diff --git "a/evals/core_9mcqa/task-004-boolq:mc-predictions.jsonl" "b/evals/core_9mcqa/task-004-boolq:mc-predictions.jsonl" new file mode 100644--- /dev/null +++ "b/evals/core_9mcqa/task-004-boolq:mc-predictions.jsonl" @@ -0,0 +1,1000 @@ +{"doc_id": 0, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22265371680259705, "incorrect_loss_raw": 1.6528948545455933, "correct_loss_per_char": 0.11132685840129852, "incorrect_loss_per_char": 0.8264474272727966, "correct_loss_per_token": 0.22265371680259705, "incorrect_loss_per_token": 1.6528948545455933, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22265371680259705, "num_tokens": 1, "num_tokens_all": 985, "is_greedy": true, "logits_per_token": -0.22265371680259705, "logits_per_char": -0.11132685840129852, "num_chars": 2}, {"sum_logits": -1.6528948545455933, "num_tokens": 1, "num_tokens_all": 985, "is_greedy": false, "logits_per_token": -1.6528948545455933, "logits_per_char": -0.8264474272727966, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6628215312957764, "incorrect_loss_raw": 0.22091442346572876, "correct_loss_per_char": 0.8314107656478882, "incorrect_loss_per_char": 0.11045721173286438, "correct_loss_per_token": 1.6628215312957764, "incorrect_loss_per_token": 0.22091442346572876, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22091442346572876, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": true, "logits_per_token": -0.22091442346572876, "logits_per_char": -0.11045721173286438, "num_chars": 2}, {"sum_logits": -1.6628215312957764, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": false, "logits_per_token": -1.6628215312957764, "logits_per_char": -0.8314107656478882, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 2, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2683212757110596, "incorrect_loss_raw": 0.3489574193954468, "correct_loss_per_char": 0.6341606378555298, "incorrect_loss_per_char": 0.1744787096977234, "correct_loss_per_token": 1.2683212757110596, "incorrect_loss_per_token": 0.3489574193954468, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3489574193954468, "num_tokens": 1, "num_tokens_all": 1030, "is_greedy": true, "logits_per_token": -0.3489574193954468, "logits_per_char": -0.1744787096977234, "num_chars": 2}, {"sum_logits": -1.2683212757110596, "num_tokens": 1, "num_tokens_all": 1030, "is_greedy": false, "logits_per_token": -1.2683212757110596, "logits_per_char": -0.6341606378555298, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 3, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22619712352752686, "incorrect_loss_raw": 1.6485193967819214, "correct_loss_per_char": 0.11309856176376343, "incorrect_loss_per_char": 0.8242596983909607, "correct_loss_per_token": 0.22619712352752686, "incorrect_loss_per_token": 1.6485193967819214, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22619712352752686, "num_tokens": 1, "num_tokens_all": 996, "is_greedy": true, "logits_per_token": -0.22619712352752686, "logits_per_char": -0.11309856176376343, "num_chars": 2}, {"sum_logits": -1.6485193967819214, "num_tokens": 1, "num_tokens_all": 996, "is_greedy": false, "logits_per_token": -1.6485193967819214, "logits_per_char": -0.8242596983909607, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 4, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.18337425589561462, "incorrect_loss_raw": 1.8430867195129395, "correct_loss_per_char": 0.09168712794780731, "incorrect_loss_per_char": 0.9215433597564697, "correct_loss_per_token": 0.18337425589561462, "incorrect_loss_per_token": 1.8430867195129395, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.18337425589561462, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": true, "logits_per_token": -0.18337425589561462, "logits_per_char": -0.09168712794780731, "num_chars": 2}, {"sum_logits": -1.8430867195129395, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": false, "logits_per_token": -1.8430867195129395, "logits_per_char": -0.9215433597564697, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 5, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23960553109645844, "incorrect_loss_raw": 1.6008185148239136, "correct_loss_per_char": 0.11980276554822922, "incorrect_loss_per_char": 0.8004092574119568, "correct_loss_per_token": 0.23960553109645844, "incorrect_loss_per_token": 1.6008185148239136, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23960553109645844, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": true, "logits_per_token": -0.23960553109645844, "logits_per_char": -0.11980276554822922, "num_chars": 2}, {"sum_logits": -1.6008185148239136, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": false, "logits_per_token": -1.6008185148239136, "logits_per_char": -0.8004092574119568, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 6, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2641911506652832, "incorrect_loss_raw": 1.504990577697754, "correct_loss_per_char": 0.1320955753326416, "incorrect_loss_per_char": 0.752495288848877, "correct_loss_per_token": 0.2641911506652832, "incorrect_loss_per_token": 1.504990577697754, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2641911506652832, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": true, "logits_per_token": -0.2641911506652832, "logits_per_char": -0.1320955753326416, "num_chars": 2}, {"sum_logits": -1.504990577697754, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": false, "logits_per_token": -1.504990577697754, "logits_per_char": -0.752495288848877, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 7, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21053443849086761, "incorrect_loss_raw": 1.7284349203109741, "correct_loss_per_char": 0.10526721924543381, "incorrect_loss_per_char": 0.8642174601554871, "correct_loss_per_token": 0.21053443849086761, "incorrect_loss_per_token": 1.7284349203109741, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21053443849086761, "num_tokens": 1, "num_tokens_all": 993, "is_greedy": true, "logits_per_token": -0.21053443849086761, "logits_per_char": -0.10526721924543381, "num_chars": 2}, {"sum_logits": -1.7284349203109741, "num_tokens": 1, "num_tokens_all": 993, "is_greedy": false, "logits_per_token": -1.7284349203109741, "logits_per_char": -0.8642174601554871, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 8, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.587033748626709, "incorrect_loss_raw": 0.24203675985336304, "correct_loss_per_char": 0.7935168743133545, "incorrect_loss_per_char": 0.12101837992668152, "correct_loss_per_token": 1.587033748626709, "incorrect_loss_per_token": 0.24203675985336304, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24203675985336304, "num_tokens": 1, "num_tokens_all": 1040, "is_greedy": true, "logits_per_token": -0.24203675985336304, "logits_per_char": -0.12101837992668152, "num_chars": 2}, {"sum_logits": -1.587033748626709, "num_tokens": 1, "num_tokens_all": 1040, "is_greedy": false, "logits_per_token": -1.587033748626709, "logits_per_char": -0.7935168743133545, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 9, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2714293301105499, "incorrect_loss_raw": 1.4786901473999023, "correct_loss_per_char": 0.13571466505527496, "incorrect_loss_per_char": 0.7393450736999512, "correct_loss_per_token": 0.2714293301105499, "incorrect_loss_per_token": 1.4786901473999023, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2714293301105499, "num_tokens": 1, "num_tokens_all": 1119, "is_greedy": true, "logits_per_token": -0.2714293301105499, "logits_per_char": -0.13571466505527496, "num_chars": 2}, {"sum_logits": -1.4786901473999023, "num_tokens": 1, "num_tokens_all": 1119, "is_greedy": false, "logits_per_token": -1.4786901473999023, "logits_per_char": -0.7393450736999512, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 10, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8140480518341064, "incorrect_loss_raw": 0.18994727730751038, "correct_loss_per_char": 0.9070240259170532, "incorrect_loss_per_char": 0.09497363865375519, "correct_loss_per_token": 1.8140480518341064, "incorrect_loss_per_token": 0.18994727730751038, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.18994727730751038, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": true, "logits_per_token": -0.18994727730751038, "logits_per_char": -0.09497363865375519, "num_chars": 2}, {"sum_logits": -1.8140480518341064, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": false, "logits_per_token": -1.8140480518341064, "logits_per_char": -0.9070240259170532, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 11, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23028892278671265, "incorrect_loss_raw": 1.6238088607788086, "correct_loss_per_char": 0.11514446139335632, "incorrect_loss_per_char": 0.8119044303894043, "correct_loss_per_token": 0.23028892278671265, "incorrect_loss_per_token": 1.6238088607788086, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23028892278671265, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": true, "logits_per_token": -0.23028892278671265, "logits_per_char": -0.11514446139335632, "num_chars": 2}, {"sum_logits": -1.6238088607788086, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": false, "logits_per_token": -1.6238088607788086, "logits_per_char": -0.8119044303894043, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 12, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20705761015415192, "incorrect_loss_raw": 1.7115126848220825, "correct_loss_per_char": 0.10352880507707596, "incorrect_loss_per_char": 0.8557563424110413, "correct_loss_per_token": 0.20705761015415192, "incorrect_loss_per_token": 1.7115126848220825, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20705761015415192, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": true, "logits_per_token": -0.20705761015415192, "logits_per_char": -0.10352880507707596, "num_chars": 2}, {"sum_logits": -1.7115126848220825, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": false, "logits_per_token": -1.7115126848220825, "logits_per_char": -0.8557563424110413, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 13, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20540307462215424, "incorrect_loss_raw": 1.7255398035049438, "correct_loss_per_char": 0.10270153731107712, "incorrect_loss_per_char": 0.8627699017524719, "correct_loss_per_token": 0.20540307462215424, "incorrect_loss_per_token": 1.7255398035049438, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20540307462215424, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": true, "logits_per_token": -0.20540307462215424, "logits_per_char": -0.10270153731107712, "num_chars": 2}, {"sum_logits": -1.7255398035049438, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": false, "logits_per_token": -1.7255398035049438, "logits_per_char": -0.8627699017524719, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 14, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2598530948162079, "incorrect_loss_raw": 1.523846983909607, "correct_loss_per_char": 0.12992654740810394, "incorrect_loss_per_char": 0.7619234919548035, "correct_loss_per_token": 0.2598530948162079, "incorrect_loss_per_token": 1.523846983909607, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2598530948162079, "num_tokens": 1, "num_tokens_all": 1009, "is_greedy": true, "logits_per_token": -0.2598530948162079, "logits_per_char": -0.12992654740810394, "num_chars": 2}, {"sum_logits": -1.523846983909607, "num_tokens": 1, "num_tokens_all": 1009, "is_greedy": false, "logits_per_token": -1.523846983909607, "logits_per_char": -0.7619234919548035, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 15, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6006951332092285, "incorrect_loss_raw": 0.23174984753131866, "correct_loss_per_char": 0.8003475666046143, "incorrect_loss_per_char": 0.11587492376565933, "correct_loss_per_token": 1.6006951332092285, "incorrect_loss_per_token": 0.23174984753131866, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23174984753131866, "num_tokens": 1, "num_tokens_all": 903, "is_greedy": true, "logits_per_token": -0.23174984753131866, "logits_per_char": -0.11587492376565933, "num_chars": 2}, {"sum_logits": -1.6006951332092285, "num_tokens": 1, "num_tokens_all": 903, "is_greedy": false, "logits_per_token": -1.6006951332092285, "logits_per_char": -0.8003475666046143, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 16, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21852953732013702, "incorrect_loss_raw": 1.6517670154571533, "correct_loss_per_char": 0.10926476866006851, "incorrect_loss_per_char": 0.8258835077285767, "correct_loss_per_token": 0.21852953732013702, "incorrect_loss_per_token": 1.6517670154571533, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21852953732013702, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": true, "logits_per_token": -0.21852953732013702, "logits_per_char": -0.10926476866006851, "num_chars": 2}, {"sum_logits": -1.6517670154571533, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": false, "logits_per_token": -1.6517670154571533, "logits_per_char": -0.8258835077285767, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 17, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2268429696559906, "incorrect_loss_raw": 1.6508997678756714, "correct_loss_per_char": 0.1134214848279953, "incorrect_loss_per_char": 0.8254498839378357, "correct_loss_per_token": 0.2268429696559906, "incorrect_loss_per_token": 1.6508997678756714, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2268429696559906, "num_tokens": 1, "num_tokens_all": 904, "is_greedy": true, "logits_per_token": -0.2268429696559906, "logits_per_char": -0.1134214848279953, "num_chars": 2}, {"sum_logits": -1.6508997678756714, "num_tokens": 1, "num_tokens_all": 904, "is_greedy": false, "logits_per_token": -1.6508997678756714, "logits_per_char": -0.8254498839378357, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 18, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3195928931236267, "incorrect_loss_raw": 1.3260016441345215, "correct_loss_per_char": 0.15979644656181335, "incorrect_loss_per_char": 0.6630008220672607, "correct_loss_per_token": 0.3195928931236267, "incorrect_loss_per_token": 1.3260016441345215, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3195928931236267, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": true, "logits_per_token": -0.3195928931236267, "logits_per_char": -0.15979644656181335, "num_chars": 2}, {"sum_logits": -1.3260016441345215, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": false, "logits_per_token": -1.3260016441345215, "logits_per_char": -0.6630008220672607, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 19, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2637498676776886, "incorrect_loss_raw": 1.4938881397247314, "correct_loss_per_char": 0.1318749338388443, "incorrect_loss_per_char": 0.7469440698623657, "correct_loss_per_token": 0.2637498676776886, "incorrect_loss_per_token": 1.4938881397247314, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2637498676776886, "num_tokens": 1, "num_tokens_all": 905, "is_greedy": true, "logits_per_token": -0.2637498676776886, "logits_per_char": -0.1318749338388443, "num_chars": 2}, {"sum_logits": -1.4938881397247314, "num_tokens": 1, "num_tokens_all": 905, "is_greedy": false, "logits_per_token": -1.4938881397247314, "logits_per_char": -0.7469440698623657, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 20, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.25714439153671265, "incorrect_loss_raw": 1.5166783332824707, "correct_loss_per_char": 0.12857219576835632, "incorrect_loss_per_char": 0.7583391666412354, "correct_loss_per_token": 0.25714439153671265, "incorrect_loss_per_token": 1.5166783332824707, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25714439153671265, "num_tokens": 1, "num_tokens_all": 988, "is_greedy": true, "logits_per_token": -0.25714439153671265, "logits_per_char": -0.12857219576835632, "num_chars": 2}, {"sum_logits": -1.5166783332824707, "num_tokens": 1, "num_tokens_all": 988, "is_greedy": false, "logits_per_token": -1.5166783332824707, "logits_per_char": -0.7583391666412354, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 21, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19573712348937988, "incorrect_loss_raw": 1.787501573562622, "correct_loss_per_char": 0.09786856174468994, "incorrect_loss_per_char": 0.893750786781311, "correct_loss_per_token": 0.19573712348937988, "incorrect_loss_per_token": 1.787501573562622, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19573712348937988, "num_tokens": 1, "num_tokens_all": 988, "is_greedy": true, "logits_per_token": -0.19573712348937988, "logits_per_char": -0.09786856174468994, "num_chars": 2}, {"sum_logits": -1.787501573562622, "num_tokens": 1, "num_tokens_all": 988, "is_greedy": false, "logits_per_token": -1.787501573562622, "logits_per_char": -0.893750786781311, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 22, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2841815948486328, "incorrect_loss_raw": 1.4261608123779297, "correct_loss_per_char": 0.1420907974243164, "incorrect_loss_per_char": 0.7130804061889648, "correct_loss_per_token": 0.2841815948486328, "incorrect_loss_per_token": 1.4261608123779297, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2841815948486328, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": true, "logits_per_token": -0.2841815948486328, "logits_per_char": -0.1420907974243164, "num_chars": 2}, {"sum_logits": -1.4261608123779297, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": false, "logits_per_token": -1.4261608123779297, "logits_per_char": -0.7130804061889648, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 23, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23431642353534698, "incorrect_loss_raw": 1.603018879890442, "correct_loss_per_char": 0.11715821176767349, "incorrect_loss_per_char": 0.801509439945221, "correct_loss_per_token": 0.23431642353534698, "incorrect_loss_per_token": 1.603018879890442, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23431642353534698, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": true, "logits_per_token": -0.23431642353534698, "logits_per_char": -0.11715821176767349, "num_chars": 2}, {"sum_logits": -1.603018879890442, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": false, "logits_per_token": -1.603018879890442, "logits_per_char": -0.801509439945221, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 24, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.27950575947761536, "incorrect_loss_raw": 1.4628806114196777, "correct_loss_per_char": 0.13975287973880768, "incorrect_loss_per_char": 0.7314403057098389, "correct_loss_per_token": 0.27950575947761536, "incorrect_loss_per_token": 1.4628806114196777, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.27950575947761536, "num_tokens": 1, "num_tokens_all": 1063, "is_greedy": true, "logits_per_token": -0.27950575947761536, "logits_per_char": -0.13975287973880768, "num_chars": 2}, {"sum_logits": -1.4628806114196777, "num_tokens": 1, "num_tokens_all": 1063, "is_greedy": false, "logits_per_token": -1.4628806114196777, "logits_per_char": -0.7314403057098389, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 25, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9002370834350586, "incorrect_loss_raw": 0.16958406567573547, "correct_loss_per_char": 0.9501185417175293, "incorrect_loss_per_char": 0.08479203283786774, "correct_loss_per_token": 1.9002370834350586, "incorrect_loss_per_token": 0.16958406567573547, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.16958406567573547, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": true, "logits_per_token": -0.16958406567573547, "logits_per_char": -0.08479203283786774, "num_chars": 2}, {"sum_logits": -1.9002370834350586, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": false, "logits_per_token": -1.9002370834350586, "logits_per_char": -0.9501185417175293, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 26, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2800866365432739, "incorrect_loss_raw": 1.4484723806381226, "correct_loss_per_char": 0.14004331827163696, "incorrect_loss_per_char": 0.7242361903190613, "correct_loss_per_token": 0.2800866365432739, "incorrect_loss_per_token": 1.4484723806381226, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2800866365432739, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": true, "logits_per_token": -0.2800866365432739, "logits_per_char": -0.14004331827163696, "num_chars": 2}, {"sum_logits": -1.4484723806381226, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": false, "logits_per_token": -1.4484723806381226, "logits_per_char": -0.7242361903190613, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 27, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2533321678638458, "incorrect_loss_raw": 1.5444055795669556, "correct_loss_per_char": 0.1266660839319229, "incorrect_loss_per_char": 0.7722027897834778, "correct_loss_per_token": 0.2533321678638458, "incorrect_loss_per_token": 1.5444055795669556, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2533321678638458, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": true, "logits_per_token": -0.2533321678638458, "logits_per_char": -0.1266660839319229, "num_chars": 2}, {"sum_logits": -1.5444055795669556, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": false, "logits_per_token": -1.5444055795669556, "logits_per_char": -0.7722027897834778, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 28, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.25319141149520874, "incorrect_loss_raw": 1.5393688678741455, "correct_loss_per_char": 0.12659570574760437, "incorrect_loss_per_char": 0.7696844339370728, "correct_loss_per_token": 0.25319141149520874, "incorrect_loss_per_token": 1.5393688678741455, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25319141149520874, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": true, "logits_per_token": -0.25319141149520874, "logits_per_char": -0.12659570574760437, "num_chars": 2}, {"sum_logits": -1.5393688678741455, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": false, "logits_per_token": -1.5393688678741455, "logits_per_char": -0.7696844339370728, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 29, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1934065967798233, "incorrect_loss_raw": 1.7858504056930542, "correct_loss_per_char": 0.09670329838991165, "incorrect_loss_per_char": 0.8929252028465271, "correct_loss_per_token": 0.1934065967798233, "incorrect_loss_per_token": 1.7858504056930542, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1934065967798233, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": true, "logits_per_token": -0.1934065967798233, "logits_per_char": -0.09670329838991165, "num_chars": 2}, {"sum_logits": -1.7858504056930542, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": false, "logits_per_token": -1.7858504056930542, "logits_per_char": -0.8929252028465271, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 30, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.605971097946167, "incorrect_loss_raw": 0.23416098952293396, "correct_loss_per_char": 0.8029855489730835, "incorrect_loss_per_char": 0.11708049476146698, "correct_loss_per_token": 1.605971097946167, "incorrect_loss_per_token": 0.23416098952293396, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23416098952293396, "num_tokens": 1, "num_tokens_all": 1034, "is_greedy": true, "logits_per_token": -0.23416098952293396, "logits_per_char": -0.11708049476146698, "num_chars": 2}, {"sum_logits": -1.605971097946167, "num_tokens": 1, "num_tokens_all": 1034, "is_greedy": false, "logits_per_token": -1.605971097946167, "logits_per_char": -0.8029855489730835, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 31, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21075181663036346, "incorrect_loss_raw": 1.699397325515747, "correct_loss_per_char": 0.10537590831518173, "incorrect_loss_per_char": 0.8496986627578735, "correct_loss_per_token": 0.21075181663036346, "incorrect_loss_per_token": 1.699397325515747, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21075181663036346, "num_tokens": 1, "num_tokens_all": 899, "is_greedy": true, "logits_per_token": -0.21075181663036346, "logits_per_char": -0.10537590831518173, "num_chars": 2}, {"sum_logits": -1.699397325515747, "num_tokens": 1, "num_tokens_all": 899, "is_greedy": false, "logits_per_token": -1.699397325515747, "logits_per_char": -0.8496986627578735, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 32, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2596054971218109, "incorrect_loss_raw": 1.5240558385849, "correct_loss_per_char": 0.12980274856090546, "incorrect_loss_per_char": 0.76202791929245, "correct_loss_per_token": 0.2596054971218109, "incorrect_loss_per_token": 1.5240558385849, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2596054971218109, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": true, "logits_per_token": -0.2596054971218109, "logits_per_char": -0.12980274856090546, "num_chars": 2}, {"sum_logits": -1.5240558385849, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": false, "logits_per_token": -1.5240558385849, "logits_per_char": -0.76202791929245, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 33, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19803063571453094, "incorrect_loss_raw": 1.7599400281906128, "correct_loss_per_char": 0.09901531785726547, "incorrect_loss_per_char": 0.8799700140953064, "correct_loss_per_token": 0.19803063571453094, "incorrect_loss_per_token": 1.7599400281906128, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19803063571453094, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": true, "logits_per_token": -0.19803063571453094, "logits_per_char": -0.09901531785726547, "num_chars": 2}, {"sum_logits": -1.7599400281906128, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": false, "logits_per_token": -1.7599400281906128, "logits_per_char": -0.8799700140953064, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 34, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5399043560028076, "incorrect_loss_raw": 0.25795477628707886, "correct_loss_per_char": 0.7699521780014038, "incorrect_loss_per_char": 0.12897738814353943, "correct_loss_per_token": 1.5399043560028076, "incorrect_loss_per_token": 0.25795477628707886, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25795477628707886, "num_tokens": 1, "num_tokens_all": 989, "is_greedy": true, "logits_per_token": -0.25795477628707886, "logits_per_char": -0.12897738814353943, "num_chars": 2}, {"sum_logits": -1.5399043560028076, "num_tokens": 1, "num_tokens_all": 989, "is_greedy": false, "logits_per_token": -1.5399043560028076, "logits_per_char": -0.7699521780014038, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 35, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4987075328826904, "incorrect_loss_raw": 0.26662886142730713, "correct_loss_per_char": 0.7493537664413452, "incorrect_loss_per_char": 0.13331443071365356, "correct_loss_per_token": 1.4987075328826904, "incorrect_loss_per_token": 0.26662886142730713, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26662886142730713, "num_tokens": 1, "num_tokens_all": 1179, "is_greedy": true, "logits_per_token": -0.26662886142730713, "logits_per_char": -0.13331443071365356, "num_chars": 2}, {"sum_logits": -1.4987075328826904, "num_tokens": 1, "num_tokens_all": 1179, "is_greedy": false, "logits_per_token": -1.4987075328826904, "logits_per_char": -0.7493537664413452, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 36, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2637813687324524, "incorrect_loss_raw": 1.5026230812072754, "correct_loss_per_char": 0.1318906843662262, "incorrect_loss_per_char": 0.7513115406036377, "correct_loss_per_token": 0.2637813687324524, "incorrect_loss_per_token": 1.5026230812072754, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2637813687324524, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": true, "logits_per_token": -0.2637813687324524, "logits_per_char": -0.1318906843662262, "num_chars": 2}, {"sum_logits": -1.5026230812072754, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": false, "logits_per_token": -1.5026230812072754, "logits_per_char": -0.7513115406036377, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 37, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2665194571018219, "incorrect_loss_raw": 1.4858068227767944, "correct_loss_per_char": 0.13325972855091095, "incorrect_loss_per_char": 0.7429034113883972, "correct_loss_per_token": 0.2665194571018219, "incorrect_loss_per_token": 1.4858068227767944, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2665194571018219, "num_tokens": 1, "num_tokens_all": 995, "is_greedy": true, "logits_per_token": -0.2665194571018219, "logits_per_char": -0.13325972855091095, "num_chars": 2}, {"sum_logits": -1.4858068227767944, "num_tokens": 1, "num_tokens_all": 995, "is_greedy": false, "logits_per_token": -1.4858068227767944, "logits_per_char": -0.7429034113883972, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 38, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19299614429473877, "incorrect_loss_raw": 1.7660080194473267, "correct_loss_per_char": 0.09649807214736938, "incorrect_loss_per_char": 0.8830040097236633, "correct_loss_per_token": 0.19299614429473877, "incorrect_loss_per_token": 1.7660080194473267, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19299614429473877, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": true, "logits_per_token": -0.19299614429473877, "logits_per_char": -0.09649807214736938, "num_chars": 2}, {"sum_logits": -1.7660080194473267, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": false, "logits_per_token": -1.7660080194473267, "logits_per_char": -0.8830040097236633, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 39, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2288992553949356, "incorrect_loss_raw": 1.6230003833770752, "correct_loss_per_char": 0.1144496276974678, "incorrect_loss_per_char": 0.8115001916885376, "correct_loss_per_token": 0.2288992553949356, "incorrect_loss_per_token": 1.6230003833770752, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2288992553949356, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": true, "logits_per_token": -0.2288992553949356, "logits_per_char": -0.1144496276974678, "num_chars": 2}, {"sum_logits": -1.6230003833770752, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": false, "logits_per_token": -1.6230003833770752, "logits_per_char": -0.8115001916885376, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 40, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.27581506967544556, "incorrect_loss_raw": 1.45011305809021, "correct_loss_per_char": 0.13790753483772278, "incorrect_loss_per_char": 0.725056529045105, "correct_loss_per_token": 0.27581506967544556, "incorrect_loss_per_token": 1.45011305809021, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.27581506967544556, "num_tokens": 1, "num_tokens_all": 908, "is_greedy": true, "logits_per_token": -0.27581506967544556, "logits_per_char": -0.13790753483772278, "num_chars": 2}, {"sum_logits": -1.45011305809021, "num_tokens": 1, "num_tokens_all": 908, "is_greedy": false, "logits_per_token": -1.45011305809021, "logits_per_char": -0.725056529045105, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 41, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2393127977848053, "incorrect_loss_raw": 1.6231842041015625, "correct_loss_per_char": 0.11965639889240265, "incorrect_loss_per_char": 0.8115921020507812, "correct_loss_per_token": 0.2393127977848053, "incorrect_loss_per_token": 1.6231842041015625, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2393127977848053, "num_tokens": 1, "num_tokens_all": 1042, "is_greedy": true, "logits_per_token": -0.2393127977848053, "logits_per_char": -0.11965639889240265, "num_chars": 2}, {"sum_logits": -1.6231842041015625, "num_tokens": 1, "num_tokens_all": 1042, "is_greedy": false, "logits_per_token": -1.6231842041015625, "logits_per_char": -0.8115921020507812, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 42, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.26819732785224915, "incorrect_loss_raw": 1.4861586093902588, "correct_loss_per_char": 0.13409866392612457, "incorrect_loss_per_char": 0.7430793046951294, "correct_loss_per_token": 0.26819732785224915, "incorrect_loss_per_token": 1.4861586093902588, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26819732785224915, "num_tokens": 1, "num_tokens_all": 1118, "is_greedy": true, "logits_per_token": -0.26819732785224915, "logits_per_char": -0.13409866392612457, "num_chars": 2}, {"sum_logits": -1.4861586093902588, "num_tokens": 1, "num_tokens_all": 1118, "is_greedy": false, "logits_per_token": -1.4861586093902588, "logits_per_char": -0.7430793046951294, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 43, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20717428624629974, "incorrect_loss_raw": 1.723198413848877, "correct_loss_per_char": 0.10358714312314987, "incorrect_loss_per_char": 0.8615992069244385, "correct_loss_per_token": 0.20717428624629974, "incorrect_loss_per_token": 1.723198413848877, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20717428624629974, "num_tokens": 1, "num_tokens_all": 1058, "is_greedy": true, "logits_per_token": -0.20717428624629974, "logits_per_char": -0.10358714312314987, "num_chars": 2}, {"sum_logits": -1.723198413848877, "num_tokens": 1, "num_tokens_all": 1058, "is_greedy": false, "logits_per_token": -1.723198413848877, "logits_per_char": -0.8615992069244385, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 44, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7561707496643066, "incorrect_loss_raw": 0.20301453769207, "correct_loss_per_char": 0.8780853748321533, "incorrect_loss_per_char": 0.101507268846035, "correct_loss_per_token": 1.7561707496643066, "incorrect_loss_per_token": 0.20301453769207, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20301453769207, "num_tokens": 1, "num_tokens_all": 1002, "is_greedy": true, "logits_per_token": -0.20301453769207, "logits_per_char": -0.101507268846035, "num_chars": 2}, {"sum_logits": -1.7561707496643066, "num_tokens": 1, "num_tokens_all": 1002, "is_greedy": false, "logits_per_token": -1.7561707496643066, "logits_per_char": -0.8780853748321533, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 45, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.25725454092025757, "incorrect_loss_raw": 1.5445516109466553, "correct_loss_per_char": 0.12862727046012878, "incorrect_loss_per_char": 0.7722758054733276, "correct_loss_per_token": 0.25725454092025757, "incorrect_loss_per_token": 1.5445516109466553, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25725454092025757, "num_tokens": 1, "num_tokens_all": 956, "is_greedy": true, "logits_per_token": -0.25725454092025757, "logits_per_char": -0.12862727046012878, "num_chars": 2}, {"sum_logits": -1.5445516109466553, "num_tokens": 1, "num_tokens_all": 956, "is_greedy": false, "logits_per_token": -1.5445516109466553, "logits_per_char": -0.7722758054733276, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 46, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6137239933013916, "incorrect_loss_raw": 0.23780924081802368, "correct_loss_per_char": 0.8068619966506958, "incorrect_loss_per_char": 0.11890462040901184, "correct_loss_per_token": 1.6137239933013916, "incorrect_loss_per_token": 0.23780924081802368, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23780924081802368, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": true, "logits_per_token": -0.23780924081802368, "logits_per_char": -0.11890462040901184, "num_chars": 2}, {"sum_logits": -1.6137239933013916, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": false, "logits_per_token": -1.6137239933013916, "logits_per_char": -0.8068619966506958, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 47, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6116043329238892, "incorrect_loss_raw": 0.23130781948566437, "correct_loss_per_char": 0.8058021664619446, "incorrect_loss_per_char": 0.11565390974283218, "correct_loss_per_token": 1.6116043329238892, "incorrect_loss_per_token": 0.23130781948566437, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23130781948566437, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": true, "logits_per_token": -0.23130781948566437, "logits_per_char": -0.11565390974283218, "num_chars": 2}, {"sum_logits": -1.6116043329238892, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": false, "logits_per_token": -1.6116043329238892, "logits_per_char": -0.8058021664619446, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 48, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2787530720233917, "incorrect_loss_raw": 1.4578816890716553, "correct_loss_per_char": 0.13937653601169586, "incorrect_loss_per_char": 0.7289408445358276, "correct_loss_per_token": 0.2787530720233917, "incorrect_loss_per_token": 1.4578816890716553, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2787530720233917, "num_tokens": 1, "num_tokens_all": 1018, "is_greedy": true, "logits_per_token": -0.2787530720233917, "logits_per_char": -0.13937653601169586, "num_chars": 2}, {"sum_logits": -1.4578816890716553, "num_tokens": 1, "num_tokens_all": 1018, "is_greedy": false, "logits_per_token": -1.4578816890716553, "logits_per_char": -0.7289408445358276, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 49, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5641498565673828, "incorrect_loss_raw": 0.24788697063922882, "correct_loss_per_char": 0.7820749282836914, "incorrect_loss_per_char": 0.12394348531961441, "correct_loss_per_token": 1.5641498565673828, "incorrect_loss_per_token": 0.24788697063922882, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24788697063922882, "num_tokens": 1, "num_tokens_all": 1032, "is_greedy": true, "logits_per_token": -0.24788697063922882, "logits_per_char": -0.12394348531961441, "num_chars": 2}, {"sum_logits": -1.5641498565673828, "num_tokens": 1, "num_tokens_all": 1032, "is_greedy": false, "logits_per_token": -1.5641498565673828, "logits_per_char": -0.7820749282836914, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 50, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4107341766357422, "incorrect_loss_raw": 0.28684142231941223, "correct_loss_per_char": 0.7053670883178711, "incorrect_loss_per_char": 0.14342071115970612, "correct_loss_per_token": 1.4107341766357422, "incorrect_loss_per_token": 0.28684142231941223, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.28684142231941223, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": true, "logits_per_token": -0.28684142231941223, "logits_per_char": -0.14342071115970612, "num_chars": 2}, {"sum_logits": -1.4107341766357422, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": false, "logits_per_token": -1.4107341766357422, "logits_per_char": -0.7053670883178711, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 51, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5635714530944824, "incorrect_loss_raw": 0.24466083943843842, "correct_loss_per_char": 0.7817857265472412, "incorrect_loss_per_char": 0.12233041971921921, "correct_loss_per_token": 1.5635714530944824, "incorrect_loss_per_token": 0.24466083943843842, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24466083943843842, "num_tokens": 1, "num_tokens_all": 998, "is_greedy": true, "logits_per_token": -0.24466083943843842, "logits_per_char": -0.12233041971921921, "num_chars": 2}, {"sum_logits": -1.5635714530944824, "num_tokens": 1, "num_tokens_all": 998, "is_greedy": false, "logits_per_token": -1.5635714530944824, "logits_per_char": -0.7817857265472412, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 52, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4376115798950195, "incorrect_loss_raw": 0.2867686450481415, "correct_loss_per_char": 0.7188057899475098, "incorrect_loss_per_char": 0.14338432252407074, "correct_loss_per_token": 1.4376115798950195, "incorrect_loss_per_token": 0.2867686450481415, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2867686450481415, "num_tokens": 1, "num_tokens_all": 1121, "is_greedy": true, "logits_per_token": -0.2867686450481415, "logits_per_char": -0.14338432252407074, "num_chars": 2}, {"sum_logits": -1.4376115798950195, "num_tokens": 1, "num_tokens_all": 1121, "is_greedy": false, "logits_per_token": -1.4376115798950195, "logits_per_char": -0.7188057899475098, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 53, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4002606868743896, "incorrect_loss_raw": 0.29497581720352173, "correct_loss_per_char": 0.7001303434371948, "incorrect_loss_per_char": 0.14748790860176086, "correct_loss_per_token": 1.4002606868743896, "incorrect_loss_per_token": 0.29497581720352173, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.29497581720352173, "num_tokens": 1, "num_tokens_all": 1011, "is_greedy": true, "logits_per_token": -0.29497581720352173, "logits_per_char": -0.14748790860176086, "num_chars": 2}, {"sum_logits": -1.4002606868743896, "num_tokens": 1, "num_tokens_all": 1011, "is_greedy": false, "logits_per_token": -1.4002606868743896, "logits_per_char": -0.7001303434371948, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 54, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3180534541606903, "incorrect_loss_raw": 1.3603830337524414, "correct_loss_per_char": 0.15902672708034515, "incorrect_loss_per_char": 0.6801915168762207, "correct_loss_per_token": 0.3180534541606903, "incorrect_loss_per_token": 1.3603830337524414, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3180534541606903, "num_tokens": 1, "num_tokens_all": 1038, "is_greedy": true, "logits_per_token": -0.3180534541606903, "logits_per_char": -0.15902672708034515, "num_chars": 2}, {"sum_logits": -1.3603830337524414, "num_tokens": 1, "num_tokens_all": 1038, "is_greedy": false, "logits_per_token": -1.3603830337524414, "logits_per_char": -0.6801915168762207, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 55, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22620882093906403, "incorrect_loss_raw": 1.6397805213928223, "correct_loss_per_char": 0.11310441046953201, "incorrect_loss_per_char": 0.8198902606964111, "correct_loss_per_token": 0.22620882093906403, "incorrect_loss_per_token": 1.6397805213928223, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22620882093906403, "num_tokens": 1, "num_tokens_all": 944, "is_greedy": true, "logits_per_token": -0.22620882093906403, "logits_per_char": -0.11310441046953201, "num_chars": 2}, {"sum_logits": -1.6397805213928223, "num_tokens": 1, "num_tokens_all": 944, "is_greedy": false, "logits_per_token": -1.6397805213928223, "logits_per_char": -0.8198902606964111, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 56, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.493530511856079, "incorrect_loss_raw": 0.26795151829719543, "correct_loss_per_char": 0.7467652559280396, "incorrect_loss_per_char": 0.13397575914859772, "correct_loss_per_token": 1.493530511856079, "incorrect_loss_per_token": 0.26795151829719543, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26795151829719543, "num_tokens": 1, "num_tokens_all": 1036, "is_greedy": true, "logits_per_token": -0.26795151829719543, "logits_per_char": -0.13397575914859772, "num_chars": 2}, {"sum_logits": -1.493530511856079, "num_tokens": 1, "num_tokens_all": 1036, "is_greedy": false, "logits_per_token": -1.493530511856079, "logits_per_char": -0.7467652559280396, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 57, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.34657788276672363, "incorrect_loss_raw": 1.2701033353805542, "correct_loss_per_char": 0.17328894138336182, "incorrect_loss_per_char": 0.6350516676902771, "correct_loss_per_token": 0.34657788276672363, "incorrect_loss_per_token": 1.2701033353805542, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.34657788276672363, "num_tokens": 1, "num_tokens_all": 1021, "is_greedy": true, "logits_per_token": -0.34657788276672363, "logits_per_char": -0.17328894138336182, "num_chars": 2}, {"sum_logits": -1.2701033353805542, "num_tokens": 1, "num_tokens_all": 1021, "is_greedy": false, "logits_per_token": -1.2701033353805542, "logits_per_char": -0.6350516676902771, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 58, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6268541812896729, "incorrect_loss_raw": 0.22591233253479004, "correct_loss_per_char": 0.8134270906448364, "incorrect_loss_per_char": 0.11295616626739502, "correct_loss_per_token": 1.6268541812896729, "incorrect_loss_per_token": 0.22591233253479004, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22591233253479004, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": true, "logits_per_token": -0.22591233253479004, "logits_per_char": -0.11295616626739502, "num_chars": 2}, {"sum_logits": -1.6268541812896729, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": false, "logits_per_token": -1.6268541812896729, "logits_per_char": -0.8134270906448364, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 59, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2029920518398285, "incorrect_loss_raw": 1.762919545173645, "correct_loss_per_char": 0.10149602591991425, "incorrect_loss_per_char": 0.8814597725868225, "correct_loss_per_token": 0.2029920518398285, "incorrect_loss_per_token": 1.762919545173645, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2029920518398285, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": true, "logits_per_token": -0.2029920518398285, "logits_per_char": -0.10149602591991425, "num_chars": 2}, {"sum_logits": -1.762919545173645, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": false, "logits_per_token": -1.762919545173645, "logits_per_char": -0.8814597725868225, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 60, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6466524600982666, "incorrect_loss_raw": 0.22944878041744232, "correct_loss_per_char": 0.8233262300491333, "incorrect_loss_per_char": 0.11472439020872116, "correct_loss_per_token": 1.6466524600982666, "incorrect_loss_per_token": 0.22944878041744232, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22944878041744232, "num_tokens": 1, "num_tokens_all": 1159, "is_greedy": true, "logits_per_token": -0.22944878041744232, "logits_per_char": -0.11472439020872116, "num_chars": 2}, {"sum_logits": -1.6466524600982666, "num_tokens": 1, "num_tokens_all": 1159, "is_greedy": false, "logits_per_token": -1.6466524600982666, "logits_per_char": -0.8233262300491333, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 61, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2326286882162094, "incorrect_loss_raw": 1.6327567100524902, "correct_loss_per_char": 0.1163143441081047, "incorrect_loss_per_char": 0.8163783550262451, "correct_loss_per_token": 0.2326286882162094, "incorrect_loss_per_token": 1.6327567100524902, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2326286882162094, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": true, "logits_per_token": -0.2326286882162094, "logits_per_char": -0.1163143441081047, "num_chars": 2}, {"sum_logits": -1.6327567100524902, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": false, "logits_per_token": -1.6327567100524902, "logits_per_char": -0.8163783550262451, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 62, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5589308738708496, "incorrect_loss_raw": 0.24568897485733032, "correct_loss_per_char": 0.7794654369354248, "incorrect_loss_per_char": 0.12284448742866516, "correct_loss_per_token": 1.5589308738708496, "incorrect_loss_per_token": 0.24568897485733032, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24568897485733032, "num_tokens": 1, "num_tokens_all": 897, "is_greedy": true, "logits_per_token": -0.24568897485733032, "logits_per_char": -0.12284448742866516, "num_chars": 2}, {"sum_logits": -1.5589308738708496, "num_tokens": 1, "num_tokens_all": 897, "is_greedy": false, "logits_per_token": -1.5589308738708496, "logits_per_char": -0.7794654369354248, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 63, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6479886770248413, "incorrect_loss_raw": 0.23037812113761902, "correct_loss_per_char": 0.8239943385124207, "incorrect_loss_per_char": 0.11518906056880951, "correct_loss_per_token": 1.6479886770248413, "incorrect_loss_per_token": 0.23037812113761902, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23037812113761902, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": true, "logits_per_token": -0.23037812113761902, "logits_per_char": -0.11518906056880951, "num_chars": 2}, {"sum_logits": -1.6479886770248413, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": false, "logits_per_token": -1.6479886770248413, "logits_per_char": -0.8239943385124207, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 64, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7100543975830078, "incorrect_loss_raw": 0.20998379588127136, "correct_loss_per_char": 0.8550271987915039, "incorrect_loss_per_char": 0.10499189794063568, "correct_loss_per_token": 1.7100543975830078, "incorrect_loss_per_token": 0.20998379588127136, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20998379588127136, "num_tokens": 1, "num_tokens_all": 983, "is_greedy": true, "logits_per_token": -0.20998379588127136, "logits_per_char": -0.10499189794063568, "num_chars": 2}, {"sum_logits": -1.7100543975830078, "num_tokens": 1, "num_tokens_all": 983, "is_greedy": false, "logits_per_token": -1.7100543975830078, "logits_per_char": -0.8550271987915039, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 65, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22916452586650848, "incorrect_loss_raw": 1.631730318069458, "correct_loss_per_char": 0.11458226293325424, "incorrect_loss_per_char": 0.815865159034729, "correct_loss_per_token": 0.22916452586650848, "incorrect_loss_per_token": 1.631730318069458, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22916452586650848, "num_tokens": 1, "num_tokens_all": 1062, "is_greedy": true, "logits_per_token": -0.22916452586650848, "logits_per_char": -0.11458226293325424, "num_chars": 2}, {"sum_logits": -1.631730318069458, "num_tokens": 1, "num_tokens_all": 1062, "is_greedy": false, "logits_per_token": -1.631730318069458, "logits_per_char": -0.815865159034729, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 66, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.26497718691825867, "incorrect_loss_raw": 1.5047409534454346, "correct_loss_per_char": 0.13248859345912933, "incorrect_loss_per_char": 0.7523704767227173, "correct_loss_per_token": 0.26497718691825867, "incorrect_loss_per_token": 1.5047409534454346, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26497718691825867, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": true, "logits_per_token": -0.26497718691825867, "logits_per_char": -0.13248859345912933, "num_chars": 2}, {"sum_logits": -1.5047409534454346, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": false, "logits_per_token": -1.5047409534454346, "logits_per_char": -0.7523704767227173, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 67, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3044125437736511, "incorrect_loss_raw": 1.3805158138275146, "correct_loss_per_char": 0.15220627188682556, "incorrect_loss_per_char": 0.6902579069137573, "correct_loss_per_token": 0.3044125437736511, "incorrect_loss_per_token": 1.3805158138275146, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3044125437736511, "num_tokens": 1, "num_tokens_all": 1035, "is_greedy": true, "logits_per_token": -0.3044125437736511, "logits_per_char": -0.15220627188682556, "num_chars": 2}, {"sum_logits": -1.3805158138275146, "num_tokens": 1, "num_tokens_all": 1035, "is_greedy": false, "logits_per_token": -1.3805158138275146, "logits_per_char": -0.6902579069137573, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 68, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8711189031600952, "incorrect_loss_raw": 0.1754523664712906, "correct_loss_per_char": 0.9355594515800476, "incorrect_loss_per_char": 0.0877261832356453, "correct_loss_per_token": 1.8711189031600952, "incorrect_loss_per_token": 0.1754523664712906, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1754523664712906, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": true, "logits_per_token": -0.1754523664712906, "logits_per_char": -0.0877261832356453, "num_chars": 2}, {"sum_logits": -1.8711189031600952, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": false, "logits_per_token": -1.8711189031600952, "logits_per_char": -0.9355594515800476, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 69, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5665661096572876, "incorrect_loss_raw": 0.24768050014972687, "correct_loss_per_char": 0.7832830548286438, "incorrect_loss_per_char": 0.12384025007486343, "correct_loss_per_token": 1.5665661096572876, "incorrect_loss_per_token": 0.24768050014972687, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24768050014972687, "num_tokens": 1, "num_tokens_all": 1035, "is_greedy": true, "logits_per_token": -0.24768050014972687, "logits_per_char": -0.12384025007486343, "num_chars": 2}, {"sum_logits": -1.5665661096572876, "num_tokens": 1, "num_tokens_all": 1035, "is_greedy": false, "logits_per_token": -1.5665661096572876, "logits_per_char": -0.7832830548286438, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 70, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6269346475601196, "incorrect_loss_raw": 0.2298794984817505, "correct_loss_per_char": 0.8134673237800598, "incorrect_loss_per_char": 0.11493974924087524, "correct_loss_per_token": 1.6269346475601196, "incorrect_loss_per_token": 0.2298794984817505, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2298794984817505, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": true, "logits_per_token": -0.2298794984817505, "logits_per_char": -0.11493974924087524, "num_chars": 2}, {"sum_logits": -1.6269346475601196, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": false, "logits_per_token": -1.6269346475601196, "logits_per_char": -0.8134673237800598, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 71, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22563639283180237, "incorrect_loss_raw": 1.6558705568313599, "correct_loss_per_char": 0.11281819641590118, "incorrect_loss_per_char": 0.8279352784156799, "correct_loss_per_token": 0.22563639283180237, "incorrect_loss_per_token": 1.6558705568313599, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22563639283180237, "num_tokens": 1, "num_tokens_all": 972, "is_greedy": true, "logits_per_token": -0.22563639283180237, "logits_per_char": -0.11281819641590118, "num_chars": 2}, {"sum_logits": -1.6558705568313599, "num_tokens": 1, "num_tokens_all": 972, "is_greedy": false, "logits_per_token": -1.6558705568313599, "logits_per_char": -0.8279352784156799, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 72, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.18796786665916443, "incorrect_loss_raw": 1.8149921894073486, "correct_loss_per_char": 0.09398393332958221, "incorrect_loss_per_char": 0.9074960947036743, "correct_loss_per_token": 0.18796786665916443, "incorrect_loss_per_token": 1.8149921894073486, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.18796786665916443, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": true, "logits_per_token": -0.18796786665916443, "logits_per_char": -0.09398393332958221, "num_chars": 2}, {"sum_logits": -1.8149921894073486, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": false, "logits_per_token": -1.8149921894073486, "logits_per_char": -0.9074960947036743, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 73, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23790377378463745, "incorrect_loss_raw": 1.5980618000030518, "correct_loss_per_char": 0.11895188689231873, "incorrect_loss_per_char": 0.7990309000015259, "correct_loss_per_token": 0.23790377378463745, "incorrect_loss_per_token": 1.5980618000030518, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23790377378463745, "num_tokens": 1, "num_tokens_all": 1106, "is_greedy": true, "logits_per_token": -0.23790377378463745, "logits_per_char": -0.11895188689231873, "num_chars": 2}, {"sum_logits": -1.5980618000030518, "num_tokens": 1, "num_tokens_all": 1106, "is_greedy": false, "logits_per_token": -1.5980618000030518, "logits_per_char": -0.7990309000015259, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 74, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6964240074157715, "incorrect_loss_raw": 0.21197780966758728, "correct_loss_per_char": 0.8482120037078857, "incorrect_loss_per_char": 0.10598890483379364, "correct_loss_per_token": 1.6964240074157715, "incorrect_loss_per_token": 0.21197780966758728, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21197780966758728, "num_tokens": 1, "num_tokens_all": 997, "is_greedy": true, "logits_per_token": -0.21197780966758728, "logits_per_char": -0.10598890483379364, "num_chars": 2}, {"sum_logits": -1.6964240074157715, "num_tokens": 1, "num_tokens_all": 997, "is_greedy": false, "logits_per_token": -1.6964240074157715, "logits_per_char": -0.8482120037078857, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 75, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6556791067123413, "incorrect_loss_raw": 0.22012396156787872, "correct_loss_per_char": 0.8278395533561707, "incorrect_loss_per_char": 0.11006198078393936, "correct_loss_per_token": 1.6556791067123413, "incorrect_loss_per_token": 0.22012396156787872, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22012396156787872, "num_tokens": 1, "num_tokens_all": 905, "is_greedy": true, "logits_per_token": -0.22012396156787872, "logits_per_char": -0.11006198078393936, "num_chars": 2}, {"sum_logits": -1.6556791067123413, "num_tokens": 1, "num_tokens_all": 905, "is_greedy": false, "logits_per_token": -1.6556791067123413, "logits_per_char": -0.8278395533561707, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 76, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.31433117389678955, "incorrect_loss_raw": 1.3662447929382324, "correct_loss_per_char": 0.15716558694839478, "incorrect_loss_per_char": 0.6831223964691162, "correct_loss_per_token": 0.31433117389678955, "incorrect_loss_per_token": 1.3662447929382324, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.31433117389678955, "num_tokens": 1, "num_tokens_all": 1018, "is_greedy": true, "logits_per_token": -0.31433117389678955, "logits_per_char": -0.15716558694839478, "num_chars": 2}, {"sum_logits": -1.3662447929382324, "num_tokens": 1, "num_tokens_all": 1018, "is_greedy": false, "logits_per_token": -1.3662447929382324, "logits_per_char": -0.6831223964691162, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 77, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2293415069580078, "incorrect_loss_raw": 1.6354817152023315, "correct_loss_per_char": 0.1146707534790039, "incorrect_loss_per_char": 0.8177408576011658, "correct_loss_per_token": 0.2293415069580078, "incorrect_loss_per_token": 1.6354817152023315, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2293415069580078, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": true, "logits_per_token": -0.2293415069580078, "logits_per_char": -0.1146707534790039, "num_chars": 2}, {"sum_logits": -1.6354817152023315, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": false, "logits_per_token": -1.6354817152023315, "logits_per_char": -0.8177408576011658, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 78, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22963546216487885, "incorrect_loss_raw": 1.6328638792037964, "correct_loss_per_char": 0.11481773108243942, "incorrect_loss_per_char": 0.8164319396018982, "correct_loss_per_token": 0.22963546216487885, "incorrect_loss_per_token": 1.6328638792037964, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22963546216487885, "num_tokens": 1, "num_tokens_all": 1010, "is_greedy": true, "logits_per_token": -0.22963546216487885, "logits_per_char": -0.11481773108243942, "num_chars": 2}, {"sum_logits": -1.6328638792037964, "num_tokens": 1, "num_tokens_all": 1010, "is_greedy": false, "logits_per_token": -1.6328638792037964, "logits_per_char": -0.8164319396018982, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 79, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1782829910516739, "incorrect_loss_raw": 1.8648818731307983, "correct_loss_per_char": 0.08914149552583694, "incorrect_loss_per_char": 0.9324409365653992, "correct_loss_per_token": 0.1782829910516739, "incorrect_loss_per_token": 1.8648818731307983, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1782829910516739, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": true, "logits_per_token": -0.1782829910516739, "logits_per_char": -0.08914149552583694, "num_chars": 2}, {"sum_logits": -1.8648818731307983, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": false, "logits_per_token": -1.8648818731307983, "logits_per_char": -0.9324409365653992, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 80, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22725562751293182, "incorrect_loss_raw": 1.6339343786239624, "correct_loss_per_char": 0.11362781375646591, "incorrect_loss_per_char": 0.8169671893119812, "correct_loss_per_token": 0.22725562751293182, "incorrect_loss_per_token": 1.6339343786239624, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22725562751293182, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": true, "logits_per_token": -0.22725562751293182, "logits_per_char": -0.11362781375646591, "num_chars": 2}, {"sum_logits": -1.6339343786239624, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": false, "logits_per_token": -1.6339343786239624, "logits_per_char": -0.8169671893119812, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 81, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19804872572422028, "incorrect_loss_raw": 1.755902886390686, "correct_loss_per_char": 0.09902436286211014, "incorrect_loss_per_char": 0.877951443195343, "correct_loss_per_token": 0.19804872572422028, "incorrect_loss_per_token": 1.755902886390686, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19804872572422028, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": true, "logits_per_token": -0.19804872572422028, "logits_per_char": -0.09902436286211014, "num_chars": 2}, {"sum_logits": -1.755902886390686, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": false, "logits_per_token": -1.755902886390686, "logits_per_char": -0.877951443195343, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 82, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5763349533081055, "incorrect_loss_raw": 0.24069547653198242, "correct_loss_per_char": 0.7881674766540527, "incorrect_loss_per_char": 0.12034773826599121, "correct_loss_per_token": 1.5763349533081055, "incorrect_loss_per_token": 0.24069547653198242, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24069547653198242, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": true, "logits_per_token": -0.24069547653198242, "logits_per_char": -0.12034773826599121, "num_chars": 2}, {"sum_logits": -1.5763349533081055, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": false, "logits_per_token": -1.5763349533081055, "logits_per_char": -0.7881674766540527, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 83, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6673312187194824, "incorrect_loss_raw": 0.2213580161333084, "correct_loss_per_char": 0.8336656093597412, "incorrect_loss_per_char": 0.1106790080666542, "correct_loss_per_token": 1.6673312187194824, "incorrect_loss_per_token": 0.2213580161333084, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2213580161333084, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": true, "logits_per_token": -0.2213580161333084, "logits_per_char": -0.1106790080666542, "num_chars": 2}, {"sum_logits": -1.6673312187194824, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": false, "logits_per_token": -1.6673312187194824, "logits_per_char": -0.8336656093597412, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 84, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.26987794041633606, "incorrect_loss_raw": 1.4894814491271973, "correct_loss_per_char": 0.13493897020816803, "incorrect_loss_per_char": 0.7447407245635986, "correct_loss_per_token": 0.26987794041633606, "incorrect_loss_per_token": 1.4894814491271973, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26987794041633606, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": true, "logits_per_token": -0.26987794041633606, "logits_per_char": -0.13493897020816803, "num_chars": 2}, {"sum_logits": -1.4894814491271973, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.4894814491271973, "logits_per_char": -0.7447407245635986, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 85, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20802965760231018, "incorrect_loss_raw": 1.7293782234191895, "correct_loss_per_char": 0.10401482880115509, "incorrect_loss_per_char": 0.8646891117095947, "correct_loss_per_token": 0.20802965760231018, "incorrect_loss_per_token": 1.7293782234191895, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20802965760231018, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": true, "logits_per_token": -0.20802965760231018, "logits_per_char": -0.10401482880115509, "num_chars": 2}, {"sum_logits": -1.7293782234191895, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": false, "logits_per_token": -1.7293782234191895, "logits_per_char": -0.8646891117095947, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 86, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19566389918327332, "incorrect_loss_raw": 1.758442997932434, "correct_loss_per_char": 0.09783194959163666, "incorrect_loss_per_char": 0.879221498966217, "correct_loss_per_token": 0.19566389918327332, "incorrect_loss_per_token": 1.758442997932434, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19566389918327332, "num_tokens": 1, "num_tokens_all": 944, "is_greedy": true, "logits_per_token": -0.19566389918327332, "logits_per_char": -0.09783194959163666, "num_chars": 2}, {"sum_logits": -1.758442997932434, "num_tokens": 1, "num_tokens_all": 944, "is_greedy": false, "logits_per_token": -1.758442997932434, "logits_per_char": -0.879221498966217, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 87, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2564406394958496, "incorrect_loss_raw": 1.5229730606079102, "correct_loss_per_char": 0.1282203197479248, "incorrect_loss_per_char": 0.7614865303039551, "correct_loss_per_token": 0.2564406394958496, "incorrect_loss_per_token": 1.5229730606079102, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2564406394958496, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": true, "logits_per_token": -0.2564406394958496, "logits_per_char": -0.1282203197479248, "num_chars": 2}, {"sum_logits": -1.5229730606079102, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": false, "logits_per_token": -1.5229730606079102, "logits_per_char": -0.7614865303039551, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 88, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24552737176418304, "incorrect_loss_raw": 1.5706251859664917, "correct_loss_per_char": 0.12276368588209152, "incorrect_loss_per_char": 0.7853125929832458, "correct_loss_per_token": 0.24552737176418304, "incorrect_loss_per_token": 1.5706251859664917, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24552737176418304, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": true, "logits_per_token": -0.24552737176418304, "logits_per_char": -0.12276368588209152, "num_chars": 2}, {"sum_logits": -1.5706251859664917, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": false, "logits_per_token": -1.5706251859664917, "logits_per_char": -0.7853125929832458, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 89, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24798241257667542, "incorrect_loss_raw": 1.5558422803878784, "correct_loss_per_char": 0.12399120628833771, "incorrect_loss_per_char": 0.7779211401939392, "correct_loss_per_token": 0.24798241257667542, "incorrect_loss_per_token": 1.5558422803878784, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24798241257667542, "num_tokens": 1, "num_tokens_all": 1021, "is_greedy": true, "logits_per_token": -0.24798241257667542, "logits_per_char": -0.12399120628833771, "num_chars": 2}, {"sum_logits": -1.5558422803878784, "num_tokens": 1, "num_tokens_all": 1021, "is_greedy": false, "logits_per_token": -1.5558422803878784, "logits_per_char": -0.7779211401939392, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 90, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.25036147236824036, "incorrect_loss_raw": 1.5508655309677124, "correct_loss_per_char": 0.12518073618412018, "incorrect_loss_per_char": 0.7754327654838562, "correct_loss_per_token": 0.25036147236824036, "incorrect_loss_per_token": 1.5508655309677124, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25036147236824036, "num_tokens": 1, "num_tokens_all": 1056, "is_greedy": true, "logits_per_token": -0.25036147236824036, "logits_per_char": -0.12518073618412018, "num_chars": 2}, {"sum_logits": -1.5508655309677124, "num_tokens": 1, "num_tokens_all": 1056, "is_greedy": false, "logits_per_token": -1.5508655309677124, "logits_per_char": -0.7754327654838562, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 91, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.676257610321045, "incorrect_loss_raw": 0.21327050030231476, "correct_loss_per_char": 0.8381288051605225, "incorrect_loss_per_char": 0.10663525015115738, "correct_loss_per_token": 1.676257610321045, "incorrect_loss_per_token": 0.21327050030231476, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21327050030231476, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": true, "logits_per_token": -0.21327050030231476, "logits_per_char": -0.10663525015115738, "num_chars": 2}, {"sum_logits": -1.676257610321045, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": false, "logits_per_token": -1.676257610321045, "logits_per_char": -0.8381288051605225, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 92, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.28148186206817627, "incorrect_loss_raw": 1.4312465190887451, "correct_loss_per_char": 0.14074093103408813, "incorrect_loss_per_char": 0.7156232595443726, "correct_loss_per_token": 0.28148186206817627, "incorrect_loss_per_token": 1.4312465190887451, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.28148186206817627, "num_tokens": 1, "num_tokens_all": 890, "is_greedy": true, "logits_per_token": -0.28148186206817627, "logits_per_char": -0.14074093103408813, "num_chars": 2}, {"sum_logits": -1.4312465190887451, "num_tokens": 1, "num_tokens_all": 890, "is_greedy": false, "logits_per_token": -1.4312465190887451, "logits_per_char": -0.7156232595443726, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 93, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24719229340553284, "incorrect_loss_raw": 1.5501526594161987, "correct_loss_per_char": 0.12359614670276642, "incorrect_loss_per_char": 0.7750763297080994, "correct_loss_per_token": 0.24719229340553284, "incorrect_loss_per_token": 1.5501526594161987, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24719229340553284, "num_tokens": 1, "num_tokens_all": 993, "is_greedy": true, "logits_per_token": -0.24719229340553284, "logits_per_char": -0.12359614670276642, "num_chars": 2}, {"sum_logits": -1.5501526594161987, "num_tokens": 1, "num_tokens_all": 993, "is_greedy": false, "logits_per_token": -1.5501526594161987, "logits_per_char": -0.7750763297080994, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 94, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4810473918914795, "incorrect_loss_raw": 0.2740626037120819, "correct_loss_per_char": 0.7405236959457397, "incorrect_loss_per_char": 0.13703130185604095, "correct_loss_per_token": 1.4810473918914795, "incorrect_loss_per_token": 0.2740626037120819, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2740626037120819, "num_tokens": 1, "num_tokens_all": 1118, "is_greedy": true, "logits_per_token": -0.2740626037120819, "logits_per_char": -0.13703130185604095, "num_chars": 2}, {"sum_logits": -1.4810473918914795, "num_tokens": 1, "num_tokens_all": 1118, "is_greedy": false, "logits_per_token": -1.4810473918914795, "logits_per_char": -0.7405236959457397, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 95, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23283535242080688, "incorrect_loss_raw": 1.6074748039245605, "correct_loss_per_char": 0.11641767621040344, "incorrect_loss_per_char": 0.8037374019622803, "correct_loss_per_token": 0.23283535242080688, "incorrect_loss_per_token": 1.6074748039245605, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23283535242080688, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": true, "logits_per_token": -0.23283535242080688, "logits_per_char": -0.11641767621040344, "num_chars": 2}, {"sum_logits": -1.6074748039245605, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -1.6074748039245605, "logits_per_char": -0.8037374019622803, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 96, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22348091006278992, "incorrect_loss_raw": 1.6673684120178223, "correct_loss_per_char": 0.11174045503139496, "incorrect_loss_per_char": 0.8336842060089111, "correct_loss_per_token": 0.22348091006278992, "incorrect_loss_per_token": 1.6673684120178223, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22348091006278992, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": true, "logits_per_token": -0.22348091006278992, "logits_per_char": -0.11174045503139496, "num_chars": 2}, {"sum_logits": -1.6673684120178223, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": false, "logits_per_token": -1.6673684120178223, "logits_per_char": -0.8336842060089111, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 97, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5488660335540771, "incorrect_loss_raw": 0.25025343894958496, "correct_loss_per_char": 0.7744330167770386, "incorrect_loss_per_char": 0.12512671947479248, "correct_loss_per_token": 1.5488660335540771, "incorrect_loss_per_token": 0.25025343894958496, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25025343894958496, "num_tokens": 1, "num_tokens_all": 1006, "is_greedy": true, "logits_per_token": -0.25025343894958496, "logits_per_char": -0.12512671947479248, "num_chars": 2}, {"sum_logits": -1.5488660335540771, "num_tokens": 1, "num_tokens_all": 1006, "is_greedy": false, "logits_per_token": -1.5488660335540771, "logits_per_char": -0.7744330167770386, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 98, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3695838451385498, "incorrect_loss_raw": 0.30507123470306396, "correct_loss_per_char": 0.6847919225692749, "incorrect_loss_per_char": 0.15253561735153198, "correct_loss_per_token": 1.3695838451385498, "incorrect_loss_per_token": 0.30507123470306396, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.30507123470306396, "num_tokens": 1, "num_tokens_all": 991, "is_greedy": true, "logits_per_token": -0.30507123470306396, "logits_per_char": -0.15253561735153198, "num_chars": 2}, {"sum_logits": -1.3695838451385498, "num_tokens": 1, "num_tokens_all": 991, "is_greedy": false, "logits_per_token": -1.3695838451385498, "logits_per_char": -0.6847919225692749, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 99, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21061652898788452, "incorrect_loss_raw": 1.6996150016784668, "correct_loss_per_char": 0.10530826449394226, "incorrect_loss_per_char": 0.8498075008392334, "correct_loss_per_token": 0.21061652898788452, "incorrect_loss_per_token": 1.6996150016784668, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21061652898788452, "num_tokens": 1, "num_tokens_all": 883, "is_greedy": true, "logits_per_token": -0.21061652898788452, "logits_per_char": -0.10530826449394226, "num_chars": 2}, {"sum_logits": -1.6996150016784668, "num_tokens": 1, "num_tokens_all": 883, "is_greedy": false, "logits_per_token": -1.6996150016784668, "logits_per_char": -0.8498075008392334, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 100, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21530459821224213, "incorrect_loss_raw": 1.6845250129699707, "correct_loss_per_char": 0.10765229910612106, "incorrect_loss_per_char": 0.8422625064849854, "correct_loss_per_token": 0.21530459821224213, "incorrect_loss_per_token": 1.6845250129699707, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21530459821224213, "num_tokens": 1, "num_tokens_all": 987, "is_greedy": true, "logits_per_token": -0.21530459821224213, "logits_per_char": -0.10765229910612106, "num_chars": 2}, {"sum_logits": -1.6845250129699707, "num_tokens": 1, "num_tokens_all": 987, "is_greedy": false, "logits_per_token": -1.6845250129699707, "logits_per_char": -0.8422625064849854, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 101, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8719608783721924, "incorrect_loss_raw": 0.17976327240467072, "correct_loss_per_char": 0.9359804391860962, "incorrect_loss_per_char": 0.08988163620233536, "correct_loss_per_token": 1.8719608783721924, "incorrect_loss_per_token": 0.17976327240467072, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.17976327240467072, "num_tokens": 1, "num_tokens_all": 971, "is_greedy": true, "logits_per_token": -0.17976327240467072, "logits_per_char": -0.08988163620233536, "num_chars": 2}, {"sum_logits": -1.8719608783721924, "num_tokens": 1, "num_tokens_all": 971, "is_greedy": false, "logits_per_token": -1.8719608783721924, "logits_per_char": -0.9359804391860962, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 102, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.226483553647995, "incorrect_loss_raw": 1.6207573413848877, "correct_loss_per_char": 0.1132417768239975, "incorrect_loss_per_char": 0.8103786706924438, "correct_loss_per_token": 0.226483553647995, "incorrect_loss_per_token": 1.6207573413848877, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.226483553647995, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": true, "logits_per_token": -0.226483553647995, "logits_per_char": -0.1132417768239975, "num_chars": 2}, {"sum_logits": -1.6207573413848877, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": false, "logits_per_token": -1.6207573413848877, "logits_per_char": -0.8103786706924438, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 103, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.29847991466522217, "incorrect_loss_raw": 1.400789499282837, "correct_loss_per_char": 0.14923995733261108, "incorrect_loss_per_char": 0.7003947496414185, "correct_loss_per_token": 0.29847991466522217, "incorrect_loss_per_token": 1.400789499282837, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.29847991466522217, "num_tokens": 1, "num_tokens_all": 992, "is_greedy": true, "logits_per_token": -0.29847991466522217, "logits_per_char": -0.14923995733261108, "num_chars": 2}, {"sum_logits": -1.400789499282837, "num_tokens": 1, "num_tokens_all": 992, "is_greedy": false, "logits_per_token": -1.400789499282837, "logits_per_char": -0.7003947496414185, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 104, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.552295446395874, "incorrect_loss_raw": 0.2512328326702118, "correct_loss_per_char": 0.776147723197937, "incorrect_loss_per_char": 0.1256164163351059, "correct_loss_per_token": 1.552295446395874, "incorrect_loss_per_token": 0.2512328326702118, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2512328326702118, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": true, "logits_per_token": -0.2512328326702118, "logits_per_char": -0.1256164163351059, "num_chars": 2}, {"sum_logits": -1.552295446395874, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": false, "logits_per_token": -1.552295446395874, "logits_per_char": -0.776147723197937, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 105, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6712416410446167, "incorrect_loss_raw": 0.21995386481285095, "correct_loss_per_char": 0.8356208205223083, "incorrect_loss_per_char": 0.10997693240642548, "correct_loss_per_token": 1.6712416410446167, "incorrect_loss_per_token": 0.21995386481285095, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21995386481285095, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": true, "logits_per_token": -0.21995386481285095, "logits_per_char": -0.10997693240642548, "num_chars": 2}, {"sum_logits": -1.6712416410446167, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": false, "logits_per_token": -1.6712416410446167, "logits_per_char": -0.8356208205223083, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 106, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.17797942459583282, "incorrect_loss_raw": 1.8469362258911133, "correct_loss_per_char": 0.08898971229791641, "incorrect_loss_per_char": 0.9234681129455566, "correct_loss_per_token": 0.17797942459583282, "incorrect_loss_per_token": 1.8469362258911133, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.17797942459583282, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": true, "logits_per_token": -0.17797942459583282, "logits_per_char": -0.08898971229791641, "num_chars": 2}, {"sum_logits": -1.8469362258911133, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.8469362258911133, "logits_per_char": -0.9234681129455566, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 107, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.682823896408081, "incorrect_loss_raw": 0.21586596965789795, "correct_loss_per_char": 0.8414119482040405, "incorrect_loss_per_char": 0.10793298482894897, "correct_loss_per_token": 1.682823896408081, "incorrect_loss_per_token": 0.21586596965789795, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21586596965789795, "num_tokens": 1, "num_tokens_all": 981, "is_greedy": true, "logits_per_token": -0.21586596965789795, "logits_per_char": -0.10793298482894897, "num_chars": 2}, {"sum_logits": -1.682823896408081, "num_tokens": 1, "num_tokens_all": 981, "is_greedy": false, "logits_per_token": -1.682823896408081, "logits_per_char": -0.8414119482040405, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 108, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21108102798461914, "incorrect_loss_raw": 1.7054369449615479, "correct_loss_per_char": 0.10554051399230957, "incorrect_loss_per_char": 0.8527184724807739, "correct_loss_per_token": 0.21108102798461914, "incorrect_loss_per_token": 1.7054369449615479, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21108102798461914, "num_tokens": 1, "num_tokens_all": 977, "is_greedy": true, "logits_per_token": -0.21108102798461914, "logits_per_char": -0.10554051399230957, "num_chars": 2}, {"sum_logits": -1.7054369449615479, "num_tokens": 1, "num_tokens_all": 977, "is_greedy": false, "logits_per_token": -1.7054369449615479, "logits_per_char": -0.8527184724807739, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 109, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1749991476535797, "incorrect_loss_raw": 1.8772454261779785, "correct_loss_per_char": 0.08749957382678986, "incorrect_loss_per_char": 0.9386227130889893, "correct_loss_per_token": 0.1749991476535797, "incorrect_loss_per_token": 1.8772454261779785, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1749991476535797, "num_tokens": 1, "num_tokens_all": 986, "is_greedy": true, "logits_per_token": -0.1749991476535797, "logits_per_char": -0.08749957382678986, "num_chars": 2}, {"sum_logits": -1.8772454261779785, "num_tokens": 1, "num_tokens_all": 986, "is_greedy": false, "logits_per_token": -1.8772454261779785, "logits_per_char": -0.9386227130889893, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 110, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2380690574645996, "incorrect_loss_raw": 1.5788389444351196, "correct_loss_per_char": 0.1190345287322998, "incorrect_loss_per_char": 0.7894194722175598, "correct_loss_per_token": 0.2380690574645996, "incorrect_loss_per_token": 1.5788389444351196, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2380690574645996, "num_tokens": 1, "num_tokens_all": 885, "is_greedy": true, "logits_per_token": -0.2380690574645996, "logits_per_char": -0.1190345287322998, "num_chars": 2}, {"sum_logits": -1.5788389444351196, "num_tokens": 1, "num_tokens_all": 885, "is_greedy": false, "logits_per_token": -1.5788389444351196, "logits_per_char": -0.7894194722175598, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 111, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22407466173171997, "incorrect_loss_raw": 1.6552848815917969, "correct_loss_per_char": 0.11203733086585999, "incorrect_loss_per_char": 0.8276424407958984, "correct_loss_per_token": 0.22407466173171997, "incorrect_loss_per_token": 1.6552848815917969, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22407466173171997, "num_tokens": 1, "num_tokens_all": 980, "is_greedy": true, "logits_per_token": -0.22407466173171997, "logits_per_char": -0.11203733086585999, "num_chars": 2}, {"sum_logits": -1.6552848815917969, "num_tokens": 1, "num_tokens_all": 980, "is_greedy": false, "logits_per_token": -1.6552848815917969, "logits_per_char": -0.8276424407958984, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 112, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.270504355430603, "incorrect_loss_raw": 1.4903178215026855, "correct_loss_per_char": 0.1352521777153015, "incorrect_loss_per_char": 0.7451589107513428, "correct_loss_per_token": 0.270504355430603, "incorrect_loss_per_token": 1.4903178215026855, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.270504355430603, "num_tokens": 1, "num_tokens_all": 1052, "is_greedy": true, "logits_per_token": -0.270504355430603, "logits_per_char": -0.1352521777153015, "num_chars": 2}, {"sum_logits": -1.4903178215026855, "num_tokens": 1, "num_tokens_all": 1052, "is_greedy": false, "logits_per_token": -1.4903178215026855, "logits_per_char": -0.7451589107513428, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 113, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.493825912475586, "incorrect_loss_raw": 0.26396915316581726, "correct_loss_per_char": 0.746912956237793, "incorrect_loss_per_char": 0.13198457658290863, "correct_loss_per_token": 1.493825912475586, "incorrect_loss_per_token": 0.26396915316581726, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26396915316581726, "num_tokens": 1, "num_tokens_all": 904, "is_greedy": true, "logits_per_token": -0.26396915316581726, "logits_per_char": -0.13198457658290863, "num_chars": 2}, {"sum_logits": -1.493825912475586, "num_tokens": 1, "num_tokens_all": 904, "is_greedy": false, "logits_per_token": -1.493825912475586, "logits_per_char": -0.746912956237793, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 114, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2627384662628174, "incorrect_loss_raw": 1.5009925365447998, "correct_loss_per_char": 0.1313692331314087, "incorrect_loss_per_char": 0.7504962682723999, "correct_loss_per_token": 0.2627384662628174, "incorrect_loss_per_token": 1.5009925365447998, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2627384662628174, "num_tokens": 1, "num_tokens_all": 1085, "is_greedy": true, "logits_per_token": -0.2627384662628174, "logits_per_char": -0.1313692331314087, "num_chars": 2}, {"sum_logits": -1.5009925365447998, "num_tokens": 1, "num_tokens_all": 1085, "is_greedy": false, "logits_per_token": -1.5009925365447998, "logits_per_char": -0.7504962682723999, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 115, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2885793447494507, "incorrect_loss_raw": 1.4205100536346436, "correct_loss_per_char": 0.14428967237472534, "incorrect_loss_per_char": 0.7102550268173218, "correct_loss_per_token": 0.2885793447494507, "incorrect_loss_per_token": 1.4205100536346436, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2885793447494507, "num_tokens": 1, "num_tokens_all": 944, "is_greedy": true, "logits_per_token": -0.2885793447494507, "logits_per_char": -0.14428967237472534, "num_chars": 2}, {"sum_logits": -1.4205100536346436, "num_tokens": 1, "num_tokens_all": 944, "is_greedy": false, "logits_per_token": -1.4205100536346436, "logits_per_char": -0.7102550268173218, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 116, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7418794631958008, "incorrect_loss_raw": 0.2025344967842102, "correct_loss_per_char": 0.8709397315979004, "incorrect_loss_per_char": 0.1012672483921051, "correct_loss_per_token": 1.7418794631958008, "incorrect_loss_per_token": 0.2025344967842102, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2025344967842102, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": true, "logits_per_token": -0.2025344967842102, "logits_per_char": -0.1012672483921051, "num_chars": 2}, {"sum_logits": -1.7418794631958008, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": false, "logits_per_token": -1.7418794631958008, "logits_per_char": -0.8709397315979004, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 117, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5606498718261719, "incorrect_loss_raw": 0.25069236755371094, "correct_loss_per_char": 0.7803249359130859, "incorrect_loss_per_char": 0.12534618377685547, "correct_loss_per_token": 1.5606498718261719, "incorrect_loss_per_token": 0.25069236755371094, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25069236755371094, "num_tokens": 1, "num_tokens_all": 998, "is_greedy": true, "logits_per_token": -0.25069236755371094, "logits_per_char": -0.12534618377685547, "num_chars": 2}, {"sum_logits": -1.5606498718261719, "num_tokens": 1, "num_tokens_all": 998, "is_greedy": false, "logits_per_token": -1.5606498718261719, "logits_per_char": -0.7803249359130859, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 118, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.497300148010254, "incorrect_loss_raw": 0.2683975100517273, "correct_loss_per_char": 0.748650074005127, "incorrect_loss_per_char": 0.13419875502586365, "correct_loss_per_token": 1.497300148010254, "incorrect_loss_per_token": 0.2683975100517273, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2683975100517273, "num_tokens": 1, "num_tokens_all": 999, "is_greedy": true, "logits_per_token": -0.2683975100517273, "logits_per_char": -0.13419875502586365, "num_chars": 2}, {"sum_logits": -1.497300148010254, "num_tokens": 1, "num_tokens_all": 999, "is_greedy": false, "logits_per_token": -1.497300148010254, "logits_per_char": -0.748650074005127, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 119, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1936827301979065, "incorrect_loss_raw": 1.7832963466644287, "correct_loss_per_char": 0.09684136509895325, "incorrect_loss_per_char": 0.8916481733322144, "correct_loss_per_token": 0.1936827301979065, "incorrect_loss_per_token": 1.7832963466644287, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1936827301979065, "num_tokens": 1, "num_tokens_all": 1041, "is_greedy": true, "logits_per_token": -0.1936827301979065, "logits_per_char": -0.09684136509895325, "num_chars": 2}, {"sum_logits": -1.7832963466644287, "num_tokens": 1, "num_tokens_all": 1041, "is_greedy": false, "logits_per_token": -1.7832963466644287, "logits_per_char": -0.8916481733322144, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 120, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2224620133638382, "incorrect_loss_raw": 1.6742032766342163, "correct_loss_per_char": 0.1112310066819191, "incorrect_loss_per_char": 0.8371016383171082, "correct_loss_per_token": 0.2224620133638382, "incorrect_loss_per_token": 1.6742032766342163, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2224620133638382, "num_tokens": 1, "num_tokens_all": 1012, "is_greedy": true, "logits_per_token": -0.2224620133638382, "logits_per_char": -0.1112310066819191, "num_chars": 2}, {"sum_logits": -1.6742032766342163, "num_tokens": 1, "num_tokens_all": 1012, "is_greedy": false, "logits_per_token": -1.6742032766342163, "logits_per_char": -0.8371016383171082, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 121, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22993500530719757, "incorrect_loss_raw": 1.628453016281128, "correct_loss_per_char": 0.11496750265359879, "incorrect_loss_per_char": 0.814226508140564, "correct_loss_per_token": 0.22993500530719757, "incorrect_loss_per_token": 1.628453016281128, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22993500530719757, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": true, "logits_per_token": -0.22993500530719757, "logits_per_char": -0.11496750265359879, "num_chars": 2}, {"sum_logits": -1.628453016281128, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": false, "logits_per_token": -1.628453016281128, "logits_per_char": -0.814226508140564, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 122, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7014912366867065, "incorrect_loss_raw": 0.2128196507692337, "correct_loss_per_char": 0.8507456183433533, "incorrect_loss_per_char": 0.10640982538461685, "correct_loss_per_token": 1.7014912366867065, "incorrect_loss_per_token": 0.2128196507692337, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2128196507692337, "num_tokens": 1, "num_tokens_all": 997, "is_greedy": true, "logits_per_token": -0.2128196507692337, "logits_per_char": -0.10640982538461685, "num_chars": 2}, {"sum_logits": -1.7014912366867065, "num_tokens": 1, "num_tokens_all": 997, "is_greedy": false, "logits_per_token": -1.7014912366867065, "logits_per_char": -0.8507456183433533, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 123, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6265217065811157, "incorrect_loss_raw": 0.23204262554645538, "correct_loss_per_char": 0.8132608532905579, "incorrect_loss_per_char": 0.11602131277322769, "correct_loss_per_token": 1.6265217065811157, "incorrect_loss_per_token": 0.23204262554645538, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23204262554645538, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": true, "logits_per_token": -0.23204262554645538, "logits_per_char": -0.11602131277322769, "num_chars": 2}, {"sum_logits": -1.6265217065811157, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": false, "logits_per_token": -1.6265217065811157, "logits_per_char": -0.8132608532905579, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 124, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4522325992584229, "incorrect_loss_raw": 0.27831384539604187, "correct_loss_per_char": 0.7261162996292114, "incorrect_loss_per_char": 0.13915692269802094, "correct_loss_per_token": 1.4522325992584229, "incorrect_loss_per_token": 0.27831384539604187, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.27831384539604187, "num_tokens": 1, "num_tokens_all": 1037, "is_greedy": true, "logits_per_token": -0.27831384539604187, "logits_per_char": -0.13915692269802094, "num_chars": 2}, {"sum_logits": -1.4522325992584229, "num_tokens": 1, "num_tokens_all": 1037, "is_greedy": false, "logits_per_token": -1.4522325992584229, "logits_per_char": -0.7261162996292114, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 125, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19348901510238647, "incorrect_loss_raw": 1.7776787281036377, "correct_loss_per_char": 0.09674450755119324, "incorrect_loss_per_char": 0.8888393640518188, "correct_loss_per_token": 0.19348901510238647, "incorrect_loss_per_token": 1.7776787281036377, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19348901510238647, "num_tokens": 1, "num_tokens_all": 1000, "is_greedy": true, "logits_per_token": -0.19348901510238647, "logits_per_char": -0.09674450755119324, "num_chars": 2}, {"sum_logits": -1.7776787281036377, "num_tokens": 1, "num_tokens_all": 1000, "is_greedy": false, "logits_per_token": -1.7776787281036377, "logits_per_char": -0.8888393640518188, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 126, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4417269229888916, "incorrect_loss_raw": 0.2876935601234436, "correct_loss_per_char": 0.7208634614944458, "incorrect_loss_per_char": 0.1438467800617218, "correct_loss_per_token": 1.4417269229888916, "incorrect_loss_per_token": 0.2876935601234436, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2876935601234436, "num_tokens": 1, "num_tokens_all": 1060, "is_greedy": true, "logits_per_token": -0.2876935601234436, "logits_per_char": -0.1438467800617218, "num_chars": 2}, {"sum_logits": -1.4417269229888916, "num_tokens": 1, "num_tokens_all": 1060, "is_greedy": false, "logits_per_token": -1.4417269229888916, "logits_per_char": -0.7208634614944458, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 127, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4604172706604004, "incorrect_loss_raw": 0.27366000413894653, "correct_loss_per_char": 0.7302086353302002, "incorrect_loss_per_char": 0.13683000206947327, "correct_loss_per_token": 1.4604172706604004, "incorrect_loss_per_token": 0.27366000413894653, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.27366000413894653, "num_tokens": 1, "num_tokens_all": 1013, "is_greedy": true, "logits_per_token": -0.27366000413894653, "logits_per_char": -0.13683000206947327, "num_chars": 2}, {"sum_logits": -1.4604172706604004, "num_tokens": 1, "num_tokens_all": 1013, "is_greedy": false, "logits_per_token": -1.4604172706604004, "logits_per_char": -0.7302086353302002, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 128, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21140818297863007, "incorrect_loss_raw": 1.7116190195083618, "correct_loss_per_char": 0.10570409148931503, "incorrect_loss_per_char": 0.8558095097541809, "correct_loss_per_token": 0.21140818297863007, "incorrect_loss_per_token": 1.7116190195083618, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21140818297863007, "num_tokens": 1, "num_tokens_all": 912, "is_greedy": true, "logits_per_token": -0.21140818297863007, "logits_per_char": -0.10570409148931503, "num_chars": 2}, {"sum_logits": -1.7116190195083618, "num_tokens": 1, "num_tokens_all": 912, "is_greedy": false, "logits_per_token": -1.7116190195083618, "logits_per_char": -0.8558095097541809, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 129, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.468736171722412, "incorrect_loss_raw": 0.27575671672821045, "correct_loss_per_char": 0.734368085861206, "incorrect_loss_per_char": 0.13787835836410522, "correct_loss_per_token": 1.468736171722412, "incorrect_loss_per_token": 0.27575671672821045, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.27575671672821045, "num_tokens": 1, "num_tokens_all": 1094, "is_greedy": true, "logits_per_token": -0.27575671672821045, "logits_per_char": -0.13787835836410522, "num_chars": 2}, {"sum_logits": -1.468736171722412, "num_tokens": 1, "num_tokens_all": 1094, "is_greedy": false, "logits_per_token": -1.468736171722412, "logits_per_char": -0.734368085861206, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 130, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8173165321350098, "incorrect_loss_raw": 0.18721002340316772, "correct_loss_per_char": 0.9086582660675049, "incorrect_loss_per_char": 0.09360501170158386, "correct_loss_per_token": 1.8173165321350098, "incorrect_loss_per_token": 0.18721002340316772, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.18721002340316772, "num_tokens": 1, "num_tokens_all": 1057, "is_greedy": true, "logits_per_token": -0.18721002340316772, "logits_per_char": -0.09360501170158386, "num_chars": 2}, {"sum_logits": -1.8173165321350098, "num_tokens": 1, "num_tokens_all": 1057, "is_greedy": false, "logits_per_token": -1.8173165321350098, "logits_per_char": -0.9086582660675049, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 131, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.18510879576206207, "incorrect_loss_raw": 1.829103946685791, "correct_loss_per_char": 0.09255439788103104, "incorrect_loss_per_char": 0.9145519733428955, "correct_loss_per_token": 0.18510879576206207, "incorrect_loss_per_token": 1.829103946685791, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.18510879576206207, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": true, "logits_per_token": -0.18510879576206207, "logits_per_char": -0.09255439788103104, "num_chars": 2}, {"sum_logits": -1.829103946685791, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": false, "logits_per_token": -1.829103946685791, "logits_per_char": -0.9145519733428955, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 132, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2068929374217987, "incorrect_loss_raw": 1.7052687406539917, "correct_loss_per_char": 0.10344646871089935, "incorrect_loss_per_char": 0.8526343703269958, "correct_loss_per_token": 0.2068929374217987, "incorrect_loss_per_token": 1.7052687406539917, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2068929374217987, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": true, "logits_per_token": -0.2068929374217987, "logits_per_char": -0.10344646871089935, "num_chars": 2}, {"sum_logits": -1.7052687406539917, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": false, "logits_per_token": -1.7052687406539917, "logits_per_char": -0.8526343703269958, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 133, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3079262971878052, "incorrect_loss_raw": 1.4089666604995728, "correct_loss_per_char": 0.1539631485939026, "incorrect_loss_per_char": 0.7044833302497864, "correct_loss_per_token": 0.3079262971878052, "incorrect_loss_per_token": 1.4089666604995728, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3079262971878052, "num_tokens": 1, "num_tokens_all": 1250, "is_greedy": true, "logits_per_token": -0.3079262971878052, "logits_per_char": -0.1539631485939026, "num_chars": 2}, {"sum_logits": -1.4089666604995728, "num_tokens": 1, "num_tokens_all": 1250, "is_greedy": false, "logits_per_token": -1.4089666604995728, "logits_per_char": -0.7044833302497864, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 134, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5582391023635864, "incorrect_loss_raw": 0.24586760997772217, "correct_loss_per_char": 0.7791195511817932, "incorrect_loss_per_char": 0.12293380498886108, "correct_loss_per_token": 1.5582391023635864, "incorrect_loss_per_token": 0.24586760997772217, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24586760997772217, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": true, "logits_per_token": -0.24586760997772217, "logits_per_char": -0.12293380498886108, "num_chars": 2}, {"sum_logits": -1.5582391023635864, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": false, "logits_per_token": -1.5582391023635864, "logits_per_char": -0.7791195511817932, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 135, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2481166422367096, "incorrect_loss_raw": 1.569935917854309, "correct_loss_per_char": 0.1240583211183548, "incorrect_loss_per_char": 0.7849679589271545, "correct_loss_per_token": 0.2481166422367096, "incorrect_loss_per_token": 1.569935917854309, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2481166422367096, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": true, "logits_per_token": -0.2481166422367096, "logits_per_char": -0.1240583211183548, "num_chars": 2}, {"sum_logits": -1.569935917854309, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": false, "logits_per_token": -1.569935917854309, "logits_per_char": -0.7849679589271545, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 136, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3110561668872833, "incorrect_loss_raw": 1.3504652976989746, "correct_loss_per_char": 0.15552808344364166, "incorrect_loss_per_char": 0.6752326488494873, "correct_loss_per_token": 0.3110561668872833, "incorrect_loss_per_token": 1.3504652976989746, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3110561668872833, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": true, "logits_per_token": -0.3110561668872833, "logits_per_char": -0.15552808344364166, "num_chars": 2}, {"sum_logits": -1.3504652976989746, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": false, "logits_per_token": -1.3504652976989746, "logits_per_char": -0.6752326488494873, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 137, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7029212713241577, "incorrect_loss_raw": 0.21093757450580597, "correct_loss_per_char": 0.8514606356620789, "incorrect_loss_per_char": 0.10546878725290298, "correct_loss_per_token": 1.7029212713241577, "incorrect_loss_per_token": 0.21093757450580597, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21093757450580597, "num_tokens": 1, "num_tokens_all": 971, "is_greedy": true, "logits_per_token": -0.21093757450580597, "logits_per_char": -0.10546878725290298, "num_chars": 2}, {"sum_logits": -1.7029212713241577, "num_tokens": 1, "num_tokens_all": 971, "is_greedy": false, "logits_per_token": -1.7029212713241577, "logits_per_char": -0.8514606356620789, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 138, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2567949295043945, "incorrect_loss_raw": 0.3510739803314209, "correct_loss_per_char": 0.6283974647521973, "incorrect_loss_per_char": 0.17553699016571045, "correct_loss_per_token": 1.2567949295043945, "incorrect_loss_per_token": 0.3510739803314209, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3510739803314209, "num_tokens": 1, "num_tokens_all": 1074, "is_greedy": true, "logits_per_token": -0.3510739803314209, "logits_per_char": -0.17553699016571045, "num_chars": 2}, {"sum_logits": -1.2567949295043945, "num_tokens": 1, "num_tokens_all": 1074, "is_greedy": false, "logits_per_token": -1.2567949295043945, "logits_per_char": -0.6283974647521973, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 139, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.30338358879089355, "incorrect_loss_raw": 1.3783243894577026, "correct_loss_per_char": 0.15169179439544678, "incorrect_loss_per_char": 0.6891621947288513, "correct_loss_per_token": 0.30338358879089355, "incorrect_loss_per_token": 1.3783243894577026, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.30338358879089355, "num_tokens": 1, "num_tokens_all": 1037, "is_greedy": true, "logits_per_token": -0.30338358879089355, "logits_per_char": -0.15169179439544678, "num_chars": 2}, {"sum_logits": -1.3783243894577026, "num_tokens": 1, "num_tokens_all": 1037, "is_greedy": false, "logits_per_token": -1.3783243894577026, "logits_per_char": -0.6891621947288513, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 140, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6194548606872559, "incorrect_loss_raw": 0.23151741921901703, "correct_loss_per_char": 0.8097274303436279, "incorrect_loss_per_char": 0.11575870960950851, "correct_loss_per_token": 1.6194548606872559, "incorrect_loss_per_token": 0.23151741921901703, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23151741921901703, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": true, "logits_per_token": -0.23151741921901703, "logits_per_char": -0.11575870960950851, "num_chars": 2}, {"sum_logits": -1.6194548606872559, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": false, "logits_per_token": -1.6194548606872559, "logits_per_char": -0.8097274303436279, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 141, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19169290363788605, "incorrect_loss_raw": 1.8308132886886597, "correct_loss_per_char": 0.09584645181894302, "incorrect_loss_per_char": 0.9154066443443298, "correct_loss_per_token": 0.19169290363788605, "incorrect_loss_per_token": 1.8308132886886597, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19169290363788605, "num_tokens": 1, "num_tokens_all": 973, "is_greedy": true, "logits_per_token": -0.19169290363788605, "logits_per_char": -0.09584645181894302, "num_chars": 2}, {"sum_logits": -1.8308132886886597, "num_tokens": 1, "num_tokens_all": 973, "is_greedy": false, "logits_per_token": -1.8308132886886597, "logits_per_char": -0.9154066443443298, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 142, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22898165881633759, "incorrect_loss_raw": 1.643568754196167, "correct_loss_per_char": 0.11449082940816879, "incorrect_loss_per_char": 0.8217843770980835, "correct_loss_per_token": 0.22898165881633759, "incorrect_loss_per_token": 1.643568754196167, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22898165881633759, "num_tokens": 1, "num_tokens_all": 1140, "is_greedy": true, "logits_per_token": -0.22898165881633759, "logits_per_char": -0.11449082940816879, "num_chars": 2}, {"sum_logits": -1.643568754196167, "num_tokens": 1, "num_tokens_all": 1140, "is_greedy": false, "logits_per_token": -1.643568754196167, "logits_per_char": -0.8217843770980835, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 143, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2507583200931549, "incorrect_loss_raw": 1.5388129949569702, "correct_loss_per_char": 0.12537916004657745, "incorrect_loss_per_char": 0.7694064974784851, "correct_loss_per_token": 0.2507583200931549, "incorrect_loss_per_token": 1.5388129949569702, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2507583200931549, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": true, "logits_per_token": -0.2507583200931549, "logits_per_char": -0.12537916004657745, "num_chars": 2}, {"sum_logits": -1.5388129949569702, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": false, "logits_per_token": -1.5388129949569702, "logits_per_char": -0.7694064974784851, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 144, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1967983841896057, "incorrect_loss_raw": 1.7623796463012695, "correct_loss_per_char": 0.09839919209480286, "incorrect_loss_per_char": 0.8811898231506348, "correct_loss_per_token": 0.1967983841896057, "incorrect_loss_per_token": 1.7623796463012695, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1967983841896057, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": true, "logits_per_token": -0.1967983841896057, "logits_per_char": -0.09839919209480286, "num_chars": 2}, {"sum_logits": -1.7623796463012695, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": false, "logits_per_token": -1.7623796463012695, "logits_per_char": -0.8811898231506348, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 145, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7057768106460571, "incorrect_loss_raw": 0.20866556465625763, "correct_loss_per_char": 0.8528884053230286, "incorrect_loss_per_char": 0.10433278232812881, "correct_loss_per_token": 1.7057768106460571, "incorrect_loss_per_token": 0.20866556465625763, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20866556465625763, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": true, "logits_per_token": -0.20866556465625763, "logits_per_char": -0.10433278232812881, "num_chars": 2}, {"sum_logits": -1.7057768106460571, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": false, "logits_per_token": -1.7057768106460571, "logits_per_char": -0.8528884053230286, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 146, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1996184140443802, "incorrect_loss_raw": 1.737560749053955, "correct_loss_per_char": 0.0998092070221901, "incorrect_loss_per_char": 0.8687803745269775, "correct_loss_per_token": 0.1996184140443802, "incorrect_loss_per_token": 1.737560749053955, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1996184140443802, "num_tokens": 1, "num_tokens_all": 1015, "is_greedy": true, "logits_per_token": -0.1996184140443802, "logits_per_char": -0.0998092070221901, "num_chars": 2}, {"sum_logits": -1.737560749053955, "num_tokens": 1, "num_tokens_all": 1015, "is_greedy": false, "logits_per_token": -1.737560749053955, "logits_per_char": -0.8687803745269775, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 147, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3255797028541565, "incorrect_loss_raw": 1.3543028831481934, "correct_loss_per_char": 0.16278985142707825, "incorrect_loss_per_char": 0.6771514415740967, "correct_loss_per_token": 0.3255797028541565, "incorrect_loss_per_token": 1.3543028831481934, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3255797028541565, "num_tokens": 1, "num_tokens_all": 1318, "is_greedy": true, "logits_per_token": -0.3255797028541565, "logits_per_char": -0.16278985142707825, "num_chars": 2}, {"sum_logits": -1.3543028831481934, "num_tokens": 1, "num_tokens_all": 1318, "is_greedy": false, "logits_per_token": -1.3543028831481934, "logits_per_char": -0.6771514415740967, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 148, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6273598670959473, "incorrect_loss_raw": 0.2293691784143448, "correct_loss_per_char": 0.8136799335479736, "incorrect_loss_per_char": 0.1146845892071724, "correct_loss_per_token": 1.6273598670959473, "incorrect_loss_per_token": 0.2293691784143448, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2293691784143448, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": true, "logits_per_token": -0.2293691784143448, "logits_per_char": -0.1146845892071724, "num_chars": 2}, {"sum_logits": -1.6273598670959473, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": false, "logits_per_token": -1.6273598670959473, "logits_per_char": -0.8136799335479736, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 149, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21436470746994019, "incorrect_loss_raw": 1.7031834125518799, "correct_loss_per_char": 0.10718235373497009, "incorrect_loss_per_char": 0.8515917062759399, "correct_loss_per_token": 0.21436470746994019, "incorrect_loss_per_token": 1.7031834125518799, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21436470746994019, "num_tokens": 1, "num_tokens_all": 1040, "is_greedy": true, "logits_per_token": -0.21436470746994019, "logits_per_char": -0.10718235373497009, "num_chars": 2}, {"sum_logits": -1.7031834125518799, "num_tokens": 1, "num_tokens_all": 1040, "is_greedy": false, "logits_per_token": -1.7031834125518799, "logits_per_char": -0.8515917062759399, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 150, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2980774939060211, "incorrect_loss_raw": 1.409290075302124, "correct_loss_per_char": 0.14903874695301056, "incorrect_loss_per_char": 0.704645037651062, "correct_loss_per_token": 0.2980774939060211, "incorrect_loss_per_token": 1.409290075302124, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2980774939060211, "num_tokens": 1, "num_tokens_all": 1000, "is_greedy": true, "logits_per_token": -0.2980774939060211, "logits_per_char": -0.14903874695301056, "num_chars": 2}, {"sum_logits": -1.409290075302124, "num_tokens": 1, "num_tokens_all": 1000, "is_greedy": false, "logits_per_token": -1.409290075302124, "logits_per_char": -0.704645037651062, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 151, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7516436576843262, "incorrect_loss_raw": 0.19732990860939026, "correct_loss_per_char": 0.8758218288421631, "incorrect_loss_per_char": 0.09866495430469513, "correct_loss_per_token": 1.7516436576843262, "incorrect_loss_per_token": 0.19732990860939026, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19732990860939026, "num_tokens": 1, "num_tokens_all": 971, "is_greedy": true, "logits_per_token": -0.19732990860939026, "logits_per_char": -0.09866495430469513, "num_chars": 2}, {"sum_logits": -1.7516436576843262, "num_tokens": 1, "num_tokens_all": 971, "is_greedy": false, "logits_per_token": -1.7516436576843262, "logits_per_char": -0.8758218288421631, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 152, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6195913553237915, "incorrect_loss_raw": 0.23255027830600739, "correct_loss_per_char": 0.8097956776618958, "incorrect_loss_per_char": 0.11627513915300369, "correct_loss_per_token": 1.6195913553237915, "incorrect_loss_per_token": 0.23255027830600739, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23255027830600739, "num_tokens": 1, "num_tokens_all": 983, "is_greedy": true, "logits_per_token": -0.23255027830600739, "logits_per_char": -0.11627513915300369, "num_chars": 2}, {"sum_logits": -1.6195913553237915, "num_tokens": 1, "num_tokens_all": 983, "is_greedy": false, "logits_per_token": -1.6195913553237915, "logits_per_char": -0.8097956776618958, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 153, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2349487543106079, "incorrect_loss_raw": 1.6058378219604492, "correct_loss_per_char": 0.11747437715530396, "incorrect_loss_per_char": 0.8029189109802246, "correct_loss_per_token": 0.2349487543106079, "incorrect_loss_per_token": 1.6058378219604492, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2349487543106079, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": true, "logits_per_token": -0.2349487543106079, "logits_per_char": -0.11747437715530396, "num_chars": 2}, {"sum_logits": -1.6058378219604492, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": false, "logits_per_token": -1.6058378219604492, "logits_per_char": -0.8029189109802246, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 154, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20115911960601807, "incorrect_loss_raw": 1.7344304323196411, "correct_loss_per_char": 0.10057955980300903, "incorrect_loss_per_char": 0.8672152161598206, "correct_loss_per_token": 0.20115911960601807, "incorrect_loss_per_token": 1.7344304323196411, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20115911960601807, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": true, "logits_per_token": -0.20115911960601807, "logits_per_char": -0.10057955980300903, "num_chars": 2}, {"sum_logits": -1.7344304323196411, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": false, "logits_per_token": -1.7344304323196411, "logits_per_char": -0.8672152161598206, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 155, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22554706037044525, "incorrect_loss_raw": 1.6444485187530518, "correct_loss_per_char": 0.11277353018522263, "incorrect_loss_per_char": 0.8222242593765259, "correct_loss_per_token": 0.22554706037044525, "incorrect_loss_per_token": 1.6444485187530518, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22554706037044525, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": true, "logits_per_token": -0.22554706037044525, "logits_per_char": -0.11277353018522263, "num_chars": 2}, {"sum_logits": -1.6444485187530518, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": false, "logits_per_token": -1.6444485187530518, "logits_per_char": -0.8222242593765259, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 156, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21563731133937836, "incorrect_loss_raw": 1.683470368385315, "correct_loss_per_char": 0.10781865566968918, "incorrect_loss_per_char": 0.8417351841926575, "correct_loss_per_token": 0.21563731133937836, "incorrect_loss_per_token": 1.683470368385315, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21563731133937836, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": true, "logits_per_token": -0.21563731133937836, "logits_per_char": -0.10781865566968918, "num_chars": 2}, {"sum_logits": -1.683470368385315, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": false, "logits_per_token": -1.683470368385315, "logits_per_char": -0.8417351841926575, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 157, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.695122480392456, "incorrect_loss_raw": 0.21256399154663086, "correct_loss_per_char": 0.847561240196228, "incorrect_loss_per_char": 0.10628199577331543, "correct_loss_per_token": 1.695122480392456, "incorrect_loss_per_token": 0.21256399154663086, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21256399154663086, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": true, "logits_per_token": -0.21256399154663086, "logits_per_char": -0.10628199577331543, "num_chars": 2}, {"sum_logits": -1.695122480392456, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": false, "logits_per_token": -1.695122480392456, "logits_per_char": -0.847561240196228, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 158, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23263539373874664, "incorrect_loss_raw": 1.6036046743392944, "correct_loss_per_char": 0.11631769686937332, "incorrect_loss_per_char": 0.8018023371696472, "correct_loss_per_token": 0.23263539373874664, "incorrect_loss_per_token": 1.6036046743392944, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23263539373874664, "num_tokens": 1, "num_tokens_all": 958, "is_greedy": true, "logits_per_token": -0.23263539373874664, "logits_per_char": -0.11631769686937332, "num_chars": 2}, {"sum_logits": -1.6036046743392944, "num_tokens": 1, "num_tokens_all": 958, "is_greedy": false, "logits_per_token": -1.6036046743392944, "logits_per_char": -0.8018023371696472, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 159, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3004649877548218, "incorrect_loss_raw": 1.383231282234192, "correct_loss_per_char": 0.1502324938774109, "incorrect_loss_per_char": 0.691615641117096, "correct_loss_per_token": 0.3004649877548218, "incorrect_loss_per_token": 1.383231282234192, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3004649877548218, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": true, "logits_per_token": -0.3004649877548218, "logits_per_char": -0.1502324938774109, "num_chars": 2}, {"sum_logits": -1.383231282234192, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -1.383231282234192, "logits_per_char": -0.691615641117096, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 160, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.30257466435432434, "incorrect_loss_raw": 1.3683406114578247, "correct_loss_per_char": 0.15128733217716217, "incorrect_loss_per_char": 0.6841703057289124, "correct_loss_per_token": 0.30257466435432434, "incorrect_loss_per_token": 1.3683406114578247, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.30257466435432434, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": true, "logits_per_token": -0.30257466435432434, "logits_per_char": -0.15128733217716217, "num_chars": 2}, {"sum_logits": -1.3683406114578247, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": false, "logits_per_token": -1.3683406114578247, "logits_per_char": -0.6841703057289124, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 161, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2713724374771118, "incorrect_loss_raw": 1.484756588935852, "correct_loss_per_char": 0.1356862187385559, "incorrect_loss_per_char": 0.742378294467926, "correct_loss_per_token": 0.2713724374771118, "incorrect_loss_per_token": 1.484756588935852, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2713724374771118, "num_tokens": 1, "num_tokens_all": 1029, "is_greedy": true, "logits_per_token": -0.2713724374771118, "logits_per_char": -0.1356862187385559, "num_chars": 2}, {"sum_logits": -1.484756588935852, "num_tokens": 1, "num_tokens_all": 1029, "is_greedy": false, "logits_per_token": -1.484756588935852, "logits_per_char": -0.742378294467926, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 162, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2040744125843048, "incorrect_loss_raw": 1.7313514947891235, "correct_loss_per_char": 0.1020372062921524, "incorrect_loss_per_char": 0.8656757473945618, "correct_loss_per_token": 0.2040744125843048, "incorrect_loss_per_token": 1.7313514947891235, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2040744125843048, "num_tokens": 1, "num_tokens_all": 1008, "is_greedy": true, "logits_per_token": -0.2040744125843048, "logits_per_char": -0.1020372062921524, "num_chars": 2}, {"sum_logits": -1.7313514947891235, "num_tokens": 1, "num_tokens_all": 1008, "is_greedy": false, "logits_per_token": -1.7313514947891235, "logits_per_char": -0.8656757473945618, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 163, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21179929375648499, "incorrect_loss_raw": 1.7083336114883423, "correct_loss_per_char": 0.10589964687824249, "incorrect_loss_per_char": 0.8541668057441711, "correct_loss_per_token": 0.21179929375648499, "incorrect_loss_per_token": 1.7083336114883423, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21179929375648499, "num_tokens": 1, "num_tokens_all": 977, "is_greedy": true, "logits_per_token": -0.21179929375648499, "logits_per_char": -0.10589964687824249, "num_chars": 2}, {"sum_logits": -1.7083336114883423, "num_tokens": 1, "num_tokens_all": 977, "is_greedy": false, "logits_per_token": -1.7083336114883423, "logits_per_char": -0.8541668057441711, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 164, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4793277978897095, "incorrect_loss_raw": 0.27313244342803955, "correct_loss_per_char": 0.7396638989448547, "incorrect_loss_per_char": 0.13656622171401978, "correct_loss_per_token": 1.4793277978897095, "incorrect_loss_per_token": 0.27313244342803955, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.27313244342803955, "num_tokens": 1, "num_tokens_all": 1045, "is_greedy": true, "logits_per_token": -0.27313244342803955, "logits_per_char": -0.13656622171401978, "num_chars": 2}, {"sum_logits": -1.4793277978897095, "num_tokens": 1, "num_tokens_all": 1045, "is_greedy": false, "logits_per_token": -1.4793277978897095, "logits_per_char": -0.7396638989448547, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 165, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21961551904678345, "incorrect_loss_raw": 1.6734061241149902, "correct_loss_per_char": 0.10980775952339172, "incorrect_loss_per_char": 0.8367030620574951, "correct_loss_per_token": 0.21961551904678345, "incorrect_loss_per_token": 1.6734061241149902, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21961551904678345, "num_tokens": 1, "num_tokens_all": 997, "is_greedy": true, "logits_per_token": -0.21961551904678345, "logits_per_char": -0.10980775952339172, "num_chars": 2}, {"sum_logits": -1.6734061241149902, "num_tokens": 1, "num_tokens_all": 997, "is_greedy": false, "logits_per_token": -1.6734061241149902, "logits_per_char": -0.8367030620574951, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 166, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4639872312545776, "incorrect_loss_raw": 0.2738696038722992, "correct_loss_per_char": 0.7319936156272888, "incorrect_loss_per_char": 0.1369348019361496, "correct_loss_per_token": 1.4639872312545776, "incorrect_loss_per_token": 0.2738696038722992, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2738696038722992, "num_tokens": 1, "num_tokens_all": 958, "is_greedy": true, "logits_per_token": -0.2738696038722992, "logits_per_char": -0.1369348019361496, "num_chars": 2}, {"sum_logits": -1.4639872312545776, "num_tokens": 1, "num_tokens_all": 958, "is_greedy": false, "logits_per_token": -1.4639872312545776, "logits_per_char": -0.7319936156272888, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 167, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2639411389827728, "incorrect_loss_raw": 1.5023151636123657, "correct_loss_per_char": 0.1319705694913864, "incorrect_loss_per_char": 0.7511575818061829, "correct_loss_per_token": 0.2639411389827728, "incorrect_loss_per_token": 1.5023151636123657, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2639411389827728, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": true, "logits_per_token": -0.2639411389827728, "logits_per_char": -0.1319705694913864, "num_chars": 2}, {"sum_logits": -1.5023151636123657, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": false, "logits_per_token": -1.5023151636123657, "logits_per_char": -0.7511575818061829, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 168, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4016952514648438, "incorrect_loss_raw": 0.2904324233531952, "correct_loss_per_char": 0.7008476257324219, "incorrect_loss_per_char": 0.1452162116765976, "correct_loss_per_token": 1.4016952514648438, "incorrect_loss_per_token": 0.2904324233531952, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2904324233531952, "num_tokens": 1, "num_tokens_all": 922, "is_greedy": true, "logits_per_token": -0.2904324233531952, "logits_per_char": -0.1452162116765976, "num_chars": 2}, {"sum_logits": -1.4016952514648438, "num_tokens": 1, "num_tokens_all": 922, "is_greedy": false, "logits_per_token": -1.4016952514648438, "logits_per_char": -0.7008476257324219, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 169, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5552338361740112, "incorrect_loss_raw": 0.2542129456996918, "correct_loss_per_char": 0.7776169180870056, "incorrect_loss_per_char": 0.1271064728498459, "correct_loss_per_token": 1.5552338361740112, "incorrect_loss_per_token": 0.2542129456996918, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2542129456996918, "num_tokens": 1, "num_tokens_all": 1178, "is_greedy": true, "logits_per_token": -0.2542129456996918, "logits_per_char": -0.1271064728498459, "num_chars": 2}, {"sum_logits": -1.5552338361740112, "num_tokens": 1, "num_tokens_all": 1178, "is_greedy": false, "logits_per_token": -1.5552338361740112, "logits_per_char": -0.7776169180870056, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 170, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.25047263503074646, "incorrect_loss_raw": 1.5576969385147095, "correct_loss_per_char": 0.12523631751537323, "incorrect_loss_per_char": 0.7788484692573547, "correct_loss_per_token": 0.25047263503074646, "incorrect_loss_per_token": 1.5576969385147095, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25047263503074646, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": true, "logits_per_token": -0.25047263503074646, "logits_per_char": -0.12523631751537323, "num_chars": 2}, {"sum_logits": -1.5576969385147095, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": false, "logits_per_token": -1.5576969385147095, "logits_per_char": -0.7788484692573547, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 171, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2427022010087967, "incorrect_loss_raw": 1.5725183486938477, "correct_loss_per_char": 0.12135110050439835, "incorrect_loss_per_char": 0.7862591743469238, "correct_loss_per_token": 0.2427022010087967, "incorrect_loss_per_token": 1.5725183486938477, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2427022010087967, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": true, "logits_per_token": -0.2427022010087967, "logits_per_char": -0.12135110050439835, "num_chars": 2}, {"sum_logits": -1.5725183486938477, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": false, "logits_per_token": -1.5725183486938477, "logits_per_char": -0.7862591743469238, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 172, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4823768138885498, "incorrect_loss_raw": 0.26840880513191223, "correct_loss_per_char": 0.7411884069442749, "incorrect_loss_per_char": 0.13420440256595612, "correct_loss_per_token": 1.4823768138885498, "incorrect_loss_per_token": 0.26840880513191223, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26840880513191223, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": true, "logits_per_token": -0.26840880513191223, "logits_per_char": -0.13420440256595612, "num_chars": 2}, {"sum_logits": -1.4823768138885498, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": false, "logits_per_token": -1.4823768138885498, "logits_per_char": -0.7411884069442749, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 173, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24864669144153595, "incorrect_loss_raw": 1.572282314300537, "correct_loss_per_char": 0.12432334572076797, "incorrect_loss_per_char": 0.7861411571502686, "correct_loss_per_token": 0.24864669144153595, "incorrect_loss_per_token": 1.572282314300537, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24864669144153595, "num_tokens": 1, "num_tokens_all": 1000, "is_greedy": true, "logits_per_token": -0.24864669144153595, "logits_per_char": -0.12432334572076797, "num_chars": 2}, {"sum_logits": -1.572282314300537, "num_tokens": 1, "num_tokens_all": 1000, "is_greedy": false, "logits_per_token": -1.572282314300537, "logits_per_char": -0.7861411571502686, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 174, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1755656898021698, "incorrect_loss_raw": 1.8576501607894897, "correct_loss_per_char": 0.0877828449010849, "incorrect_loss_per_char": 0.9288250803947449, "correct_loss_per_token": 0.1755656898021698, "incorrect_loss_per_token": 1.8576501607894897, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1755656898021698, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": true, "logits_per_token": -0.1755656898021698, "logits_per_char": -0.0877828449010849, "num_chars": 2}, {"sum_logits": -1.8576501607894897, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": false, "logits_per_token": -1.8576501607894897, "logits_per_char": -0.9288250803947449, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 175, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4606432914733887, "incorrect_loss_raw": 0.27852606773376465, "correct_loss_per_char": 0.7303216457366943, "incorrect_loss_per_char": 0.13926303386688232, "correct_loss_per_token": 1.4606432914733887, "incorrect_loss_per_token": 0.27852606773376465, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.27852606773376465, "num_tokens": 1, "num_tokens_all": 968, "is_greedy": true, "logits_per_token": -0.27852606773376465, "logits_per_char": -0.13926303386688232, "num_chars": 2}, {"sum_logits": -1.4606432914733887, "num_tokens": 1, "num_tokens_all": 968, "is_greedy": false, "logits_per_token": -1.4606432914733887, "logits_per_char": -0.7303216457366943, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 176, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22240713238716125, "incorrect_loss_raw": 1.660649061203003, "correct_loss_per_char": 0.11120356619358063, "incorrect_loss_per_char": 0.8303245306015015, "correct_loss_per_token": 0.22240713238716125, "incorrect_loss_per_token": 1.660649061203003, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22240713238716125, "num_tokens": 1, "num_tokens_all": 1010, "is_greedy": true, "logits_per_token": -0.22240713238716125, "logits_per_char": -0.11120356619358063, "num_chars": 2}, {"sum_logits": -1.660649061203003, "num_tokens": 1, "num_tokens_all": 1010, "is_greedy": false, "logits_per_token": -1.660649061203003, "logits_per_char": -0.8303245306015015, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 177, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.26375579833984375, "incorrect_loss_raw": 1.511297345161438, "correct_loss_per_char": 0.13187789916992188, "incorrect_loss_per_char": 0.755648672580719, "correct_loss_per_token": 0.26375579833984375, "incorrect_loss_per_token": 1.511297345161438, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26375579833984375, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": true, "logits_per_token": -0.26375579833984375, "logits_per_char": -0.13187789916992188, "num_chars": 2}, {"sum_logits": -1.511297345161438, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": false, "logits_per_token": -1.511297345161438, "logits_per_char": -0.755648672580719, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 178, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7766090631484985, "incorrect_loss_raw": 0.19794194400310516, "correct_loss_per_char": 0.8883045315742493, "incorrect_loss_per_char": 0.09897097200155258, "correct_loss_per_token": 1.7766090631484985, "incorrect_loss_per_token": 0.19794194400310516, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19794194400310516, "num_tokens": 1, "num_tokens_all": 1150, "is_greedy": true, "logits_per_token": -0.19794194400310516, "logits_per_char": -0.09897097200155258, "num_chars": 2}, {"sum_logits": -1.7766090631484985, "num_tokens": 1, "num_tokens_all": 1150, "is_greedy": false, "logits_per_token": -1.7766090631484985, "logits_per_char": -0.8883045315742493, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 179, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6057758331298828, "incorrect_loss_raw": 0.23399591445922852, "correct_loss_per_char": 0.8028879165649414, "incorrect_loss_per_char": 0.11699795722961426, "correct_loss_per_token": 1.6057758331298828, "incorrect_loss_per_token": 0.23399591445922852, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23399591445922852, "num_tokens": 1, "num_tokens_all": 971, "is_greedy": true, "logits_per_token": -0.23399591445922852, "logits_per_char": -0.11699795722961426, "num_chars": 2}, {"sum_logits": -1.6057758331298828, "num_tokens": 1, "num_tokens_all": 971, "is_greedy": false, "logits_per_token": -1.6057758331298828, "logits_per_char": -0.8028879165649414, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 180, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.27076947689056396, "incorrect_loss_raw": 1.4880812168121338, "correct_loss_per_char": 0.13538473844528198, "incorrect_loss_per_char": 0.7440406084060669, "correct_loss_per_token": 0.27076947689056396, "incorrect_loss_per_token": 1.4880812168121338, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.27076947689056396, "num_tokens": 1, "num_tokens_all": 956, "is_greedy": true, "logits_per_token": -0.27076947689056396, "logits_per_char": -0.13538473844528198, "num_chars": 2}, {"sum_logits": -1.4880812168121338, "num_tokens": 1, "num_tokens_all": 956, "is_greedy": false, "logits_per_token": -1.4880812168121338, "logits_per_char": -0.7440406084060669, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 181, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19827060401439667, "incorrect_loss_raw": 1.757840871810913, "correct_loss_per_char": 0.09913530200719833, "incorrect_loss_per_char": 0.8789204359054565, "correct_loss_per_token": 0.19827060401439667, "incorrect_loss_per_token": 1.757840871810913, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19827060401439667, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": true, "logits_per_token": -0.19827060401439667, "logits_per_char": -0.09913530200719833, "num_chars": 2}, {"sum_logits": -1.757840871810913, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": false, "logits_per_token": -1.757840871810913, "logits_per_char": -0.8789204359054565, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 182, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.16692239046096802, "incorrect_loss_raw": 1.9104561805725098, "correct_loss_per_char": 0.08346119523048401, "incorrect_loss_per_char": 0.9552280902862549, "correct_loss_per_token": 0.16692239046096802, "incorrect_loss_per_token": 1.9104561805725098, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.16692239046096802, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": true, "logits_per_token": -0.16692239046096802, "logits_per_char": -0.08346119523048401, "num_chars": 2}, {"sum_logits": -1.9104561805725098, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -1.9104561805725098, "logits_per_char": -0.9552280902862549, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 183, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20387881994247437, "incorrect_loss_raw": 1.7333168983459473, "correct_loss_per_char": 0.10193940997123718, "incorrect_loss_per_char": 0.8666584491729736, "correct_loss_per_token": 0.20387881994247437, "incorrect_loss_per_token": 1.7333168983459473, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20387881994247437, "num_tokens": 1, "num_tokens_all": 1057, "is_greedy": true, "logits_per_token": -0.20387881994247437, "logits_per_char": -0.10193940997123718, "num_chars": 2}, {"sum_logits": -1.7333168983459473, "num_tokens": 1, "num_tokens_all": 1057, "is_greedy": false, "logits_per_token": -1.7333168983459473, "logits_per_char": -0.8666584491729736, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 184, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2738038897514343, "incorrect_loss_raw": 1.4799683094024658, "correct_loss_per_char": 0.13690194487571716, "incorrect_loss_per_char": 0.7399841547012329, "correct_loss_per_token": 0.2738038897514343, "incorrect_loss_per_token": 1.4799683094024658, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2738038897514343, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": true, "logits_per_token": -0.2738038897514343, "logits_per_char": -0.13690194487571716, "num_chars": 2}, {"sum_logits": -1.4799683094024658, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": false, "logits_per_token": -1.4799683094024658, "logits_per_char": -0.7399841547012329, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 185, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24943621456623077, "incorrect_loss_raw": 1.5546122789382935, "correct_loss_per_char": 0.12471810728311539, "incorrect_loss_per_char": 0.7773061394691467, "correct_loss_per_token": 0.24943621456623077, "incorrect_loss_per_token": 1.5546122789382935, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24943621456623077, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": true, "logits_per_token": -0.24943621456623077, "logits_per_char": -0.12471810728311539, "num_chars": 2}, {"sum_logits": -1.5546122789382935, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": false, "logits_per_token": -1.5546122789382935, "logits_per_char": -0.7773061394691467, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 186, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19672054052352905, "incorrect_loss_raw": 1.7678182125091553, "correct_loss_per_char": 0.09836027026176453, "incorrect_loss_per_char": 0.8839091062545776, "correct_loss_per_token": 0.19672054052352905, "incorrect_loss_per_token": 1.7678182125091553, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19672054052352905, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": true, "logits_per_token": -0.19672054052352905, "logits_per_char": -0.09836027026176453, "num_chars": 2}, {"sum_logits": -1.7678182125091553, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": false, "logits_per_token": -1.7678182125091553, "logits_per_char": -0.8839091062545776, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 187, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2583203613758087, "incorrect_loss_raw": 1.515869379043579, "correct_loss_per_char": 0.12916018068790436, "incorrect_loss_per_char": 0.7579346895217896, "correct_loss_per_token": 0.2583203613758087, "incorrect_loss_per_token": 1.515869379043579, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2583203613758087, "num_tokens": 1, "num_tokens_all": 920, "is_greedy": true, "logits_per_token": -0.2583203613758087, "logits_per_char": -0.12916018068790436, "num_chars": 2}, {"sum_logits": -1.515869379043579, "num_tokens": 1, "num_tokens_all": 920, "is_greedy": false, "logits_per_token": -1.515869379043579, "logits_per_char": -0.7579346895217896, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 188, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5453206300735474, "incorrect_loss_raw": 0.25615787506103516, "correct_loss_per_char": 0.7726603150367737, "incorrect_loss_per_char": 0.12807893753051758, "correct_loss_per_token": 1.5453206300735474, "incorrect_loss_per_token": 0.25615787506103516, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25615787506103516, "num_tokens": 1, "num_tokens_all": 1043, "is_greedy": true, "logits_per_token": -0.25615787506103516, "logits_per_char": -0.12807893753051758, "num_chars": 2}, {"sum_logits": -1.5453206300735474, "num_tokens": 1, "num_tokens_all": 1043, "is_greedy": false, "logits_per_token": -1.5453206300735474, "logits_per_char": -0.7726603150367737, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 189, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.554499864578247, "incorrect_loss_raw": 0.24694880843162537, "correct_loss_per_char": 0.7772499322891235, "incorrect_loss_per_char": 0.12347440421581268, "correct_loss_per_token": 1.554499864578247, "incorrect_loss_per_token": 0.24694880843162537, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24694880843162537, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": true, "logits_per_token": -0.24694880843162537, "logits_per_char": -0.12347440421581268, "num_chars": 2}, {"sum_logits": -1.554499864578247, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": false, "logits_per_token": -1.554499864578247, "logits_per_char": -0.7772499322891235, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 190, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7049274444580078, "incorrect_loss_raw": 0.2106005996465683, "correct_loss_per_char": 0.8524637222290039, "incorrect_loss_per_char": 0.10530029982328415, "correct_loss_per_token": 1.7049274444580078, "incorrect_loss_per_token": 0.2106005996465683, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2106005996465683, "num_tokens": 1, "num_tokens_all": 1049, "is_greedy": true, "logits_per_token": -0.2106005996465683, "logits_per_char": -0.10530029982328415, "num_chars": 2}, {"sum_logits": -1.7049274444580078, "num_tokens": 1, "num_tokens_all": 1049, "is_greedy": false, "logits_per_token": -1.7049274444580078, "logits_per_char": -0.8524637222290039, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 191, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2267102152109146, "incorrect_loss_raw": 1.6417523622512817, "correct_loss_per_char": 0.1133551076054573, "incorrect_loss_per_char": 0.8208761811256409, "correct_loss_per_token": 0.2267102152109146, "incorrect_loss_per_token": 1.6417523622512817, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2267102152109146, "num_tokens": 1, "num_tokens_all": 973, "is_greedy": true, "logits_per_token": -0.2267102152109146, "logits_per_char": -0.1133551076054573, "num_chars": 2}, {"sum_logits": -1.6417523622512817, "num_tokens": 1, "num_tokens_all": 973, "is_greedy": false, "logits_per_token": -1.6417523622512817, "logits_per_char": -0.8208761811256409, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 192, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3419179916381836, "incorrect_loss_raw": 0.3132157325744629, "correct_loss_per_char": 0.6709589958190918, "incorrect_loss_per_char": 0.15660786628723145, "correct_loss_per_token": 1.3419179916381836, "incorrect_loss_per_token": 0.3132157325744629, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3132157325744629, "num_tokens": 1, "num_tokens_all": 1036, "is_greedy": true, "logits_per_token": -0.3132157325744629, "logits_per_char": -0.15660786628723145, "num_chars": 2}, {"sum_logits": -1.3419179916381836, "num_tokens": 1, "num_tokens_all": 1036, "is_greedy": false, "logits_per_token": -1.3419179916381836, "logits_per_char": -0.6709589958190918, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 193, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.758561134338379, "incorrect_loss_raw": 0.19713063538074493, "correct_loss_per_char": 0.8792805671691895, "incorrect_loss_per_char": 0.09856531769037247, "correct_loss_per_token": 1.758561134338379, "incorrect_loss_per_token": 0.19713063538074493, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19713063538074493, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": true, "logits_per_token": -0.19713063538074493, "logits_per_char": -0.09856531769037247, "num_chars": 2}, {"sum_logits": -1.758561134338379, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": false, "logits_per_token": -1.758561134338379, "logits_per_char": -0.8792805671691895, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 194, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3728276491165161, "incorrect_loss_raw": 0.3058924376964569, "correct_loss_per_char": 0.6864138245582581, "incorrect_loss_per_char": 0.15294621884822845, "correct_loss_per_token": 1.3728276491165161, "incorrect_loss_per_token": 0.3058924376964569, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3058924376964569, "num_tokens": 1, "num_tokens_all": 1022, "is_greedy": true, "logits_per_token": -0.3058924376964569, "logits_per_char": -0.15294621884822845, "num_chars": 2}, {"sum_logits": -1.3728276491165161, "num_tokens": 1, "num_tokens_all": 1022, "is_greedy": false, "logits_per_token": -1.3728276491165161, "logits_per_char": -0.6864138245582581, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 195, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1952141672372818, "incorrect_loss_raw": 1.7629951238632202, "correct_loss_per_char": 0.0976070836186409, "incorrect_loss_per_char": 0.8814975619316101, "correct_loss_per_token": 0.1952141672372818, "incorrect_loss_per_token": 1.7629951238632202, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1952141672372818, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": true, "logits_per_token": -0.1952141672372818, "logits_per_char": -0.0976070836186409, "num_chars": 2}, {"sum_logits": -1.7629951238632202, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": false, "logits_per_token": -1.7629951238632202, "logits_per_char": -0.8814975619316101, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 196, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.17430521547794342, "incorrect_loss_raw": 1.8818397521972656, "correct_loss_per_char": 0.08715260773897171, "incorrect_loss_per_char": 0.9409198760986328, "correct_loss_per_token": 0.17430521547794342, "incorrect_loss_per_token": 1.8818397521972656, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.17430521547794342, "num_tokens": 1, "num_tokens_all": 988, "is_greedy": true, "logits_per_token": -0.17430521547794342, "logits_per_char": -0.08715260773897171, "num_chars": 2}, {"sum_logits": -1.8818397521972656, "num_tokens": 1, "num_tokens_all": 988, "is_greedy": false, "logits_per_token": -1.8818397521972656, "logits_per_char": -0.9409198760986328, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 197, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2841807007789612, "incorrect_loss_raw": 1.4216821193695068, "correct_loss_per_char": 0.1420903503894806, "incorrect_loss_per_char": 0.7108410596847534, "correct_loss_per_token": 0.2841807007789612, "incorrect_loss_per_token": 1.4216821193695068, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2841807007789612, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": true, "logits_per_token": -0.2841807007789612, "logits_per_char": -0.1420903503894806, "num_chars": 2}, {"sum_logits": -1.4216821193695068, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": false, "logits_per_token": -1.4216821193695068, "logits_per_char": -0.7108410596847534, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 198, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21648356318473816, "incorrect_loss_raw": 1.688399076461792, "correct_loss_per_char": 0.10824178159236908, "incorrect_loss_per_char": 0.844199538230896, "correct_loss_per_token": 0.21648356318473816, "incorrect_loss_per_token": 1.688399076461792, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21648356318473816, "num_tokens": 1, "num_tokens_all": 972, "is_greedy": true, "logits_per_token": -0.21648356318473816, "logits_per_char": -0.10824178159236908, "num_chars": 2}, {"sum_logits": -1.688399076461792, "num_tokens": 1, "num_tokens_all": 972, "is_greedy": false, "logits_per_token": -1.688399076461792, "logits_per_char": -0.844199538230896, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 199, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2884089946746826, "incorrect_loss_raw": 1.4330391883850098, "correct_loss_per_char": 0.1442044973373413, "incorrect_loss_per_char": 0.7165195941925049, "correct_loss_per_token": 0.2884089946746826, "incorrect_loss_per_token": 1.4330391883850098, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2884089946746826, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": true, "logits_per_token": -0.2884089946746826, "logits_per_char": -0.1442044973373413, "num_chars": 2}, {"sum_logits": -1.4330391883850098, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": false, "logits_per_token": -1.4330391883850098, "logits_per_char": -0.7165195941925049, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 200, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5212829113006592, "incorrect_loss_raw": 0.2584575414657593, "correct_loss_per_char": 0.7606414556503296, "incorrect_loss_per_char": 0.12922877073287964, "correct_loss_per_token": 1.5212829113006592, "incorrect_loss_per_token": 0.2584575414657593, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2584575414657593, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": true, "logits_per_token": -0.2584575414657593, "logits_per_char": -0.12922877073287964, "num_chars": 2}, {"sum_logits": -1.5212829113006592, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": false, "logits_per_token": -1.5212829113006592, "logits_per_char": -0.7606414556503296, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 201, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.25706952810287476, "incorrect_loss_raw": 1.5412592887878418, "correct_loss_per_char": 0.12853476405143738, "incorrect_loss_per_char": 0.7706296443939209, "correct_loss_per_token": 0.25706952810287476, "incorrect_loss_per_token": 1.5412592887878418, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25706952810287476, "num_tokens": 1, "num_tokens_all": 1060, "is_greedy": true, "logits_per_token": -0.25706952810287476, "logits_per_char": -0.12853476405143738, "num_chars": 2}, {"sum_logits": -1.5412592887878418, "num_tokens": 1, "num_tokens_all": 1060, "is_greedy": false, "logits_per_token": -1.5412592887878418, "logits_per_char": -0.7706296443939209, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 202, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21949473023414612, "incorrect_loss_raw": 1.6608351469039917, "correct_loss_per_char": 0.10974736511707306, "incorrect_loss_per_char": 0.8304175734519958, "correct_loss_per_token": 0.21949473023414612, "incorrect_loss_per_token": 1.6608351469039917, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21949473023414612, "num_tokens": 1, "num_tokens_all": 990, "is_greedy": true, "logits_per_token": -0.21949473023414612, "logits_per_char": -0.10974736511707306, "num_chars": 2}, {"sum_logits": -1.6608351469039917, "num_tokens": 1, "num_tokens_all": 990, "is_greedy": false, "logits_per_token": -1.6608351469039917, "logits_per_char": -0.8304175734519958, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 203, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21673651039600372, "incorrect_loss_raw": 1.6832891702651978, "correct_loss_per_char": 0.10836825519800186, "incorrect_loss_per_char": 0.8416445851325989, "correct_loss_per_token": 0.21673651039600372, "incorrect_loss_per_token": 1.6832891702651978, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21673651039600372, "num_tokens": 1, "num_tokens_all": 970, "is_greedy": true, "logits_per_token": -0.21673651039600372, "logits_per_char": -0.10836825519800186, "num_chars": 2}, {"sum_logits": -1.6832891702651978, "num_tokens": 1, "num_tokens_all": 970, "is_greedy": false, "logits_per_token": -1.6832891702651978, "logits_per_char": -0.8416445851325989, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 204, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23191218078136444, "incorrect_loss_raw": 1.6214462518692017, "correct_loss_per_char": 0.11595609039068222, "incorrect_loss_per_char": 0.8107231259346008, "correct_loss_per_token": 0.23191218078136444, "incorrect_loss_per_token": 1.6214462518692017, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23191218078136444, "num_tokens": 1, "num_tokens_all": 1025, "is_greedy": true, "logits_per_token": -0.23191218078136444, "logits_per_char": -0.11595609039068222, "num_chars": 2}, {"sum_logits": -1.6214462518692017, "num_tokens": 1, "num_tokens_all": 1025, "is_greedy": false, "logits_per_token": -1.6214462518692017, "logits_per_char": -0.8107231259346008, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 205, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21825654804706573, "incorrect_loss_raw": 1.652321696281433, "correct_loss_per_char": 0.10912827402353287, "incorrect_loss_per_char": 0.8261608481407166, "correct_loss_per_token": 0.21825654804706573, "incorrect_loss_per_token": 1.652321696281433, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21825654804706573, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": true, "logits_per_token": -0.21825654804706573, "logits_per_char": -0.10912827402353287, "num_chars": 2}, {"sum_logits": -1.652321696281433, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": false, "logits_per_token": -1.652321696281433, "logits_per_char": -0.8261608481407166, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 206, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.27725186944007874, "incorrect_loss_raw": 1.453661561012268, "correct_loss_per_char": 0.13862593472003937, "incorrect_loss_per_char": 0.726830780506134, "correct_loss_per_token": 0.27725186944007874, "incorrect_loss_per_token": 1.453661561012268, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.27725186944007874, "num_tokens": 1, "num_tokens_all": 1029, "is_greedy": true, "logits_per_token": -0.27725186944007874, "logits_per_char": -0.13862593472003937, "num_chars": 2}, {"sum_logits": -1.453661561012268, "num_tokens": 1, "num_tokens_all": 1029, "is_greedy": false, "logits_per_token": -1.453661561012268, "logits_per_char": -0.726830780506134, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 207, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23571619391441345, "incorrect_loss_raw": 1.591042160987854, "correct_loss_per_char": 0.11785809695720673, "incorrect_loss_per_char": 0.795521080493927, "correct_loss_per_token": 0.23571619391441345, "incorrect_loss_per_token": 1.591042160987854, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23571619391441345, "num_tokens": 1, "num_tokens_all": 1047, "is_greedy": true, "logits_per_token": -0.23571619391441345, "logits_per_char": -0.11785809695720673, "num_chars": 2}, {"sum_logits": -1.591042160987854, "num_tokens": 1, "num_tokens_all": 1047, "is_greedy": false, "logits_per_token": -1.591042160987854, "logits_per_char": -0.795521080493927, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 208, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24171750247478485, "incorrect_loss_raw": 1.5974849462509155, "correct_loss_per_char": 0.12085875123739243, "incorrect_loss_per_char": 0.7987424731254578, "correct_loss_per_token": 0.24171750247478485, "incorrect_loss_per_token": 1.5974849462509155, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24171750247478485, "num_tokens": 1, "num_tokens_all": 1100, "is_greedy": true, "logits_per_token": -0.24171750247478485, "logits_per_char": -0.12085875123739243, "num_chars": 2}, {"sum_logits": -1.5974849462509155, "num_tokens": 1, "num_tokens_all": 1100, "is_greedy": false, "logits_per_token": -1.5974849462509155, "logits_per_char": -0.7987424731254578, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 209, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6040080785751343, "incorrect_loss_raw": 0.23336564004421234, "correct_loss_per_char": 0.8020040392875671, "incorrect_loss_per_char": 0.11668282002210617, "correct_loss_per_token": 1.6040080785751343, "incorrect_loss_per_token": 0.23336564004421234, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23336564004421234, "num_tokens": 1, "num_tokens_all": 1011, "is_greedy": true, "logits_per_token": -0.23336564004421234, "logits_per_char": -0.11668282002210617, "num_chars": 2}, {"sum_logits": -1.6040080785751343, "num_tokens": 1, "num_tokens_all": 1011, "is_greedy": false, "logits_per_token": -1.6040080785751343, "logits_per_char": -0.8020040392875671, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 210, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6937134265899658, "incorrect_loss_raw": 0.21088756620883942, "correct_loss_per_char": 0.8468567132949829, "incorrect_loss_per_char": 0.10544378310441971, "correct_loss_per_token": 1.6937134265899658, "incorrect_loss_per_token": 0.21088756620883942, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21088756620883942, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": true, "logits_per_token": -0.21088756620883942, "logits_per_char": -0.10544378310441971, "num_chars": 2}, {"sum_logits": -1.6937134265899658, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": false, "logits_per_token": -1.6937134265899658, "logits_per_char": -0.8468567132949829, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 211, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6552809476852417, "incorrect_loss_raw": 0.22659124433994293, "correct_loss_per_char": 0.8276404738426208, "incorrect_loss_per_char": 0.11329562216997147, "correct_loss_per_token": 1.6552809476852417, "incorrect_loss_per_token": 0.22659124433994293, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22659124433994293, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": true, "logits_per_token": -0.22659124433994293, "logits_per_char": -0.11329562216997147, "num_chars": 2}, {"sum_logits": -1.6552809476852417, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": false, "logits_per_token": -1.6552809476852417, "logits_per_char": -0.8276404738426208, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 212, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5314292907714844, "incorrect_loss_raw": 0.2528951168060303, "correct_loss_per_char": 0.7657146453857422, "incorrect_loss_per_char": 0.12644755840301514, "correct_loss_per_token": 1.5314292907714844, "incorrect_loss_per_token": 0.2528951168060303, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2528951168060303, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": true, "logits_per_token": -0.2528951168060303, "logits_per_char": -0.12644755840301514, "num_chars": 2}, {"sum_logits": -1.5314292907714844, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -1.5314292907714844, "logits_per_char": -0.7657146453857422, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 213, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4511901140213013, "incorrect_loss_raw": 0.28025877475738525, "correct_loss_per_char": 0.7255950570106506, "incorrect_loss_per_char": 0.14012938737869263, "correct_loss_per_token": 1.4511901140213013, "incorrect_loss_per_token": 0.28025877475738525, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.28025877475738525, "num_tokens": 1, "num_tokens_all": 1048, "is_greedy": true, "logits_per_token": -0.28025877475738525, "logits_per_char": -0.14012938737869263, "num_chars": 2}, {"sum_logits": -1.4511901140213013, "num_tokens": 1, "num_tokens_all": 1048, "is_greedy": false, "logits_per_token": -1.4511901140213013, "logits_per_char": -0.7255950570106506, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 214, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19087247550487518, "incorrect_loss_raw": 1.8442412614822388, "correct_loss_per_char": 0.09543623775243759, "incorrect_loss_per_char": 0.9221206307411194, "correct_loss_per_token": 0.19087247550487518, "incorrect_loss_per_token": 1.8442412614822388, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19087247550487518, "num_tokens": 1, "num_tokens_all": 981, "is_greedy": true, "logits_per_token": -0.19087247550487518, "logits_per_char": -0.09543623775243759, "num_chars": 2}, {"sum_logits": -1.8442412614822388, "num_tokens": 1, "num_tokens_all": 981, "is_greedy": false, "logits_per_token": -1.8442412614822388, "logits_per_char": -0.9221206307411194, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 215, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.831109642982483, "incorrect_loss_raw": 0.18515832722187042, "correct_loss_per_char": 0.9155548214912415, "incorrect_loss_per_char": 0.09257916361093521, "correct_loss_per_token": 1.831109642982483, "incorrect_loss_per_token": 0.18515832722187042, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.18515832722187042, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": true, "logits_per_token": -0.18515832722187042, "logits_per_char": -0.09257916361093521, "num_chars": 2}, {"sum_logits": -1.831109642982483, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -1.831109642982483, "logits_per_char": -0.9155548214912415, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 216, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20055949687957764, "incorrect_loss_raw": 1.7311182022094727, "correct_loss_per_char": 0.10027974843978882, "incorrect_loss_per_char": 0.8655591011047363, "correct_loss_per_token": 0.20055949687957764, "incorrect_loss_per_token": 1.7311182022094727, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20055949687957764, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": true, "logits_per_token": -0.20055949687957764, "logits_per_char": -0.10027974843978882, "num_chars": 2}, {"sum_logits": -1.7311182022094727, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": false, "logits_per_token": -1.7311182022094727, "logits_per_char": -0.8655591011047363, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 217, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20160678029060364, "incorrect_loss_raw": 1.7705762386322021, "correct_loss_per_char": 0.10080339014530182, "incorrect_loss_per_char": 0.8852881193161011, "correct_loss_per_token": 0.20160678029060364, "incorrect_loss_per_token": 1.7705762386322021, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20160678029060364, "num_tokens": 1, "num_tokens_all": 1105, "is_greedy": true, "logits_per_token": -0.20160678029060364, "logits_per_char": -0.10080339014530182, "num_chars": 2}, {"sum_logits": -1.7705762386322021, "num_tokens": 1, "num_tokens_all": 1105, "is_greedy": false, "logits_per_token": -1.7705762386322021, "logits_per_char": -0.8852881193161011, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 218, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23798242211341858, "incorrect_loss_raw": 1.6234709024429321, "correct_loss_per_char": 0.11899121105670929, "incorrect_loss_per_char": 0.8117354512214661, "correct_loss_per_token": 0.23798242211341858, "incorrect_loss_per_token": 1.6234709024429321, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23798242211341858, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": true, "logits_per_token": -0.23798242211341858, "logits_per_char": -0.11899121105670929, "num_chars": 2}, {"sum_logits": -1.6234709024429321, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": false, "logits_per_token": -1.6234709024429321, "logits_per_char": -0.8117354512214661, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 219, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24177394807338715, "incorrect_loss_raw": 1.5759525299072266, "correct_loss_per_char": 0.12088697403669357, "incorrect_loss_per_char": 0.7879762649536133, "correct_loss_per_token": 0.24177394807338715, "incorrect_loss_per_token": 1.5759525299072266, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24177394807338715, "num_tokens": 1, "num_tokens_all": 890, "is_greedy": true, "logits_per_token": -0.24177394807338715, "logits_per_char": -0.12088697403669357, "num_chars": 2}, {"sum_logits": -1.5759525299072266, "num_tokens": 1, "num_tokens_all": 890, "is_greedy": false, "logits_per_token": -1.5759525299072266, "logits_per_char": -0.7879762649536133, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 220, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.17181502282619476, "incorrect_loss_raw": 1.8915332555770874, "correct_loss_per_char": 0.08590751141309738, "incorrect_loss_per_char": 0.9457666277885437, "correct_loss_per_token": 0.17181502282619476, "incorrect_loss_per_token": 1.8915332555770874, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.17181502282619476, "num_tokens": 1, "num_tokens_all": 883, "is_greedy": true, "logits_per_token": -0.17181502282619476, "logits_per_char": -0.08590751141309738, "num_chars": 2}, {"sum_logits": -1.8915332555770874, "num_tokens": 1, "num_tokens_all": 883, "is_greedy": false, "logits_per_token": -1.8915332555770874, "logits_per_char": -0.9457666277885437, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 221, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.14555588364601135, "incorrect_loss_raw": 2.0633842945098877, "correct_loss_per_char": 0.07277794182300568, "incorrect_loss_per_char": 1.0316921472549438, "correct_loss_per_token": 0.14555588364601135, "incorrect_loss_per_token": 2.0633842945098877, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.14555588364601135, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": true, "logits_per_token": -0.14555588364601135, "logits_per_char": -0.07277794182300568, "num_chars": 2}, {"sum_logits": -2.0633842945098877, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": false, "logits_per_token": -2.0633842945098877, "logits_per_char": -1.0316921472549438, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 222, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3017411231994629, "incorrect_loss_raw": 1.4375262260437012, "correct_loss_per_char": 0.15087056159973145, "incorrect_loss_per_char": 0.7187631130218506, "correct_loss_per_token": 0.3017411231994629, "incorrect_loss_per_token": 1.4375262260437012, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3017411231994629, "num_tokens": 1, "num_tokens_all": 1255, "is_greedy": true, "logits_per_token": -0.3017411231994629, "logits_per_char": -0.15087056159973145, "num_chars": 2}, {"sum_logits": -1.4375262260437012, "num_tokens": 1, "num_tokens_all": 1255, "is_greedy": false, "logits_per_token": -1.4375262260437012, "logits_per_char": -0.7187631130218506, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 223, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6251214742660522, "incorrect_loss_raw": 0.23231519758701324, "correct_loss_per_char": 0.8125607371330261, "incorrect_loss_per_char": 0.11615759879350662, "correct_loss_per_token": 1.6251214742660522, "incorrect_loss_per_token": 0.23231519758701324, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23231519758701324, "num_tokens": 1, "num_tokens_all": 1001, "is_greedy": true, "logits_per_token": -0.23231519758701324, "logits_per_char": -0.11615759879350662, "num_chars": 2}, {"sum_logits": -1.6251214742660522, "num_tokens": 1, "num_tokens_all": 1001, "is_greedy": false, "logits_per_token": -1.6251214742660522, "logits_per_char": -0.8125607371330261, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 224, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22393107414245605, "incorrect_loss_raw": 1.6451153755187988, "correct_loss_per_char": 0.11196553707122803, "incorrect_loss_per_char": 0.8225576877593994, "correct_loss_per_token": 0.22393107414245605, "incorrect_loss_per_token": 1.6451153755187988, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22393107414245605, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": true, "logits_per_token": -0.22393107414245605, "logits_per_char": -0.11196553707122803, "num_chars": 2}, {"sum_logits": -1.6451153755187988, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -1.6451153755187988, "logits_per_char": -0.8225576877593994, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 225, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21191667020320892, "incorrect_loss_raw": 1.6886159181594849, "correct_loss_per_char": 0.10595833510160446, "incorrect_loss_per_char": 0.8443079590797424, "correct_loss_per_token": 0.21191667020320892, "incorrect_loss_per_token": 1.6886159181594849, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21191667020320892, "num_tokens": 1, "num_tokens_all": 1012, "is_greedy": true, "logits_per_token": -0.21191667020320892, "logits_per_char": -0.10595833510160446, "num_chars": 2}, {"sum_logits": -1.6886159181594849, "num_tokens": 1, "num_tokens_all": 1012, "is_greedy": false, "logits_per_token": -1.6886159181594849, "logits_per_char": -0.8443079590797424, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 226, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21559128165245056, "incorrect_loss_raw": 1.6889992952346802, "correct_loss_per_char": 0.10779564082622528, "incorrect_loss_per_char": 0.8444996476173401, "correct_loss_per_token": 0.21559128165245056, "incorrect_loss_per_token": 1.6889992952346802, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21559128165245056, "num_tokens": 1, "num_tokens_all": 988, "is_greedy": true, "logits_per_token": -0.21559128165245056, "logits_per_char": -0.10779564082622528, "num_chars": 2}, {"sum_logits": -1.6889992952346802, "num_tokens": 1, "num_tokens_all": 988, "is_greedy": false, "logits_per_token": -1.6889992952346802, "logits_per_char": -0.8444996476173401, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 227, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.210485577583313, "incorrect_loss_raw": 1.7014678716659546, "correct_loss_per_char": 0.1052427887916565, "incorrect_loss_per_char": 0.8507339358329773, "correct_loss_per_token": 0.210485577583313, "incorrect_loss_per_token": 1.7014678716659546, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.210485577583313, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": true, "logits_per_token": -0.210485577583313, "logits_per_char": -0.1052427887916565, "num_chars": 2}, {"sum_logits": -1.7014678716659546, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": false, "logits_per_token": -1.7014678716659546, "logits_per_char": -0.8507339358329773, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 228, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2523322105407715, "incorrect_loss_raw": 1.545668125152588, "correct_loss_per_char": 0.12616610527038574, "incorrect_loss_per_char": 0.772834062576294, "correct_loss_per_token": 0.2523322105407715, "incorrect_loss_per_token": 1.545668125152588, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2523322105407715, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": true, "logits_per_token": -0.2523322105407715, "logits_per_char": -0.12616610527038574, "num_chars": 2}, {"sum_logits": -1.545668125152588, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": false, "logits_per_token": -1.545668125152588, "logits_per_char": -0.772834062576294, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 229, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6355564594268799, "incorrect_loss_raw": 0.2272651195526123, "correct_loss_per_char": 0.8177782297134399, "incorrect_loss_per_char": 0.11363255977630615, "correct_loss_per_token": 1.6355564594268799, "incorrect_loss_per_token": 0.2272651195526123, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2272651195526123, "num_tokens": 1, "num_tokens_all": 987, "is_greedy": true, "logits_per_token": -0.2272651195526123, "logits_per_char": -0.11363255977630615, "num_chars": 2}, {"sum_logits": -1.6355564594268799, "num_tokens": 1, "num_tokens_all": 987, "is_greedy": false, "logits_per_token": -1.6355564594268799, "logits_per_char": -0.8177782297134399, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 230, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1933353692293167, "incorrect_loss_raw": 1.7615528106689453, "correct_loss_per_char": 0.09666768461465836, "incorrect_loss_per_char": 0.8807764053344727, "correct_loss_per_token": 0.1933353692293167, "incorrect_loss_per_token": 1.7615528106689453, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1933353692293167, "num_tokens": 1, "num_tokens_all": 889, "is_greedy": true, "logits_per_token": -0.1933353692293167, "logits_per_char": -0.09666768461465836, "num_chars": 2}, {"sum_logits": -1.7615528106689453, "num_tokens": 1, "num_tokens_all": 889, "is_greedy": false, "logits_per_token": -1.7615528106689453, "logits_per_char": -0.8807764053344727, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 231, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6617305278778076, "incorrect_loss_raw": 0.22346879541873932, "correct_loss_per_char": 0.8308652639389038, "incorrect_loss_per_char": 0.11173439770936966, "correct_loss_per_token": 1.6617305278778076, "incorrect_loss_per_token": 0.22346879541873932, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22346879541873932, "num_tokens": 1, "num_tokens_all": 1068, "is_greedy": true, "logits_per_token": -0.22346879541873932, "logits_per_char": -0.11173439770936966, "num_chars": 2}, {"sum_logits": -1.6617305278778076, "num_tokens": 1, "num_tokens_all": 1068, "is_greedy": false, "logits_per_token": -1.6617305278778076, "logits_per_char": -0.8308652639389038, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 232, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.31725284457206726, "incorrect_loss_raw": 1.3786087036132812, "correct_loss_per_char": 0.15862642228603363, "incorrect_loss_per_char": 0.6893043518066406, "correct_loss_per_token": 0.31725284457206726, "incorrect_loss_per_token": 1.3786087036132812, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.31725284457206726, "num_tokens": 1, "num_tokens_all": 1250, "is_greedy": true, "logits_per_token": -0.31725284457206726, "logits_per_char": -0.15862642228603363, "num_chars": 2}, {"sum_logits": -1.3786087036132812, "num_tokens": 1, "num_tokens_all": 1250, "is_greedy": false, "logits_per_token": -1.3786087036132812, "logits_per_char": -0.6893043518066406, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 233, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.18834978342056274, "incorrect_loss_raw": 1.8251233100891113, "correct_loss_per_char": 0.09417489171028137, "incorrect_loss_per_char": 0.9125616550445557, "correct_loss_per_token": 0.18834978342056274, "incorrect_loss_per_token": 1.8251233100891113, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.18834978342056274, "num_tokens": 1, "num_tokens_all": 980, "is_greedy": true, "logits_per_token": -0.18834978342056274, "logits_per_char": -0.09417489171028137, "num_chars": 2}, {"sum_logits": -1.8251233100891113, "num_tokens": 1, "num_tokens_all": 980, "is_greedy": false, "logits_per_token": -1.8251233100891113, "logits_per_char": -0.9125616550445557, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 234, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2604224383831024, "incorrect_loss_raw": 1.516141414642334, "correct_loss_per_char": 0.1302112191915512, "incorrect_loss_per_char": 0.758070707321167, "correct_loss_per_token": 0.2604224383831024, "incorrect_loss_per_token": 1.516141414642334, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2604224383831024, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": true, "logits_per_token": -0.2604224383831024, "logits_per_char": -0.1302112191915512, "num_chars": 2}, {"sum_logits": -1.516141414642334, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": false, "logits_per_token": -1.516141414642334, "logits_per_char": -0.758070707321167, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 235, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4914206266403198, "incorrect_loss_raw": 0.2683047354221344, "correct_loss_per_char": 0.7457103133201599, "incorrect_loss_per_char": 0.1341523677110672, "correct_loss_per_token": 1.4914206266403198, "incorrect_loss_per_token": 0.2683047354221344, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2683047354221344, "num_tokens": 1, "num_tokens_all": 992, "is_greedy": true, "logits_per_token": -0.2683047354221344, "logits_per_char": -0.1341523677110672, "num_chars": 2}, {"sum_logits": -1.4914206266403198, "num_tokens": 1, "num_tokens_all": 992, "is_greedy": false, "logits_per_token": -1.4914206266403198, "logits_per_char": -0.7457103133201599, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 236, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2812427878379822, "incorrect_loss_raw": 1.4335689544677734, "correct_loss_per_char": 0.1406213939189911, "incorrect_loss_per_char": 0.7167844772338867, "correct_loss_per_token": 0.2812427878379822, "incorrect_loss_per_token": 1.4335689544677734, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2812427878379822, "num_tokens": 1, "num_tokens_all": 901, "is_greedy": true, "logits_per_token": -0.2812427878379822, "logits_per_char": -0.1406213939189911, "num_chars": 2}, {"sum_logits": -1.4335689544677734, "num_tokens": 1, "num_tokens_all": 901, "is_greedy": false, "logits_per_token": -1.4335689544677734, "logits_per_char": -0.7167844772338867, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 237, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4931740760803223, "incorrect_loss_raw": 0.26733124256134033, "correct_loss_per_char": 0.7465870380401611, "incorrect_loss_per_char": 0.13366562128067017, "correct_loss_per_token": 1.4931740760803223, "incorrect_loss_per_token": 0.26733124256134033, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26733124256134033, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": true, "logits_per_token": -0.26733124256134033, "logits_per_char": -0.13366562128067017, "num_chars": 2}, {"sum_logits": -1.4931740760803223, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": false, "logits_per_token": -1.4931740760803223, "logits_per_char": -0.7465870380401611, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 238, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5549179315567017, "incorrect_loss_raw": 0.24996110796928406, "correct_loss_per_char": 0.7774589657783508, "incorrect_loss_per_char": 0.12498055398464203, "correct_loss_per_token": 1.5549179315567017, "incorrect_loss_per_token": 0.24996110796928406, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24996110796928406, "num_tokens": 1, "num_tokens_all": 1014, "is_greedy": true, "logits_per_token": -0.24996110796928406, "logits_per_char": -0.12498055398464203, "num_chars": 2}, {"sum_logits": -1.5549179315567017, "num_tokens": 1, "num_tokens_all": 1014, "is_greedy": false, "logits_per_token": -1.5549179315567017, "logits_per_char": -0.7774589657783508, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 239, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.17199008166790009, "incorrect_loss_raw": 1.8850579261779785, "correct_loss_per_char": 0.08599504083395004, "incorrect_loss_per_char": 0.9425289630889893, "correct_loss_per_token": 0.17199008166790009, "incorrect_loss_per_token": 1.8850579261779785, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.17199008166790009, "num_tokens": 1, "num_tokens_all": 897, "is_greedy": true, "logits_per_token": -0.17199008166790009, "logits_per_char": -0.08599504083395004, "num_chars": 2}, {"sum_logits": -1.8850579261779785, "num_tokens": 1, "num_tokens_all": 897, "is_greedy": false, "logits_per_token": -1.8850579261779785, "logits_per_char": -0.9425289630889893, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 240, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2373872697353363, "incorrect_loss_raw": 1.6000138521194458, "correct_loss_per_char": 0.11869363486766815, "incorrect_loss_per_char": 0.8000069260597229, "correct_loss_per_token": 0.2373872697353363, "incorrect_loss_per_token": 1.6000138521194458, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2373872697353363, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": true, "logits_per_token": -0.2373872697353363, "logits_per_char": -0.11869363486766815, "num_chars": 2}, {"sum_logits": -1.6000138521194458, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": false, "logits_per_token": -1.6000138521194458, "logits_per_char": -0.8000069260597229, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 241, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22163312137126923, "incorrect_loss_raw": 1.6496360301971436, "correct_loss_per_char": 0.11081656068563461, "incorrect_loss_per_char": 0.8248180150985718, "correct_loss_per_token": 0.22163312137126923, "incorrect_loss_per_token": 1.6496360301971436, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22163312137126923, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": true, "logits_per_token": -0.22163312137126923, "logits_per_char": -0.11081656068563461, "num_chars": 2}, {"sum_logits": -1.6496360301971436, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": false, "logits_per_token": -1.6496360301971436, "logits_per_char": -0.8248180150985718, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 242, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.214656263589859, "incorrect_loss_raw": 1.6819018125534058, "correct_loss_per_char": 0.1073281317949295, "incorrect_loss_per_char": 0.8409509062767029, "correct_loss_per_token": 0.214656263589859, "incorrect_loss_per_token": 1.6819018125534058, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.214656263589859, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": true, "logits_per_token": -0.214656263589859, "logits_per_char": -0.1073281317949295, "num_chars": 2}, {"sum_logits": -1.6819018125534058, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": false, "logits_per_token": -1.6819018125534058, "logits_per_char": -0.8409509062767029, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 243, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.25071990489959717, "incorrect_loss_raw": 1.5441430807113647, "correct_loss_per_char": 0.12535995244979858, "incorrect_loss_per_char": 0.7720715403556824, "correct_loss_per_token": 0.25071990489959717, "incorrect_loss_per_token": 1.5441430807113647, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25071990489959717, "num_tokens": 1, "num_tokens_all": 1023, "is_greedy": true, "logits_per_token": -0.25071990489959717, "logits_per_char": -0.12535995244979858, "num_chars": 2}, {"sum_logits": -1.5441430807113647, "num_tokens": 1, "num_tokens_all": 1023, "is_greedy": false, "logits_per_token": -1.5441430807113647, "logits_per_char": -0.7720715403556824, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 244, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6736769676208496, "incorrect_loss_raw": 0.21967265009880066, "correct_loss_per_char": 0.8368384838104248, "incorrect_loss_per_char": 0.10983632504940033, "correct_loss_per_token": 1.6736769676208496, "incorrect_loss_per_token": 0.21967265009880066, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21967265009880066, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": true, "logits_per_token": -0.21967265009880066, "logits_per_char": -0.10983632504940033, "num_chars": 2}, {"sum_logits": -1.6736769676208496, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": false, "logits_per_token": -1.6736769676208496, "logits_per_char": -0.8368384838104248, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 245, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6932905912399292, "incorrect_loss_raw": 0.2117207795381546, "correct_loss_per_char": 0.8466452956199646, "incorrect_loss_per_char": 0.1058603897690773, "correct_loss_per_token": 1.6932905912399292, "incorrect_loss_per_token": 0.2117207795381546, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2117207795381546, "num_tokens": 1, "num_tokens_all": 968, "is_greedy": true, "logits_per_token": -0.2117207795381546, "logits_per_char": -0.1058603897690773, "num_chars": 2}, {"sum_logits": -1.6932905912399292, "num_tokens": 1, "num_tokens_all": 968, "is_greedy": false, "logits_per_token": -1.6932905912399292, "logits_per_char": -0.8466452956199646, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 246, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24588485062122345, "incorrect_loss_raw": 1.5639359951019287, "correct_loss_per_char": 0.12294242531061172, "incorrect_loss_per_char": 0.7819679975509644, "correct_loss_per_token": 0.24588485062122345, "incorrect_loss_per_token": 1.5639359951019287, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24588485062122345, "num_tokens": 1, "num_tokens_all": 918, "is_greedy": true, "logits_per_token": -0.24588485062122345, "logits_per_char": -0.12294242531061172, "num_chars": 2}, {"sum_logits": -1.5639359951019287, "num_tokens": 1, "num_tokens_all": 918, "is_greedy": false, "logits_per_token": -1.5639359951019287, "logits_per_char": -0.7819679975509644, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 247, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7106631994247437, "incorrect_loss_raw": 0.20900067687034607, "correct_loss_per_char": 0.8553315997123718, "incorrect_loss_per_char": 0.10450033843517303, "correct_loss_per_token": 1.7106631994247437, "incorrect_loss_per_token": 0.20900067687034607, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20900067687034607, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": true, "logits_per_token": -0.20900067687034607, "logits_per_char": -0.10450033843517303, "num_chars": 2}, {"sum_logits": -1.7106631994247437, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": false, "logits_per_token": -1.7106631994247437, "logits_per_char": -0.8553315997123718, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 248, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.708348035812378, "incorrect_loss_raw": 0.2077854573726654, "correct_loss_per_char": 0.854174017906189, "incorrect_loss_per_char": 0.1038927286863327, "correct_loss_per_token": 1.708348035812378, "incorrect_loss_per_token": 0.2077854573726654, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2077854573726654, "num_tokens": 1, "num_tokens_all": 980, "is_greedy": true, "logits_per_token": -0.2077854573726654, "logits_per_char": -0.1038927286863327, "num_chars": 2}, {"sum_logits": -1.708348035812378, "num_tokens": 1, "num_tokens_all": 980, "is_greedy": false, "logits_per_token": -1.708348035812378, "logits_per_char": -0.854174017906189, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 249, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2334832400083542, "incorrect_loss_raw": 1.6113380193710327, "correct_loss_per_char": 0.1167416200041771, "incorrect_loss_per_char": 0.8056690096855164, "correct_loss_per_token": 0.2334832400083542, "incorrect_loss_per_token": 1.6113380193710327, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2334832400083542, "num_tokens": 1, "num_tokens_all": 969, "is_greedy": true, "logits_per_token": -0.2334832400083542, "logits_per_char": -0.1167416200041771, "num_chars": 2}, {"sum_logits": -1.6113380193710327, "num_tokens": 1, "num_tokens_all": 969, "is_greedy": false, "logits_per_token": -1.6113380193710327, "logits_per_char": -0.8056690096855164, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 250, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.29189884662628174, "incorrect_loss_raw": 1.4154086112976074, "correct_loss_per_char": 0.14594942331314087, "incorrect_loss_per_char": 0.7077043056488037, "correct_loss_per_token": 0.29189884662628174, "incorrect_loss_per_token": 1.4154086112976074, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.29189884662628174, "num_tokens": 1, "num_tokens_all": 1023, "is_greedy": true, "logits_per_token": -0.29189884662628174, "logits_per_char": -0.14594942331314087, "num_chars": 2}, {"sum_logits": -1.4154086112976074, "num_tokens": 1, "num_tokens_all": 1023, "is_greedy": false, "logits_per_token": -1.4154086112976074, "logits_per_char": -0.7077043056488037, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 251, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3922679424285889, "incorrect_loss_raw": 0.2975935637950897, "correct_loss_per_char": 0.6961339712142944, "incorrect_loss_per_char": 0.14879678189754486, "correct_loss_per_token": 1.3922679424285889, "incorrect_loss_per_token": 0.2975935637950897, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2975935637950897, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": true, "logits_per_token": -0.2975935637950897, "logits_per_char": -0.14879678189754486, "num_chars": 2}, {"sum_logits": -1.3922679424285889, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": false, "logits_per_token": -1.3922679424285889, "logits_per_char": -0.6961339712142944, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 252, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22131387889385223, "incorrect_loss_raw": 1.6617683172225952, "correct_loss_per_char": 0.11065693944692612, "incorrect_loss_per_char": 0.8308841586112976, "correct_loss_per_token": 0.22131387889385223, "incorrect_loss_per_token": 1.6617683172225952, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22131387889385223, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": true, "logits_per_token": -0.22131387889385223, "logits_per_char": -0.11065693944692612, "num_chars": 2}, {"sum_logits": -1.6617683172225952, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": false, "logits_per_token": -1.6617683172225952, "logits_per_char": -0.8308841586112976, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 253, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6426820755004883, "incorrect_loss_raw": 0.23022083938121796, "correct_loss_per_char": 0.8213410377502441, "incorrect_loss_per_char": 0.11511041969060898, "correct_loss_per_token": 1.6426820755004883, "incorrect_loss_per_token": 0.23022083938121796, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23022083938121796, "num_tokens": 1, "num_tokens_all": 886, "is_greedy": true, "logits_per_token": -0.23022083938121796, "logits_per_char": -0.11511041969060898, "num_chars": 2}, {"sum_logits": -1.6426820755004883, "num_tokens": 1, "num_tokens_all": 886, "is_greedy": false, "logits_per_token": -1.6426820755004883, "logits_per_char": -0.8213410377502441, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 254, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22569328546524048, "incorrect_loss_raw": 1.6475751399993896, "correct_loss_per_char": 0.11284664273262024, "incorrect_loss_per_char": 0.8237875699996948, "correct_loss_per_token": 0.22569328546524048, "incorrect_loss_per_token": 1.6475751399993896, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22569328546524048, "num_tokens": 1, "num_tokens_all": 1002, "is_greedy": true, "logits_per_token": -0.22569328546524048, "logits_per_char": -0.11284664273262024, "num_chars": 2}, {"sum_logits": -1.6475751399993896, "num_tokens": 1, "num_tokens_all": 1002, "is_greedy": false, "logits_per_token": -1.6475751399993896, "logits_per_char": -0.8237875699996948, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 255, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20381037890911102, "incorrect_loss_raw": 1.7230807542800903, "correct_loss_per_char": 0.10190518945455551, "incorrect_loss_per_char": 0.8615403771400452, "correct_loss_per_token": 0.20381037890911102, "incorrect_loss_per_token": 1.7230807542800903, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20381037890911102, "num_tokens": 1, "num_tokens_all": 1020, "is_greedy": true, "logits_per_token": -0.20381037890911102, "logits_per_char": -0.10190518945455551, "num_chars": 2}, {"sum_logits": -1.7230807542800903, "num_tokens": 1, "num_tokens_all": 1020, "is_greedy": false, "logits_per_token": -1.7230807542800903, "logits_per_char": -0.8615403771400452, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 256, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23796963691711426, "incorrect_loss_raw": 1.606449842453003, "correct_loss_per_char": 0.11898481845855713, "incorrect_loss_per_char": 0.8032249212265015, "correct_loss_per_token": 0.23796963691711426, "incorrect_loss_per_token": 1.606449842453003, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23796963691711426, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": true, "logits_per_token": -0.23796963691711426, "logits_per_char": -0.11898481845855713, "num_chars": 2}, {"sum_logits": -1.606449842453003, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": false, "logits_per_token": -1.606449842453003, "logits_per_char": -0.8032249212265015, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 257, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.26039671897888184, "incorrect_loss_raw": 1.518122911453247, "correct_loss_per_char": 0.13019835948944092, "incorrect_loss_per_char": 0.7590614557266235, "correct_loss_per_token": 0.26039671897888184, "incorrect_loss_per_token": 1.518122911453247, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26039671897888184, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": true, "logits_per_token": -0.26039671897888184, "logits_per_char": -0.13019835948944092, "num_chars": 2}, {"sum_logits": -1.518122911453247, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -1.518122911453247, "logits_per_char": -0.7590614557266235, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 258, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.28205639123916626, "incorrect_loss_raw": 1.4551782608032227, "correct_loss_per_char": 0.14102819561958313, "incorrect_loss_per_char": 0.7275891304016113, "correct_loss_per_token": 0.28205639123916626, "incorrect_loss_per_token": 1.4551782608032227, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.28205639123916626, "num_tokens": 1, "num_tokens_all": 1041, "is_greedy": true, "logits_per_token": -0.28205639123916626, "logits_per_char": -0.14102819561958313, "num_chars": 2}, {"sum_logits": -1.4551782608032227, "num_tokens": 1, "num_tokens_all": 1041, "is_greedy": false, "logits_per_token": -1.4551782608032227, "logits_per_char": -0.7275891304016113, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 259, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1718740463256836, "incorrect_loss_raw": 1.9004708528518677, "correct_loss_per_char": 0.0859370231628418, "incorrect_loss_per_char": 0.9502354264259338, "correct_loss_per_token": 0.1718740463256836, "incorrect_loss_per_token": 1.9004708528518677, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1718740463256836, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": true, "logits_per_token": -0.1718740463256836, "logits_per_char": -0.0859370231628418, "num_chars": 2}, {"sum_logits": -1.9004708528518677, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": false, "logits_per_token": -1.9004708528518677, "logits_per_char": -0.9502354264259338, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 260, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2551668584346771, "incorrect_loss_raw": 1.5263272523880005, "correct_loss_per_char": 0.12758342921733856, "incorrect_loss_per_char": 0.7631636261940002, "correct_loss_per_token": 0.2551668584346771, "incorrect_loss_per_token": 1.5263272523880005, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2551668584346771, "num_tokens": 1, "num_tokens_all": 969, "is_greedy": true, "logits_per_token": -0.2551668584346771, "logits_per_char": -0.12758342921733856, "num_chars": 2}, {"sum_logits": -1.5263272523880005, "num_tokens": 1, "num_tokens_all": 969, "is_greedy": false, "logits_per_token": -1.5263272523880005, "logits_per_char": -0.7631636261940002, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 261, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.410736322402954, "incorrect_loss_raw": 0.29319143295288086, "correct_loss_per_char": 0.705368161201477, "incorrect_loss_per_char": 0.14659571647644043, "correct_loss_per_token": 1.410736322402954, "incorrect_loss_per_token": 0.29319143295288086, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.29319143295288086, "num_tokens": 1, "num_tokens_all": 1052, "is_greedy": true, "logits_per_token": -0.29319143295288086, "logits_per_char": -0.14659571647644043, "num_chars": 2}, {"sum_logits": -1.410736322402954, "num_tokens": 1, "num_tokens_all": 1052, "is_greedy": false, "logits_per_token": -1.410736322402954, "logits_per_char": -0.705368161201477, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 262, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.27719295024871826, "incorrect_loss_raw": 1.4517093896865845, "correct_loss_per_char": 0.13859647512435913, "incorrect_loss_per_char": 0.7258546948432922, "correct_loss_per_token": 0.27719295024871826, "incorrect_loss_per_token": 1.4517093896865845, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.27719295024871826, "num_tokens": 1, "num_tokens_all": 920, "is_greedy": true, "logits_per_token": -0.27719295024871826, "logits_per_char": -0.13859647512435913, "num_chars": 2}, {"sum_logits": -1.4517093896865845, "num_tokens": 1, "num_tokens_all": 920, "is_greedy": false, "logits_per_token": -1.4517093896865845, "logits_per_char": -0.7258546948432922, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 263, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.18641282618045807, "incorrect_loss_raw": 1.8032435178756714, "correct_loss_per_char": 0.09320641309022903, "incorrect_loss_per_char": 0.9016217589378357, "correct_loss_per_token": 0.18641282618045807, "incorrect_loss_per_token": 1.8032435178756714, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.18641282618045807, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": true, "logits_per_token": -0.18641282618045807, "logits_per_char": -0.09320641309022903, "num_chars": 2}, {"sum_logits": -1.8032435178756714, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": false, "logits_per_token": -1.8032435178756714, "logits_per_char": -0.9016217589378357, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 264, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2767798900604248, "incorrect_loss_raw": 1.4653834104537964, "correct_loss_per_char": 0.1383899450302124, "incorrect_loss_per_char": 0.7326917052268982, "correct_loss_per_token": 0.2767798900604248, "incorrect_loss_per_token": 1.4653834104537964, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2767798900604248, "num_tokens": 1, "num_tokens_all": 1062, "is_greedy": true, "logits_per_token": -0.2767798900604248, "logits_per_char": -0.1383899450302124, "num_chars": 2}, {"sum_logits": -1.4653834104537964, "num_tokens": 1, "num_tokens_all": 1062, "is_greedy": false, "logits_per_token": -1.4653834104537964, "logits_per_char": -0.7326917052268982, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 265, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21977819502353668, "incorrect_loss_raw": 1.6773476600646973, "correct_loss_per_char": 0.10988909751176834, "incorrect_loss_per_char": 0.8386738300323486, "correct_loss_per_token": 0.21977819502353668, "incorrect_loss_per_token": 1.6773476600646973, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21977819502353668, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": true, "logits_per_token": -0.21977819502353668, "logits_per_char": -0.10988909751176834, "num_chars": 2}, {"sum_logits": -1.6773476600646973, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": false, "logits_per_token": -1.6773476600646973, "logits_per_char": -0.8386738300323486, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 266, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20977389812469482, "incorrect_loss_raw": 1.7263487577438354, "correct_loss_per_char": 0.10488694906234741, "incorrect_loss_per_char": 0.8631743788719177, "correct_loss_per_token": 0.20977389812469482, "incorrect_loss_per_token": 1.7263487577438354, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20977389812469482, "num_tokens": 1, "num_tokens_all": 950, "is_greedy": true, "logits_per_token": -0.20977389812469482, "logits_per_char": -0.10488694906234741, "num_chars": 2}, {"sum_logits": -1.7263487577438354, "num_tokens": 1, "num_tokens_all": 950, "is_greedy": false, "logits_per_token": -1.7263487577438354, "logits_per_char": -0.8631743788719177, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 267, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.17219191789627075, "incorrect_loss_raw": 1.8900461196899414, "correct_loss_per_char": 0.08609595894813538, "incorrect_loss_per_char": 0.9450230598449707, "correct_loss_per_token": 0.17219191789627075, "incorrect_loss_per_token": 1.8900461196899414, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.17219191789627075, "num_tokens": 1, "num_tokens_all": 973, "is_greedy": true, "logits_per_token": -0.17219191789627075, "logits_per_char": -0.08609595894813538, "num_chars": 2}, {"sum_logits": -1.8900461196899414, "num_tokens": 1, "num_tokens_all": 973, "is_greedy": false, "logits_per_token": -1.8900461196899414, "logits_per_char": -0.9450230598449707, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 268, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2269827276468277, "incorrect_loss_raw": 1.6215871572494507, "correct_loss_per_char": 0.11349136382341385, "incorrect_loss_per_char": 0.8107935786247253, "correct_loss_per_token": 0.2269827276468277, "incorrect_loss_per_token": 1.6215871572494507, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2269827276468277, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": true, "logits_per_token": -0.2269827276468277, "logits_per_char": -0.11349136382341385, "num_chars": 2}, {"sum_logits": -1.6215871572494507, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": false, "logits_per_token": -1.6215871572494507, "logits_per_char": -0.8107935786247253, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 269, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2566049098968506, "incorrect_loss_raw": 1.5267287492752075, "correct_loss_per_char": 0.1283024549484253, "incorrect_loss_per_char": 0.7633643746376038, "correct_loss_per_token": 0.2566049098968506, "incorrect_loss_per_token": 1.5267287492752075, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2566049098968506, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": true, "logits_per_token": -0.2566049098968506, "logits_per_char": -0.1283024549484253, "num_chars": 2}, {"sum_logits": -1.5267287492752075, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.5267287492752075, "logits_per_char": -0.7633643746376038, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 270, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23733104765415192, "incorrect_loss_raw": 1.5970274209976196, "correct_loss_per_char": 0.11866552382707596, "incorrect_loss_per_char": 0.7985137104988098, "correct_loss_per_token": 0.23733104765415192, "incorrect_loss_per_token": 1.5970274209976196, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23733104765415192, "num_tokens": 1, "num_tokens_all": 991, "is_greedy": true, "logits_per_token": -0.23733104765415192, "logits_per_char": -0.11866552382707596, "num_chars": 2}, {"sum_logits": -1.5970274209976196, "num_tokens": 1, "num_tokens_all": 991, "is_greedy": false, "logits_per_token": -1.5970274209976196, "logits_per_char": -0.7985137104988098, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 271, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2367897927761078, "incorrect_loss_raw": 1.5934118032455444, "correct_loss_per_char": 0.1183948963880539, "incorrect_loss_per_char": 0.7967059016227722, "correct_loss_per_token": 0.2367897927761078, "incorrect_loss_per_token": 1.5934118032455444, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2367897927761078, "num_tokens": 1, "num_tokens_all": 976, "is_greedy": true, "logits_per_token": -0.2367897927761078, "logits_per_char": -0.1183948963880539, "num_chars": 2}, {"sum_logits": -1.5934118032455444, "num_tokens": 1, "num_tokens_all": 976, "is_greedy": false, "logits_per_token": -1.5934118032455444, "logits_per_char": -0.7967059016227722, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 272, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2686156928539276, "incorrect_loss_raw": 1.484192132949829, "correct_loss_per_char": 0.1343078464269638, "incorrect_loss_per_char": 0.7420960664749146, "correct_loss_per_token": 0.2686156928539276, "incorrect_loss_per_token": 1.484192132949829, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2686156928539276, "num_tokens": 1, "num_tokens_all": 1050, "is_greedy": true, "logits_per_token": -0.2686156928539276, "logits_per_char": -0.1343078464269638, "num_chars": 2}, {"sum_logits": -1.484192132949829, "num_tokens": 1, "num_tokens_all": 1050, "is_greedy": false, "logits_per_token": -1.484192132949829, "logits_per_char": -0.7420960664749146, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 273, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20387054979801178, "incorrect_loss_raw": 1.7344096899032593, "correct_loss_per_char": 0.10193527489900589, "incorrect_loss_per_char": 0.8672048449516296, "correct_loss_per_token": 0.20387054979801178, "incorrect_loss_per_token": 1.7344096899032593, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20387054979801178, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": true, "logits_per_token": -0.20387054979801178, "logits_per_char": -0.10193527489900589, "num_chars": 2}, {"sum_logits": -1.7344096899032593, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": false, "logits_per_token": -1.7344096899032593, "logits_per_char": -0.8672048449516296, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 274, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3013682961463928, "incorrect_loss_raw": 1.3742260932922363, "correct_loss_per_char": 0.1506841480731964, "incorrect_loss_per_char": 0.6871130466461182, "correct_loss_per_token": 0.3013682961463928, "incorrect_loss_per_token": 1.3742260932922363, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3013682961463928, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": true, "logits_per_token": -0.3013682961463928, "logits_per_char": -0.1506841480731964, "num_chars": 2}, {"sum_logits": -1.3742260932922363, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": false, "logits_per_token": -1.3742260932922363, "logits_per_char": -0.6871130466461182, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 275, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3015696406364441, "incorrect_loss_raw": 1.3887913227081299, "correct_loss_per_char": 0.15078482031822205, "incorrect_loss_per_char": 0.6943956613540649, "correct_loss_per_token": 0.3015696406364441, "incorrect_loss_per_token": 1.3887913227081299, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3015696406364441, "num_tokens": 1, "num_tokens_all": 1035, "is_greedy": true, "logits_per_token": -0.3015696406364441, "logits_per_char": -0.15078482031822205, "num_chars": 2}, {"sum_logits": -1.3887913227081299, "num_tokens": 1, "num_tokens_all": 1035, "is_greedy": false, "logits_per_token": -1.3887913227081299, "logits_per_char": -0.6943956613540649, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 276, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5433636903762817, "incorrect_loss_raw": 0.24980558454990387, "correct_loss_per_char": 0.7716818451881409, "incorrect_loss_per_char": 0.12490279227495193, "correct_loss_per_token": 1.5433636903762817, "incorrect_loss_per_token": 0.24980558454990387, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24980558454990387, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": true, "logits_per_token": -0.24980558454990387, "logits_per_char": -0.12490279227495193, "num_chars": 2}, {"sum_logits": -1.5433636903762817, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": false, "logits_per_token": -1.5433636903762817, "logits_per_char": -0.7716818451881409, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 277, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24924911558628082, "incorrect_loss_raw": 1.5460799932479858, "correct_loss_per_char": 0.12462455779314041, "incorrect_loss_per_char": 0.7730399966239929, "correct_loss_per_token": 0.24924911558628082, "incorrect_loss_per_token": 1.5460799932479858, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24924911558628082, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": true, "logits_per_token": -0.24924911558628082, "logits_per_char": -0.12462455779314041, "num_chars": 2}, {"sum_logits": -1.5460799932479858, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": false, "logits_per_token": -1.5460799932479858, "logits_per_char": -0.7730399966239929, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 278, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6029109954833984, "incorrect_loss_raw": 0.23500049114227295, "correct_loss_per_char": 0.8014554977416992, "incorrect_loss_per_char": 0.11750024557113647, "correct_loss_per_token": 1.6029109954833984, "incorrect_loss_per_token": 0.23500049114227295, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23500049114227295, "num_tokens": 1, "num_tokens_all": 1031, "is_greedy": true, "logits_per_token": -0.23500049114227295, "logits_per_char": -0.11750024557113647, "num_chars": 2}, {"sum_logits": -1.6029109954833984, "num_tokens": 1, "num_tokens_all": 1031, "is_greedy": false, "logits_per_token": -1.6029109954833984, "logits_per_char": -0.8014554977416992, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 279, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3813416957855225, "incorrect_loss_raw": 0.30575406551361084, "correct_loss_per_char": 0.6906708478927612, "incorrect_loss_per_char": 0.15287703275680542, "correct_loss_per_token": 1.3813416957855225, "incorrect_loss_per_token": 0.30575406551361084, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.30575406551361084, "num_tokens": 1, "num_tokens_all": 1003, "is_greedy": true, "logits_per_token": -0.30575406551361084, "logits_per_char": -0.15287703275680542, "num_chars": 2}, {"sum_logits": -1.3813416957855225, "num_tokens": 1, "num_tokens_all": 1003, "is_greedy": false, "logits_per_token": -1.3813416957855225, "logits_per_char": -0.6906708478927612, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 280, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22252127528190613, "incorrect_loss_raw": 1.6743826866149902, "correct_loss_per_char": 0.11126063764095306, "incorrect_loss_per_char": 0.8371913433074951, "correct_loss_per_token": 0.22252127528190613, "incorrect_loss_per_token": 1.6743826866149902, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22252127528190613, "num_tokens": 1, "num_tokens_all": 970, "is_greedy": true, "logits_per_token": -0.22252127528190613, "logits_per_char": -0.11126063764095306, "num_chars": 2}, {"sum_logits": -1.6743826866149902, "num_tokens": 1, "num_tokens_all": 970, "is_greedy": false, "logits_per_token": -1.6743826866149902, "logits_per_char": -0.8371913433074951, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 281, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22330862283706665, "incorrect_loss_raw": 1.6396183967590332, "correct_loss_per_char": 0.11165431141853333, "incorrect_loss_per_char": 0.8198091983795166, "correct_loss_per_token": 0.22330862283706665, "incorrect_loss_per_token": 1.6396183967590332, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22330862283706665, "num_tokens": 1, "num_tokens_all": 974, "is_greedy": true, "logits_per_token": -0.22330862283706665, "logits_per_char": -0.11165431141853333, "num_chars": 2}, {"sum_logits": -1.6396183967590332, "num_tokens": 1, "num_tokens_all": 974, "is_greedy": false, "logits_per_token": -1.6396183967590332, "logits_per_char": -0.8198091983795166, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 282, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2607528567314148, "incorrect_loss_raw": 1.5140020847320557, "correct_loss_per_char": 0.1303764283657074, "incorrect_loss_per_char": 0.7570010423660278, "correct_loss_per_token": 0.2607528567314148, "incorrect_loss_per_token": 1.5140020847320557, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2607528567314148, "num_tokens": 1, "num_tokens_all": 1086, "is_greedy": true, "logits_per_token": -0.2607528567314148, "logits_per_char": -0.1303764283657074, "num_chars": 2}, {"sum_logits": -1.5140020847320557, "num_tokens": 1, "num_tokens_all": 1086, "is_greedy": false, "logits_per_token": -1.5140020847320557, "logits_per_char": -0.7570010423660278, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 283, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20270125567913055, "incorrect_loss_raw": 1.7393969297409058, "correct_loss_per_char": 0.10135062783956528, "incorrect_loss_per_char": 0.8696984648704529, "correct_loss_per_token": 0.20270125567913055, "incorrect_loss_per_token": 1.7393969297409058, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20270125567913055, "num_tokens": 1, "num_tokens_all": 973, "is_greedy": true, "logits_per_token": -0.20270125567913055, "logits_per_char": -0.10135062783956528, "num_chars": 2}, {"sum_logits": -1.7393969297409058, "num_tokens": 1, "num_tokens_all": 973, "is_greedy": false, "logits_per_token": -1.7393969297409058, "logits_per_char": -0.8696984648704529, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 284, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4812568426132202, "incorrect_loss_raw": 0.270666241645813, "correct_loss_per_char": 0.7406284213066101, "incorrect_loss_per_char": 0.1353331208229065, "correct_loss_per_token": 1.4812568426132202, "incorrect_loss_per_token": 0.270666241645813, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.270666241645813, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": true, "logits_per_token": -0.270666241645813, "logits_per_char": -0.1353331208229065, "num_chars": 2}, {"sum_logits": -1.4812568426132202, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.4812568426132202, "logits_per_char": -0.7406284213066101, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 285, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2622075080871582, "incorrect_loss_raw": 1.509232759475708, "correct_loss_per_char": 0.1311037540435791, "incorrect_loss_per_char": 0.754616379737854, "correct_loss_per_token": 0.2622075080871582, "incorrect_loss_per_token": 1.509232759475708, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2622075080871582, "num_tokens": 1, "num_tokens_all": 993, "is_greedy": true, "logits_per_token": -0.2622075080871582, "logits_per_char": -0.1311037540435791, "num_chars": 2}, {"sum_logits": -1.509232759475708, "num_tokens": 1, "num_tokens_all": 993, "is_greedy": false, "logits_per_token": -1.509232759475708, "logits_per_char": -0.754616379737854, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 286, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2252296358346939, "incorrect_loss_raw": 1.634694218635559, "correct_loss_per_char": 0.11261481791734695, "incorrect_loss_per_char": 0.8173471093177795, "correct_loss_per_token": 0.2252296358346939, "incorrect_loss_per_token": 1.634694218635559, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2252296358346939, "num_tokens": 1, "num_tokens_all": 922, "is_greedy": true, "logits_per_token": -0.2252296358346939, "logits_per_char": -0.11261481791734695, "num_chars": 2}, {"sum_logits": -1.634694218635559, "num_tokens": 1, "num_tokens_all": 922, "is_greedy": false, "logits_per_token": -1.634694218635559, "logits_per_char": -0.8173471093177795, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 287, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2468939572572708, "incorrect_loss_raw": 1.571973204612732, "correct_loss_per_char": 0.1234469786286354, "incorrect_loss_per_char": 0.785986602306366, "correct_loss_per_token": 0.2468939572572708, "incorrect_loss_per_token": 1.571973204612732, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2468939572572708, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": true, "logits_per_token": -0.2468939572572708, "logits_per_char": -0.1234469786286354, "num_chars": 2}, {"sum_logits": -1.571973204612732, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": false, "logits_per_token": -1.571973204612732, "logits_per_char": -0.785986602306366, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 288, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.739241600036621, "incorrect_loss_raw": 0.20562195777893066, "correct_loss_per_char": 0.8696208000183105, "incorrect_loss_per_char": 0.10281097888946533, "correct_loss_per_token": 1.739241600036621, "incorrect_loss_per_token": 0.20562195777893066, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20562195777893066, "num_tokens": 1, "num_tokens_all": 977, "is_greedy": true, "logits_per_token": -0.20562195777893066, "logits_per_char": -0.10281097888946533, "num_chars": 2}, {"sum_logits": -1.739241600036621, "num_tokens": 1, "num_tokens_all": 977, "is_greedy": false, "logits_per_token": -1.739241600036621, "logits_per_char": -0.8696208000183105, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 289, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2368808537721634, "incorrect_loss_raw": 1.5883289575576782, "correct_loss_per_char": 0.1184404268860817, "incorrect_loss_per_char": 0.7941644787788391, "correct_loss_per_token": 0.2368808537721634, "incorrect_loss_per_token": 1.5883289575576782, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2368808537721634, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": true, "logits_per_token": -0.2368808537721634, "logits_per_char": -0.1184404268860817, "num_chars": 2}, {"sum_logits": -1.5883289575576782, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": false, "logits_per_token": -1.5883289575576782, "logits_per_char": -0.7941644787788391, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 290, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19920414686203003, "incorrect_loss_raw": 1.7534856796264648, "correct_loss_per_char": 0.09960207343101501, "incorrect_loss_per_char": 0.8767428398132324, "correct_loss_per_token": 0.19920414686203003, "incorrect_loss_per_token": 1.7534856796264648, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19920414686203003, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": true, "logits_per_token": -0.19920414686203003, "logits_per_char": -0.09960207343101501, "num_chars": 2}, {"sum_logits": -1.7534856796264648, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": false, "logits_per_token": -1.7534856796264648, "logits_per_char": -0.8767428398132324, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 291, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2474748194217682, "incorrect_loss_raw": 1.563369870185852, "correct_loss_per_char": 0.1237374097108841, "incorrect_loss_per_char": 0.781684935092926, "correct_loss_per_token": 0.2474748194217682, "incorrect_loss_per_token": 1.563369870185852, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2474748194217682, "num_tokens": 1, "num_tokens_all": 1045, "is_greedy": true, "logits_per_token": -0.2474748194217682, "logits_per_char": -0.1237374097108841, "num_chars": 2}, {"sum_logits": -1.563369870185852, "num_tokens": 1, "num_tokens_all": 1045, "is_greedy": false, "logits_per_token": -1.563369870185852, "logits_per_char": -0.781684935092926, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 292, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.811497449874878, "incorrect_loss_raw": 0.18643835186958313, "correct_loss_per_char": 0.905748724937439, "incorrect_loss_per_char": 0.09321917593479156, "correct_loss_per_token": 1.811497449874878, "incorrect_loss_per_token": 0.18643835186958313, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.18643835186958313, "num_tokens": 1, "num_tokens_all": 960, "is_greedy": true, "logits_per_token": -0.18643835186958313, "logits_per_char": -0.09321917593479156, "num_chars": 2}, {"sum_logits": -1.811497449874878, "num_tokens": 1, "num_tokens_all": 960, "is_greedy": false, "logits_per_token": -1.811497449874878, "logits_per_char": -0.905748724937439, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 293, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19298051297664642, "incorrect_loss_raw": 1.778617024421692, "correct_loss_per_char": 0.09649025648832321, "incorrect_loss_per_char": 0.889308512210846, "correct_loss_per_token": 0.19298051297664642, "incorrect_loss_per_token": 1.778617024421692, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19298051297664642, "num_tokens": 1, "num_tokens_all": 1003, "is_greedy": true, "logits_per_token": -0.19298051297664642, "logits_per_char": -0.09649025648832321, "num_chars": 2}, {"sum_logits": -1.778617024421692, "num_tokens": 1, "num_tokens_all": 1003, "is_greedy": false, "logits_per_token": -1.778617024421692, "logits_per_char": -0.889308512210846, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 294, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2210816740989685, "incorrect_loss_raw": 1.6515920162200928, "correct_loss_per_char": 0.11054083704948425, "incorrect_loss_per_char": 0.8257960081100464, "correct_loss_per_token": 0.2210816740989685, "incorrect_loss_per_token": 1.6515920162200928, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2210816740989685, "num_tokens": 1, "num_tokens_all": 1145, "is_greedy": true, "logits_per_token": -0.2210816740989685, "logits_per_char": -0.11054083704948425, "num_chars": 2}, {"sum_logits": -1.6515920162200928, "num_tokens": 1, "num_tokens_all": 1145, "is_greedy": false, "logits_per_token": -1.6515920162200928, "logits_per_char": -0.8257960081100464, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 295, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24467362463474274, "incorrect_loss_raw": 1.5860739946365356, "correct_loss_per_char": 0.12233681231737137, "incorrect_loss_per_char": 0.7930369973182678, "correct_loss_per_token": 0.24467362463474274, "incorrect_loss_per_token": 1.5860739946365356, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24467362463474274, "num_tokens": 1, "num_tokens_all": 997, "is_greedy": true, "logits_per_token": -0.24467362463474274, "logits_per_char": -0.12233681231737137, "num_chars": 2}, {"sum_logits": -1.5860739946365356, "num_tokens": 1, "num_tokens_all": 997, "is_greedy": false, "logits_per_token": -1.5860739946365356, "logits_per_char": -0.7930369973182678, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 296, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22954268753528595, "incorrect_loss_raw": 1.6332361698150635, "correct_loss_per_char": 0.11477134376764297, "incorrect_loss_per_char": 0.8166180849075317, "correct_loss_per_token": 0.22954268753528595, "incorrect_loss_per_token": 1.6332361698150635, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22954268753528595, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": true, "logits_per_token": -0.22954268753528595, "logits_per_char": -0.11477134376764297, "num_chars": 2}, {"sum_logits": -1.6332361698150635, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": false, "logits_per_token": -1.6332361698150635, "logits_per_char": -0.8166180849075317, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 297, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.32202622294425964, "incorrect_loss_raw": 1.334136962890625, "correct_loss_per_char": 0.16101311147212982, "incorrect_loss_per_char": 0.6670684814453125, "correct_loss_per_token": 0.32202622294425964, "incorrect_loss_per_token": 1.334136962890625, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.32202622294425964, "num_tokens": 1, "num_tokens_all": 1100, "is_greedy": true, "logits_per_token": -0.32202622294425964, "logits_per_char": -0.16101311147212982, "num_chars": 2}, {"sum_logits": -1.334136962890625, "num_tokens": 1, "num_tokens_all": 1100, "is_greedy": false, "logits_per_token": -1.334136962890625, "logits_per_char": -0.6670684814453125, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 298, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1767946183681488, "incorrect_loss_raw": 1.8870149850845337, "correct_loss_per_char": 0.0883973091840744, "incorrect_loss_per_char": 0.9435074925422668, "correct_loss_per_token": 0.1767946183681488, "incorrect_loss_per_token": 1.8870149850845337, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1767946183681488, "num_tokens": 1, "num_tokens_all": 1001, "is_greedy": true, "logits_per_token": -0.1767946183681488, "logits_per_char": -0.0883973091840744, "num_chars": 2}, {"sum_logits": -1.8870149850845337, "num_tokens": 1, "num_tokens_all": 1001, "is_greedy": false, "logits_per_token": -1.8870149850845337, "logits_per_char": -0.9435074925422668, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 299, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2211015522480011, "incorrect_loss_raw": 1.675616979598999, "correct_loss_per_char": 0.11055077612400055, "incorrect_loss_per_char": 0.8378084897994995, "correct_loss_per_token": 0.2211015522480011, "incorrect_loss_per_token": 1.675616979598999, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2211015522480011, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": true, "logits_per_token": -0.2211015522480011, "logits_per_char": -0.11055077612400055, "num_chars": 2}, {"sum_logits": -1.675616979598999, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.675616979598999, "logits_per_char": -0.8378084897994995, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 300, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3343987464904785, "incorrect_loss_raw": 0.3173083961009979, "correct_loss_per_char": 0.6671993732452393, "incorrect_loss_per_char": 0.15865419805049896, "correct_loss_per_token": 1.3343987464904785, "incorrect_loss_per_token": 0.3173083961009979, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3173083961009979, "num_tokens": 1, "num_tokens_all": 891, "is_greedy": true, "logits_per_token": -0.3173083961009979, "logits_per_char": -0.15865419805049896, "num_chars": 2}, {"sum_logits": -1.3343987464904785, "num_tokens": 1, "num_tokens_all": 891, "is_greedy": false, "logits_per_token": -1.3343987464904785, "logits_per_char": -0.6671993732452393, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 301, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20349346101284027, "incorrect_loss_raw": 1.7410576343536377, "correct_loss_per_char": 0.10174673050642014, "incorrect_loss_per_char": 0.8705288171768188, "correct_loss_per_token": 0.20349346101284027, "incorrect_loss_per_token": 1.7410576343536377, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20349346101284027, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": true, "logits_per_token": -0.20349346101284027, "logits_per_char": -0.10174673050642014, "num_chars": 2}, {"sum_logits": -1.7410576343536377, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": false, "logits_per_token": -1.7410576343536377, "logits_per_char": -0.8705288171768188, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 302, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.197641059756279, "incorrect_loss_raw": 1.7679139375686646, "correct_loss_per_char": 0.0988205298781395, "incorrect_loss_per_char": 0.8839569687843323, "correct_loss_per_token": 0.197641059756279, "incorrect_loss_per_token": 1.7679139375686646, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.197641059756279, "num_tokens": 1, "num_tokens_all": 973, "is_greedy": true, "logits_per_token": -0.197641059756279, "logits_per_char": -0.0988205298781395, "num_chars": 2}, {"sum_logits": -1.7679139375686646, "num_tokens": 1, "num_tokens_all": 973, "is_greedy": false, "logits_per_token": -1.7679139375686646, "logits_per_char": -0.8839569687843323, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 303, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2475968301296234, "incorrect_loss_raw": 1.5695291757583618, "correct_loss_per_char": 0.1237984150648117, "incorrect_loss_per_char": 0.7847645878791809, "correct_loss_per_token": 0.2475968301296234, "incorrect_loss_per_token": 1.5695291757583618, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2475968301296234, "num_tokens": 1, "num_tokens_all": 990, "is_greedy": true, "logits_per_token": -0.2475968301296234, "logits_per_char": -0.1237984150648117, "num_chars": 2}, {"sum_logits": -1.5695291757583618, "num_tokens": 1, "num_tokens_all": 990, "is_greedy": false, "logits_per_token": -1.5695291757583618, "logits_per_char": -0.7847645878791809, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 304, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.34647342562675476, "incorrect_loss_raw": 1.277007818222046, "correct_loss_per_char": 0.17323671281337738, "incorrect_loss_per_char": 0.638503909111023, "correct_loss_per_token": 0.34647342562675476, "incorrect_loss_per_token": 1.277007818222046, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.34647342562675476, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": true, "logits_per_token": -0.34647342562675476, "logits_per_char": -0.17323671281337738, "num_chars": 2}, {"sum_logits": -1.277007818222046, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": false, "logits_per_token": -1.277007818222046, "logits_per_char": -0.638503909111023, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 305, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24106191098690033, "incorrect_loss_raw": 1.5942645072937012, "correct_loss_per_char": 0.12053095549345016, "incorrect_loss_per_char": 0.7971322536468506, "correct_loss_per_token": 0.24106191098690033, "incorrect_loss_per_token": 1.5942645072937012, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24106191098690033, "num_tokens": 1, "num_tokens_all": 886, "is_greedy": true, "logits_per_token": -0.24106191098690033, "logits_per_char": -0.12053095549345016, "num_chars": 2}, {"sum_logits": -1.5942645072937012, "num_tokens": 1, "num_tokens_all": 886, "is_greedy": false, "logits_per_token": -1.5942645072937012, "logits_per_char": -0.7971322536468506, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 306, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21010486781597137, "incorrect_loss_raw": 1.705894947052002, "correct_loss_per_char": 0.10505243390798569, "incorrect_loss_per_char": 0.852947473526001, "correct_loss_per_token": 0.21010486781597137, "incorrect_loss_per_token": 1.705894947052002, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21010486781597137, "num_tokens": 1, "num_tokens_all": 1008, "is_greedy": true, "logits_per_token": -0.21010486781597137, "logits_per_char": -0.10505243390798569, "num_chars": 2}, {"sum_logits": -1.705894947052002, "num_tokens": 1, "num_tokens_all": 1008, "is_greedy": false, "logits_per_token": -1.705894947052002, "logits_per_char": -0.852947473526001, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 307, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5828112363815308, "incorrect_loss_raw": 0.24111248552799225, "correct_loss_per_char": 0.7914056181907654, "incorrect_loss_per_char": 0.12055624276399612, "correct_loss_per_token": 1.5828112363815308, "incorrect_loss_per_token": 0.24111248552799225, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24111248552799225, "num_tokens": 1, "num_tokens_all": 972, "is_greedy": true, "logits_per_token": -0.24111248552799225, "logits_per_char": -0.12055624276399612, "num_chars": 2}, {"sum_logits": -1.5828112363815308, "num_tokens": 1, "num_tokens_all": 972, "is_greedy": false, "logits_per_token": -1.5828112363815308, "logits_per_char": -0.7914056181907654, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 308, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4029446840286255, "incorrect_loss_raw": 0.3010769784450531, "correct_loss_per_char": 0.7014723420143127, "incorrect_loss_per_char": 0.15053848922252655, "correct_loss_per_token": 1.4029446840286255, "incorrect_loss_per_token": 0.3010769784450531, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3010769784450531, "num_tokens": 1, "num_tokens_all": 1018, "is_greedy": true, "logits_per_token": -0.3010769784450531, "logits_per_char": -0.15053848922252655, "num_chars": 2}, {"sum_logits": -1.4029446840286255, "num_tokens": 1, "num_tokens_all": 1018, "is_greedy": false, "logits_per_token": -1.4029446840286255, "logits_per_char": -0.7014723420143127, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 309, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2830844223499298, "incorrect_loss_raw": 1.467519998550415, "correct_loss_per_char": 0.1415422111749649, "incorrect_loss_per_char": 0.7337599992752075, "correct_loss_per_token": 0.2830844223499298, "incorrect_loss_per_token": 1.467519998550415, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2830844223499298, "num_tokens": 1, "num_tokens_all": 1175, "is_greedy": true, "logits_per_token": -0.2830844223499298, "logits_per_char": -0.1415422111749649, "num_chars": 2}, {"sum_logits": -1.467519998550415, "num_tokens": 1, "num_tokens_all": 1175, "is_greedy": false, "logits_per_token": -1.467519998550415, "logits_per_char": -0.7337599992752075, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 310, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20858371257781982, "incorrect_loss_raw": 1.7171729803085327, "correct_loss_per_char": 0.10429185628890991, "incorrect_loss_per_char": 0.8585864901542664, "correct_loss_per_token": 0.20858371257781982, "incorrect_loss_per_token": 1.7171729803085327, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20858371257781982, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": true, "logits_per_token": -0.20858371257781982, "logits_per_char": -0.10429185628890991, "num_chars": 2}, {"sum_logits": -1.7171729803085327, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": false, "logits_per_token": -1.7171729803085327, "logits_per_char": -0.8585864901542664, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 311, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8164726495742798, "incorrect_loss_raw": 0.18762721121311188, "correct_loss_per_char": 0.9082363247871399, "incorrect_loss_per_char": 0.09381360560655594, "correct_loss_per_token": 1.8164726495742798, "incorrect_loss_per_token": 0.18762721121311188, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.18762721121311188, "num_tokens": 1, "num_tokens_all": 895, "is_greedy": true, "logits_per_token": -0.18762721121311188, "logits_per_char": -0.09381360560655594, "num_chars": 2}, {"sum_logits": -1.8164726495742798, "num_tokens": 1, "num_tokens_all": 895, "is_greedy": false, "logits_per_token": -1.8164726495742798, "logits_per_char": -0.9082363247871399, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 312, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2679574489593506, "incorrect_loss_raw": 1.4909911155700684, "correct_loss_per_char": 0.1339787244796753, "incorrect_loss_per_char": 0.7454955577850342, "correct_loss_per_token": 0.2679574489593506, "incorrect_loss_per_token": 1.4909911155700684, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2679574489593506, "num_tokens": 1, "num_tokens_all": 1008, "is_greedy": true, "logits_per_token": -0.2679574489593506, "logits_per_char": -0.1339787244796753, "num_chars": 2}, {"sum_logits": -1.4909911155700684, "num_tokens": 1, "num_tokens_all": 1008, "is_greedy": false, "logits_per_token": -1.4909911155700684, "logits_per_char": -0.7454955577850342, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 313, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2477595955133438, "incorrect_loss_raw": 1.5502172708511353, "correct_loss_per_char": 0.1238797977566719, "incorrect_loss_per_char": 0.7751086354255676, "correct_loss_per_token": 0.2477595955133438, "incorrect_loss_per_token": 1.5502172708511353, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2477595955133438, "num_tokens": 1, "num_tokens_all": 878, "is_greedy": true, "logits_per_token": -0.2477595955133438, "logits_per_char": -0.1238797977566719, "num_chars": 2}, {"sum_logits": -1.5502172708511353, "num_tokens": 1, "num_tokens_all": 878, "is_greedy": false, "logits_per_token": -1.5502172708511353, "logits_per_char": -0.7751086354255676, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 314, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23571667075157166, "incorrect_loss_raw": 1.6112971305847168, "correct_loss_per_char": 0.11785833537578583, "incorrect_loss_per_char": 0.8056485652923584, "correct_loss_per_token": 0.23571667075157166, "incorrect_loss_per_token": 1.6112971305847168, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23571667075157166, "num_tokens": 1, "num_tokens_all": 1114, "is_greedy": true, "logits_per_token": -0.23571667075157166, "logits_per_char": -0.11785833537578583, "num_chars": 2}, {"sum_logits": -1.6112971305847168, "num_tokens": 1, "num_tokens_all": 1114, "is_greedy": false, "logits_per_token": -1.6112971305847168, "logits_per_char": -0.8056485652923584, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 315, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2878078818321228, "incorrect_loss_raw": 1.4276516437530518, "correct_loss_per_char": 0.1439039409160614, "incorrect_loss_per_char": 0.7138258218765259, "correct_loss_per_token": 0.2878078818321228, "incorrect_loss_per_token": 1.4276516437530518, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2878078818321228, "num_tokens": 1, "num_tokens_all": 1135, "is_greedy": true, "logits_per_token": -0.2878078818321228, "logits_per_char": -0.1439039409160614, "num_chars": 2}, {"sum_logits": -1.4276516437530518, "num_tokens": 1, "num_tokens_all": 1135, "is_greedy": false, "logits_per_token": -1.4276516437530518, "logits_per_char": -0.7138258218765259, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 316, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19636766612529755, "incorrect_loss_raw": 1.7787145376205444, "correct_loss_per_char": 0.09818383306264877, "incorrect_loss_per_char": 0.8893572688102722, "correct_loss_per_token": 0.19636766612529755, "incorrect_loss_per_token": 1.7787145376205444, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19636766612529755, "num_tokens": 1, "num_tokens_all": 886, "is_greedy": true, "logits_per_token": -0.19636766612529755, "logits_per_char": -0.09818383306264877, "num_chars": 2}, {"sum_logits": -1.7787145376205444, "num_tokens": 1, "num_tokens_all": 886, "is_greedy": false, "logits_per_token": -1.7787145376205444, "logits_per_char": -0.8893572688102722, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 317, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.604762315750122, "incorrect_loss_raw": 0.23393657803535461, "correct_loss_per_char": 0.802381157875061, "incorrect_loss_per_char": 0.11696828901767731, "correct_loss_per_token": 1.604762315750122, "incorrect_loss_per_token": 0.23393657803535461, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23393657803535461, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": true, "logits_per_token": -0.23393657803535461, "logits_per_char": -0.11696828901767731, "num_chars": 2}, {"sum_logits": -1.604762315750122, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": false, "logits_per_token": -1.604762315750122, "logits_per_char": -0.802381157875061, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 318, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2608851194381714, "incorrect_loss_raw": 1.513069748878479, "correct_loss_per_char": 0.1304425597190857, "incorrect_loss_per_char": 0.7565348744392395, "correct_loss_per_token": 0.2608851194381714, "incorrect_loss_per_token": 1.513069748878479, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2608851194381714, "num_tokens": 1, "num_tokens_all": 1001, "is_greedy": true, "logits_per_token": -0.2608851194381714, "logits_per_char": -0.1304425597190857, "num_chars": 2}, {"sum_logits": -1.513069748878479, "num_tokens": 1, "num_tokens_all": 1001, "is_greedy": false, "logits_per_token": -1.513069748878479, "logits_per_char": -0.7565348744392395, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 319, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.18929797410964966, "incorrect_loss_raw": 1.7864069938659668, "correct_loss_per_char": 0.09464898705482483, "incorrect_loss_per_char": 0.8932034969329834, "correct_loss_per_token": 0.18929797410964966, "incorrect_loss_per_token": 1.7864069938659668, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.18929797410964966, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": true, "logits_per_token": -0.18929797410964966, "logits_per_char": -0.09464898705482483, "num_chars": 2}, {"sum_logits": -1.7864069938659668, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": false, "logits_per_token": -1.7864069938659668, "logits_per_char": -0.8932034969329834, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 320, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5987662076950073, "incorrect_loss_raw": 0.23632684350013733, "correct_loss_per_char": 0.7993831038475037, "incorrect_loss_per_char": 0.11816342175006866, "correct_loss_per_token": 1.5987662076950073, "incorrect_loss_per_token": 0.23632684350013733, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23632684350013733, "num_tokens": 1, "num_tokens_all": 1013, "is_greedy": true, "logits_per_token": -0.23632684350013733, "logits_per_char": -0.11816342175006866, "num_chars": 2}, {"sum_logits": -1.5987662076950073, "num_tokens": 1, "num_tokens_all": 1013, "is_greedy": false, "logits_per_token": -1.5987662076950073, "logits_per_char": -0.7993831038475037, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 321, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7237935066223145, "incorrect_loss_raw": 0.20370984077453613, "correct_loss_per_char": 0.8618967533111572, "incorrect_loss_per_char": 0.10185492038726807, "correct_loss_per_token": 1.7237935066223145, "incorrect_loss_per_token": 0.20370984077453613, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20370984077453613, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": true, "logits_per_token": -0.20370984077453613, "logits_per_char": -0.10185492038726807, "num_chars": 2}, {"sum_logits": -1.7237935066223145, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": false, "logits_per_token": -1.7237935066223145, "logits_per_char": -0.8618967533111572, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 322, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3002537488937378, "incorrect_loss_raw": 1.4388302564620972, "correct_loss_per_char": 0.1501268744468689, "incorrect_loss_per_char": 0.7194151282310486, "correct_loss_per_token": 0.3002537488937378, "incorrect_loss_per_token": 1.4388302564620972, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3002537488937378, "num_tokens": 1, "num_tokens_all": 1478, "is_greedy": true, "logits_per_token": -0.3002537488937378, "logits_per_char": -0.1501268744468689, "num_chars": 2}, {"sum_logits": -1.4388302564620972, "num_tokens": 1, "num_tokens_all": 1478, "is_greedy": false, "logits_per_token": -1.4388302564620972, "logits_per_char": -0.7194151282310486, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 323, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19066502153873444, "incorrect_loss_raw": 1.7947876453399658, "correct_loss_per_char": 0.09533251076936722, "incorrect_loss_per_char": 0.8973938226699829, "correct_loss_per_token": 0.19066502153873444, "incorrect_loss_per_token": 1.7947876453399658, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19066502153873444, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": true, "logits_per_token": -0.19066502153873444, "logits_per_char": -0.09533251076936722, "num_chars": 2}, {"sum_logits": -1.7947876453399658, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": false, "logits_per_token": -1.7947876453399658, "logits_per_char": -0.8973938226699829, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 324, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.25128260254859924, "incorrect_loss_raw": 1.5399130582809448, "correct_loss_per_char": 0.12564130127429962, "incorrect_loss_per_char": 0.7699565291404724, "correct_loss_per_token": 0.25128260254859924, "incorrect_loss_per_token": 1.5399130582809448, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25128260254859924, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": true, "logits_per_token": -0.25128260254859924, "logits_per_char": -0.12564130127429962, "num_chars": 2}, {"sum_logits": -1.5399130582809448, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": false, "logits_per_token": -1.5399130582809448, "logits_per_char": -0.7699565291404724, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 325, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5598958730697632, "incorrect_loss_raw": 0.24748310446739197, "correct_loss_per_char": 0.7799479365348816, "incorrect_loss_per_char": 0.12374155223369598, "correct_loss_per_token": 1.5598958730697632, "incorrect_loss_per_token": 0.24748310446739197, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24748310446739197, "num_tokens": 1, "num_tokens_all": 970, "is_greedy": true, "logits_per_token": -0.24748310446739197, "logits_per_char": -0.12374155223369598, "num_chars": 2}, {"sum_logits": -1.5598958730697632, "num_tokens": 1, "num_tokens_all": 970, "is_greedy": false, "logits_per_token": -1.5598958730697632, "logits_per_char": -0.7799479365348816, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 326, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.18102097511291504, "incorrect_loss_raw": 1.8621947765350342, "correct_loss_per_char": 0.09051048755645752, "incorrect_loss_per_char": 0.9310973882675171, "correct_loss_per_token": 0.18102097511291504, "incorrect_loss_per_token": 1.8621947765350342, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.18102097511291504, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": true, "logits_per_token": -0.18102097511291504, "logits_per_char": -0.09051048755645752, "num_chars": 2}, {"sum_logits": -1.8621947765350342, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": false, "logits_per_token": -1.8621947765350342, "logits_per_char": -0.9310973882675171, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 327, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22630208730697632, "incorrect_loss_raw": 1.6295437812805176, "correct_loss_per_char": 0.11315104365348816, "incorrect_loss_per_char": 0.8147718906402588, "correct_loss_per_token": 0.22630208730697632, "incorrect_loss_per_token": 1.6295437812805176, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22630208730697632, "num_tokens": 1, "num_tokens_all": 922, "is_greedy": true, "logits_per_token": -0.22630208730697632, "logits_per_char": -0.11315104365348816, "num_chars": 2}, {"sum_logits": -1.6295437812805176, "num_tokens": 1, "num_tokens_all": 922, "is_greedy": false, "logits_per_token": -1.6295437812805176, "logits_per_char": -0.8147718906402588, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 328, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.236349418759346, "incorrect_loss_raw": 1.5973849296569824, "correct_loss_per_char": 0.118174709379673, "incorrect_loss_per_char": 0.7986924648284912, "correct_loss_per_token": 0.236349418759346, "incorrect_loss_per_token": 1.5973849296569824, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.236349418759346, "num_tokens": 1, "num_tokens_all": 905, "is_greedy": true, "logits_per_token": -0.236349418759346, "logits_per_char": -0.118174709379673, "num_chars": 2}, {"sum_logits": -1.5973849296569824, "num_tokens": 1, "num_tokens_all": 905, "is_greedy": false, "logits_per_token": -1.5973849296569824, "logits_per_char": -0.7986924648284912, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 329, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2019004374742508, "incorrect_loss_raw": 1.7532320022583008, "correct_loss_per_char": 0.1009502187371254, "incorrect_loss_per_char": 0.8766160011291504, "correct_loss_per_token": 0.2019004374742508, "incorrect_loss_per_token": 1.7532320022583008, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2019004374742508, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": true, "logits_per_token": -0.2019004374742508, "logits_per_char": -0.1009502187371254, "num_chars": 2}, {"sum_logits": -1.7532320022583008, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": false, "logits_per_token": -1.7532320022583008, "logits_per_char": -0.8766160011291504, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 330, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21331395208835602, "incorrect_loss_raw": 1.6928666830062866, "correct_loss_per_char": 0.10665697604417801, "incorrect_loss_per_char": 0.8464333415031433, "correct_loss_per_token": 0.21331395208835602, "incorrect_loss_per_token": 1.6928666830062866, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21331395208835602, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": true, "logits_per_token": -0.21331395208835602, "logits_per_char": -0.10665697604417801, "num_chars": 2}, {"sum_logits": -1.6928666830062866, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": false, "logits_per_token": -1.6928666830062866, "logits_per_char": -0.8464333415031433, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 331, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1921761929988861, "incorrect_loss_raw": 1.7678922414779663, "correct_loss_per_char": 0.09608809649944305, "incorrect_loss_per_char": 0.8839461207389832, "correct_loss_per_token": 0.1921761929988861, "incorrect_loss_per_token": 1.7678922414779663, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1921761929988861, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": true, "logits_per_token": -0.1921761929988861, "logits_per_char": -0.09608809649944305, "num_chars": 2}, {"sum_logits": -1.7678922414779663, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": false, "logits_per_token": -1.7678922414779663, "logits_per_char": -0.8839461207389832, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 332, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.818943738937378, "incorrect_loss_raw": 0.18305625021457672, "correct_loss_per_char": 0.909471869468689, "incorrect_loss_per_char": 0.09152812510728836, "correct_loss_per_token": 1.818943738937378, "incorrect_loss_per_token": 0.18305625021457672, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.18305625021457672, "num_tokens": 1, "num_tokens_all": 1137, "is_greedy": true, "logits_per_token": -0.18305625021457672, "logits_per_char": -0.09152812510728836, "num_chars": 2}, {"sum_logits": -1.818943738937378, "num_tokens": 1, "num_tokens_all": 1137, "is_greedy": false, "logits_per_token": -1.818943738937378, "logits_per_char": -0.909471869468689, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 333, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3411916196346283, "incorrect_loss_raw": 1.3112578392028809, "correct_loss_per_char": 0.17059580981731415, "incorrect_loss_per_char": 0.6556289196014404, "correct_loss_per_token": 0.3411916196346283, "incorrect_loss_per_token": 1.3112578392028809, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3411916196346283, "num_tokens": 1, "num_tokens_all": 1240, "is_greedy": true, "logits_per_token": -0.3411916196346283, "logits_per_char": -0.17059580981731415, "num_chars": 2}, {"sum_logits": -1.3112578392028809, "num_tokens": 1, "num_tokens_all": 1240, "is_greedy": false, "logits_per_token": -1.3112578392028809, "logits_per_char": -0.6556289196014404, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 334, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2229052037000656, "incorrect_loss_raw": 1.6545846462249756, "correct_loss_per_char": 0.1114526018500328, "incorrect_loss_per_char": 0.8272923231124878, "correct_loss_per_token": 0.2229052037000656, "incorrect_loss_per_token": 1.6545846462249756, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2229052037000656, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": true, "logits_per_token": -0.2229052037000656, "logits_per_char": -0.1114526018500328, "num_chars": 2}, {"sum_logits": -1.6545846462249756, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": false, "logits_per_token": -1.6545846462249756, "logits_per_char": -0.8272923231124878, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 335, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2547933757305145, "incorrect_loss_raw": 1.529947280883789, "correct_loss_per_char": 0.12739668786525726, "incorrect_loss_per_char": 0.7649736404418945, "correct_loss_per_token": 0.2547933757305145, "incorrect_loss_per_token": 1.529947280883789, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2547933757305145, "num_tokens": 1, "num_tokens_all": 878, "is_greedy": true, "logits_per_token": -0.2547933757305145, "logits_per_char": -0.12739668786525726, "num_chars": 2}, {"sum_logits": -1.529947280883789, "num_tokens": 1, "num_tokens_all": 878, "is_greedy": false, "logits_per_token": -1.529947280883789, "logits_per_char": -0.7649736404418945, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 336, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6380844116210938, "incorrect_loss_raw": 0.2267274260520935, "correct_loss_per_char": 0.8190422058105469, "incorrect_loss_per_char": 0.11336371302604675, "correct_loss_per_token": 1.6380844116210938, "incorrect_loss_per_token": 0.2267274260520935, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2267274260520935, "num_tokens": 1, "num_tokens_all": 950, "is_greedy": true, "logits_per_token": -0.2267274260520935, "logits_per_char": -0.11336371302604675, "num_chars": 2}, {"sum_logits": -1.6380844116210938, "num_tokens": 1, "num_tokens_all": 950, "is_greedy": false, "logits_per_token": -1.6380844116210938, "logits_per_char": -0.8190422058105469, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 337, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20368990302085876, "incorrect_loss_raw": 1.7270185947418213, "correct_loss_per_char": 0.10184495151042938, "incorrect_loss_per_char": 0.8635092973709106, "correct_loss_per_token": 0.20368990302085876, "incorrect_loss_per_token": 1.7270185947418213, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20368990302085876, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": true, "logits_per_token": -0.20368990302085876, "logits_per_char": -0.10184495151042938, "num_chars": 2}, {"sum_logits": -1.7270185947418213, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": false, "logits_per_token": -1.7270185947418213, "logits_per_char": -0.8635092973709106, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 338, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.26063060760498047, "incorrect_loss_raw": 1.5202114582061768, "correct_loss_per_char": 0.13031530380249023, "incorrect_loss_per_char": 0.7601057291030884, "correct_loss_per_token": 0.26063060760498047, "incorrect_loss_per_token": 1.5202114582061768, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26063060760498047, "num_tokens": 1, "num_tokens_all": 956, "is_greedy": true, "logits_per_token": -0.26063060760498047, "logits_per_char": -0.13031530380249023, "num_chars": 2}, {"sum_logits": -1.5202114582061768, "num_tokens": 1, "num_tokens_all": 956, "is_greedy": false, "logits_per_token": -1.5202114582061768, "logits_per_char": -0.7601057291030884, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 339, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22122110426425934, "incorrect_loss_raw": 1.6482635736465454, "correct_loss_per_char": 0.11061055213212967, "incorrect_loss_per_char": 0.8241317868232727, "correct_loss_per_token": 0.22122110426425934, "incorrect_loss_per_token": 1.6482635736465454, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22122110426425934, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": true, "logits_per_token": -0.22122110426425934, "logits_per_char": -0.11061055213212967, "num_chars": 2}, {"sum_logits": -1.6482635736465454, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": false, "logits_per_token": -1.6482635736465454, "logits_per_char": -0.8241317868232727, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 340, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2361198514699936, "incorrect_loss_raw": 1.5983705520629883, "correct_loss_per_char": 0.1180599257349968, "incorrect_loss_per_char": 0.7991852760314941, "correct_loss_per_token": 0.2361198514699936, "incorrect_loss_per_token": 1.5983705520629883, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2361198514699936, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": true, "logits_per_token": -0.2361198514699936, "logits_per_char": -0.1180599257349968, "num_chars": 2}, {"sum_logits": -1.5983705520629883, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": false, "logits_per_token": -1.5983705520629883, "logits_per_char": -0.7991852760314941, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 341, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20873580873012543, "incorrect_loss_raw": 1.711271047592163, "correct_loss_per_char": 0.10436790436506271, "incorrect_loss_per_char": 0.8556355237960815, "correct_loss_per_token": 0.20873580873012543, "incorrect_loss_per_token": 1.711271047592163, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20873580873012543, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": true, "logits_per_token": -0.20873580873012543, "logits_per_char": -0.10436790436506271, "num_chars": 2}, {"sum_logits": -1.711271047592163, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -1.711271047592163, "logits_per_char": -0.8556355237960815, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 342, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.716163158416748, "incorrect_loss_raw": 0.2054453045129776, "correct_loss_per_char": 0.858081579208374, "incorrect_loss_per_char": 0.1027226522564888, "correct_loss_per_token": 1.716163158416748, "incorrect_loss_per_token": 0.2054453045129776, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2054453045129776, "num_tokens": 1, "num_tokens_all": 972, "is_greedy": true, "logits_per_token": -0.2054453045129776, "logits_per_char": -0.1027226522564888, "num_chars": 2}, {"sum_logits": -1.716163158416748, "num_tokens": 1, "num_tokens_all": 972, "is_greedy": false, "logits_per_token": -1.716163158416748, "logits_per_char": -0.858081579208374, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 343, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.28669339418411255, "incorrect_loss_raw": 1.4512338638305664, "correct_loss_per_char": 0.14334669709205627, "incorrect_loss_per_char": 0.7256169319152832, "correct_loss_per_token": 0.28669339418411255, "incorrect_loss_per_token": 1.4512338638305664, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.28669339418411255, "num_tokens": 1, "num_tokens_all": 1188, "is_greedy": true, "logits_per_token": -0.28669339418411255, "logits_per_char": -0.14334669709205627, "num_chars": 2}, {"sum_logits": -1.4512338638305664, "num_tokens": 1, "num_tokens_all": 1188, "is_greedy": false, "logits_per_token": -1.4512338638305664, "logits_per_char": -0.7256169319152832, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 344, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2365128993988037, "incorrect_loss_raw": 1.6066792011260986, "correct_loss_per_char": 0.11825644969940186, "incorrect_loss_per_char": 0.8033396005630493, "correct_loss_per_token": 0.2365128993988037, "incorrect_loss_per_token": 1.6066792011260986, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2365128993988037, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": true, "logits_per_token": -0.2365128993988037, "logits_per_char": -0.11825644969940186, "num_chars": 2}, {"sum_logits": -1.6066792011260986, "num_tokens": 1, "num_tokens_all": 1090, "is_greedy": false, "logits_per_token": -1.6066792011260986, "logits_per_char": -0.8033396005630493, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 345, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4133728742599487, "incorrect_loss_raw": 0.29413118958473206, "correct_loss_per_char": 0.7066864371299744, "incorrect_loss_per_char": 0.14706559479236603, "correct_loss_per_token": 1.4133728742599487, "incorrect_loss_per_token": 0.29413118958473206, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.29413118958473206, "num_tokens": 1, "num_tokens_all": 995, "is_greedy": true, "logits_per_token": -0.29413118958473206, "logits_per_char": -0.14706559479236603, "num_chars": 2}, {"sum_logits": -1.4133728742599487, "num_tokens": 1, "num_tokens_all": 995, "is_greedy": false, "logits_per_token": -1.4133728742599487, "logits_per_char": -0.7066864371299744, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 346, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2306169718503952, "incorrect_loss_raw": 1.6242598295211792, "correct_loss_per_char": 0.1153084859251976, "incorrect_loss_per_char": 0.8121299147605896, "correct_loss_per_token": 0.2306169718503952, "incorrect_loss_per_token": 1.6242598295211792, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2306169718503952, "num_tokens": 1, "num_tokens_all": 993, "is_greedy": true, "logits_per_token": -0.2306169718503952, "logits_per_char": -0.1153084859251976, "num_chars": 2}, {"sum_logits": -1.6242598295211792, "num_tokens": 1, "num_tokens_all": 993, "is_greedy": false, "logits_per_token": -1.6242598295211792, "logits_per_char": -0.8121299147605896, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 347, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2666197419166565, "incorrect_loss_raw": 1.4881260395050049, "correct_loss_per_char": 0.13330987095832825, "incorrect_loss_per_char": 0.7440630197525024, "correct_loss_per_token": 0.2666197419166565, "incorrect_loss_per_token": 1.4881260395050049, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2666197419166565, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": true, "logits_per_token": -0.2666197419166565, "logits_per_char": -0.13330987095832825, "num_chars": 2}, {"sum_logits": -1.4881260395050049, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": false, "logits_per_token": -1.4881260395050049, "logits_per_char": -0.7440630197525024, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 348, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5952744483947754, "incorrect_loss_raw": 0.23758892714977264, "correct_loss_per_char": 0.7976372241973877, "incorrect_loss_per_char": 0.11879446357488632, "correct_loss_per_token": 1.5952744483947754, "incorrect_loss_per_token": 0.23758892714977264, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23758892714977264, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": true, "logits_per_token": -0.23758892714977264, "logits_per_char": -0.11879446357488632, "num_chars": 2}, {"sum_logits": -1.5952744483947754, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": false, "logits_per_token": -1.5952744483947754, "logits_per_char": -0.7976372241973877, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 349, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4055622816085815, "incorrect_loss_raw": 0.2924828827381134, "correct_loss_per_char": 0.7027811408042908, "incorrect_loss_per_char": 0.1462414413690567, "correct_loss_per_token": 1.4055622816085815, "incorrect_loss_per_token": 0.2924828827381134, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2924828827381134, "num_tokens": 1, "num_tokens_all": 1188, "is_greedy": true, "logits_per_token": -0.2924828827381134, "logits_per_char": -0.1462414413690567, "num_chars": 2}, {"sum_logits": -1.4055622816085815, "num_tokens": 1, "num_tokens_all": 1188, "is_greedy": false, "logits_per_token": -1.4055622816085815, "logits_per_char": -0.7027811408042908, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 350, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.674616813659668, "incorrect_loss_raw": 0.21915823221206665, "correct_loss_per_char": 0.837308406829834, "incorrect_loss_per_char": 0.10957911610603333, "correct_loss_per_token": 1.674616813659668, "incorrect_loss_per_token": 0.21915823221206665, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21915823221206665, "num_tokens": 1, "num_tokens_all": 971, "is_greedy": true, "logits_per_token": -0.21915823221206665, "logits_per_char": -0.10957911610603333, "num_chars": 2}, {"sum_logits": -1.674616813659668, "num_tokens": 1, "num_tokens_all": 971, "is_greedy": false, "logits_per_token": -1.674616813659668, "logits_per_char": -0.837308406829834, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 351, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5328950881958008, "incorrect_loss_raw": 0.253355473279953, "correct_loss_per_char": 0.7664475440979004, "incorrect_loss_per_char": 0.1266777366399765, "correct_loss_per_token": 1.5328950881958008, "incorrect_loss_per_token": 0.253355473279953, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.253355473279953, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": true, "logits_per_token": -0.253355473279953, "logits_per_char": -0.1266777366399765, "num_chars": 2}, {"sum_logits": -1.5328950881958008, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": false, "logits_per_token": -1.5328950881958008, "logits_per_char": -0.7664475440979004, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 352, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2658843398094177, "incorrect_loss_raw": 1.4998087882995605, "correct_loss_per_char": 0.13294216990470886, "incorrect_loss_per_char": 0.7499043941497803, "correct_loss_per_token": 0.2658843398094177, "incorrect_loss_per_token": 1.4998087882995605, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2658843398094177, "num_tokens": 1, "num_tokens_all": 1026, "is_greedy": true, "logits_per_token": -0.2658843398094177, "logits_per_char": -0.13294216990470886, "num_chars": 2}, {"sum_logits": -1.4998087882995605, "num_tokens": 1, "num_tokens_all": 1026, "is_greedy": false, "logits_per_token": -1.4998087882995605, "logits_per_char": -0.7499043941497803, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 353, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6175856590270996, "incorrect_loss_raw": 0.23356442153453827, "correct_loss_per_char": 0.8087928295135498, "incorrect_loss_per_char": 0.11678221076726913, "correct_loss_per_token": 1.6175856590270996, "incorrect_loss_per_token": 0.23356442153453827, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23356442153453827, "num_tokens": 1, "num_tokens_all": 983, "is_greedy": true, "logits_per_token": -0.23356442153453827, "logits_per_char": -0.11678221076726913, "num_chars": 2}, {"sum_logits": -1.6175856590270996, "num_tokens": 1, "num_tokens_all": 983, "is_greedy": false, "logits_per_token": -1.6175856590270996, "logits_per_char": -0.8087928295135498, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 354, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22433865070343018, "incorrect_loss_raw": 1.6412436962127686, "correct_loss_per_char": 0.11216932535171509, "incorrect_loss_per_char": 0.8206218481063843, "correct_loss_per_token": 0.22433865070343018, "incorrect_loss_per_token": 1.6412436962127686, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22433865070343018, "num_tokens": 1, "num_tokens_all": 906, "is_greedy": true, "logits_per_token": -0.22433865070343018, "logits_per_char": -0.11216932535171509, "num_chars": 2}, {"sum_logits": -1.6412436962127686, "num_tokens": 1, "num_tokens_all": 906, "is_greedy": false, "logits_per_token": -1.6412436962127686, "logits_per_char": -0.8206218481063843, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 355, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4382206201553345, "incorrect_loss_raw": 0.282496839761734, "correct_loss_per_char": 0.7191103100776672, "incorrect_loss_per_char": 0.141248419880867, "correct_loss_per_token": 1.4382206201553345, "incorrect_loss_per_token": 0.282496839761734, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.282496839761734, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": true, "logits_per_token": -0.282496839761734, "logits_per_char": -0.141248419880867, "num_chars": 2}, {"sum_logits": -1.4382206201553345, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": false, "logits_per_token": -1.4382206201553345, "logits_per_char": -0.7191103100776672, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 356, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.27551600337028503, "incorrect_loss_raw": 1.4682729244232178, "correct_loss_per_char": 0.13775800168514252, "incorrect_loss_per_char": 0.7341364622116089, "correct_loss_per_token": 0.27551600337028503, "incorrect_loss_per_token": 1.4682729244232178, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.27551600337028503, "num_tokens": 1, "num_tokens_all": 878, "is_greedy": true, "logits_per_token": -0.27551600337028503, "logits_per_char": -0.13775800168514252, "num_chars": 2}, {"sum_logits": -1.4682729244232178, "num_tokens": 1, "num_tokens_all": 878, "is_greedy": false, "logits_per_token": -1.4682729244232178, "logits_per_char": -0.7341364622116089, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 357, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.247689887881279, "incorrect_loss_raw": 1.5516667366027832, "correct_loss_per_char": 0.1238449439406395, "incorrect_loss_per_char": 0.7758333683013916, "correct_loss_per_token": 0.247689887881279, "incorrect_loss_per_token": 1.5516667366027832, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.247689887881279, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": true, "logits_per_token": -0.247689887881279, "logits_per_char": -0.1238449439406395, "num_chars": 2}, {"sum_logits": -1.5516667366027832, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": false, "logits_per_token": -1.5516667366027832, "logits_per_char": -0.7758333683013916, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 358, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.673054814338684, "incorrect_loss_raw": 0.2208113968372345, "correct_loss_per_char": 0.836527407169342, "incorrect_loss_per_char": 0.11040569841861725, "correct_loss_per_token": 1.673054814338684, "incorrect_loss_per_token": 0.2208113968372345, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2208113968372345, "num_tokens": 1, "num_tokens_all": 981, "is_greedy": true, "logits_per_token": -0.2208113968372345, "logits_per_char": -0.11040569841861725, "num_chars": 2}, {"sum_logits": -1.673054814338684, "num_tokens": 1, "num_tokens_all": 981, "is_greedy": false, "logits_per_token": -1.673054814338684, "logits_per_char": -0.836527407169342, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 359, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2604292333126068, "incorrect_loss_raw": 1.517594575881958, "correct_loss_per_char": 0.1302146166563034, "incorrect_loss_per_char": 0.758797287940979, "correct_loss_per_token": 0.2604292333126068, "incorrect_loss_per_token": 1.517594575881958, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2604292333126068, "num_tokens": 1, "num_tokens_all": 1017, "is_greedy": true, "logits_per_token": -0.2604292333126068, "logits_per_char": -0.1302146166563034, "num_chars": 2}, {"sum_logits": -1.517594575881958, "num_tokens": 1, "num_tokens_all": 1017, "is_greedy": false, "logits_per_token": -1.517594575881958, "logits_per_char": -0.758797287940979, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 360, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21621999144554138, "incorrect_loss_raw": 1.6749333143234253, "correct_loss_per_char": 0.10810999572277069, "incorrect_loss_per_char": 0.8374666571617126, "correct_loss_per_token": 0.21621999144554138, "incorrect_loss_per_token": 1.6749333143234253, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21621999144554138, "num_tokens": 1, "num_tokens_all": 944, "is_greedy": true, "logits_per_token": -0.21621999144554138, "logits_per_char": -0.10810999572277069, "num_chars": 2}, {"sum_logits": -1.6749333143234253, "num_tokens": 1, "num_tokens_all": 944, "is_greedy": false, "logits_per_token": -1.6749333143234253, "logits_per_char": -0.8374666571617126, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 361, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8270512819290161, "incorrect_loss_raw": 0.18512603640556335, "correct_loss_per_char": 0.9135256409645081, "incorrect_loss_per_char": 0.09256301820278168, "correct_loss_per_token": 1.8270512819290161, "incorrect_loss_per_token": 0.18512603640556335, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.18512603640556335, "num_tokens": 1, "num_tokens_all": 1009, "is_greedy": true, "logits_per_token": -0.18512603640556335, "logits_per_char": -0.09256301820278168, "num_chars": 2}, {"sum_logits": -1.8270512819290161, "num_tokens": 1, "num_tokens_all": 1009, "is_greedy": false, "logits_per_token": -1.8270512819290161, "logits_per_char": -0.9135256409645081, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 362, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2616673409938812, "incorrect_loss_raw": 1.5031929016113281, "correct_loss_per_char": 0.1308336704969406, "incorrect_loss_per_char": 0.7515964508056641, "correct_loss_per_token": 0.2616673409938812, "incorrect_loss_per_token": 1.5031929016113281, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2616673409938812, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": true, "logits_per_token": -0.2616673409938812, "logits_per_char": -0.1308336704969406, "num_chars": 2}, {"sum_logits": -1.5031929016113281, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": false, "logits_per_token": -1.5031929016113281, "logits_per_char": -0.7515964508056641, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 363, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.25620031356811523, "incorrect_loss_raw": 1.5743181705474854, "correct_loss_per_char": 0.12810015678405762, "incorrect_loss_per_char": 0.7871590852737427, "correct_loss_per_token": 0.25620031356811523, "incorrect_loss_per_token": 1.5743181705474854, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25620031356811523, "num_tokens": 1, "num_tokens_all": 1244, "is_greedy": true, "logits_per_token": -0.25620031356811523, "logits_per_char": -0.12810015678405762, "num_chars": 2}, {"sum_logits": -1.5743181705474854, "num_tokens": 1, "num_tokens_all": 1244, "is_greedy": false, "logits_per_token": -1.5743181705474854, "logits_per_char": -0.7871590852737427, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 364, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5460323095321655, "incorrect_loss_raw": 0.25362682342529297, "correct_loss_per_char": 0.7730161547660828, "incorrect_loss_per_char": 0.12681341171264648, "correct_loss_per_token": 1.5460323095321655, "incorrect_loss_per_token": 0.25362682342529297, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25362682342529297, "num_tokens": 1, "num_tokens_all": 966, "is_greedy": true, "logits_per_token": -0.25362682342529297, "logits_per_char": -0.12681341171264648, "num_chars": 2}, {"sum_logits": -1.5460323095321655, "num_tokens": 1, "num_tokens_all": 966, "is_greedy": false, "logits_per_token": -1.5460323095321655, "logits_per_char": -0.7730161547660828, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 365, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4531877040863037, "incorrect_loss_raw": 0.27768081426620483, "correct_loss_per_char": 0.7265938520431519, "incorrect_loss_per_char": 0.13884040713310242, "correct_loss_per_token": 1.4531877040863037, "incorrect_loss_per_token": 0.27768081426620483, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.27768081426620483, "num_tokens": 1, "num_tokens_all": 1027, "is_greedy": true, "logits_per_token": -0.27768081426620483, "logits_per_char": -0.13884040713310242, "num_chars": 2}, {"sum_logits": -1.4531877040863037, "num_tokens": 1, "num_tokens_all": 1027, "is_greedy": false, "logits_per_token": -1.4531877040863037, "logits_per_char": -0.7265938520431519, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 366, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3235640525817871, "incorrect_loss_raw": 1.3404412269592285, "correct_loss_per_char": 0.16178202629089355, "incorrect_loss_per_char": 0.6702206134796143, "correct_loss_per_token": 0.3235640525817871, "incorrect_loss_per_token": 1.3404412269592285, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3235640525817871, "num_tokens": 1, "num_tokens_all": 1055, "is_greedy": true, "logits_per_token": -0.3235640525817871, "logits_per_char": -0.16178202629089355, "num_chars": 2}, {"sum_logits": -1.3404412269592285, "num_tokens": 1, "num_tokens_all": 1055, "is_greedy": false, "logits_per_token": -1.3404412269592285, "logits_per_char": -0.6702206134796143, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 367, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.26196324825286865, "incorrect_loss_raw": 1.5014933347702026, "correct_loss_per_char": 0.13098162412643433, "incorrect_loss_per_char": 0.7507466673851013, "correct_loss_per_token": 0.26196324825286865, "incorrect_loss_per_token": 1.5014933347702026, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26196324825286865, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": true, "logits_per_token": -0.26196324825286865, "logits_per_char": -0.13098162412643433, "num_chars": 2}, {"sum_logits": -1.5014933347702026, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": false, "logits_per_token": -1.5014933347702026, "logits_per_char": -0.7507466673851013, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 368, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23542223870754242, "incorrect_loss_raw": 1.6351292133331299, "correct_loss_per_char": 0.11771111935377121, "incorrect_loss_per_char": 0.8175646066665649, "correct_loss_per_token": 0.23542223870754242, "incorrect_loss_per_token": 1.6351292133331299, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23542223870754242, "num_tokens": 1, "num_tokens_all": 1339, "is_greedy": true, "logits_per_token": -0.23542223870754242, "logits_per_char": -0.11771111935377121, "num_chars": 2}, {"sum_logits": -1.6351292133331299, "num_tokens": 1, "num_tokens_all": 1339, "is_greedy": false, "logits_per_token": -1.6351292133331299, "logits_per_char": -0.8175646066665649, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 369, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.27540624141693115, "incorrect_loss_raw": 1.4523630142211914, "correct_loss_per_char": 0.13770312070846558, "incorrect_loss_per_char": 0.7261815071105957, "correct_loss_per_token": 0.27540624141693115, "incorrect_loss_per_token": 1.4523630142211914, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.27540624141693115, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": true, "logits_per_token": -0.27540624141693115, "logits_per_char": -0.13770312070846558, "num_chars": 2}, {"sum_logits": -1.4523630142211914, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": false, "logits_per_token": -1.4523630142211914, "logits_per_char": -0.7261815071105957, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 370, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.32003504037857056, "incorrect_loss_raw": 1.335524559020996, "correct_loss_per_char": 0.16001752018928528, "incorrect_loss_per_char": 0.667762279510498, "correct_loss_per_token": 0.32003504037857056, "incorrect_loss_per_token": 1.335524559020996, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.32003504037857056, "num_tokens": 1, "num_tokens_all": 1013, "is_greedy": true, "logits_per_token": -0.32003504037857056, "logits_per_char": -0.16001752018928528, "num_chars": 2}, {"sum_logits": -1.335524559020996, "num_tokens": 1, "num_tokens_all": 1013, "is_greedy": false, "logits_per_token": -1.335524559020996, "logits_per_char": -0.667762279510498, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 371, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.17631632089614868, "incorrect_loss_raw": 1.8557047843933105, "correct_loss_per_char": 0.08815816044807434, "incorrect_loss_per_char": 0.9278523921966553, "correct_loss_per_token": 0.17631632089614868, "incorrect_loss_per_token": 1.8557047843933105, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.17631632089614868, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": true, "logits_per_token": -0.17631632089614868, "logits_per_char": -0.08815816044807434, "num_chars": 2}, {"sum_logits": -1.8557047843933105, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": false, "logits_per_token": -1.8557047843933105, "logits_per_char": -0.9278523921966553, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 372, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3134521245956421, "incorrect_loss_raw": 1.3663815259933472, "correct_loss_per_char": 0.15672606229782104, "incorrect_loss_per_char": 0.6831907629966736, "correct_loss_per_token": 0.3134521245956421, "incorrect_loss_per_token": 1.3663815259933472, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3134521245956421, "num_tokens": 1, "num_tokens_all": 1020, "is_greedy": true, "logits_per_token": -0.3134521245956421, "logits_per_char": -0.15672606229782104, "num_chars": 2}, {"sum_logits": -1.3663815259933472, "num_tokens": 1, "num_tokens_all": 1020, "is_greedy": false, "logits_per_token": -1.3663815259933472, "logits_per_char": -0.6831907629966736, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 373, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21282264590263367, "incorrect_loss_raw": 1.696714997291565, "correct_loss_per_char": 0.10641132295131683, "incorrect_loss_per_char": 0.8483574986457825, "correct_loss_per_token": 0.21282264590263367, "incorrect_loss_per_token": 1.696714997291565, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21282264590263367, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": true, "logits_per_token": -0.21282264590263367, "logits_per_char": -0.10641132295131683, "num_chars": 2}, {"sum_logits": -1.696714997291565, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": false, "logits_per_token": -1.696714997291565, "logits_per_char": -0.8483574986457825, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 374, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4385950565338135, "incorrect_loss_raw": 0.2871769070625305, "correct_loss_per_char": 0.7192975282669067, "incorrect_loss_per_char": 0.14358845353126526, "correct_loss_per_token": 1.4385950565338135, "incorrect_loss_per_token": 0.2871769070625305, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2871769070625305, "num_tokens": 1, "num_tokens_all": 1032, "is_greedy": true, "logits_per_token": -0.2871769070625305, "logits_per_char": -0.14358845353126526, "num_chars": 2}, {"sum_logits": -1.4385950565338135, "num_tokens": 1, "num_tokens_all": 1032, "is_greedy": false, "logits_per_token": -1.4385950565338135, "logits_per_char": -0.7192975282669067, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 375, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5671718120574951, "incorrect_loss_raw": 0.24749678373336792, "correct_loss_per_char": 0.7835859060287476, "incorrect_loss_per_char": 0.12374839186668396, "correct_loss_per_token": 1.5671718120574951, "incorrect_loss_per_token": 0.24749678373336792, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24749678373336792, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": true, "logits_per_token": -0.24749678373336792, "logits_per_char": -0.12374839186668396, "num_chars": 2}, {"sum_logits": -1.5671718120574951, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": false, "logits_per_token": -1.5671718120574951, "logits_per_char": -0.7835859060287476, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 376, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4482160806655884, "incorrect_loss_raw": 0.2748660147190094, "correct_loss_per_char": 0.7241080403327942, "incorrect_loss_per_char": 0.1374330073595047, "correct_loss_per_token": 1.4482160806655884, "incorrect_loss_per_token": 0.2748660147190094, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2748660147190094, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": true, "logits_per_token": -0.2748660147190094, "logits_per_char": -0.1374330073595047, "num_chars": 2}, {"sum_logits": -1.4482160806655884, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": false, "logits_per_token": -1.4482160806655884, "logits_per_char": -0.7241080403327942, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 377, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.476680874824524, "incorrect_loss_raw": 0.27596768736839294, "correct_loss_per_char": 0.738340437412262, "incorrect_loss_per_char": 0.13798384368419647, "correct_loss_per_token": 1.476680874824524, "incorrect_loss_per_token": 0.27596768736839294, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.27596768736839294, "num_tokens": 1, "num_tokens_all": 995, "is_greedy": true, "logits_per_token": -0.27596768736839294, "logits_per_char": -0.13798384368419647, "num_chars": 2}, {"sum_logits": -1.476680874824524, "num_tokens": 1, "num_tokens_all": 995, "is_greedy": false, "logits_per_token": -1.476680874824524, "logits_per_char": -0.738340437412262, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 378, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.674437403678894, "incorrect_loss_raw": 0.21546684205532074, "correct_loss_per_char": 0.837218701839447, "incorrect_loss_per_char": 0.10773342102766037, "correct_loss_per_token": 1.674437403678894, "incorrect_loss_per_token": 0.21546684205532074, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21546684205532074, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": true, "logits_per_token": -0.21546684205532074, "logits_per_char": -0.10773342102766037, "num_chars": 2}, {"sum_logits": -1.674437403678894, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": false, "logits_per_token": -1.674437403678894, "logits_per_char": -0.837218701839447, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 379, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2186082899570465, "incorrect_loss_raw": 1.6751569509506226, "correct_loss_per_char": 0.10930414497852325, "incorrect_loss_per_char": 0.8375784754753113, "correct_loss_per_token": 0.2186082899570465, "incorrect_loss_per_token": 1.6751569509506226, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2186082899570465, "num_tokens": 1, "num_tokens_all": 1005, "is_greedy": true, "logits_per_token": -0.2186082899570465, "logits_per_char": -0.10930414497852325, "num_chars": 2}, {"sum_logits": -1.6751569509506226, "num_tokens": 1, "num_tokens_all": 1005, "is_greedy": false, "logits_per_token": -1.6751569509506226, "logits_per_char": -0.8375784754753113, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 380, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6913409233093262, "incorrect_loss_raw": 0.21629047393798828, "correct_loss_per_char": 0.8456704616546631, "incorrect_loss_per_char": 0.10814523696899414, "correct_loss_per_token": 1.6913409233093262, "incorrect_loss_per_token": 0.21629047393798828, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21629047393798828, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": true, "logits_per_token": -0.21629047393798828, "logits_per_char": -0.10814523696899414, "num_chars": 2}, {"sum_logits": -1.6913409233093262, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": false, "logits_per_token": -1.6913409233093262, "logits_per_char": -0.8456704616546631, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 381, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4442943334579468, "incorrect_loss_raw": 0.2786799967288971, "correct_loss_per_char": 0.7221471667289734, "incorrect_loss_per_char": 0.13933999836444855, "correct_loss_per_token": 1.4442943334579468, "incorrect_loss_per_token": 0.2786799967288971, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2786799967288971, "num_tokens": 1, "num_tokens_all": 888, "is_greedy": true, "logits_per_token": -0.2786799967288971, "logits_per_char": -0.13933999836444855, "num_chars": 2}, {"sum_logits": -1.4442943334579468, "num_tokens": 1, "num_tokens_all": 888, "is_greedy": false, "logits_per_token": -1.4442943334579468, "logits_per_char": -0.7221471667289734, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 382, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24930113554000854, "incorrect_loss_raw": 1.5589661598205566, "correct_loss_per_char": 0.12465056777000427, "incorrect_loss_per_char": 0.7794830799102783, "correct_loss_per_token": 0.24930113554000854, "incorrect_loss_per_token": 1.5589661598205566, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24930113554000854, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": true, "logits_per_token": -0.24930113554000854, "logits_per_char": -0.12465056777000427, "num_chars": 2}, {"sum_logits": -1.5589661598205566, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": false, "logits_per_token": -1.5589661598205566, "logits_per_char": -0.7794830799102783, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 383, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2509525716304779, "incorrect_loss_raw": 1.5655858516693115, "correct_loss_per_char": 0.12547628581523895, "incorrect_loss_per_char": 0.7827929258346558, "correct_loss_per_token": 0.2509525716304779, "incorrect_loss_per_token": 1.5655858516693115, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2509525716304779, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": true, "logits_per_token": -0.2509525716304779, "logits_per_char": -0.12547628581523895, "num_chars": 2}, {"sum_logits": -1.5655858516693115, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": false, "logits_per_token": -1.5655858516693115, "logits_per_char": -0.7827929258346558, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 384, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1972854733467102, "incorrect_loss_raw": 1.7789645195007324, "correct_loss_per_char": 0.0986427366733551, "incorrect_loss_per_char": 0.8894822597503662, "correct_loss_per_token": 0.1972854733467102, "incorrect_loss_per_token": 1.7789645195007324, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1972854733467102, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": true, "logits_per_token": -0.1972854733467102, "logits_per_char": -0.0986427366733551, "num_chars": 2}, {"sum_logits": -1.7789645195007324, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": false, "logits_per_token": -1.7789645195007324, "logits_per_char": -0.8894822597503662, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 385, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2724739909172058, "incorrect_loss_raw": 1.4714698791503906, "correct_loss_per_char": 0.1362369954586029, "incorrect_loss_per_char": 0.7357349395751953, "correct_loss_per_token": 0.2724739909172058, "incorrect_loss_per_token": 1.4714698791503906, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2724739909172058, "num_tokens": 1, "num_tokens_all": 958, "is_greedy": true, "logits_per_token": -0.2724739909172058, "logits_per_char": -0.1362369954586029, "num_chars": 2}, {"sum_logits": -1.4714698791503906, "num_tokens": 1, "num_tokens_all": 958, "is_greedy": false, "logits_per_token": -1.4714698791503906, "logits_per_char": -0.7357349395751953, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 386, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21493381261825562, "incorrect_loss_raw": 1.714625597000122, "correct_loss_per_char": 0.10746690630912781, "incorrect_loss_per_char": 0.857312798500061, "correct_loss_per_token": 0.21493381261825562, "incorrect_loss_per_token": 1.714625597000122, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21493381261825562, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": true, "logits_per_token": -0.21493381261825562, "logits_per_char": -0.10746690630912781, "num_chars": 2}, {"sum_logits": -1.714625597000122, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": false, "logits_per_token": -1.714625597000122, "logits_per_char": -0.857312798500061, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 387, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.375187873840332, "incorrect_loss_raw": 0.3070814907550812, "correct_loss_per_char": 0.687593936920166, "incorrect_loss_per_char": 0.1535407453775406, "correct_loss_per_token": 1.375187873840332, "incorrect_loss_per_token": 0.3070814907550812, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3070814907550812, "num_tokens": 1, "num_tokens_all": 1022, "is_greedy": true, "logits_per_token": -0.3070814907550812, "logits_per_char": -0.1535407453775406, "num_chars": 2}, {"sum_logits": -1.375187873840332, "num_tokens": 1, "num_tokens_all": 1022, "is_greedy": false, "logits_per_token": -1.375187873840332, "logits_per_char": -0.687593936920166, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 388, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6796207427978516, "incorrect_loss_raw": 0.22248348593711853, "correct_loss_per_char": 0.8398103713989258, "incorrect_loss_per_char": 0.11124174296855927, "correct_loss_per_token": 1.6796207427978516, "incorrect_loss_per_token": 0.22248348593711853, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22248348593711853, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": true, "logits_per_token": -0.22248348593711853, "logits_per_char": -0.11124174296855927, "num_chars": 2}, {"sum_logits": -1.6796207427978516, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": false, "logits_per_token": -1.6796207427978516, "logits_per_char": -0.8398103713989258, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 389, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.600244402885437, "incorrect_loss_raw": 0.23595772683620453, "correct_loss_per_char": 0.8001222014427185, "incorrect_loss_per_char": 0.11797886341810226, "correct_loss_per_token": 1.600244402885437, "incorrect_loss_per_token": 0.23595772683620453, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23595772683620453, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": true, "logits_per_token": -0.23595772683620453, "logits_per_char": -0.11797886341810226, "num_chars": 2}, {"sum_logits": -1.600244402885437, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": false, "logits_per_token": -1.600244402885437, "logits_per_char": -0.8001222014427185, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 390, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19975480437278748, "incorrect_loss_raw": 1.7640644311904907, "correct_loss_per_char": 0.09987740218639374, "incorrect_loss_per_char": 0.8820322155952454, "correct_loss_per_token": 0.19975480437278748, "incorrect_loss_per_token": 1.7640644311904907, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19975480437278748, "num_tokens": 1, "num_tokens_all": 964, "is_greedy": true, "logits_per_token": -0.19975480437278748, "logits_per_char": -0.09987740218639374, "num_chars": 2}, {"sum_logits": -1.7640644311904907, "num_tokens": 1, "num_tokens_all": 964, "is_greedy": false, "logits_per_token": -1.7640644311904907, "logits_per_char": -0.8820322155952454, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 391, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2318440079689026, "incorrect_loss_raw": 1.6215755939483643, "correct_loss_per_char": 0.1159220039844513, "incorrect_loss_per_char": 0.8107877969741821, "correct_loss_per_token": 0.2318440079689026, "incorrect_loss_per_token": 1.6215755939483643, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2318440079689026, "num_tokens": 1, "num_tokens_all": 1007, "is_greedy": true, "logits_per_token": -0.2318440079689026, "logits_per_char": -0.1159220039844513, "num_chars": 2}, {"sum_logits": -1.6215755939483643, "num_tokens": 1, "num_tokens_all": 1007, "is_greedy": false, "logits_per_token": -1.6215755939483643, "logits_per_char": -0.8107877969741821, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 392, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3040424883365631, "incorrect_loss_raw": 1.3784481287002563, "correct_loss_per_char": 0.15202124416828156, "incorrect_loss_per_char": 0.6892240643501282, "correct_loss_per_token": 0.3040424883365631, "incorrect_loss_per_token": 1.3784481287002563, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3040424883365631, "num_tokens": 1, "num_tokens_all": 1047, "is_greedy": true, "logits_per_token": -0.3040424883365631, "logits_per_char": -0.15202124416828156, "num_chars": 2}, {"sum_logits": -1.3784481287002563, "num_tokens": 1, "num_tokens_all": 1047, "is_greedy": false, "logits_per_token": -1.3784481287002563, "logits_per_char": -0.6892240643501282, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 393, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21903201937675476, "incorrect_loss_raw": 1.6666339635849, "correct_loss_per_char": 0.10951600968837738, "incorrect_loss_per_char": 0.83331698179245, "correct_loss_per_token": 0.21903201937675476, "incorrect_loss_per_token": 1.6666339635849, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21903201937675476, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": true, "logits_per_token": -0.21903201937675476, "logits_per_char": -0.10951600968837738, "num_chars": 2}, {"sum_logits": -1.6666339635849, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": false, "logits_per_token": -1.6666339635849, "logits_per_char": -0.83331698179245, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 394, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2607671022415161, "incorrect_loss_raw": 1.50581693649292, "correct_loss_per_char": 0.13038355112075806, "incorrect_loss_per_char": 0.75290846824646, "correct_loss_per_token": 0.2607671022415161, "incorrect_loss_per_token": 1.50581693649292, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2607671022415161, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": true, "logits_per_token": -0.2607671022415161, "logits_per_char": -0.13038355112075806, "num_chars": 2}, {"sum_logits": -1.50581693649292, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": false, "logits_per_token": -1.50581693649292, "logits_per_char": -0.75290846824646, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 395, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19017527997493744, "incorrect_loss_raw": 1.7951312065124512, "correct_loss_per_char": 0.09508763998746872, "incorrect_loss_per_char": 0.8975656032562256, "correct_loss_per_token": 0.19017527997493744, "incorrect_loss_per_token": 1.7951312065124512, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19017527997493744, "num_tokens": 1, "num_tokens_all": 968, "is_greedy": true, "logits_per_token": -0.19017527997493744, "logits_per_char": -0.09508763998746872, "num_chars": 2}, {"sum_logits": -1.7951312065124512, "num_tokens": 1, "num_tokens_all": 968, "is_greedy": false, "logits_per_token": -1.7951312065124512, "logits_per_char": -0.8975656032562256, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 396, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.904968023300171, "incorrect_loss_raw": 0.16963759064674377, "correct_loss_per_char": 0.9524840116500854, "incorrect_loss_per_char": 0.08481879532337189, "correct_loss_per_token": 1.904968023300171, "incorrect_loss_per_token": 0.16963759064674377, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.16963759064674377, "num_tokens": 1, "num_tokens_all": 974, "is_greedy": true, "logits_per_token": -0.16963759064674377, "logits_per_char": -0.08481879532337189, "num_chars": 2}, {"sum_logits": -1.904968023300171, "num_tokens": 1, "num_tokens_all": 974, "is_greedy": false, "logits_per_token": -1.904968023300171, "logits_per_char": -0.9524840116500854, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 397, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.25362810492515564, "incorrect_loss_raw": 1.5202370882034302, "correct_loss_per_char": 0.12681405246257782, "incorrect_loss_per_char": 0.7601185441017151, "correct_loss_per_token": 0.25362810492515564, "incorrect_loss_per_token": 1.5202370882034302, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25362810492515564, "num_tokens": 1, "num_tokens_all": 893, "is_greedy": true, "logits_per_token": -0.25362810492515564, "logits_per_char": -0.12681405246257782, "num_chars": 2}, {"sum_logits": -1.5202370882034302, "num_tokens": 1, "num_tokens_all": 893, "is_greedy": false, "logits_per_token": -1.5202370882034302, "logits_per_char": -0.7601185441017151, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 398, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19822649657726288, "incorrect_loss_raw": 1.7685153484344482, "correct_loss_per_char": 0.09911324828863144, "incorrect_loss_per_char": 0.8842576742172241, "correct_loss_per_token": 0.19822649657726288, "incorrect_loss_per_token": 1.7685153484344482, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19822649657726288, "num_tokens": 1, "num_tokens_all": 990, "is_greedy": true, "logits_per_token": -0.19822649657726288, "logits_per_char": -0.09911324828863144, "num_chars": 2}, {"sum_logits": -1.7685153484344482, "num_tokens": 1, "num_tokens_all": 990, "is_greedy": false, "logits_per_token": -1.7685153484344482, "logits_per_char": -0.8842576742172241, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 399, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.28010979294776917, "incorrect_loss_raw": 1.446391224861145, "correct_loss_per_char": 0.14005489647388458, "incorrect_loss_per_char": 0.7231956124305725, "correct_loss_per_token": 0.28010979294776917, "incorrect_loss_per_token": 1.446391224861145, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.28010979294776917, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": true, "logits_per_token": -0.28010979294776917, "logits_per_char": -0.14005489647388458, "num_chars": 2}, {"sum_logits": -1.446391224861145, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": false, "logits_per_token": -1.446391224861145, "logits_per_char": -0.7231956124305725, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 400, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6288127899169922, "incorrect_loss_raw": 0.22864748537540436, "correct_loss_per_char": 0.8144063949584961, "incorrect_loss_per_char": 0.11432374268770218, "correct_loss_per_token": 1.6288127899169922, "incorrect_loss_per_token": 0.22864748537540436, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22864748537540436, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": true, "logits_per_token": -0.22864748537540436, "logits_per_char": -0.11432374268770218, "num_chars": 2}, {"sum_logits": -1.6288127899169922, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": false, "logits_per_token": -1.6288127899169922, "logits_per_char": -0.8144063949584961, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 401, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6127601861953735, "incorrect_loss_raw": 0.2315443605184555, "correct_loss_per_char": 0.8063800930976868, "incorrect_loss_per_char": 0.11577218025922775, "correct_loss_per_token": 1.6127601861953735, "incorrect_loss_per_token": 0.2315443605184555, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2315443605184555, "num_tokens": 1, "num_tokens_all": 972, "is_greedy": true, "logits_per_token": -0.2315443605184555, "logits_per_char": -0.11577218025922775, "num_chars": 2}, {"sum_logits": -1.6127601861953735, "num_tokens": 1, "num_tokens_all": 972, "is_greedy": false, "logits_per_token": -1.6127601861953735, "logits_per_char": -0.8063800930976868, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 402, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.29661133885383606, "incorrect_loss_raw": 1.405260443687439, "correct_loss_per_char": 0.14830566942691803, "incorrect_loss_per_char": 0.7026302218437195, "correct_loss_per_token": 0.29661133885383606, "incorrect_loss_per_token": 1.405260443687439, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.29661133885383606, "num_tokens": 1, "num_tokens_all": 1031, "is_greedy": true, "logits_per_token": -0.29661133885383606, "logits_per_char": -0.14830566942691803, "num_chars": 2}, {"sum_logits": -1.405260443687439, "num_tokens": 1, "num_tokens_all": 1031, "is_greedy": false, "logits_per_token": -1.405260443687439, "logits_per_char": -0.7026302218437195, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 403, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2742111384868622, "incorrect_loss_raw": 1.4999654293060303, "correct_loss_per_char": 0.1371055692434311, "incorrect_loss_per_char": 0.7499827146530151, "correct_loss_per_token": 0.2742111384868622, "incorrect_loss_per_token": 1.4999654293060303, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2742111384868622, "num_tokens": 1, "num_tokens_all": 989, "is_greedy": true, "logits_per_token": -0.2742111384868622, "logits_per_char": -0.1371055692434311, "num_chars": 2}, {"sum_logits": -1.4999654293060303, "num_tokens": 1, "num_tokens_all": 989, "is_greedy": false, "logits_per_token": -1.4999654293060303, "logits_per_char": -0.7499827146530151, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 404, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22040648758411407, "incorrect_loss_raw": 1.6676865816116333, "correct_loss_per_char": 0.11020324379205704, "incorrect_loss_per_char": 0.8338432908058167, "correct_loss_per_token": 0.22040648758411407, "incorrect_loss_per_token": 1.6676865816116333, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22040648758411407, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": true, "logits_per_token": -0.22040648758411407, "logits_per_char": -0.11020324379205704, "num_chars": 2}, {"sum_logits": -1.6676865816116333, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": false, "logits_per_token": -1.6676865816116333, "logits_per_char": -0.8338432908058167, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 405, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.867865800857544, "incorrect_loss_raw": 0.18022151291370392, "correct_loss_per_char": 0.933932900428772, "incorrect_loss_per_char": 0.09011075645685196, "correct_loss_per_token": 1.867865800857544, "incorrect_loss_per_token": 0.18022151291370392, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.18022151291370392, "num_tokens": 1, "num_tokens_all": 989, "is_greedy": true, "logits_per_token": -0.18022151291370392, "logits_per_char": -0.09011075645685196, "num_chars": 2}, {"sum_logits": -1.867865800857544, "num_tokens": 1, "num_tokens_all": 989, "is_greedy": false, "logits_per_token": -1.867865800857544, "logits_per_char": -0.933932900428772, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 406, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5965886116027832, "incorrect_loss_raw": 0.23663662374019623, "correct_loss_per_char": 0.7982943058013916, "incorrect_loss_per_char": 0.11831831187009811, "correct_loss_per_token": 1.5965886116027832, "incorrect_loss_per_token": 0.23663662374019623, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23663662374019623, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": true, "logits_per_token": -0.23663662374019623, "logits_per_char": -0.11831831187009811, "num_chars": 2}, {"sum_logits": -1.5965886116027832, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": false, "logits_per_token": -1.5965886116027832, "logits_per_char": -0.7982943058013916, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 407, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4976043701171875, "incorrect_loss_raw": 0.26377567648887634, "correct_loss_per_char": 0.7488021850585938, "incorrect_loss_per_char": 0.13188783824443817, "correct_loss_per_token": 1.4976043701171875, "incorrect_loss_per_token": 0.26377567648887634, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26377567648887634, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": true, "logits_per_token": -0.26377567648887634, "logits_per_char": -0.13188783824443817, "num_chars": 2}, {"sum_logits": -1.4976043701171875, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": false, "logits_per_token": -1.4976043701171875, "logits_per_char": -0.7488021850585938, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 408, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2912251949310303, "incorrect_loss_raw": 1.4152019023895264, "correct_loss_per_char": 0.14561259746551514, "incorrect_loss_per_char": 0.7076009511947632, "correct_loss_per_token": 0.2912251949310303, "incorrect_loss_per_token": 1.4152019023895264, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2912251949310303, "num_tokens": 1, "num_tokens_all": 882, "is_greedy": true, "logits_per_token": -0.2912251949310303, "logits_per_char": -0.14561259746551514, "num_chars": 2}, {"sum_logits": -1.4152019023895264, "num_tokens": 1, "num_tokens_all": 882, "is_greedy": false, "logits_per_token": -1.4152019023895264, "logits_per_char": -0.7076009511947632, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 409, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21004612743854523, "incorrect_loss_raw": 1.6933680772781372, "correct_loss_per_char": 0.10502306371927261, "incorrect_loss_per_char": 0.8466840386390686, "correct_loss_per_token": 0.21004612743854523, "incorrect_loss_per_token": 1.6933680772781372, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21004612743854523, "num_tokens": 1, "num_tokens_all": 976, "is_greedy": true, "logits_per_token": -0.21004612743854523, "logits_per_char": -0.10502306371927261, "num_chars": 2}, {"sum_logits": -1.6933680772781372, "num_tokens": 1, "num_tokens_all": 976, "is_greedy": false, "logits_per_token": -1.6933680772781372, "logits_per_char": -0.8466840386390686, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 410, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8075543642044067, "incorrect_loss_raw": 0.19222065806388855, "correct_loss_per_char": 0.9037771821022034, "incorrect_loss_per_char": 0.09611032903194427, "correct_loss_per_token": 1.8075543642044067, "incorrect_loss_per_token": 0.19222065806388855, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19222065806388855, "num_tokens": 1, "num_tokens_all": 1014, "is_greedy": true, "logits_per_token": -0.19222065806388855, "logits_per_char": -0.09611032903194427, "num_chars": 2}, {"sum_logits": -1.8075543642044067, "num_tokens": 1, "num_tokens_all": 1014, "is_greedy": false, "logits_per_token": -1.8075543642044067, "logits_per_char": -0.9037771821022034, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 411, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2405644655227661, "incorrect_loss_raw": 1.5859535932540894, "correct_loss_per_char": 0.12028223276138306, "incorrect_loss_per_char": 0.7929767966270447, "correct_loss_per_token": 0.2405644655227661, "incorrect_loss_per_token": 1.5859535932540894, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2405644655227661, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": true, "logits_per_token": -0.2405644655227661, "logits_per_char": -0.12028223276138306, "num_chars": 2}, {"sum_logits": -1.5859535932540894, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": false, "logits_per_token": -1.5859535932540894, "logits_per_char": -0.7929767966270447, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 412, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20543977618217468, "incorrect_loss_raw": 1.7411584854125977, "correct_loss_per_char": 0.10271988809108734, "incorrect_loss_per_char": 0.8705792427062988, "correct_loss_per_token": 0.20543977618217468, "incorrect_loss_per_token": 1.7411584854125977, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20543977618217468, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": true, "logits_per_token": -0.20543977618217468, "logits_per_char": -0.10271988809108734, "num_chars": 2}, {"sum_logits": -1.7411584854125977, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": false, "logits_per_token": -1.7411584854125977, "logits_per_char": -0.8705792427062988, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 413, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.376232624053955, "incorrect_loss_raw": 0.3133329451084137, "correct_loss_per_char": 0.6881163120269775, "incorrect_loss_per_char": 0.15666647255420685, "correct_loss_per_token": 1.376232624053955, "incorrect_loss_per_token": 0.3133329451084137, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3133329451084137, "num_tokens": 1, "num_tokens_all": 1233, "is_greedy": true, "logits_per_token": -0.3133329451084137, "logits_per_char": -0.15666647255420685, "num_chars": 2}, {"sum_logits": -1.376232624053955, "num_tokens": 1, "num_tokens_all": 1233, "is_greedy": false, "logits_per_token": -1.376232624053955, "logits_per_char": -0.6881163120269775, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 414, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6911022663116455, "incorrect_loss_raw": 0.21268369257450104, "correct_loss_per_char": 0.8455511331558228, "incorrect_loss_per_char": 0.10634184628725052, "correct_loss_per_token": 1.6911022663116455, "incorrect_loss_per_token": 0.21268369257450104, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21268369257450104, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": true, "logits_per_token": -0.21268369257450104, "logits_per_char": -0.10634184628725052, "num_chars": 2}, {"sum_logits": -1.6911022663116455, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": false, "logits_per_token": -1.6911022663116455, "logits_per_char": -0.8455511331558228, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 415, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22001078724861145, "incorrect_loss_raw": 1.6458730697631836, "correct_loss_per_char": 0.11000539362430573, "incorrect_loss_per_char": 0.8229365348815918, "correct_loss_per_token": 0.22001078724861145, "incorrect_loss_per_token": 1.6458730697631836, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22001078724861145, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": true, "logits_per_token": -0.22001078724861145, "logits_per_char": -0.11000539362430573, "num_chars": 2}, {"sum_logits": -1.6458730697631836, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": false, "logits_per_token": -1.6458730697631836, "logits_per_char": -0.8229365348815918, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 416, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22621385753154755, "incorrect_loss_raw": 1.6453722715377808, "correct_loss_per_char": 0.11310692876577377, "incorrect_loss_per_char": 0.8226861357688904, "correct_loss_per_token": 0.22621385753154755, "incorrect_loss_per_token": 1.6453722715377808, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22621385753154755, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": true, "logits_per_token": -0.22621385753154755, "logits_per_char": -0.11310692876577377, "num_chars": 2}, {"sum_logits": -1.6453722715377808, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": false, "logits_per_token": -1.6453722715377808, "logits_per_char": -0.8226861357688904, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 417, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.25959905982017517, "incorrect_loss_raw": 1.5308557748794556, "correct_loss_per_char": 0.12979952991008759, "incorrect_loss_per_char": 0.7654278874397278, "correct_loss_per_token": 0.25959905982017517, "incorrect_loss_per_token": 1.5308557748794556, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25959905982017517, "num_tokens": 1, "num_tokens_all": 1062, "is_greedy": true, "logits_per_token": -0.25959905982017517, "logits_per_char": -0.12979952991008759, "num_chars": 2}, {"sum_logits": -1.5308557748794556, "num_tokens": 1, "num_tokens_all": 1062, "is_greedy": false, "logits_per_token": -1.5308557748794556, "logits_per_char": -0.7654278874397278, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 418, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3382090330123901, "incorrect_loss_raw": 0.3264878988265991, "correct_loss_per_char": 0.6691045165061951, "incorrect_loss_per_char": 0.16324394941329956, "correct_loss_per_token": 1.3382090330123901, "incorrect_loss_per_token": 0.3264878988265991, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3264878988265991, "num_tokens": 1, "num_tokens_all": 1040, "is_greedy": true, "logits_per_token": -0.3264878988265991, "logits_per_char": -0.16324394941329956, "num_chars": 2}, {"sum_logits": -1.3382090330123901, "num_tokens": 1, "num_tokens_all": 1040, "is_greedy": false, "logits_per_token": -1.3382090330123901, "logits_per_char": -0.6691045165061951, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 419, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.27373644709587097, "incorrect_loss_raw": 1.4715454578399658, "correct_loss_per_char": 0.13686822354793549, "incorrect_loss_per_char": 0.7357727289199829, "correct_loss_per_token": 0.27373644709587097, "incorrect_loss_per_token": 1.4715454578399658, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.27373644709587097, "num_tokens": 1, "num_tokens_all": 1046, "is_greedy": true, "logits_per_token": -0.27373644709587097, "logits_per_char": -0.13686822354793549, "num_chars": 2}, {"sum_logits": -1.4715454578399658, "num_tokens": 1, "num_tokens_all": 1046, "is_greedy": false, "logits_per_token": -1.4715454578399658, "logits_per_char": -0.7357727289199829, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 420, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.18556171655654907, "incorrect_loss_raw": 1.8186376094818115, "correct_loss_per_char": 0.09278085827827454, "incorrect_loss_per_char": 0.9093188047409058, "correct_loss_per_token": 0.18556171655654907, "incorrect_loss_per_token": 1.8186376094818115, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.18556171655654907, "num_tokens": 1, "num_tokens_all": 901, "is_greedy": true, "logits_per_token": -0.18556171655654907, "logits_per_char": -0.09278085827827454, "num_chars": 2}, {"sum_logits": -1.8186376094818115, "num_tokens": 1, "num_tokens_all": 901, "is_greedy": false, "logits_per_token": -1.8186376094818115, "logits_per_char": -0.9093188047409058, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 421, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8309056758880615, "incorrect_loss_raw": 0.18320058286190033, "correct_loss_per_char": 0.9154528379440308, "incorrect_loss_per_char": 0.09160029143095016, "correct_loss_per_token": 1.8309056758880615, "incorrect_loss_per_token": 0.18320058286190033, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.18320058286190033, "num_tokens": 1, "num_tokens_all": 1002, "is_greedy": true, "logits_per_token": -0.18320058286190033, "logits_per_char": -0.09160029143095016, "num_chars": 2}, {"sum_logits": -1.8309056758880615, "num_tokens": 1, "num_tokens_all": 1002, "is_greedy": false, "logits_per_token": -1.8309056758880615, "logits_per_char": -0.9154528379440308, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 422, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5596411228179932, "incorrect_loss_raw": 0.24754805862903595, "correct_loss_per_char": 0.7798205614089966, "incorrect_loss_per_char": 0.12377402931451797, "correct_loss_per_token": 1.5596411228179932, "incorrect_loss_per_token": 0.24754805862903595, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24754805862903595, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": true, "logits_per_token": -0.24754805862903595, "logits_per_char": -0.12377402931451797, "num_chars": 2}, {"sum_logits": -1.5596411228179932, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": false, "logits_per_token": -1.5596411228179932, "logits_per_char": -0.7798205614089966, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 423, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7725298404693604, "incorrect_loss_raw": 0.20000019669532776, "correct_loss_per_char": 0.8862649202346802, "incorrect_loss_per_char": 0.10000009834766388, "correct_loss_per_token": 1.7725298404693604, "incorrect_loss_per_token": 0.20000019669532776, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20000019669532776, "num_tokens": 1, "num_tokens_all": 997, "is_greedy": true, "logits_per_token": -0.20000019669532776, "logits_per_char": -0.10000009834766388, "num_chars": 2}, {"sum_logits": -1.7725298404693604, "num_tokens": 1, "num_tokens_all": 997, "is_greedy": false, "logits_per_token": -1.7725298404693604, "logits_per_char": -0.8862649202346802, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 424, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.685609221458435, "incorrect_loss_raw": 0.21807827055454254, "correct_loss_per_char": 0.8428046107292175, "incorrect_loss_per_char": 0.10903913527727127, "correct_loss_per_token": 1.685609221458435, "incorrect_loss_per_token": 0.21807827055454254, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21807827055454254, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": true, "logits_per_token": -0.21807827055454254, "logits_per_char": -0.10903913527727127, "num_chars": 2}, {"sum_logits": -1.685609221458435, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": false, "logits_per_token": -1.685609221458435, "logits_per_char": -0.8428046107292175, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 425, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.493728518486023, "incorrect_loss_raw": 0.2722235918045044, "correct_loss_per_char": 0.7468642592430115, "incorrect_loss_per_char": 0.1361117959022522, "correct_loss_per_token": 1.493728518486023, "incorrect_loss_per_token": 0.2722235918045044, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2722235918045044, "num_tokens": 1, "num_tokens_all": 1010, "is_greedy": true, "logits_per_token": -0.2722235918045044, "logits_per_char": -0.1361117959022522, "num_chars": 2}, {"sum_logits": -1.493728518486023, "num_tokens": 1, "num_tokens_all": 1010, "is_greedy": false, "logits_per_token": -1.493728518486023, "logits_per_char": -0.7468642592430115, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 426, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5599557161331177, "incorrect_loss_raw": 0.24687114357948303, "correct_loss_per_char": 0.7799778580665588, "incorrect_loss_per_char": 0.12343557178974152, "correct_loss_per_token": 1.5599557161331177, "incorrect_loss_per_token": 0.24687114357948303, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24687114357948303, "num_tokens": 1, "num_tokens_all": 918, "is_greedy": true, "logits_per_token": -0.24687114357948303, "logits_per_char": -0.12343557178974152, "num_chars": 2}, {"sum_logits": -1.5599557161331177, "num_tokens": 1, "num_tokens_all": 918, "is_greedy": false, "logits_per_token": -1.5599557161331177, "logits_per_char": -0.7799778580665588, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 427, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21658331155776978, "incorrect_loss_raw": 1.6850605010986328, "correct_loss_per_char": 0.10829165577888489, "incorrect_loss_per_char": 0.8425302505493164, "correct_loss_per_token": 0.21658331155776978, "incorrect_loss_per_token": 1.6850605010986328, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21658331155776978, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": true, "logits_per_token": -0.21658331155776978, "logits_per_char": -0.10829165577888489, "num_chars": 2}, {"sum_logits": -1.6850605010986328, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": false, "logits_per_token": -1.6850605010986328, "logits_per_char": -0.8425302505493164, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 428, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23758281767368317, "incorrect_loss_raw": 1.6192103624343872, "correct_loss_per_char": 0.11879140883684158, "incorrect_loss_per_char": 0.8096051812171936, "correct_loss_per_token": 0.23758281767368317, "incorrect_loss_per_token": 1.6192103624343872, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23758281767368317, "num_tokens": 1, "num_tokens_all": 1002, "is_greedy": true, "logits_per_token": -0.23758281767368317, "logits_per_char": -0.11879140883684158, "num_chars": 2}, {"sum_logits": -1.6192103624343872, "num_tokens": 1, "num_tokens_all": 1002, "is_greedy": false, "logits_per_token": -1.6192103624343872, "logits_per_char": -0.8096051812171936, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 429, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19780853390693665, "incorrect_loss_raw": 1.7824556827545166, "correct_loss_per_char": 0.09890426695346832, "incorrect_loss_per_char": 0.8912278413772583, "correct_loss_per_token": 0.19780853390693665, "incorrect_loss_per_token": 1.7824556827545166, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19780853390693665, "num_tokens": 1, "num_tokens_all": 1086, "is_greedy": true, "logits_per_token": -0.19780853390693665, "logits_per_char": -0.09890426695346832, "num_chars": 2}, {"sum_logits": -1.7824556827545166, "num_tokens": 1, "num_tokens_all": 1086, "is_greedy": false, "logits_per_token": -1.7824556827545166, "logits_per_char": -0.8912278413772583, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 430, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21638794243335724, "incorrect_loss_raw": 1.6768606901168823, "correct_loss_per_char": 0.10819397121667862, "incorrect_loss_per_char": 0.8384303450584412, "correct_loss_per_token": 0.21638794243335724, "incorrect_loss_per_token": 1.6768606901168823, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21638794243335724, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": true, "logits_per_token": -0.21638794243335724, "logits_per_char": -0.10819397121667862, "num_chars": 2}, {"sum_logits": -1.6768606901168823, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": false, "logits_per_token": -1.6768606901168823, "logits_per_char": -0.8384303450584412, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 431, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23530946671962738, "incorrect_loss_raw": 1.6083707809448242, "correct_loss_per_char": 0.11765473335981369, "incorrect_loss_per_char": 0.8041853904724121, "correct_loss_per_token": 0.23530946671962738, "incorrect_loss_per_token": 1.6083707809448242, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23530946671962738, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": true, "logits_per_token": -0.23530946671962738, "logits_per_char": -0.11765473335981369, "num_chars": 2}, {"sum_logits": -1.6083707809448242, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": false, "logits_per_token": -1.6083707809448242, "logits_per_char": -0.8041853904724121, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 432, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24628666043281555, "incorrect_loss_raw": 1.5659513473510742, "correct_loss_per_char": 0.12314333021640778, "incorrect_loss_per_char": 0.7829756736755371, "correct_loss_per_token": 0.24628666043281555, "incorrect_loss_per_token": 1.5659513473510742, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24628666043281555, "num_tokens": 1, "num_tokens_all": 912, "is_greedy": true, "logits_per_token": -0.24628666043281555, "logits_per_char": -0.12314333021640778, "num_chars": 2}, {"sum_logits": -1.5659513473510742, "num_tokens": 1, "num_tokens_all": 912, "is_greedy": false, "logits_per_token": -1.5659513473510742, "logits_per_char": -0.7829756736755371, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 433, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2420278787612915, "incorrect_loss_raw": 1.572023630142212, "correct_loss_per_char": 0.12101393938064575, "incorrect_loss_per_char": 0.786011815071106, "correct_loss_per_token": 0.2420278787612915, "incorrect_loss_per_token": 1.572023630142212, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2420278787612915, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": true, "logits_per_token": -0.2420278787612915, "logits_per_char": -0.12101393938064575, "num_chars": 2}, {"sum_logits": -1.572023630142212, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": false, "logits_per_token": -1.572023630142212, "logits_per_char": -0.786011815071106, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 434, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.28965309262275696, "incorrect_loss_raw": 1.4195959568023682, "correct_loss_per_char": 0.14482654631137848, "incorrect_loss_per_char": 0.7097979784011841, "correct_loss_per_token": 0.28965309262275696, "incorrect_loss_per_token": 1.4195959568023682, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.28965309262275696, "num_tokens": 1, "num_tokens_all": 878, "is_greedy": true, "logits_per_token": -0.28965309262275696, "logits_per_char": -0.14482654631137848, "num_chars": 2}, {"sum_logits": -1.4195959568023682, "num_tokens": 1, "num_tokens_all": 878, "is_greedy": false, "logits_per_token": -1.4195959568023682, "logits_per_char": -0.7097979784011841, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 435, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3049514591693878, "incorrect_loss_raw": 1.3703503608703613, "correct_loss_per_char": 0.1524757295846939, "incorrect_loss_per_char": 0.6851751804351807, "correct_loss_per_token": 0.3049514591693878, "incorrect_loss_per_token": 1.3703503608703613, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3049514591693878, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": true, "logits_per_token": -0.3049514591693878, "logits_per_char": -0.1524757295846939, "num_chars": 2}, {"sum_logits": -1.3703503608703613, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": false, "logits_per_token": -1.3703503608703613, "logits_per_char": -0.6851751804351807, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 436, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2275247573852539, "incorrect_loss_raw": 1.6385343074798584, "correct_loss_per_char": 0.11376237869262695, "incorrect_loss_per_char": 0.8192671537399292, "correct_loss_per_token": 0.2275247573852539, "incorrect_loss_per_token": 1.6385343074798584, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2275247573852539, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": true, "logits_per_token": -0.2275247573852539, "logits_per_char": -0.11376237869262695, "num_chars": 2}, {"sum_logits": -1.6385343074798584, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": false, "logits_per_token": -1.6385343074798584, "logits_per_char": -0.8192671537399292, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 437, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23927517235279083, "incorrect_loss_raw": 1.6053223609924316, "correct_loss_per_char": 0.11963758617639542, "incorrect_loss_per_char": 0.8026611804962158, "correct_loss_per_token": 0.23927517235279083, "incorrect_loss_per_token": 1.6053223609924316, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23927517235279083, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": true, "logits_per_token": -0.23927517235279083, "logits_per_char": -0.11963758617639542, "num_chars": 2}, {"sum_logits": -1.6053223609924316, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": false, "logits_per_token": -1.6053223609924316, "logits_per_char": -0.8026611804962158, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 438, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.26757383346557617, "incorrect_loss_raw": 1.5015056133270264, "correct_loss_per_char": 0.13378691673278809, "incorrect_loss_per_char": 0.7507528066635132, "correct_loss_per_token": 0.26757383346557617, "incorrect_loss_per_token": 1.5015056133270264, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26757383346557617, "num_tokens": 1, "num_tokens_all": 1054, "is_greedy": true, "logits_per_token": -0.26757383346557617, "logits_per_char": -0.13378691673278809, "num_chars": 2}, {"sum_logits": -1.5015056133270264, "num_tokens": 1, "num_tokens_all": 1054, "is_greedy": false, "logits_per_token": -1.5015056133270264, "logits_per_char": -0.7507528066635132, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 439, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2719515264034271, "incorrect_loss_raw": 1.4869325160980225, "correct_loss_per_char": 0.13597576320171356, "incorrect_loss_per_char": 0.7434662580490112, "correct_loss_per_token": 0.2719515264034271, "incorrect_loss_per_token": 1.4869325160980225, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2719515264034271, "num_tokens": 1, "num_tokens_all": 1209, "is_greedy": true, "logits_per_token": -0.2719515264034271, "logits_per_char": -0.13597576320171356, "num_chars": 2}, {"sum_logits": -1.4869325160980225, "num_tokens": 1, "num_tokens_all": 1209, "is_greedy": false, "logits_per_token": -1.4869325160980225, "logits_per_char": -0.7434662580490112, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 440, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21417976915836334, "incorrect_loss_raw": 1.7022221088409424, "correct_loss_per_char": 0.10708988457918167, "incorrect_loss_per_char": 0.8511110544204712, "correct_loss_per_token": 0.21417976915836334, "incorrect_loss_per_token": 1.7022221088409424, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21417976915836334, "num_tokens": 1, "num_tokens_all": 985, "is_greedy": true, "logits_per_token": -0.21417976915836334, "logits_per_char": -0.10708988457918167, "num_chars": 2}, {"sum_logits": -1.7022221088409424, "num_tokens": 1, "num_tokens_all": 985, "is_greedy": false, "logits_per_token": -1.7022221088409424, "logits_per_char": -0.8511110544204712, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 441, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20397497713565826, "incorrect_loss_raw": 1.7353715896606445, "correct_loss_per_char": 0.10198748856782913, "incorrect_loss_per_char": 0.8676857948303223, "correct_loss_per_token": 0.20397497713565826, "incorrect_loss_per_token": 1.7353715896606445, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20397497713565826, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": true, "logits_per_token": -0.20397497713565826, "logits_per_char": -0.10198748856782913, "num_chars": 2}, {"sum_logits": -1.7353715896606445, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": false, "logits_per_token": -1.7353715896606445, "logits_per_char": -0.8676857948303223, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 442, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4817194938659668, "incorrect_loss_raw": 0.2710825800895691, "correct_loss_per_char": 0.7408597469329834, "incorrect_loss_per_char": 0.13554129004478455, "correct_loss_per_token": 1.4817194938659668, "incorrect_loss_per_token": 0.2710825800895691, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2710825800895691, "num_tokens": 1, "num_tokens_all": 1048, "is_greedy": true, "logits_per_token": -0.2710825800895691, "logits_per_char": -0.13554129004478455, "num_chars": 2}, {"sum_logits": -1.4817194938659668, "num_tokens": 1, "num_tokens_all": 1048, "is_greedy": false, "logits_per_token": -1.4817194938659668, "logits_per_char": -0.7408597469329834, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 443, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22051970660686493, "incorrect_loss_raw": 1.670148253440857, "correct_loss_per_char": 0.11025985330343246, "incorrect_loss_per_char": 0.8350741267204285, "correct_loss_per_token": 0.22051970660686493, "incorrect_loss_per_token": 1.670148253440857, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22051970660686493, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": true, "logits_per_token": -0.22051970660686493, "logits_per_char": -0.11025985330343246, "num_chars": 2}, {"sum_logits": -1.670148253440857, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": false, "logits_per_token": -1.670148253440857, "logits_per_char": -0.8350741267204285, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 444, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21254047751426697, "incorrect_loss_raw": 1.6980656385421753, "correct_loss_per_char": 0.10627023875713348, "incorrect_loss_per_char": 0.8490328192710876, "correct_loss_per_token": 0.21254047751426697, "incorrect_loss_per_token": 1.6980656385421753, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21254047751426697, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": true, "logits_per_token": -0.21254047751426697, "logits_per_char": -0.10627023875713348, "num_chars": 2}, {"sum_logits": -1.6980656385421753, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": false, "logits_per_token": -1.6980656385421753, "logits_per_char": -0.8490328192710876, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 445, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21945424377918243, "incorrect_loss_raw": 1.6728014945983887, "correct_loss_per_char": 0.10972712188959122, "incorrect_loss_per_char": 0.8364007472991943, "correct_loss_per_token": 0.21945424377918243, "incorrect_loss_per_token": 1.6728014945983887, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21945424377918243, "num_tokens": 1, "num_tokens_all": 1004, "is_greedy": true, "logits_per_token": -0.21945424377918243, "logits_per_char": -0.10972712188959122, "num_chars": 2}, {"sum_logits": -1.6728014945983887, "num_tokens": 1, "num_tokens_all": 1004, "is_greedy": false, "logits_per_token": -1.6728014945983887, "logits_per_char": -0.8364007472991943, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 446, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.381624698638916, "incorrect_loss_raw": 0.30364784598350525, "correct_loss_per_char": 0.690812349319458, "incorrect_loss_per_char": 0.15182392299175262, "correct_loss_per_token": 1.381624698638916, "incorrect_loss_per_token": 0.30364784598350525, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.30364784598350525, "num_tokens": 1, "num_tokens_all": 1012, "is_greedy": true, "logits_per_token": -0.30364784598350525, "logits_per_char": -0.15182392299175262, "num_chars": 2}, {"sum_logits": -1.381624698638916, "num_tokens": 1, "num_tokens_all": 1012, "is_greedy": false, "logits_per_token": -1.381624698638916, "logits_per_char": -0.690812349319458, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 447, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1810043603181839, "incorrect_loss_raw": 1.84528386592865, "correct_loss_per_char": 0.09050218015909195, "incorrect_loss_per_char": 0.922641932964325, "correct_loss_per_token": 0.1810043603181839, "incorrect_loss_per_token": 1.84528386592865, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1810043603181839, "num_tokens": 1, "num_tokens_all": 980, "is_greedy": true, "logits_per_token": -0.1810043603181839, "logits_per_char": -0.09050218015909195, "num_chars": 2}, {"sum_logits": -1.84528386592865, "num_tokens": 1, "num_tokens_all": 980, "is_greedy": false, "logits_per_token": -1.84528386592865, "logits_per_char": -0.922641932964325, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 448, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7197043895721436, "incorrect_loss_raw": 0.2054247260093689, "correct_loss_per_char": 0.8598521947860718, "incorrect_loss_per_char": 0.10271236300468445, "correct_loss_per_token": 1.7197043895721436, "incorrect_loss_per_token": 0.2054247260093689, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2054247260093689, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": true, "logits_per_token": -0.2054247260093689, "logits_per_char": -0.10271236300468445, "num_chars": 2}, {"sum_logits": -1.7197043895721436, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.7197043895721436, "logits_per_char": -0.8598521947860718, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 449, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.596835732460022, "incorrect_loss_raw": 0.23886922001838684, "correct_loss_per_char": 0.798417866230011, "incorrect_loss_per_char": 0.11943461000919342, "correct_loss_per_token": 1.596835732460022, "incorrect_loss_per_token": 0.23886922001838684, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23886922001838684, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": true, "logits_per_token": -0.23886922001838684, "logits_per_char": -0.11943461000919342, "num_chars": 2}, {"sum_logits": -1.596835732460022, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": false, "logits_per_token": -1.596835732460022, "logits_per_char": -0.798417866230011, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 450, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2877217233181, "incorrect_loss_raw": 1.4366718530654907, "correct_loss_per_char": 0.14386086165905, "incorrect_loss_per_char": 0.7183359265327454, "correct_loss_per_token": 0.2877217233181, "incorrect_loss_per_token": 1.4366718530654907, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2877217233181, "num_tokens": 1, "num_tokens_all": 1020, "is_greedy": true, "logits_per_token": -0.2877217233181, "logits_per_char": -0.14386086165905, "num_chars": 2}, {"sum_logits": -1.4366718530654907, "num_tokens": 1, "num_tokens_all": 1020, "is_greedy": false, "logits_per_token": -1.4366718530654907, "logits_per_char": -0.7183359265327454, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 451, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2463836669921875, "incorrect_loss_raw": 1.551694393157959, "correct_loss_per_char": 0.12319183349609375, "incorrect_loss_per_char": 0.7758471965789795, "correct_loss_per_token": 0.2463836669921875, "incorrect_loss_per_token": 1.551694393157959, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2463836669921875, "num_tokens": 1, "num_tokens_all": 900, "is_greedy": true, "logits_per_token": -0.2463836669921875, "logits_per_char": -0.12319183349609375, "num_chars": 2}, {"sum_logits": -1.551694393157959, "num_tokens": 1, "num_tokens_all": 900, "is_greedy": false, "logits_per_token": -1.551694393157959, "logits_per_char": -0.7758471965789795, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 452, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.18781228363513947, "incorrect_loss_raw": 1.8223745822906494, "correct_loss_per_char": 0.09390614181756973, "incorrect_loss_per_char": 0.9111872911453247, "correct_loss_per_token": 0.18781228363513947, "incorrect_loss_per_token": 1.8223745822906494, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.18781228363513947, "num_tokens": 1, "num_tokens_all": 920, "is_greedy": true, "logits_per_token": -0.18781228363513947, "logits_per_char": -0.09390614181756973, "num_chars": 2}, {"sum_logits": -1.8223745822906494, "num_tokens": 1, "num_tokens_all": 920, "is_greedy": false, "logits_per_token": -1.8223745822906494, "logits_per_char": -0.9111872911453247, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 453, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.25098976492881775, "incorrect_loss_raw": 1.5370299816131592, "correct_loss_per_char": 0.12549488246440887, "incorrect_loss_per_char": 0.7685149908065796, "correct_loss_per_token": 0.25098976492881775, "incorrect_loss_per_token": 1.5370299816131592, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25098976492881775, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": true, "logits_per_token": -0.25098976492881775, "logits_per_char": -0.12549488246440887, "num_chars": 2}, {"sum_logits": -1.5370299816131592, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": false, "logits_per_token": -1.5370299816131592, "logits_per_char": -0.7685149908065796, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 454, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2835961878299713, "incorrect_loss_raw": 1.4537428617477417, "correct_loss_per_char": 0.14179809391498566, "incorrect_loss_per_char": 0.7268714308738708, "correct_loss_per_token": 0.2835961878299713, "incorrect_loss_per_token": 1.4537428617477417, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2835961878299713, "num_tokens": 1, "num_tokens_all": 1023, "is_greedy": true, "logits_per_token": -0.2835961878299713, "logits_per_char": -0.14179809391498566, "num_chars": 2}, {"sum_logits": -1.4537428617477417, "num_tokens": 1, "num_tokens_all": 1023, "is_greedy": false, "logits_per_token": -1.4537428617477417, "logits_per_char": -0.7268714308738708, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 455, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4119834899902344, "incorrect_loss_raw": 0.29212093353271484, "correct_loss_per_char": 0.7059917449951172, "incorrect_loss_per_char": 0.14606046676635742, "correct_loss_per_token": 1.4119834899902344, "incorrect_loss_per_token": 0.29212093353271484, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.29212093353271484, "num_tokens": 1, "num_tokens_all": 948, "is_greedy": true, "logits_per_token": -0.29212093353271484, "logits_per_char": -0.14606046676635742, "num_chars": 2}, {"sum_logits": -1.4119834899902344, "num_tokens": 1, "num_tokens_all": 948, "is_greedy": false, "logits_per_token": -1.4119834899902344, "logits_per_char": -0.7059917449951172, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 456, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.25882554054260254, "incorrect_loss_raw": 1.5353004932403564, "correct_loss_per_char": 0.12941277027130127, "incorrect_loss_per_char": 0.7676502466201782, "correct_loss_per_token": 0.25882554054260254, "incorrect_loss_per_token": 1.5353004932403564, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25882554054260254, "num_tokens": 1, "num_tokens_all": 1113, "is_greedy": true, "logits_per_token": -0.25882554054260254, "logits_per_char": -0.12941277027130127, "num_chars": 2}, {"sum_logits": -1.5353004932403564, "num_tokens": 1, "num_tokens_all": 1113, "is_greedy": false, "logits_per_token": -1.5353004932403564, "logits_per_char": -0.7676502466201782, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 457, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2032908201217651, "incorrect_loss_raw": 0.3689306080341339, "correct_loss_per_char": 0.6016454100608826, "incorrect_loss_per_char": 0.18446530401706696, "correct_loss_per_token": 1.2032908201217651, "incorrect_loss_per_token": 0.3689306080341339, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3689306080341339, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": true, "logits_per_token": -0.3689306080341339, "logits_per_char": -0.18446530401706696, "num_chars": 2}, {"sum_logits": -1.2032908201217651, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": false, "logits_per_token": -1.2032908201217651, "logits_per_char": -0.6016454100608826, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 458, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2870507836341858, "incorrect_loss_raw": 1.4293229579925537, "correct_loss_per_char": 0.1435253918170929, "incorrect_loss_per_char": 0.7146614789962769, "correct_loss_per_token": 0.2870507836341858, "incorrect_loss_per_token": 1.4293229579925537, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2870507836341858, "num_tokens": 1, "num_tokens_all": 1042, "is_greedy": true, "logits_per_token": -0.2870507836341858, "logits_per_char": -0.1435253918170929, "num_chars": 2}, {"sum_logits": -1.4293229579925537, "num_tokens": 1, "num_tokens_all": 1042, "is_greedy": false, "logits_per_token": -1.4293229579925537, "logits_per_char": -0.7146614789962769, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 459, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21843497455120087, "incorrect_loss_raw": 1.6790140867233276, "correct_loss_per_char": 0.10921748727560043, "incorrect_loss_per_char": 0.8395070433616638, "correct_loss_per_token": 0.21843497455120087, "incorrect_loss_per_token": 1.6790140867233276, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21843497455120087, "num_tokens": 1, "num_tokens_all": 985, "is_greedy": true, "logits_per_token": -0.21843497455120087, "logits_per_char": -0.10921748727560043, "num_chars": 2}, {"sum_logits": -1.6790140867233276, "num_tokens": 1, "num_tokens_all": 985, "is_greedy": false, "logits_per_token": -1.6790140867233276, "logits_per_char": -0.8395070433616638, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 460, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3129124641418457, "incorrect_loss_raw": 1.3484798669815063, "correct_loss_per_char": 0.15645623207092285, "incorrect_loss_per_char": 0.6742399334907532, "correct_loss_per_token": 0.3129124641418457, "incorrect_loss_per_token": 1.3484798669815063, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3129124641418457, "num_tokens": 1, "num_tokens_all": 1027, "is_greedy": true, "logits_per_token": -0.3129124641418457, "logits_per_char": -0.15645623207092285, "num_chars": 2}, {"sum_logits": -1.3484798669815063, "num_tokens": 1, "num_tokens_all": 1027, "is_greedy": false, "logits_per_token": -1.3484798669815063, "logits_per_char": -0.6742399334907532, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 461, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2273751050233841, "incorrect_loss_raw": 1.6517388820648193, "correct_loss_per_char": 0.11368755251169205, "incorrect_loss_per_char": 0.8258694410324097, "correct_loss_per_token": 0.2273751050233841, "incorrect_loss_per_token": 1.6517388820648193, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2273751050233841, "num_tokens": 1, "num_tokens_all": 1119, "is_greedy": true, "logits_per_token": -0.2273751050233841, "logits_per_char": -0.11368755251169205, "num_chars": 2}, {"sum_logits": -1.6517388820648193, "num_tokens": 1, "num_tokens_all": 1119, "is_greedy": false, "logits_per_token": -1.6517388820648193, "logits_per_char": -0.8258694410324097, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 462, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.26158061623573303, "incorrect_loss_raw": 1.5021227598190308, "correct_loss_per_char": 0.13079030811786652, "incorrect_loss_per_char": 0.7510613799095154, "correct_loss_per_token": 0.26158061623573303, "incorrect_loss_per_token": 1.5021227598190308, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26158061623573303, "num_tokens": 1, "num_tokens_all": 1091, "is_greedy": true, "logits_per_token": -0.26158061623573303, "logits_per_char": -0.13079030811786652, "num_chars": 2}, {"sum_logits": -1.5021227598190308, "num_tokens": 1, "num_tokens_all": 1091, "is_greedy": false, "logits_per_token": -1.5021227598190308, "logits_per_char": -0.7510613799095154, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 463, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2620144188404083, "incorrect_loss_raw": 1.5284525156021118, "correct_loss_per_char": 0.13100720942020416, "incorrect_loss_per_char": 0.7642262578010559, "correct_loss_per_token": 0.2620144188404083, "incorrect_loss_per_token": 1.5284525156021118, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2620144188404083, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": true, "logits_per_token": -0.2620144188404083, "logits_per_char": -0.13100720942020416, "num_chars": 2}, {"sum_logits": -1.5284525156021118, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": false, "logits_per_token": -1.5284525156021118, "logits_per_char": -0.7642262578010559, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 464, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.25108417868614197, "incorrect_loss_raw": 1.5431228876113892, "correct_loss_per_char": 0.12554208934307098, "incorrect_loss_per_char": 0.7715614438056946, "correct_loss_per_token": 0.25108417868614197, "incorrect_loss_per_token": 1.5431228876113892, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25108417868614197, "num_tokens": 1, "num_tokens_all": 991, "is_greedy": true, "logits_per_token": -0.25108417868614197, "logits_per_char": -0.12554208934307098, "num_chars": 2}, {"sum_logits": -1.5431228876113892, "num_tokens": 1, "num_tokens_all": 991, "is_greedy": false, "logits_per_token": -1.5431228876113892, "logits_per_char": -0.7715614438056946, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 465, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19811014831066132, "incorrect_loss_raw": 1.7524070739746094, "correct_loss_per_char": 0.09905507415533066, "incorrect_loss_per_char": 0.8762035369873047, "correct_loss_per_token": 0.19811014831066132, "incorrect_loss_per_token": 1.7524070739746094, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19811014831066132, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": true, "logits_per_token": -0.19811014831066132, "logits_per_char": -0.09905507415533066, "num_chars": 2}, {"sum_logits": -1.7524070739746094, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": false, "logits_per_token": -1.7524070739746094, "logits_per_char": -0.8762035369873047, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 466, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.248787060379982, "incorrect_loss_raw": 1.5583264827728271, "correct_loss_per_char": 0.124393530189991, "incorrect_loss_per_char": 0.7791632413864136, "correct_loss_per_token": 0.248787060379982, "incorrect_loss_per_token": 1.5583264827728271, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.248787060379982, "num_tokens": 1, "num_tokens_all": 948, "is_greedy": true, "logits_per_token": -0.248787060379982, "logits_per_char": -0.124393530189991, "num_chars": 2}, {"sum_logits": -1.5583264827728271, "num_tokens": 1, "num_tokens_all": 948, "is_greedy": false, "logits_per_token": -1.5583264827728271, "logits_per_char": -0.7791632413864136, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 467, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20513774454593658, "incorrect_loss_raw": 1.7278225421905518, "correct_loss_per_char": 0.10256887227296829, "incorrect_loss_per_char": 0.8639112710952759, "correct_loss_per_token": 0.20513774454593658, "incorrect_loss_per_token": 1.7278225421905518, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20513774454593658, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": true, "logits_per_token": -0.20513774454593658, "logits_per_char": -0.10256887227296829, "num_chars": 2}, {"sum_logits": -1.7278225421905518, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": false, "logits_per_token": -1.7278225421905518, "logits_per_char": -0.8639112710952759, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 468, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23836158215999603, "incorrect_loss_raw": 1.5980292558670044, "correct_loss_per_char": 0.11918079107999802, "incorrect_loss_per_char": 0.7990146279335022, "correct_loss_per_token": 0.23836158215999603, "incorrect_loss_per_token": 1.5980292558670044, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23836158215999603, "num_tokens": 1, "num_tokens_all": 986, "is_greedy": true, "logits_per_token": -0.23836158215999603, "logits_per_char": -0.11918079107999802, "num_chars": 2}, {"sum_logits": -1.5980292558670044, "num_tokens": 1, "num_tokens_all": 986, "is_greedy": false, "logits_per_token": -1.5980292558670044, "logits_per_char": -0.7990146279335022, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 469, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.16133415699005127, "incorrect_loss_raw": 1.9638029336929321, "correct_loss_per_char": 0.08066707849502563, "incorrect_loss_per_char": 0.9819014668464661, "correct_loss_per_token": 0.16133415699005127, "incorrect_loss_per_token": 1.9638029336929321, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.16133415699005127, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": true, "logits_per_token": -0.16133415699005127, "logits_per_char": -0.08066707849502563, "num_chars": 2}, {"sum_logits": -1.9638029336929321, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -1.9638029336929321, "logits_per_char": -0.9819014668464661, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 470, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5401971340179443, "incorrect_loss_raw": 0.25406214594841003, "correct_loss_per_char": 0.7700985670089722, "incorrect_loss_per_char": 0.12703107297420502, "correct_loss_per_token": 1.5401971340179443, "incorrect_loss_per_token": 0.25406214594841003, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25406214594841003, "num_tokens": 1, "num_tokens_all": 1001, "is_greedy": true, "logits_per_token": -0.25406214594841003, "logits_per_char": -0.12703107297420502, "num_chars": 2}, {"sum_logits": -1.5401971340179443, "num_tokens": 1, "num_tokens_all": 1001, "is_greedy": false, "logits_per_token": -1.5401971340179443, "logits_per_char": -0.7700985670089722, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 471, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4948704242706299, "incorrect_loss_raw": 0.26952919363975525, "correct_loss_per_char": 0.7474352121353149, "incorrect_loss_per_char": 0.13476459681987762, "correct_loss_per_token": 1.4948704242706299, "incorrect_loss_per_token": 0.26952919363975525, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26952919363975525, "num_tokens": 1, "num_tokens_all": 1038, "is_greedy": true, "logits_per_token": -0.26952919363975525, "logits_per_char": -0.13476459681987762, "num_chars": 2}, {"sum_logits": -1.4948704242706299, "num_tokens": 1, "num_tokens_all": 1038, "is_greedy": false, "logits_per_token": -1.4948704242706299, "logits_per_char": -0.7474352121353149, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 472, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.25970369577407837, "incorrect_loss_raw": 1.519728422164917, "correct_loss_per_char": 0.12985184788703918, "incorrect_loss_per_char": 0.7598642110824585, "correct_loss_per_token": 0.25970369577407837, "incorrect_loss_per_token": 1.519728422164917, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25970369577407837, "num_tokens": 1, "num_tokens_all": 990, "is_greedy": true, "logits_per_token": -0.25970369577407837, "logits_per_char": -0.12985184788703918, "num_chars": 2}, {"sum_logits": -1.519728422164917, "num_tokens": 1, "num_tokens_all": 990, "is_greedy": false, "logits_per_token": -1.519728422164917, "logits_per_char": -0.7598642110824585, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 473, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24234797060489655, "incorrect_loss_raw": 1.5884478092193604, "correct_loss_per_char": 0.12117398530244827, "incorrect_loss_per_char": 0.7942239046096802, "correct_loss_per_token": 0.24234797060489655, "incorrect_loss_per_token": 1.5884478092193604, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24234797060489655, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": true, "logits_per_token": -0.24234797060489655, "logits_per_char": -0.12117398530244827, "num_chars": 2}, {"sum_logits": -1.5884478092193604, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": false, "logits_per_token": -1.5884478092193604, "logits_per_char": -0.7942239046096802, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 474, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1887398511171341, "incorrect_loss_raw": 1.8163713216781616, "correct_loss_per_char": 0.09436992555856705, "incorrect_loss_per_char": 0.9081856608390808, "correct_loss_per_token": 0.1887398511171341, "incorrect_loss_per_token": 1.8163713216781616, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1887398511171341, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": true, "logits_per_token": -0.1887398511171341, "logits_per_char": -0.09436992555856705, "num_chars": 2}, {"sum_logits": -1.8163713216781616, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": false, "logits_per_token": -1.8163713216781616, "logits_per_char": -0.9081856608390808, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 475, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21288932859897614, "incorrect_loss_raw": 1.6811813116073608, "correct_loss_per_char": 0.10644466429948807, "incorrect_loss_per_char": 0.8405906558036804, "correct_loss_per_token": 0.21288932859897614, "incorrect_loss_per_token": 1.6811813116073608, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21288932859897614, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": true, "logits_per_token": -0.21288932859897614, "logits_per_char": -0.10644466429948807, "num_chars": 2}, {"sum_logits": -1.6811813116073608, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": false, "logits_per_token": -1.6811813116073608, "logits_per_char": -0.8405906558036804, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 476, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5796256065368652, "incorrect_loss_raw": 0.2414340078830719, "correct_loss_per_char": 0.7898128032684326, "incorrect_loss_per_char": 0.12071700394153595, "correct_loss_per_token": 1.5796256065368652, "incorrect_loss_per_token": 0.2414340078830719, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2414340078830719, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": true, "logits_per_token": -0.2414340078830719, "logits_per_char": -0.12071700394153595, "num_chars": 2}, {"sum_logits": -1.5796256065368652, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": false, "logits_per_token": -1.5796256065368652, "logits_per_char": -0.7898128032684326, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 477, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2499634325504303, "incorrect_loss_raw": 1.5769773721694946, "correct_loss_per_char": 0.12498171627521515, "incorrect_loss_per_char": 0.7884886860847473, "correct_loss_per_token": 0.2499634325504303, "incorrect_loss_per_token": 1.5769773721694946, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2499634325504303, "num_tokens": 1, "num_tokens_all": 1200, "is_greedy": true, "logits_per_token": -0.2499634325504303, "logits_per_char": -0.12498171627521515, "num_chars": 2}, {"sum_logits": -1.5769773721694946, "num_tokens": 1, "num_tokens_all": 1200, "is_greedy": false, "logits_per_token": -1.5769773721694946, "logits_per_char": -0.7884886860847473, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 478, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24958139657974243, "incorrect_loss_raw": 1.5659232139587402, "correct_loss_per_char": 0.12479069828987122, "incorrect_loss_per_char": 0.7829616069793701, "correct_loss_per_token": 0.24958139657974243, "incorrect_loss_per_token": 1.5659232139587402, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24958139657974243, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": true, "logits_per_token": -0.24958139657974243, "logits_per_char": -0.12479069828987122, "num_chars": 2}, {"sum_logits": -1.5659232139587402, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": false, "logits_per_token": -1.5659232139587402, "logits_per_char": -0.7829616069793701, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 479, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.546565055847168, "incorrect_loss_raw": 0.25580865144729614, "correct_loss_per_char": 0.773282527923584, "incorrect_loss_per_char": 0.12790432572364807, "correct_loss_per_token": 1.546565055847168, "incorrect_loss_per_token": 0.25580865144729614, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25580865144729614, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": true, "logits_per_token": -0.25580865144729614, "logits_per_char": -0.12790432572364807, "num_chars": 2}, {"sum_logits": -1.546565055847168, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": false, "logits_per_token": -1.546565055847168, "logits_per_char": -0.773282527923584, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 480, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5090678930282593, "incorrect_loss_raw": 0.2591830790042877, "correct_loss_per_char": 0.7545339465141296, "incorrect_loss_per_char": 0.12959153950214386, "correct_loss_per_token": 1.5090678930282593, "incorrect_loss_per_token": 0.2591830790042877, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2591830790042877, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": true, "logits_per_token": -0.2591830790042877, "logits_per_char": -0.12959153950214386, "num_chars": 2}, {"sum_logits": -1.5090678930282593, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": false, "logits_per_token": -1.5090678930282593, "logits_per_char": -0.7545339465141296, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 481, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3892852067947388, "incorrect_loss_raw": 0.30715787410736084, "correct_loss_per_char": 0.6946426033973694, "incorrect_loss_per_char": 0.15357893705368042, "correct_loss_per_token": 1.3892852067947388, "incorrect_loss_per_token": 0.30715787410736084, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.30715787410736084, "num_tokens": 1, "num_tokens_all": 1233, "is_greedy": true, "logits_per_token": -0.30715787410736084, "logits_per_char": -0.15357893705368042, "num_chars": 2}, {"sum_logits": -1.3892852067947388, "num_tokens": 1, "num_tokens_all": 1233, "is_greedy": false, "logits_per_token": -1.3892852067947388, "logits_per_char": -0.6946426033973694, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 482, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23093420267105103, "incorrect_loss_raw": 1.6306102275848389, "correct_loss_per_char": 0.11546710133552551, "incorrect_loss_per_char": 0.8153051137924194, "correct_loss_per_token": 0.23093420267105103, "incorrect_loss_per_token": 1.6306102275848389, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23093420267105103, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": true, "logits_per_token": -0.23093420267105103, "logits_per_char": -0.11546710133552551, "num_chars": 2}, {"sum_logits": -1.6306102275848389, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": false, "logits_per_token": -1.6306102275848389, "logits_per_char": -0.8153051137924194, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 483, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4023511409759521, "incorrect_loss_raw": 0.29830485582351685, "correct_loss_per_char": 0.7011755704879761, "incorrect_loss_per_char": 0.14915242791175842, "correct_loss_per_token": 1.4023511409759521, "incorrect_loss_per_token": 0.29830485582351685, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.29830485582351685, "num_tokens": 1, "num_tokens_all": 985, "is_greedy": true, "logits_per_token": -0.29830485582351685, "logits_per_char": -0.14915242791175842, "num_chars": 2}, {"sum_logits": -1.4023511409759521, "num_tokens": 1, "num_tokens_all": 985, "is_greedy": false, "logits_per_token": -1.4023511409759521, "logits_per_char": -0.7011755704879761, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 484, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1412314772605896, "incorrect_loss_raw": 2.0672872066497803, "correct_loss_per_char": 0.0706157386302948, "incorrect_loss_per_char": 1.0336436033248901, "correct_loss_per_token": 0.1412314772605896, "incorrect_loss_per_token": 2.0672872066497803, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1412314772605896, "num_tokens": 1, "num_tokens_all": 888, "is_greedy": true, "logits_per_token": -0.1412314772605896, "logits_per_char": -0.0706157386302948, "num_chars": 2}, {"sum_logits": -2.0672872066497803, "num_tokens": 1, "num_tokens_all": 888, "is_greedy": false, "logits_per_token": -2.0672872066497803, "logits_per_char": -1.0336436033248901, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 485, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2129351794719696, "incorrect_loss_raw": 1.7152055501937866, "correct_loss_per_char": 0.1064675897359848, "incorrect_loss_per_char": 0.8576027750968933, "correct_loss_per_token": 0.2129351794719696, "incorrect_loss_per_token": 1.7152055501937866, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2129351794719696, "num_tokens": 1, "num_tokens_all": 1052, "is_greedy": true, "logits_per_token": -0.2129351794719696, "logits_per_char": -0.1064675897359848, "num_chars": 2}, {"sum_logits": -1.7152055501937866, "num_tokens": 1, "num_tokens_all": 1052, "is_greedy": false, "logits_per_token": -1.7152055501937866, "logits_per_char": -0.8576027750968933, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 486, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.30055344104766846, "incorrect_loss_raw": 1.3901010751724243, "correct_loss_per_char": 0.15027672052383423, "incorrect_loss_per_char": 0.6950505375862122, "correct_loss_per_token": 0.30055344104766846, "incorrect_loss_per_token": 1.3901010751724243, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.30055344104766846, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": true, "logits_per_token": -0.30055344104766846, "logits_per_char": -0.15027672052383423, "num_chars": 2}, {"sum_logits": -1.3901010751724243, "num_tokens": 1, "num_tokens_all": 1077, "is_greedy": false, "logits_per_token": -1.3901010751724243, "logits_per_char": -0.6950505375862122, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 487, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.26444271206855774, "incorrect_loss_raw": 1.4912059307098389, "correct_loss_per_char": 0.13222135603427887, "incorrect_loss_per_char": 0.7456029653549194, "correct_loss_per_token": 0.26444271206855774, "incorrect_loss_per_token": 1.4912059307098389, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26444271206855774, "num_tokens": 1, "num_tokens_all": 1016, "is_greedy": true, "logits_per_token": -0.26444271206855774, "logits_per_char": -0.13222135603427887, "num_chars": 2}, {"sum_logits": -1.4912059307098389, "num_tokens": 1, "num_tokens_all": 1016, "is_greedy": false, "logits_per_token": -1.4912059307098389, "logits_per_char": -0.7456029653549194, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 488, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.30736538767814636, "incorrect_loss_raw": 1.356033444404602, "correct_loss_per_char": 0.15368269383907318, "incorrect_loss_per_char": 0.678016722202301, "correct_loss_per_token": 0.30736538767814636, "incorrect_loss_per_token": 1.356033444404602, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.30736538767814636, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": true, "logits_per_token": -0.30736538767814636, "logits_per_char": -0.15368269383907318, "num_chars": 2}, {"sum_logits": -1.356033444404602, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": false, "logits_per_token": -1.356033444404602, "logits_per_char": -0.678016722202301, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 489, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7880290746688843, "incorrect_loss_raw": 0.19192542135715485, "correct_loss_per_char": 0.8940145373344421, "incorrect_loss_per_char": 0.09596271067857742, "correct_loss_per_token": 1.7880290746688843, "incorrect_loss_per_token": 0.19192542135715485, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19192542135715485, "num_tokens": 1, "num_tokens_all": 987, "is_greedy": true, "logits_per_token": -0.19192542135715485, "logits_per_char": -0.09596271067857742, "num_chars": 2}, {"sum_logits": -1.7880290746688843, "num_tokens": 1, "num_tokens_all": 987, "is_greedy": false, "logits_per_token": -1.7880290746688843, "logits_per_char": -0.8940145373344421, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 490, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.190139502286911, "incorrect_loss_raw": 1.795912742614746, "correct_loss_per_char": 0.0950697511434555, "incorrect_loss_per_char": 0.897956371307373, "correct_loss_per_token": 0.190139502286911, "incorrect_loss_per_token": 1.795912742614746, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.190139502286911, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": true, "logits_per_token": -0.190139502286911, "logits_per_char": -0.0950697511434555, "num_chars": 2}, {"sum_logits": -1.795912742614746, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": false, "logits_per_token": -1.795912742614746, "logits_per_char": -0.897956371307373, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 491, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8624293804168701, "incorrect_loss_raw": 0.181833416223526, "correct_loss_per_char": 0.9312146902084351, "incorrect_loss_per_char": 0.090916708111763, "correct_loss_per_token": 1.8624293804168701, "incorrect_loss_per_token": 0.181833416223526, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.181833416223526, "num_tokens": 1, "num_tokens_all": 1011, "is_greedy": true, "logits_per_token": -0.181833416223526, "logits_per_char": -0.090916708111763, "num_chars": 2}, {"sum_logits": -1.8624293804168701, "num_tokens": 1, "num_tokens_all": 1011, "is_greedy": false, "logits_per_token": -1.8624293804168701, "logits_per_char": -0.9312146902084351, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 492, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5063424110412598, "incorrect_loss_raw": 0.260881632566452, "correct_loss_per_char": 0.7531712055206299, "incorrect_loss_per_char": 0.130440816283226, "correct_loss_per_token": 1.5063424110412598, "incorrect_loss_per_token": 0.260881632566452, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.260881632566452, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": true, "logits_per_token": -0.260881632566452, "logits_per_char": -0.130440816283226, "num_chars": 2}, {"sum_logits": -1.5063424110412598, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": false, "logits_per_token": -1.5063424110412598, "logits_per_char": -0.7531712055206299, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 493, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2587727904319763, "incorrect_loss_raw": 1.5047588348388672, "correct_loss_per_char": 0.12938639521598816, "incorrect_loss_per_char": 0.7523794174194336, "correct_loss_per_token": 0.2587727904319763, "incorrect_loss_per_token": 1.5047588348388672, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2587727904319763, "num_tokens": 1, "num_tokens_all": 897, "is_greedy": true, "logits_per_token": -0.2587727904319763, "logits_per_char": -0.12938639521598816, "num_chars": 2}, {"sum_logits": -1.5047588348388672, "num_tokens": 1, "num_tokens_all": 897, "is_greedy": false, "logits_per_token": -1.5047588348388672, "logits_per_char": -0.7523794174194336, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 494, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6483542919158936, "incorrect_loss_raw": 0.22432084381580353, "correct_loss_per_char": 0.8241771459579468, "incorrect_loss_per_char": 0.11216042190790176, "correct_loss_per_token": 1.6483542919158936, "incorrect_loss_per_token": 0.22432084381580353, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22432084381580353, "num_tokens": 1, "num_tokens_all": 997, "is_greedy": true, "logits_per_token": -0.22432084381580353, "logits_per_char": -0.11216042190790176, "num_chars": 2}, {"sum_logits": -1.6483542919158936, "num_tokens": 1, "num_tokens_all": 997, "is_greedy": false, "logits_per_token": -1.6483542919158936, "logits_per_char": -0.8241771459579468, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 495, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6595163345336914, "incorrect_loss_raw": 0.22464731335639954, "correct_loss_per_char": 0.8297581672668457, "incorrect_loss_per_char": 0.11232365667819977, "correct_loss_per_token": 1.6595163345336914, "incorrect_loss_per_token": 0.22464731335639954, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22464731335639954, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": true, "logits_per_token": -0.22464731335639954, "logits_per_char": -0.11232365667819977, "num_chars": 2}, {"sum_logits": -1.6595163345336914, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": false, "logits_per_token": -1.6595163345336914, "logits_per_char": -0.8297581672668457, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 496, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.29367777705192566, "incorrect_loss_raw": 1.426051139831543, "correct_loss_per_char": 0.14683888852596283, "incorrect_loss_per_char": 0.7130255699157715, "correct_loss_per_token": 0.29367777705192566, "incorrect_loss_per_token": 1.426051139831543, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.29367777705192566, "num_tokens": 1, "num_tokens_all": 1129, "is_greedy": true, "logits_per_token": -0.29367777705192566, "logits_per_char": -0.14683888852596283, "num_chars": 2}, {"sum_logits": -1.426051139831543, "num_tokens": 1, "num_tokens_all": 1129, "is_greedy": false, "logits_per_token": -1.426051139831543, "logits_per_char": -0.7130255699157715, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 497, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5170766115188599, "incorrect_loss_raw": 0.26413795351982117, "correct_loss_per_char": 0.7585383057594299, "incorrect_loss_per_char": 0.13206897675991058, "correct_loss_per_token": 1.5170766115188599, "incorrect_loss_per_token": 0.26413795351982117, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26413795351982117, "num_tokens": 1, "num_tokens_all": 1149, "is_greedy": true, "logits_per_token": -0.26413795351982117, "logits_per_char": -0.13206897675991058, "num_chars": 2}, {"sum_logits": -1.5170766115188599, "num_tokens": 1, "num_tokens_all": 1149, "is_greedy": false, "logits_per_token": -1.5170766115188599, "logits_per_char": -0.7585383057594299, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 498, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9654262065887451, "incorrect_loss_raw": 0.1613588035106659, "correct_loss_per_char": 0.9827131032943726, "incorrect_loss_per_char": 0.08067940175533295, "correct_loss_per_token": 1.9654262065887451, "incorrect_loss_per_token": 0.1613588035106659, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1613588035106659, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": true, "logits_per_token": -0.1613588035106659, "logits_per_char": -0.08067940175533295, "num_chars": 2}, {"sum_logits": -1.9654262065887451, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": false, "logits_per_token": -1.9654262065887451, "logits_per_char": -0.9827131032943726, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 499, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.18345433473587036, "incorrect_loss_raw": 1.8616065979003906, "correct_loss_per_char": 0.09172716736793518, "incorrect_loss_per_char": 0.9308032989501953, "correct_loss_per_token": 0.18345433473587036, "incorrect_loss_per_token": 1.8616065979003906, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.18345433473587036, "num_tokens": 1, "num_tokens_all": 981, "is_greedy": true, "logits_per_token": -0.18345433473587036, "logits_per_char": -0.09172716736793518, "num_chars": 2}, {"sum_logits": -1.8616065979003906, "num_tokens": 1, "num_tokens_all": 981, "is_greedy": false, "logits_per_token": -1.8616065979003906, "logits_per_char": -0.9308032989501953, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 500, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.32637420296669006, "incorrect_loss_raw": 1.3049474954605103, "correct_loss_per_char": 0.16318710148334503, "incorrect_loss_per_char": 0.6524737477302551, "correct_loss_per_token": 0.32637420296669006, "incorrect_loss_per_token": 1.3049474954605103, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.32637420296669006, "num_tokens": 1, "num_tokens_all": 1128, "is_greedy": true, "logits_per_token": -0.32637420296669006, "logits_per_char": -0.16318710148334503, "num_chars": 2}, {"sum_logits": -1.3049474954605103, "num_tokens": 1, "num_tokens_all": 1128, "is_greedy": false, "logits_per_token": -1.3049474954605103, "logits_per_char": -0.6524737477302551, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 501, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24096383154392242, "incorrect_loss_raw": 1.5849436521530151, "correct_loss_per_char": 0.12048191577196121, "incorrect_loss_per_char": 0.7924718260765076, "correct_loss_per_token": 0.24096383154392242, "incorrect_loss_per_token": 1.5849436521530151, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24096383154392242, "num_tokens": 1, "num_tokens_all": 1055, "is_greedy": true, "logits_per_token": -0.24096383154392242, "logits_per_char": -0.12048191577196121, "num_chars": 2}, {"sum_logits": -1.5849436521530151, "num_tokens": 1, "num_tokens_all": 1055, "is_greedy": false, "logits_per_token": -1.5849436521530151, "logits_per_char": -0.7924718260765076, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 502, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6100659370422363, "incorrect_loss_raw": 0.2339758276939392, "correct_loss_per_char": 0.8050329685211182, "incorrect_loss_per_char": 0.1169879138469696, "correct_loss_per_token": 1.6100659370422363, "incorrect_loss_per_token": 0.2339758276939392, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2339758276939392, "num_tokens": 1, "num_tokens_all": 973, "is_greedy": true, "logits_per_token": -0.2339758276939392, "logits_per_char": -0.1169879138469696, "num_chars": 2}, {"sum_logits": -1.6100659370422363, "num_tokens": 1, "num_tokens_all": 973, "is_greedy": false, "logits_per_token": -1.6100659370422363, "logits_per_char": -0.8050329685211182, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 503, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.25485312938690186, "incorrect_loss_raw": 1.5257017612457275, "correct_loss_per_char": 0.12742656469345093, "incorrect_loss_per_char": 0.7628508806228638, "correct_loss_per_token": 0.25485312938690186, "incorrect_loss_per_token": 1.5257017612457275, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25485312938690186, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": true, "logits_per_token": -0.25485312938690186, "logits_per_char": -0.12742656469345093, "num_chars": 2}, {"sum_logits": -1.5257017612457275, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": false, "logits_per_token": -1.5257017612457275, "logits_per_char": -0.7628508806228638, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 504, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22841015458106995, "incorrect_loss_raw": 1.6310967206954956, "correct_loss_per_char": 0.11420507729053497, "incorrect_loss_per_char": 0.8155483603477478, "correct_loss_per_token": 0.22841015458106995, "incorrect_loss_per_token": 1.6310967206954956, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22841015458106995, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": true, "logits_per_token": -0.22841015458106995, "logits_per_char": -0.11420507729053497, "num_chars": 2}, {"sum_logits": -1.6310967206954956, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": false, "logits_per_token": -1.6310967206954956, "logits_per_char": -0.8155483603477478, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 505, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1764393299818039, "incorrect_loss_raw": 1.858525276184082, "correct_loss_per_char": 0.08821966499090195, "incorrect_loss_per_char": 0.929262638092041, "correct_loss_per_token": 0.1764393299818039, "incorrect_loss_per_token": 1.858525276184082, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1764393299818039, "num_tokens": 1, "num_tokens_all": 971, "is_greedy": true, "logits_per_token": -0.1764393299818039, "logits_per_char": -0.08821966499090195, "num_chars": 2}, {"sum_logits": -1.858525276184082, "num_tokens": 1, "num_tokens_all": 971, "is_greedy": false, "logits_per_token": -1.858525276184082, "logits_per_char": -0.929262638092041, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 506, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19558076560497284, "incorrect_loss_raw": 1.7742507457733154, "correct_loss_per_char": 0.09779038280248642, "incorrect_loss_per_char": 0.8871253728866577, "correct_loss_per_token": 0.19558076560497284, "incorrect_loss_per_token": 1.7742507457733154, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19558076560497284, "num_tokens": 1, "num_tokens_all": 960, "is_greedy": true, "logits_per_token": -0.19558076560497284, "logits_per_char": -0.09779038280248642, "num_chars": 2}, {"sum_logits": -1.7742507457733154, "num_tokens": 1, "num_tokens_all": 960, "is_greedy": false, "logits_per_token": -1.7742507457733154, "logits_per_char": -0.8871253728866577, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 507, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19504085183143616, "incorrect_loss_raw": 1.7757295370101929, "correct_loss_per_char": 0.09752042591571808, "incorrect_loss_per_char": 0.8878647685050964, "correct_loss_per_token": 0.19504085183143616, "incorrect_loss_per_token": 1.7757295370101929, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19504085183143616, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": true, "logits_per_token": -0.19504085183143616, "logits_per_char": -0.09752042591571808, "num_chars": 2}, {"sum_logits": -1.7757295370101929, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": false, "logits_per_token": -1.7757295370101929, "logits_per_char": -0.8878647685050964, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 508, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4472638368606567, "incorrect_loss_raw": 0.2776634693145752, "correct_loss_per_char": 0.7236319184303284, "incorrect_loss_per_char": 0.1388317346572876, "correct_loss_per_token": 1.4472638368606567, "incorrect_loss_per_token": 0.2776634693145752, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2776634693145752, "num_tokens": 1, "num_tokens_all": 1034, "is_greedy": true, "logits_per_token": -0.2776634693145752, "logits_per_char": -0.1388317346572876, "num_chars": 2}, {"sum_logits": -1.4472638368606567, "num_tokens": 1, "num_tokens_all": 1034, "is_greedy": false, "logits_per_token": -1.4472638368606567, "logits_per_char": -0.7236319184303284, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 509, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6778569221496582, "incorrect_loss_raw": 0.22083471715450287, "correct_loss_per_char": 0.8389284610748291, "incorrect_loss_per_char": 0.11041735857725143, "correct_loss_per_token": 1.6778569221496582, "incorrect_loss_per_token": 0.22083471715450287, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22083471715450287, "num_tokens": 1, "num_tokens_all": 944, "is_greedy": true, "logits_per_token": -0.22083471715450287, "logits_per_char": -0.11041735857725143, "num_chars": 2}, {"sum_logits": -1.6778569221496582, "num_tokens": 1, "num_tokens_all": 944, "is_greedy": false, "logits_per_token": -1.6778569221496582, "logits_per_char": -0.8389284610748291, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 510, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24284489452838898, "incorrect_loss_raw": 1.5852762460708618, "correct_loss_per_char": 0.12142244726419449, "incorrect_loss_per_char": 0.7926381230354309, "correct_loss_per_token": 0.24284489452838898, "incorrect_loss_per_token": 1.5852762460708618, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24284489452838898, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": true, "logits_per_token": -0.24284489452838898, "logits_per_char": -0.12142244726419449, "num_chars": 2}, {"sum_logits": -1.5852762460708618, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": false, "logits_per_token": -1.5852762460708618, "logits_per_char": -0.7926381230354309, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 511, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20448346436023712, "incorrect_loss_raw": 1.7277412414550781, "correct_loss_per_char": 0.10224173218011856, "incorrect_loss_per_char": 0.8638706207275391, "correct_loss_per_token": 0.20448346436023712, "incorrect_loss_per_token": 1.7277412414550781, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20448346436023712, "num_tokens": 1, "num_tokens_all": 969, "is_greedy": true, "logits_per_token": -0.20448346436023712, "logits_per_char": -0.10224173218011856, "num_chars": 2}, {"sum_logits": -1.7277412414550781, "num_tokens": 1, "num_tokens_all": 969, "is_greedy": false, "logits_per_token": -1.7277412414550781, "logits_per_char": -0.8638706207275391, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 512, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24174080789089203, "incorrect_loss_raw": 1.5869063138961792, "correct_loss_per_char": 0.12087040394544601, "incorrect_loss_per_char": 0.7934531569480896, "correct_loss_per_token": 0.24174080789089203, "incorrect_loss_per_token": 1.5869063138961792, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24174080789089203, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": true, "logits_per_token": -0.24174080789089203, "logits_per_char": -0.12087040394544601, "num_chars": 2}, {"sum_logits": -1.5869063138961792, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": false, "logits_per_token": -1.5869063138961792, "logits_per_char": -0.7934531569480896, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 513, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.17102232575416565, "incorrect_loss_raw": 1.9032090902328491, "correct_loss_per_char": 0.08551116287708282, "incorrect_loss_per_char": 0.9516045451164246, "correct_loss_per_token": 0.17102232575416565, "incorrect_loss_per_token": 1.9032090902328491, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.17102232575416565, "num_tokens": 1, "num_tokens_all": 944, "is_greedy": true, "logits_per_token": -0.17102232575416565, "logits_per_char": -0.08551116287708282, "num_chars": 2}, {"sum_logits": -1.9032090902328491, "num_tokens": 1, "num_tokens_all": 944, "is_greedy": false, "logits_per_token": -1.9032090902328491, "logits_per_char": -0.9516045451164246, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 514, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5821311473846436, "incorrect_loss_raw": 0.24464869499206543, "correct_loss_per_char": 0.7910655736923218, "incorrect_loss_per_char": 0.12232434749603271, "correct_loss_per_token": 1.5821311473846436, "incorrect_loss_per_token": 0.24464869499206543, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24464869499206543, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": true, "logits_per_token": -0.24464869499206543, "logits_per_char": -0.12232434749603271, "num_chars": 2}, {"sum_logits": -1.5821311473846436, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": false, "logits_per_token": -1.5821311473846436, "logits_per_char": -0.7910655736923218, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 515, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4720417261123657, "incorrect_loss_raw": 0.2711375951766968, "correct_loss_per_char": 0.7360208630561829, "incorrect_loss_per_char": 0.1355687975883484, "correct_loss_per_token": 1.4720417261123657, "incorrect_loss_per_token": 0.2711375951766968, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2711375951766968, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": true, "logits_per_token": -0.2711375951766968, "logits_per_char": -0.1355687975883484, "num_chars": 2}, {"sum_logits": -1.4720417261123657, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": false, "logits_per_token": -1.4720417261123657, "logits_per_char": -0.7360208630561829, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 516, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2837579548358917, "incorrect_loss_raw": 1.433650016784668, "correct_loss_per_char": 0.14187897741794586, "incorrect_loss_per_char": 0.716825008392334, "correct_loss_per_token": 0.2837579548358917, "incorrect_loss_per_token": 1.433650016784668, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2837579548358917, "num_tokens": 1, "num_tokens_all": 1029, "is_greedy": true, "logits_per_token": -0.2837579548358917, "logits_per_char": -0.14187897741794586, "num_chars": 2}, {"sum_logits": -1.433650016784668, "num_tokens": 1, "num_tokens_all": 1029, "is_greedy": false, "logits_per_token": -1.433650016784668, "logits_per_char": -0.716825008392334, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 517, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2762508690357208, "incorrect_loss_raw": 1.4769471883773804, "correct_loss_per_char": 0.1381254345178604, "incorrect_loss_per_char": 0.7384735941886902, "correct_loss_per_token": 0.2762508690357208, "incorrect_loss_per_token": 1.4769471883773804, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2762508690357208, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": true, "logits_per_token": -0.2762508690357208, "logits_per_char": -0.1381254345178604, "num_chars": 2}, {"sum_logits": -1.4769471883773804, "num_tokens": 1, "num_tokens_all": 1072, "is_greedy": false, "logits_per_token": -1.4769471883773804, "logits_per_char": -0.7384735941886902, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 518, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.661116123199463, "incorrect_loss_raw": 0.22218704223632812, "correct_loss_per_char": 0.8305580615997314, "incorrect_loss_per_char": 0.11109352111816406, "correct_loss_per_token": 1.661116123199463, "incorrect_loss_per_token": 0.22218704223632812, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22218704223632812, "num_tokens": 1, "num_tokens_all": 1055, "is_greedy": true, "logits_per_token": -0.22218704223632812, "logits_per_char": -0.11109352111816406, "num_chars": 2}, {"sum_logits": -1.661116123199463, "num_tokens": 1, "num_tokens_all": 1055, "is_greedy": false, "logits_per_token": -1.661116123199463, "logits_per_char": -0.8305580615997314, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 519, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2904133200645447, "incorrect_loss_raw": 1.4219820499420166, "correct_loss_per_char": 0.14520666003227234, "incorrect_loss_per_char": 0.7109910249710083, "correct_loss_per_token": 0.2904133200645447, "incorrect_loss_per_token": 1.4219820499420166, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2904133200645447, "num_tokens": 1, "num_tokens_all": 1048, "is_greedy": true, "logits_per_token": -0.2904133200645447, "logits_per_char": -0.14520666003227234, "num_chars": 2}, {"sum_logits": -1.4219820499420166, "num_tokens": 1, "num_tokens_all": 1048, "is_greedy": false, "logits_per_token": -1.4219820499420166, "logits_per_char": -0.7109910249710083, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 520, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6971867084503174, "incorrect_loss_raw": 0.21136891841888428, "correct_loss_per_char": 0.8485933542251587, "incorrect_loss_per_char": 0.10568445920944214, "correct_loss_per_token": 1.6971867084503174, "incorrect_loss_per_token": 0.21136891841888428, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21136891841888428, "num_tokens": 1, "num_tokens_all": 964, "is_greedy": true, "logits_per_token": -0.21136891841888428, "logits_per_char": -0.10568445920944214, "num_chars": 2}, {"sum_logits": -1.6971867084503174, "num_tokens": 1, "num_tokens_all": 964, "is_greedy": false, "logits_per_token": -1.6971867084503174, "logits_per_char": -0.8485933542251587, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 521, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21617572009563446, "incorrect_loss_raw": 1.6752678155899048, "correct_loss_per_char": 0.10808786004781723, "incorrect_loss_per_char": 0.8376339077949524, "correct_loss_per_token": 0.21617572009563446, "incorrect_loss_per_token": 1.6752678155899048, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21617572009563446, "num_tokens": 1, "num_tokens_all": 899, "is_greedy": true, "logits_per_token": -0.21617572009563446, "logits_per_char": -0.10808786004781723, "num_chars": 2}, {"sum_logits": -1.6752678155899048, "num_tokens": 1, "num_tokens_all": 899, "is_greedy": false, "logits_per_token": -1.6752678155899048, "logits_per_char": -0.8376339077949524, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 522, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.621760368347168, "incorrect_loss_raw": 0.22928547859191895, "correct_loss_per_char": 0.810880184173584, "incorrect_loss_per_char": 0.11464273929595947, "correct_loss_per_token": 1.621760368347168, "incorrect_loss_per_token": 0.22928547859191895, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22928547859191895, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": true, "logits_per_token": -0.22928547859191895, "logits_per_char": -0.11464273929595947, "num_chars": 2}, {"sum_logits": -1.621760368347168, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": false, "logits_per_token": -1.621760368347168, "logits_per_char": -0.810880184173584, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 523, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.643922209739685, "incorrect_loss_raw": 0.22569072246551514, "correct_loss_per_char": 0.8219611048698425, "incorrect_loss_per_char": 0.11284536123275757, "correct_loss_per_token": 1.643922209739685, "incorrect_loss_per_token": 0.22569072246551514, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22569072246551514, "num_tokens": 1, "num_tokens_all": 908, "is_greedy": true, "logits_per_token": -0.22569072246551514, "logits_per_char": -0.11284536123275757, "num_chars": 2}, {"sum_logits": -1.643922209739685, "num_tokens": 1, "num_tokens_all": 908, "is_greedy": false, "logits_per_token": -1.643922209739685, "logits_per_char": -0.8219611048698425, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 524, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20472925901412964, "incorrect_loss_raw": 1.7297024726867676, "correct_loss_per_char": 0.10236462950706482, "incorrect_loss_per_char": 0.8648512363433838, "correct_loss_per_token": 0.20472925901412964, "incorrect_loss_per_token": 1.7297024726867676, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20472925901412964, "num_tokens": 1, "num_tokens_all": 1009, "is_greedy": true, "logits_per_token": -0.20472925901412964, "logits_per_char": -0.10236462950706482, "num_chars": 2}, {"sum_logits": -1.7297024726867676, "num_tokens": 1, "num_tokens_all": 1009, "is_greedy": false, "logits_per_token": -1.7297024726867676, "logits_per_char": -0.8648512363433838, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 525, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6690492630004883, "incorrect_loss_raw": 0.2196706384420395, "correct_loss_per_char": 0.8345246315002441, "incorrect_loss_per_char": 0.10983531922101974, "correct_loss_per_token": 1.6690492630004883, "incorrect_loss_per_token": 0.2196706384420395, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2196706384420395, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": true, "logits_per_token": -0.2196706384420395, "logits_per_char": -0.10983531922101974, "num_chars": 2}, {"sum_logits": -1.6690492630004883, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": false, "logits_per_token": -1.6690492630004883, "logits_per_char": -0.8345246315002441, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 526, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1736796349287033, "incorrect_loss_raw": 1.9046456813812256, "correct_loss_per_char": 0.08683981746435165, "incorrect_loss_per_char": 0.9523228406906128, "correct_loss_per_token": 0.1736796349287033, "incorrect_loss_per_token": 1.9046456813812256, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1736796349287033, "num_tokens": 1, "num_tokens_all": 970, "is_greedy": true, "logits_per_token": -0.1736796349287033, "logits_per_char": -0.08683981746435165, "num_chars": 2}, {"sum_logits": -1.9046456813812256, "num_tokens": 1, "num_tokens_all": 970, "is_greedy": false, "logits_per_token": -1.9046456813812256, "logits_per_char": -0.9523228406906128, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 527, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2507473826408386, "incorrect_loss_raw": 1.567387342453003, "correct_loss_per_char": 0.1253736913204193, "incorrect_loss_per_char": 0.7836936712265015, "correct_loss_per_token": 0.2507473826408386, "incorrect_loss_per_token": 1.567387342453003, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2507473826408386, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": true, "logits_per_token": -0.2507473826408386, "logits_per_char": -0.1253736913204193, "num_chars": 2}, {"sum_logits": -1.567387342453003, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.567387342453003, "logits_per_char": -0.7836936712265015, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 528, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3273868560791016, "incorrect_loss_raw": 0.32152312994003296, "correct_loss_per_char": 0.6636934280395508, "incorrect_loss_per_char": 0.16076156497001648, "correct_loss_per_token": 1.3273868560791016, "incorrect_loss_per_token": 0.32152312994003296, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.32152312994003296, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": true, "logits_per_token": -0.32152312994003296, "logits_per_char": -0.16076156497001648, "num_chars": 2}, {"sum_logits": -1.3273868560791016, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": false, "logits_per_token": -1.3273868560791016, "logits_per_char": -0.6636934280395508, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 529, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19622601568698883, "incorrect_loss_raw": 1.7835248708724976, "correct_loss_per_char": 0.09811300784349442, "incorrect_loss_per_char": 0.8917624354362488, "correct_loss_per_token": 0.19622601568698883, "incorrect_loss_per_token": 1.7835248708724976, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19622601568698883, "num_tokens": 1, "num_tokens_all": 968, "is_greedy": true, "logits_per_token": -0.19622601568698883, "logits_per_char": -0.09811300784349442, "num_chars": 2}, {"sum_logits": -1.7835248708724976, "num_tokens": 1, "num_tokens_all": 968, "is_greedy": false, "logits_per_token": -1.7835248708724976, "logits_per_char": -0.8917624354362488, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 530, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.25289252400398254, "incorrect_loss_raw": 1.5526806116104126, "correct_loss_per_char": 0.12644626200199127, "incorrect_loss_per_char": 0.7763403058052063, "correct_loss_per_token": 0.25289252400398254, "incorrect_loss_per_token": 1.5526806116104126, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25289252400398254, "num_tokens": 1, "num_tokens_all": 997, "is_greedy": true, "logits_per_token": -0.25289252400398254, "logits_per_char": -0.12644626200199127, "num_chars": 2}, {"sum_logits": -1.5526806116104126, "num_tokens": 1, "num_tokens_all": 997, "is_greedy": false, "logits_per_token": -1.5526806116104126, "logits_per_char": -0.7763403058052063, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 531, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2391006052494049, "incorrect_loss_raw": 1.5870481729507446, "correct_loss_per_char": 0.11955030262470245, "incorrect_loss_per_char": 0.7935240864753723, "correct_loss_per_token": 0.2391006052494049, "incorrect_loss_per_token": 1.5870481729507446, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2391006052494049, "num_tokens": 1, "num_tokens_all": 1034, "is_greedy": true, "logits_per_token": -0.2391006052494049, "logits_per_char": -0.11955030262470245, "num_chars": 2}, {"sum_logits": -1.5870481729507446, "num_tokens": 1, "num_tokens_all": 1034, "is_greedy": false, "logits_per_token": -1.5870481729507446, "logits_per_char": -0.7935240864753723, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 532, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24135562777519226, "incorrect_loss_raw": 1.5717403888702393, "correct_loss_per_char": 0.12067781388759613, "incorrect_loss_per_char": 0.7858701944351196, "correct_loss_per_token": 0.24135562777519226, "incorrect_loss_per_token": 1.5717403888702393, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24135562777519226, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": true, "logits_per_token": -0.24135562777519226, "logits_per_char": -0.12067781388759613, "num_chars": 2}, {"sum_logits": -1.5717403888702393, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": false, "logits_per_token": -1.5717403888702393, "logits_per_char": -0.7858701944351196, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 533, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.32457834482192993, "incorrect_loss_raw": 1.3195916414260864, "correct_loss_per_char": 0.16228917241096497, "incorrect_loss_per_char": 0.6597958207130432, "correct_loss_per_token": 0.32457834482192993, "incorrect_loss_per_token": 1.3195916414260864, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.32457834482192993, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": true, "logits_per_token": -0.32457834482192993, "logits_per_char": -0.16228917241096497, "num_chars": 2}, {"sum_logits": -1.3195916414260864, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.3195916414260864, "logits_per_char": -0.6597958207130432, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 534, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7576686143875122, "incorrect_loss_raw": 0.19624589383602142, "correct_loss_per_char": 0.8788343071937561, "incorrect_loss_per_char": 0.09812294691801071, "correct_loss_per_token": 1.7576686143875122, "incorrect_loss_per_token": 0.19624589383602142, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19624589383602142, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": true, "logits_per_token": -0.19624589383602142, "logits_per_char": -0.09812294691801071, "num_chars": 2}, {"sum_logits": -1.7576686143875122, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": false, "logits_per_token": -1.7576686143875122, "logits_per_char": -0.8788343071937561, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 535, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5764081478118896, "incorrect_loss_raw": 0.240799680352211, "correct_loss_per_char": 0.7882040739059448, "incorrect_loss_per_char": 0.1203998401761055, "correct_loss_per_token": 1.5764081478118896, "incorrect_loss_per_token": 0.240799680352211, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.240799680352211, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": true, "logits_per_token": -0.240799680352211, "logits_per_char": -0.1203998401761055, "num_chars": 2}, {"sum_logits": -1.5764081478118896, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": false, "logits_per_token": -1.5764081478118896, "logits_per_char": -0.7882040739059448, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 536, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20209231972694397, "incorrect_loss_raw": 1.7471115589141846, "correct_loss_per_char": 0.10104615986347198, "incorrect_loss_per_char": 0.8735557794570923, "correct_loss_per_token": 0.20209231972694397, "incorrect_loss_per_token": 1.7471115589141846, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20209231972694397, "num_tokens": 1, "num_tokens_all": 998, "is_greedy": true, "logits_per_token": -0.20209231972694397, "logits_per_char": -0.10104615986347198, "num_chars": 2}, {"sum_logits": -1.7471115589141846, "num_tokens": 1, "num_tokens_all": 998, "is_greedy": false, "logits_per_token": -1.7471115589141846, "logits_per_char": -0.8735557794570923, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 537, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2429073601961136, "incorrect_loss_raw": 1.5635322332382202, "correct_loss_per_char": 0.1214536800980568, "incorrect_loss_per_char": 0.7817661166191101, "correct_loss_per_token": 0.2429073601961136, "incorrect_loss_per_token": 1.5635322332382202, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2429073601961136, "num_tokens": 1, "num_tokens_all": 918, "is_greedy": true, "logits_per_token": -0.2429073601961136, "logits_per_char": -0.1214536800980568, "num_chars": 2}, {"sum_logits": -1.5635322332382202, "num_tokens": 1, "num_tokens_all": 918, "is_greedy": false, "logits_per_token": -1.5635322332382202, "logits_per_char": -0.7817661166191101, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 538, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21474312245845795, "incorrect_loss_raw": 1.687290906906128, "correct_loss_per_char": 0.10737156122922897, "incorrect_loss_per_char": 0.843645453453064, "correct_loss_per_token": 0.21474312245845795, "incorrect_loss_per_token": 1.687290906906128, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21474312245845795, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": true, "logits_per_token": -0.21474312245845795, "logits_per_char": -0.10737156122922897, "num_chars": 2}, {"sum_logits": -1.687290906906128, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": false, "logits_per_token": -1.687290906906128, "logits_per_char": -0.843645453453064, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 539, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2236262857913971, "incorrect_loss_raw": 1.6555787324905396, "correct_loss_per_char": 0.11181314289569855, "incorrect_loss_per_char": 0.8277893662452698, "correct_loss_per_token": 0.2236262857913971, "incorrect_loss_per_token": 1.6555787324905396, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2236262857913971, "num_tokens": 1, "num_tokens_all": 980, "is_greedy": true, "logits_per_token": -0.2236262857913971, "logits_per_char": -0.11181314289569855, "num_chars": 2}, {"sum_logits": -1.6555787324905396, "num_tokens": 1, "num_tokens_all": 980, "is_greedy": false, "logits_per_token": -1.6555787324905396, "logits_per_char": -0.8277893662452698, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 540, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23081497848033905, "incorrect_loss_raw": 1.6151310205459595, "correct_loss_per_char": 0.11540748924016953, "incorrect_loss_per_char": 0.8075655102729797, "correct_loss_per_token": 0.23081497848033905, "incorrect_loss_per_token": 1.6151310205459595, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23081497848033905, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": true, "logits_per_token": -0.23081497848033905, "logits_per_char": -0.11540748924016953, "num_chars": 2}, {"sum_logits": -1.6151310205459595, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": false, "logits_per_token": -1.6151310205459595, "logits_per_char": -0.8075655102729797, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 541, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.25088438391685486, "incorrect_loss_raw": 1.5396536588668823, "correct_loss_per_char": 0.12544219195842743, "incorrect_loss_per_char": 0.7698268294334412, "correct_loss_per_token": 0.25088438391685486, "incorrect_loss_per_token": 1.5396536588668823, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25088438391685486, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": true, "logits_per_token": -0.25088438391685486, "logits_per_char": -0.12544219195842743, "num_chars": 2}, {"sum_logits": -1.5396536588668823, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": false, "logits_per_token": -1.5396536588668823, "logits_per_char": -0.7698268294334412, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 542, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22669968008995056, "incorrect_loss_raw": 1.6498340368270874, "correct_loss_per_char": 0.11334984004497528, "incorrect_loss_per_char": 0.8249170184135437, "correct_loss_per_token": 0.22669968008995056, "incorrect_loss_per_token": 1.6498340368270874, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22669968008995056, "num_tokens": 1, "num_tokens_all": 1013, "is_greedy": true, "logits_per_token": -0.22669968008995056, "logits_per_char": -0.11334984004497528, "num_chars": 2}, {"sum_logits": -1.6498340368270874, "num_tokens": 1, "num_tokens_all": 1013, "is_greedy": false, "logits_per_token": -1.6498340368270874, "logits_per_char": -0.8249170184135437, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 543, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.34025871753692627, "incorrect_loss_raw": 1.2837300300598145, "correct_loss_per_char": 0.17012935876846313, "incorrect_loss_per_char": 0.6418650150299072, "correct_loss_per_token": 0.34025871753692627, "incorrect_loss_per_token": 1.2837300300598145, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.34025871753692627, "num_tokens": 1, "num_tokens_all": 909, "is_greedy": true, "logits_per_token": -0.34025871753692627, "logits_per_char": -0.17012935876846313, "num_chars": 2}, {"sum_logits": -1.2837300300598145, "num_tokens": 1, "num_tokens_all": 909, "is_greedy": false, "logits_per_token": -1.2837300300598145, "logits_per_char": -0.6418650150299072, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 544, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1883077174425125, "incorrect_loss_raw": 1.813397765159607, "correct_loss_per_char": 0.09415385872125626, "incorrect_loss_per_char": 0.9066988825798035, "correct_loss_per_token": 0.1883077174425125, "incorrect_loss_per_token": 1.813397765159607, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1883077174425125, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": true, "logits_per_token": -0.1883077174425125, "logits_per_char": -0.09415385872125626, "num_chars": 2}, {"sum_logits": -1.813397765159607, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": false, "logits_per_token": -1.813397765159607, "logits_per_char": -0.9066988825798035, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 545, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.29246091842651367, "incorrect_loss_raw": 1.4108983278274536, "correct_loss_per_char": 0.14623045921325684, "incorrect_loss_per_char": 0.7054491639137268, "correct_loss_per_token": 0.29246091842651367, "incorrect_loss_per_token": 1.4108983278274536, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.29246091842651367, "num_tokens": 1, "num_tokens_all": 958, "is_greedy": true, "logits_per_token": -0.29246091842651367, "logits_per_char": -0.14623045921325684, "num_chars": 2}, {"sum_logits": -1.4108983278274536, "num_tokens": 1, "num_tokens_all": 958, "is_greedy": false, "logits_per_token": -1.4108983278274536, "logits_per_char": -0.7054491639137268, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 546, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.17463281750679016, "incorrect_loss_raw": 1.8830991983413696, "correct_loss_per_char": 0.08731640875339508, "incorrect_loss_per_char": 0.9415495991706848, "correct_loss_per_token": 0.17463281750679016, "incorrect_loss_per_token": 1.8830991983413696, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.17463281750679016, "num_tokens": 1, "num_tokens_all": 1055, "is_greedy": true, "logits_per_token": -0.17463281750679016, "logits_per_char": -0.08731640875339508, "num_chars": 2}, {"sum_logits": -1.8830991983413696, "num_tokens": 1, "num_tokens_all": 1055, "is_greedy": false, "logits_per_token": -1.8830991983413696, "logits_per_char": -0.9415495991706848, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 547, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19994144141674042, "incorrect_loss_raw": 1.745359182357788, "correct_loss_per_char": 0.09997072070837021, "incorrect_loss_per_char": 0.872679591178894, "correct_loss_per_token": 0.19994144141674042, "incorrect_loss_per_token": 1.745359182357788, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19994144141674042, "num_tokens": 1, "num_tokens_all": 1001, "is_greedy": true, "logits_per_token": -0.19994144141674042, "logits_per_char": -0.09997072070837021, "num_chars": 2}, {"sum_logits": -1.745359182357788, "num_tokens": 1, "num_tokens_all": 1001, "is_greedy": false, "logits_per_token": -1.745359182357788, "logits_per_char": -0.872679591178894, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 548, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4428908824920654, "incorrect_loss_raw": 0.2824106514453888, "correct_loss_per_char": 0.7214454412460327, "incorrect_loss_per_char": 0.1412053257226944, "correct_loss_per_token": 1.4428908824920654, "incorrect_loss_per_token": 0.2824106514453888, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2824106514453888, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": true, "logits_per_token": -0.2824106514453888, "logits_per_char": -0.1412053257226944, "num_chars": 2}, {"sum_logits": -1.4428908824920654, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": false, "logits_per_token": -1.4428908824920654, "logits_per_char": -0.7214454412460327, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 549, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2073206901550293, "incorrect_loss_raw": 1.704207420349121, "correct_loss_per_char": 0.10366034507751465, "incorrect_loss_per_char": 0.8521037101745605, "correct_loss_per_token": 0.2073206901550293, "incorrect_loss_per_token": 1.704207420349121, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2073206901550293, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": true, "logits_per_token": -0.2073206901550293, "logits_per_char": -0.10366034507751465, "num_chars": 2}, {"sum_logits": -1.704207420349121, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": false, "logits_per_token": -1.704207420349121, "logits_per_char": -0.8521037101745605, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 550, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19496454298496246, "incorrect_loss_raw": 1.7832039594650269, "correct_loss_per_char": 0.09748227149248123, "incorrect_loss_per_char": 0.8916019797325134, "correct_loss_per_token": 0.19496454298496246, "incorrect_loss_per_token": 1.7832039594650269, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19496454298496246, "num_tokens": 1, "num_tokens_all": 960, "is_greedy": true, "logits_per_token": -0.19496454298496246, "logits_per_char": -0.09748227149248123, "num_chars": 2}, {"sum_logits": -1.7832039594650269, "num_tokens": 1, "num_tokens_all": 960, "is_greedy": false, "logits_per_token": -1.7832039594650269, "logits_per_char": -0.8916019797325134, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 551, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5304034948349, "incorrect_loss_raw": 0.2525831162929535, "correct_loss_per_char": 0.76520174741745, "incorrect_loss_per_char": 0.12629155814647675, "correct_loss_per_token": 1.5304034948349, "incorrect_loss_per_token": 0.2525831162929535, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2525831162929535, "num_tokens": 1, "num_tokens_all": 1030, "is_greedy": true, "logits_per_token": -0.2525831162929535, "logits_per_char": -0.12629155814647675, "num_chars": 2}, {"sum_logits": -1.5304034948349, "num_tokens": 1, "num_tokens_all": 1030, "is_greedy": false, "logits_per_token": -1.5304034948349, "logits_per_char": -0.76520174741745, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 552, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.66465425491333, "incorrect_loss_raw": 0.22084972262382507, "correct_loss_per_char": 0.832327127456665, "incorrect_loss_per_char": 0.11042486131191254, "correct_loss_per_token": 1.66465425491333, "incorrect_loss_per_token": 0.22084972262382507, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22084972262382507, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": true, "logits_per_token": -0.22084972262382507, "logits_per_char": -0.11042486131191254, "num_chars": 2}, {"sum_logits": -1.66465425491333, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": false, "logits_per_token": -1.66465425491333, "logits_per_char": -0.832327127456665, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 553, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2171711027622223, "incorrect_loss_raw": 1.682606816291809, "correct_loss_per_char": 0.10858555138111115, "incorrect_loss_per_char": 0.8413034081459045, "correct_loss_per_token": 0.2171711027622223, "incorrect_loss_per_token": 1.682606816291809, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2171711027622223, "num_tokens": 1, "num_tokens_all": 956, "is_greedy": true, "logits_per_token": -0.2171711027622223, "logits_per_char": -0.10858555138111115, "num_chars": 2}, {"sum_logits": -1.682606816291809, "num_tokens": 1, "num_tokens_all": 956, "is_greedy": false, "logits_per_token": -1.682606816291809, "logits_per_char": -0.8413034081459045, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 554, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7353909015655518, "incorrect_loss_raw": 0.19962193071842194, "correct_loss_per_char": 0.8676954507827759, "incorrect_loss_per_char": 0.09981096535921097, "correct_loss_per_token": 1.7353909015655518, "incorrect_loss_per_token": 0.19962193071842194, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19962193071842194, "num_tokens": 1, "num_tokens_all": 980, "is_greedy": true, "logits_per_token": -0.19962193071842194, "logits_per_char": -0.09981096535921097, "num_chars": 2}, {"sum_logits": -1.7353909015655518, "num_tokens": 1, "num_tokens_all": 980, "is_greedy": false, "logits_per_token": -1.7353909015655518, "logits_per_char": -0.8676954507827759, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 555, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.28040289878845215, "incorrect_loss_raw": 1.4629249572753906, "correct_loss_per_char": 0.14020144939422607, "incorrect_loss_per_char": 0.7314624786376953, "correct_loss_per_token": 0.28040289878845215, "incorrect_loss_per_token": 1.4629249572753906, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.28040289878845215, "num_tokens": 1, "num_tokens_all": 1039, "is_greedy": true, "logits_per_token": -0.28040289878845215, "logits_per_char": -0.14020144939422607, "num_chars": 2}, {"sum_logits": -1.4629249572753906, "num_tokens": 1, "num_tokens_all": 1039, "is_greedy": false, "logits_per_token": -1.4629249572753906, "logits_per_char": -0.7314624786376953, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 556, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.16738130152225494, "incorrect_loss_raw": 1.9131338596343994, "correct_loss_per_char": 0.08369065076112747, "incorrect_loss_per_char": 0.9565669298171997, "correct_loss_per_token": 0.16738130152225494, "incorrect_loss_per_token": 1.9131338596343994, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.16738130152225494, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": true, "logits_per_token": -0.16738130152225494, "logits_per_char": -0.08369065076112747, "num_chars": 2}, {"sum_logits": -1.9131338596343994, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": false, "logits_per_token": -1.9131338596343994, "logits_per_char": -0.9565669298171997, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 557, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5715899467468262, "incorrect_loss_raw": 0.2412252575159073, "correct_loss_per_char": 0.7857949733734131, "incorrect_loss_per_char": 0.12061262875795364, "correct_loss_per_token": 1.5715899467468262, "incorrect_loss_per_token": 0.2412252575159073, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2412252575159073, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": true, "logits_per_token": -0.2412252575159073, "logits_per_char": -0.12061262875795364, "num_chars": 2}, {"sum_logits": -1.5715899467468262, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -1.5715899467468262, "logits_per_char": -0.7857949733734131, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 558, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22081100940704346, "incorrect_loss_raw": 1.6782724857330322, "correct_loss_per_char": 0.11040550470352173, "incorrect_loss_per_char": 0.8391362428665161, "correct_loss_per_token": 0.22081100940704346, "incorrect_loss_per_token": 1.6782724857330322, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22081100940704346, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": true, "logits_per_token": -0.22081100940704346, "logits_per_char": -0.11040550470352173, "num_chars": 2}, {"sum_logits": -1.6782724857330322, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": false, "logits_per_token": -1.6782724857330322, "logits_per_char": -0.8391362428665161, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 559, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7368361949920654, "incorrect_loss_raw": 0.21140529215335846, "correct_loss_per_char": 0.8684180974960327, "incorrect_loss_per_char": 0.10570264607667923, "correct_loss_per_token": 1.7368361949920654, "incorrect_loss_per_token": 0.21140529215335846, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21140529215335846, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": true, "logits_per_token": -0.21140529215335846, "logits_per_char": -0.10570264607667923, "num_chars": 2}, {"sum_logits": -1.7368361949920654, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": false, "logits_per_token": -1.7368361949920654, "logits_per_char": -0.8684180974960327, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 560, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6621370315551758, "incorrect_loss_raw": 0.2245810478925705, "correct_loss_per_char": 0.8310685157775879, "incorrect_loss_per_char": 0.11229052394628525, "correct_loss_per_token": 1.6621370315551758, "incorrect_loss_per_token": 0.2245810478925705, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2245810478925705, "num_tokens": 1, "num_tokens_all": 993, "is_greedy": true, "logits_per_token": -0.2245810478925705, "logits_per_char": -0.11229052394628525, "num_chars": 2}, {"sum_logits": -1.6621370315551758, "num_tokens": 1, "num_tokens_all": 993, "is_greedy": false, "logits_per_token": -1.6621370315551758, "logits_per_char": -0.8310685157775879, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 561, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.14406563341617584, "incorrect_loss_raw": 2.043041944503784, "correct_loss_per_char": 0.07203281670808792, "incorrect_loss_per_char": 1.021520972251892, "correct_loss_per_token": 0.14406563341617584, "incorrect_loss_per_token": 2.043041944503784, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.14406563341617584, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": true, "logits_per_token": -0.14406563341617584, "logits_per_char": -0.07203281670808792, "num_chars": 2}, {"sum_logits": -2.043041944503784, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -2.043041944503784, "logits_per_char": -1.021520972251892, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 562, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.325843095779419, "incorrect_loss_raw": 0.32213497161865234, "correct_loss_per_char": 0.6629215478897095, "incorrect_loss_per_char": 0.16106748580932617, "correct_loss_per_token": 1.325843095779419, "incorrect_loss_per_token": 0.32213497161865234, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.32213497161865234, "num_tokens": 1, "num_tokens_all": 1093, "is_greedy": true, "logits_per_token": -0.32213497161865234, "logits_per_char": -0.16106748580932617, "num_chars": 2}, {"sum_logits": -1.325843095779419, "num_tokens": 1, "num_tokens_all": 1093, "is_greedy": false, "logits_per_token": -1.325843095779419, "logits_per_char": -0.6629215478897095, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 563, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.17352250218391418, "incorrect_loss_raw": 1.8856768608093262, "correct_loss_per_char": 0.08676125109195709, "incorrect_loss_per_char": 0.9428384304046631, "correct_loss_per_token": 0.17352250218391418, "incorrect_loss_per_token": 1.8856768608093262, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.17352250218391418, "num_tokens": 1, "num_tokens_all": 964, "is_greedy": true, "logits_per_token": -0.17352250218391418, "logits_per_char": -0.08676125109195709, "num_chars": 2}, {"sum_logits": -1.8856768608093262, "num_tokens": 1, "num_tokens_all": 964, "is_greedy": false, "logits_per_token": -1.8856768608093262, "logits_per_char": -0.9428384304046631, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 564, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3366972506046295, "incorrect_loss_raw": 1.2789751291275024, "correct_loss_per_char": 0.16834862530231476, "incorrect_loss_per_char": 0.6394875645637512, "correct_loss_per_token": 0.3366972506046295, "incorrect_loss_per_token": 1.2789751291275024, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3366972506046295, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": true, "logits_per_token": -0.3366972506046295, "logits_per_char": -0.16834862530231476, "num_chars": 2}, {"sum_logits": -1.2789751291275024, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.2789751291275024, "logits_per_char": -0.6394875645637512, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 565, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2849908173084259, "incorrect_loss_raw": 1.4546873569488525, "correct_loss_per_char": 0.14249540865421295, "incorrect_loss_per_char": 0.7273436784744263, "correct_loss_per_token": 0.2849908173084259, "incorrect_loss_per_token": 1.4546873569488525, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2849908173084259, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": true, "logits_per_token": -0.2849908173084259, "logits_per_char": -0.14249540865421295, "num_chars": 2}, {"sum_logits": -1.4546873569488525, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.4546873569488525, "logits_per_char": -0.7273436784744263, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 566, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7806369066238403, "incorrect_loss_raw": 0.19190870225429535, "correct_loss_per_char": 0.8903184533119202, "incorrect_loss_per_char": 0.09595435112714767, "correct_loss_per_token": 1.7806369066238403, "incorrect_loss_per_token": 0.19190870225429535, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19190870225429535, "num_tokens": 1, "num_tokens_all": 966, "is_greedy": true, "logits_per_token": -0.19190870225429535, "logits_per_char": -0.09595435112714767, "num_chars": 2}, {"sum_logits": -1.7806369066238403, "num_tokens": 1, "num_tokens_all": 966, "is_greedy": false, "logits_per_token": -1.7806369066238403, "logits_per_char": -0.8903184533119202, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 567, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2523021996021271, "incorrect_loss_raw": 1.5409021377563477, "correct_loss_per_char": 0.12615109980106354, "incorrect_loss_per_char": 0.7704510688781738, "correct_loss_per_token": 0.2523021996021271, "incorrect_loss_per_token": 1.5409021377563477, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2523021996021271, "num_tokens": 1, "num_tokens_all": 1033, "is_greedy": true, "logits_per_token": -0.2523021996021271, "logits_per_char": -0.12615109980106354, "num_chars": 2}, {"sum_logits": -1.5409021377563477, "num_tokens": 1, "num_tokens_all": 1033, "is_greedy": false, "logits_per_token": -1.5409021377563477, "logits_per_char": -0.7704510688781738, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 568, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6598669290542603, "incorrect_loss_raw": 0.2224196493625641, "correct_loss_per_char": 0.8299334645271301, "incorrect_loss_per_char": 0.11120982468128204, "correct_loss_per_token": 1.6598669290542603, "incorrect_loss_per_token": 0.2224196493625641, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2224196493625641, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": true, "logits_per_token": -0.2224196493625641, "logits_per_char": -0.11120982468128204, "num_chars": 2}, {"sum_logits": -1.6598669290542603, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": false, "logits_per_token": -1.6598669290542603, "logits_per_char": -0.8299334645271301, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 569, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24105122685432434, "incorrect_loss_raw": 1.5982409715652466, "correct_loss_per_char": 0.12052561342716217, "incorrect_loss_per_char": 0.7991204857826233, "correct_loss_per_token": 0.24105122685432434, "incorrect_loss_per_token": 1.5982409715652466, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24105122685432434, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": true, "logits_per_token": -0.24105122685432434, "logits_per_char": -0.12052561342716217, "num_chars": 2}, {"sum_logits": -1.5982409715652466, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": false, "logits_per_token": -1.5982409715652466, "logits_per_char": -0.7991204857826233, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 570, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6531049013137817, "incorrect_loss_raw": 0.22115717828273773, "correct_loss_per_char": 0.8265524506568909, "incorrect_loss_per_char": 0.11057858914136887, "correct_loss_per_token": 1.6531049013137817, "incorrect_loss_per_token": 0.22115717828273773, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22115717828273773, "num_tokens": 1, "num_tokens_all": 944, "is_greedy": true, "logits_per_token": -0.22115717828273773, "logits_per_char": -0.11057858914136887, "num_chars": 2}, {"sum_logits": -1.6531049013137817, "num_tokens": 1, "num_tokens_all": 944, "is_greedy": false, "logits_per_token": -1.6531049013137817, "logits_per_char": -0.8265524506568909, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 571, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21383772790431976, "incorrect_loss_raw": 1.7007663249969482, "correct_loss_per_char": 0.10691886395215988, "incorrect_loss_per_char": 0.8503831624984741, "correct_loss_per_token": 0.21383772790431976, "incorrect_loss_per_token": 1.7007663249969482, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21383772790431976, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": true, "logits_per_token": -0.21383772790431976, "logits_per_char": -0.10691886395215988, "num_chars": 2}, {"sum_logits": -1.7007663249969482, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": false, "logits_per_token": -1.7007663249969482, "logits_per_char": -0.8503831624984741, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 572, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6927518844604492, "incorrect_loss_raw": 0.21906323730945587, "correct_loss_per_char": 0.8463759422302246, "incorrect_loss_per_char": 0.10953161865472794, "correct_loss_per_token": 1.6927518844604492, "incorrect_loss_per_token": 0.21906323730945587, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21906323730945587, "num_tokens": 1, "num_tokens_all": 994, "is_greedy": true, "logits_per_token": -0.21906323730945587, "logits_per_char": -0.10953161865472794, "num_chars": 2}, {"sum_logits": -1.6927518844604492, "num_tokens": 1, "num_tokens_all": 994, "is_greedy": false, "logits_per_token": -1.6927518844604492, "logits_per_char": -0.8463759422302246, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 573, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.30769479274749756, "incorrect_loss_raw": 1.365423560142517, "correct_loss_per_char": 0.15384739637374878, "incorrect_loss_per_char": 0.6827117800712585, "correct_loss_per_token": 0.30769479274749756, "incorrect_loss_per_token": 1.365423560142517, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.30769479274749756, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": true, "logits_per_token": -0.30769479274749756, "logits_per_char": -0.15384739637374878, "num_chars": 2}, {"sum_logits": -1.365423560142517, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": false, "logits_per_token": -1.365423560142517, "logits_per_char": -0.6827117800712585, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 574, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5357706546783447, "incorrect_loss_raw": 0.25269773602485657, "correct_loss_per_char": 0.7678853273391724, "incorrect_loss_per_char": 0.12634886801242828, "correct_loss_per_token": 1.5357706546783447, "incorrect_loss_per_token": 0.25269773602485657, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25269773602485657, "num_tokens": 1, "num_tokens_all": 944, "is_greedy": true, "logits_per_token": -0.25269773602485657, "logits_per_char": -0.12634886801242828, "num_chars": 2}, {"sum_logits": -1.5357706546783447, "num_tokens": 1, "num_tokens_all": 944, "is_greedy": false, "logits_per_token": -1.5357706546783447, "logits_per_char": -0.7678853273391724, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 575, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5362143516540527, "incorrect_loss_raw": 0.25942736864089966, "correct_loss_per_char": 0.7681071758270264, "incorrect_loss_per_char": 0.12971368432044983, "correct_loss_per_token": 1.5362143516540527, "incorrect_loss_per_token": 0.25942736864089966, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25942736864089966, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": true, "logits_per_token": -0.25942736864089966, "logits_per_char": -0.12971368432044983, "num_chars": 2}, {"sum_logits": -1.5362143516540527, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": false, "logits_per_token": -1.5362143516540527, "logits_per_char": -0.7681071758270264, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 576, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.18259355425834656, "incorrect_loss_raw": 1.8322817087173462, "correct_loss_per_char": 0.09129677712917328, "incorrect_loss_per_char": 0.9161408543586731, "correct_loss_per_token": 0.18259355425834656, "incorrect_loss_per_token": 1.8322817087173462, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.18259355425834656, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": true, "logits_per_token": -0.18259355425834656, "logits_per_char": -0.09129677712917328, "num_chars": 2}, {"sum_logits": -1.8322817087173462, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": false, "logits_per_token": -1.8322817087173462, "logits_per_char": -0.9161408543586731, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 577, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22488859295845032, "incorrect_loss_raw": 1.6507012844085693, "correct_loss_per_char": 0.11244429647922516, "incorrect_loss_per_char": 0.8253506422042847, "correct_loss_per_token": 0.22488859295845032, "incorrect_loss_per_token": 1.6507012844085693, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22488859295845032, "num_tokens": 1, "num_tokens_all": 1047, "is_greedy": true, "logits_per_token": -0.22488859295845032, "logits_per_char": -0.11244429647922516, "num_chars": 2}, {"sum_logits": -1.6507012844085693, "num_tokens": 1, "num_tokens_all": 1047, "is_greedy": false, "logits_per_token": -1.6507012844085693, "logits_per_char": -0.8253506422042847, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 578, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4329020977020264, "incorrect_loss_raw": 0.28313949704170227, "correct_loss_per_char": 0.7164510488510132, "incorrect_loss_per_char": 0.14156974852085114, "correct_loss_per_token": 1.4329020977020264, "incorrect_loss_per_token": 0.28313949704170227, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.28313949704170227, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": true, "logits_per_token": -0.28313949704170227, "logits_per_char": -0.14156974852085114, "num_chars": 2}, {"sum_logits": -1.4329020977020264, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": false, "logits_per_token": -1.4329020977020264, "logits_per_char": -0.7164510488510132, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 579, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19203276932239532, "incorrect_loss_raw": 1.7847868204116821, "correct_loss_per_char": 0.09601638466119766, "incorrect_loss_per_char": 0.8923934102058411, "correct_loss_per_token": 0.19203276932239532, "incorrect_loss_per_token": 1.7847868204116821, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19203276932239532, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": true, "logits_per_token": -0.19203276932239532, "logits_per_char": -0.09601638466119766, "num_chars": 2}, {"sum_logits": -1.7847868204116821, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": false, "logits_per_token": -1.7847868204116821, "logits_per_char": -0.8923934102058411, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 580, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6507312059402466, "incorrect_loss_raw": 0.23239192366600037, "correct_loss_per_char": 0.8253656029701233, "incorrect_loss_per_char": 0.11619596183300018, "correct_loss_per_token": 1.6507312059402466, "incorrect_loss_per_token": 0.23239192366600037, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23239192366600037, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": true, "logits_per_token": -0.23239192366600037, "logits_per_char": -0.11619596183300018, "num_chars": 2}, {"sum_logits": -1.6507312059402466, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": false, "logits_per_token": -1.6507312059402466, "logits_per_char": -0.8253656029701233, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 581, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2235725224018097, "incorrect_loss_raw": 1.664273738861084, "correct_loss_per_char": 0.11178626120090485, "incorrect_loss_per_char": 0.832136869430542, "correct_loss_per_token": 0.2235725224018097, "incorrect_loss_per_token": 1.664273738861084, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2235725224018097, "num_tokens": 1, "num_tokens_all": 999, "is_greedy": true, "logits_per_token": -0.2235725224018097, "logits_per_char": -0.11178626120090485, "num_chars": 2}, {"sum_logits": -1.664273738861084, "num_tokens": 1, "num_tokens_all": 999, "is_greedy": false, "logits_per_token": -1.664273738861084, "logits_per_char": -0.832136869430542, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 582, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23397402465343475, "incorrect_loss_raw": 1.6025428771972656, "correct_loss_per_char": 0.11698701232671738, "incorrect_loss_per_char": 0.8012714385986328, "correct_loss_per_token": 0.23397402465343475, "incorrect_loss_per_token": 1.6025428771972656, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23397402465343475, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": true, "logits_per_token": -0.23397402465343475, "logits_per_char": -0.11698701232671738, "num_chars": 2}, {"sum_logits": -1.6025428771972656, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": false, "logits_per_token": -1.6025428771972656, "logits_per_char": -0.8012714385986328, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 583, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5370410680770874, "incorrect_loss_raw": 0.2501620054244995, "correct_loss_per_char": 0.7685205340385437, "incorrect_loss_per_char": 0.12508100271224976, "correct_loss_per_token": 1.5370410680770874, "incorrect_loss_per_token": 0.2501620054244995, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2501620054244995, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": true, "logits_per_token": -0.2501620054244995, "logits_per_char": -0.12508100271224976, "num_chars": 2}, {"sum_logits": -1.5370410680770874, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": false, "logits_per_token": -1.5370410680770874, "logits_per_char": -0.7685205340385437, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 584, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22332711517810822, "incorrect_loss_raw": 1.6526992321014404, "correct_loss_per_char": 0.11166355758905411, "incorrect_loss_per_char": 0.8263496160507202, "correct_loss_per_token": 0.22332711517810822, "incorrect_loss_per_token": 1.6526992321014404, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22332711517810822, "num_tokens": 1, "num_tokens_all": 997, "is_greedy": true, "logits_per_token": -0.22332711517810822, "logits_per_char": -0.11166355758905411, "num_chars": 2}, {"sum_logits": -1.6526992321014404, "num_tokens": 1, "num_tokens_all": 997, "is_greedy": false, "logits_per_token": -1.6526992321014404, "logits_per_char": -0.8263496160507202, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 585, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.27080273628234863, "incorrect_loss_raw": 1.482349157333374, "correct_loss_per_char": 0.13540136814117432, "incorrect_loss_per_char": 0.741174578666687, "correct_loss_per_token": 0.27080273628234863, "incorrect_loss_per_token": 1.482349157333374, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.27080273628234863, "num_tokens": 1, "num_tokens_all": 1192, "is_greedy": true, "logits_per_token": -0.27080273628234863, "logits_per_char": -0.13540136814117432, "num_chars": 2}, {"sum_logits": -1.482349157333374, "num_tokens": 1, "num_tokens_all": 1192, "is_greedy": false, "logits_per_token": -1.482349157333374, "logits_per_char": -0.741174578666687, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 586, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4499521255493164, "incorrect_loss_raw": 0.28160977363586426, "correct_loss_per_char": 0.7249760627746582, "incorrect_loss_per_char": 0.14080488681793213, "correct_loss_per_token": 1.4499521255493164, "incorrect_loss_per_token": 0.28160977363586426, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.28160977363586426, "num_tokens": 1, "num_tokens_all": 1009, "is_greedy": true, "logits_per_token": -0.28160977363586426, "logits_per_char": -0.14080488681793213, "num_chars": 2}, {"sum_logits": -1.4499521255493164, "num_tokens": 1, "num_tokens_all": 1009, "is_greedy": false, "logits_per_token": -1.4499521255493164, "logits_per_char": -0.7249760627746582, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 587, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.219541534781456, "incorrect_loss_raw": 1.6452300548553467, "correct_loss_per_char": 0.109770767390728, "incorrect_loss_per_char": 0.8226150274276733, "correct_loss_per_token": 0.219541534781456, "incorrect_loss_per_token": 1.6452300548553467, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.219541534781456, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": true, "logits_per_token": -0.219541534781456, "logits_per_char": -0.109770767390728, "num_chars": 2}, {"sum_logits": -1.6452300548553467, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": false, "logits_per_token": -1.6452300548553467, "logits_per_char": -0.8226150274276733, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 588, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21783238649368286, "incorrect_loss_raw": 1.6938841342926025, "correct_loss_per_char": 0.10891619324684143, "incorrect_loss_per_char": 0.8469420671463013, "correct_loss_per_token": 0.21783238649368286, "incorrect_loss_per_token": 1.6938841342926025, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21783238649368286, "num_tokens": 1, "num_tokens_all": 971, "is_greedy": true, "logits_per_token": -0.21783238649368286, "logits_per_char": -0.10891619324684143, "num_chars": 2}, {"sum_logits": -1.6938841342926025, "num_tokens": 1, "num_tokens_all": 971, "is_greedy": false, "logits_per_token": -1.6938841342926025, "logits_per_char": -0.8469420671463013, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 589, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3977354764938354, "incorrect_loss_raw": 0.2961639165878296, "correct_loss_per_char": 0.6988677382469177, "incorrect_loss_per_char": 0.1480819582939148, "correct_loss_per_token": 1.3977354764938354, "incorrect_loss_per_token": 0.2961639165878296, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2961639165878296, "num_tokens": 1, "num_tokens_all": 1130, "is_greedy": true, "logits_per_token": -0.2961639165878296, "logits_per_char": -0.1480819582939148, "num_chars": 2}, {"sum_logits": -1.3977354764938354, "num_tokens": 1, "num_tokens_all": 1130, "is_greedy": false, "logits_per_token": -1.3977354764938354, "logits_per_char": -0.6988677382469177, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 590, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4016001224517822, "incorrect_loss_raw": 0.29600831866264343, "correct_loss_per_char": 0.7008000612258911, "incorrect_loss_per_char": 0.14800415933132172, "correct_loss_per_token": 1.4016001224517822, "incorrect_loss_per_token": 0.29600831866264343, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.29600831866264343, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": true, "logits_per_token": -0.29600831866264343, "logits_per_char": -0.14800415933132172, "num_chars": 2}, {"sum_logits": -1.4016001224517822, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": false, "logits_per_token": -1.4016001224517822, "logits_per_char": -0.7008000612258911, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 591, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2369016408920288, "incorrect_loss_raw": 1.6134964227676392, "correct_loss_per_char": 0.1184508204460144, "incorrect_loss_per_char": 0.8067482113838196, "correct_loss_per_token": 0.2369016408920288, "incorrect_loss_per_token": 1.6134964227676392, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2369016408920288, "num_tokens": 1, "num_tokens_all": 974, "is_greedy": true, "logits_per_token": -0.2369016408920288, "logits_per_char": -0.1184508204460144, "num_chars": 2}, {"sum_logits": -1.6134964227676392, "num_tokens": 1, "num_tokens_all": 974, "is_greedy": false, "logits_per_token": -1.6134964227676392, "logits_per_char": -0.8067482113838196, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 592, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24634331464767456, "incorrect_loss_raw": 1.5577850341796875, "correct_loss_per_char": 0.12317165732383728, "incorrect_loss_per_char": 0.7788925170898438, "correct_loss_per_token": 0.24634331464767456, "incorrect_loss_per_token": 1.5577850341796875, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24634331464767456, "num_tokens": 1, "num_tokens_all": 1049, "is_greedy": true, "logits_per_token": -0.24634331464767456, "logits_per_char": -0.12317165732383728, "num_chars": 2}, {"sum_logits": -1.5577850341796875, "num_tokens": 1, "num_tokens_all": 1049, "is_greedy": false, "logits_per_token": -1.5577850341796875, "logits_per_char": -0.7788925170898438, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 593, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6064590215682983, "incorrect_loss_raw": 0.24236321449279785, "correct_loss_per_char": 0.8032295107841492, "incorrect_loss_per_char": 0.12118160724639893, "correct_loss_per_token": 1.6064590215682983, "incorrect_loss_per_token": 0.24236321449279785, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24236321449279785, "num_tokens": 1, "num_tokens_all": 1099, "is_greedy": true, "logits_per_token": -0.24236321449279785, "logits_per_char": -0.12118160724639893, "num_chars": 2}, {"sum_logits": -1.6064590215682983, "num_tokens": 1, "num_tokens_all": 1099, "is_greedy": false, "logits_per_token": -1.6064590215682983, "logits_per_char": -0.8032295107841492, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 594, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24916520714759827, "incorrect_loss_raw": 1.5593065023422241, "correct_loss_per_char": 0.12458260357379913, "incorrect_loss_per_char": 0.7796532511711121, "correct_loss_per_token": 0.24916520714759827, "incorrect_loss_per_token": 1.5593065023422241, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24916520714759827, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": true, "logits_per_token": -0.24916520714759827, "logits_per_char": -0.12458260357379913, "num_chars": 2}, {"sum_logits": -1.5593065023422241, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": false, "logits_per_token": -1.5593065023422241, "logits_per_char": -0.7796532511711121, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 595, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5369596481323242, "incorrect_loss_raw": 0.26010578870773315, "correct_loss_per_char": 0.7684798240661621, "incorrect_loss_per_char": 0.13005289435386658, "correct_loss_per_token": 1.5369596481323242, "incorrect_loss_per_token": 0.26010578870773315, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26010578870773315, "num_tokens": 1, "num_tokens_all": 1043, "is_greedy": true, "logits_per_token": -0.26010578870773315, "logits_per_char": -0.13005289435386658, "num_chars": 2}, {"sum_logits": -1.5369596481323242, "num_tokens": 1, "num_tokens_all": 1043, "is_greedy": false, "logits_per_token": -1.5369596481323242, "logits_per_char": -0.7684798240661621, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 596, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.29027441143989563, "incorrect_loss_raw": 1.4308772087097168, "correct_loss_per_char": 0.14513720571994781, "incorrect_loss_per_char": 0.7154386043548584, "correct_loss_per_token": 0.29027441143989563, "incorrect_loss_per_token": 1.4308772087097168, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.29027441143989563, "num_tokens": 1, "num_tokens_all": 1062, "is_greedy": true, "logits_per_token": -0.29027441143989563, "logits_per_char": -0.14513720571994781, "num_chars": 2}, {"sum_logits": -1.4308772087097168, "num_tokens": 1, "num_tokens_all": 1062, "is_greedy": false, "logits_per_token": -1.4308772087097168, "logits_per_char": -0.7154386043548584, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 597, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5385336875915527, "incorrect_loss_raw": 0.2585567831993103, "correct_loss_per_char": 0.7692668437957764, "incorrect_loss_per_char": 0.12927839159965515, "correct_loss_per_token": 1.5385336875915527, "incorrect_loss_per_token": 0.2585567831993103, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2585567831993103, "num_tokens": 1, "num_tokens_all": 983, "is_greedy": true, "logits_per_token": -0.2585567831993103, "logits_per_char": -0.12927839159965515, "num_chars": 2}, {"sum_logits": -1.5385336875915527, "num_tokens": 1, "num_tokens_all": 983, "is_greedy": false, "logits_per_token": -1.5385336875915527, "logits_per_char": -0.7692668437957764, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 598, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2365211695432663, "incorrect_loss_raw": 1.603807806968689, "correct_loss_per_char": 0.11826058477163315, "incorrect_loss_per_char": 0.8019039034843445, "correct_loss_per_token": 0.2365211695432663, "incorrect_loss_per_token": 1.603807806968689, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2365211695432663, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": true, "logits_per_token": -0.2365211695432663, "logits_per_char": -0.11826058477163315, "num_chars": 2}, {"sum_logits": -1.603807806968689, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.603807806968689, "logits_per_char": -0.8019039034843445, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 599, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4734100103378296, "incorrect_loss_raw": 0.2677210569381714, "correct_loss_per_char": 0.7367050051689148, "incorrect_loss_per_char": 0.1338605284690857, "correct_loss_per_token": 1.4734100103378296, "incorrect_loss_per_token": 0.2677210569381714, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2677210569381714, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": true, "logits_per_token": -0.2677210569381714, "logits_per_char": -0.1338605284690857, "num_chars": 2}, {"sum_logits": -1.4734100103378296, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": false, "logits_per_token": -1.4734100103378296, "logits_per_char": -0.7367050051689148, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 600, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3645823001861572, "incorrect_loss_raw": 1.2135493755340576, "correct_loss_per_char": 0.1822911500930786, "incorrect_loss_per_char": 0.6067746877670288, "correct_loss_per_token": 0.3645823001861572, "incorrect_loss_per_token": 1.2135493755340576, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3645823001861572, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": true, "logits_per_token": -0.3645823001861572, "logits_per_char": -0.1822911500930786, "num_chars": 2}, {"sum_logits": -1.2135493755340576, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.2135493755340576, "logits_per_char": -0.6067746877670288, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 601, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2640198767185211, "incorrect_loss_raw": 1.4920151233673096, "correct_loss_per_char": 0.13200993835926056, "incorrect_loss_per_char": 0.7460075616836548, "correct_loss_per_token": 0.2640198767185211, "incorrect_loss_per_token": 1.4920151233673096, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2640198767185211, "num_tokens": 1, "num_tokens_all": 1004, "is_greedy": true, "logits_per_token": -0.2640198767185211, "logits_per_char": -0.13200993835926056, "num_chars": 2}, {"sum_logits": -1.4920151233673096, "num_tokens": 1, "num_tokens_all": 1004, "is_greedy": false, "logits_per_token": -1.4920151233673096, "logits_per_char": -0.7460075616836548, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 602, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22056560218334198, "incorrect_loss_raw": 1.6801478862762451, "correct_loss_per_char": 0.11028280109167099, "incorrect_loss_per_char": 0.8400739431381226, "correct_loss_per_token": 0.22056560218334198, "incorrect_loss_per_token": 1.6801478862762451, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22056560218334198, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": true, "logits_per_token": -0.22056560218334198, "logits_per_char": -0.11028280109167099, "num_chars": 2}, {"sum_logits": -1.6801478862762451, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": false, "logits_per_token": -1.6801478862762451, "logits_per_char": -0.8400739431381226, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 603, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2173982560634613, "incorrect_loss_raw": 1.6831457614898682, "correct_loss_per_char": 0.10869912803173065, "incorrect_loss_per_char": 0.8415728807449341, "correct_loss_per_token": 0.2173982560634613, "incorrect_loss_per_token": 1.6831457614898682, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2173982560634613, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": true, "logits_per_token": -0.2173982560634613, "logits_per_char": -0.10869912803173065, "num_chars": 2}, {"sum_logits": -1.6831457614898682, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": false, "logits_per_token": -1.6831457614898682, "logits_per_char": -0.8415728807449341, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 604, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6176047325134277, "incorrect_loss_raw": 0.23089219629764557, "correct_loss_per_char": 0.8088023662567139, "incorrect_loss_per_char": 0.11544609814882278, "correct_loss_per_token": 1.6176047325134277, "incorrect_loss_per_token": 0.23089219629764557, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23089219629764557, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": true, "logits_per_token": -0.23089219629764557, "logits_per_char": -0.11544609814882278, "num_chars": 2}, {"sum_logits": -1.6176047325134277, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": false, "logits_per_token": -1.6176047325134277, "logits_per_char": -0.8088023662567139, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 605, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.648149013519287, "incorrect_loss_raw": 0.22537419199943542, "correct_loss_per_char": 0.8240745067596436, "incorrect_loss_per_char": 0.11268709599971771, "correct_loss_per_token": 1.648149013519287, "incorrect_loss_per_token": 0.22537419199943542, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22537419199943542, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": true, "logits_per_token": -0.22537419199943542, "logits_per_char": -0.11268709599971771, "num_chars": 2}, {"sum_logits": -1.648149013519287, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": false, "logits_per_token": -1.648149013519287, "logits_per_char": -0.8240745067596436, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 606, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23269177973270416, "incorrect_loss_raw": 1.6082359552383423, "correct_loss_per_char": 0.11634588986635208, "incorrect_loss_per_char": 0.8041179776191711, "correct_loss_per_token": 0.23269177973270416, "incorrect_loss_per_token": 1.6082359552383423, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23269177973270416, "num_tokens": 1, "num_tokens_all": 920, "is_greedy": true, "logits_per_token": -0.23269177973270416, "logits_per_char": -0.11634588986635208, "num_chars": 2}, {"sum_logits": -1.6082359552383423, "num_tokens": 1, "num_tokens_all": 920, "is_greedy": false, "logits_per_token": -1.6082359552383423, "logits_per_char": -0.8041179776191711, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 607, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23542919754981995, "incorrect_loss_raw": 1.5987111330032349, "correct_loss_per_char": 0.11771459877490997, "incorrect_loss_per_char": 0.7993555665016174, "correct_loss_per_token": 0.23542919754981995, "incorrect_loss_per_token": 1.5987111330032349, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23542919754981995, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": true, "logits_per_token": -0.23542919754981995, "logits_per_char": -0.11771459877490997, "num_chars": 2}, {"sum_logits": -1.5987111330032349, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": false, "logits_per_token": -1.5987111330032349, "logits_per_char": -0.7993555665016174, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 608, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7270982265472412, "incorrect_loss_raw": 0.2017529308795929, "correct_loss_per_char": 0.8635491132736206, "incorrect_loss_per_char": 0.10087646543979645, "correct_loss_per_token": 1.7270982265472412, "incorrect_loss_per_token": 0.2017529308795929, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2017529308795929, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": true, "logits_per_token": -0.2017529308795929, "logits_per_char": -0.10087646543979645, "num_chars": 2}, {"sum_logits": -1.7270982265472412, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": false, "logits_per_token": -1.7270982265472412, "logits_per_char": -0.8635491132736206, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 609, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8163113594055176, "incorrect_loss_raw": 0.1851854771375656, "correct_loss_per_char": 0.9081556797027588, "incorrect_loss_per_char": 0.0925927385687828, "correct_loss_per_token": 1.8163113594055176, "incorrect_loss_per_token": 0.1851854771375656, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1851854771375656, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": true, "logits_per_token": -0.1851854771375656, "logits_per_char": -0.0925927385687828, "num_chars": 2}, {"sum_logits": -1.8163113594055176, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -1.8163113594055176, "logits_per_char": -0.9081556797027588, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 610, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2776452302932739, "incorrect_loss_raw": 1.4413704872131348, "correct_loss_per_char": 0.13882261514663696, "incorrect_loss_per_char": 0.7206852436065674, "correct_loss_per_token": 0.2776452302932739, "incorrect_loss_per_token": 1.4413704872131348, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2776452302932739, "num_tokens": 1, "num_tokens_all": 1031, "is_greedy": true, "logits_per_token": -0.2776452302932739, "logits_per_char": -0.13882261514663696, "num_chars": 2}, {"sum_logits": -1.4413704872131348, "num_tokens": 1, "num_tokens_all": 1031, "is_greedy": false, "logits_per_token": -1.4413704872131348, "logits_per_char": -0.7206852436065674, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 611, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20328092575073242, "incorrect_loss_raw": 1.7491199970245361, "correct_loss_per_char": 0.10164046287536621, "incorrect_loss_per_char": 0.8745599985122681, "correct_loss_per_token": 0.20328092575073242, "incorrect_loss_per_token": 1.7491199970245361, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20328092575073242, "num_tokens": 1, "num_tokens_all": 999, "is_greedy": true, "logits_per_token": -0.20328092575073242, "logits_per_char": -0.10164046287536621, "num_chars": 2}, {"sum_logits": -1.7491199970245361, "num_tokens": 1, "num_tokens_all": 999, "is_greedy": false, "logits_per_token": -1.7491199970245361, "logits_per_char": -0.8745599985122681, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 612, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4621790647506714, "incorrect_loss_raw": 0.2803114652633667, "correct_loss_per_char": 0.7310895323753357, "incorrect_loss_per_char": 0.14015573263168335, "correct_loss_per_token": 1.4621790647506714, "incorrect_loss_per_token": 0.2803114652633667, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2803114652633667, "num_tokens": 1, "num_tokens_all": 988, "is_greedy": true, "logits_per_token": -0.2803114652633667, "logits_per_char": -0.14015573263168335, "num_chars": 2}, {"sum_logits": -1.4621790647506714, "num_tokens": 1, "num_tokens_all": 988, "is_greedy": false, "logits_per_token": -1.4621790647506714, "logits_per_char": -0.7310895323753357, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 613, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.2466275691986084, "incorrect_loss_raw": 0.12023330479860306, "correct_loss_per_char": 1.1233137845993042, "incorrect_loss_per_char": 0.06011665239930153, "correct_loss_per_token": 2.2466275691986084, "incorrect_loss_per_token": 0.12023330479860306, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.12023330479860306, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": true, "logits_per_token": -0.12023330479860306, "logits_per_char": -0.06011665239930153, "num_chars": 2}, {"sum_logits": -2.2466275691986084, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": false, "logits_per_token": -2.2466275691986084, "logits_per_char": -1.1233137845993042, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 614, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6657583713531494, "incorrect_loss_raw": 0.22060996294021606, "correct_loss_per_char": 0.8328791856765747, "incorrect_loss_per_char": 0.11030498147010803, "correct_loss_per_token": 1.6657583713531494, "incorrect_loss_per_token": 0.22060996294021606, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22060996294021606, "num_tokens": 1, "num_tokens_all": 941, "is_greedy": true, "logits_per_token": -0.22060996294021606, "logits_per_char": -0.11030498147010803, "num_chars": 2}, {"sum_logits": -1.6657583713531494, "num_tokens": 1, "num_tokens_all": 941, "is_greedy": false, "logits_per_token": -1.6657583713531494, "logits_per_char": -0.8328791856765747, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 615, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23379825055599213, "incorrect_loss_raw": 1.615419626235962, "correct_loss_per_char": 0.11689912527799606, "incorrect_loss_per_char": 0.807709813117981, "correct_loss_per_token": 0.23379825055599213, "incorrect_loss_per_token": 1.615419626235962, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23379825055599213, "num_tokens": 1, "num_tokens_all": 985, "is_greedy": true, "logits_per_token": -0.23379825055599213, "logits_per_char": -0.11689912527799606, "num_chars": 2}, {"sum_logits": -1.615419626235962, "num_tokens": 1, "num_tokens_all": 985, "is_greedy": false, "logits_per_token": -1.615419626235962, "logits_per_char": -0.807709813117981, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 616, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23042753338813782, "incorrect_loss_raw": 1.6374229192733765, "correct_loss_per_char": 0.11521376669406891, "incorrect_loss_per_char": 0.8187114596366882, "correct_loss_per_token": 0.23042753338813782, "incorrect_loss_per_token": 1.6374229192733765, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23042753338813782, "num_tokens": 1, "num_tokens_all": 1010, "is_greedy": true, "logits_per_token": -0.23042753338813782, "logits_per_char": -0.11521376669406891, "num_chars": 2}, {"sum_logits": -1.6374229192733765, "num_tokens": 1, "num_tokens_all": 1010, "is_greedy": false, "logits_per_token": -1.6374229192733765, "logits_per_char": -0.8187114596366882, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 617, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19784383475780487, "incorrect_loss_raw": 1.7746161222457886, "correct_loss_per_char": 0.09892191737890244, "incorrect_loss_per_char": 0.8873080611228943, "correct_loss_per_token": 0.19784383475780487, "incorrect_loss_per_token": 1.7746161222457886, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19784383475780487, "num_tokens": 1, "num_tokens_all": 993, "is_greedy": true, "logits_per_token": -0.19784383475780487, "logits_per_char": -0.09892191737890244, "num_chars": 2}, {"sum_logits": -1.7746161222457886, "num_tokens": 1, "num_tokens_all": 993, "is_greedy": false, "logits_per_token": -1.7746161222457886, "logits_per_char": -0.8873080611228943, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 618, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5887181758880615, "incorrect_loss_raw": 0.24501843750476837, "correct_loss_per_char": 0.7943590879440308, "incorrect_loss_per_char": 0.12250921875238419, "correct_loss_per_token": 1.5887181758880615, "incorrect_loss_per_token": 0.24501843750476837, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24501843750476837, "num_tokens": 1, "num_tokens_all": 997, "is_greedy": true, "logits_per_token": -0.24501843750476837, "logits_per_char": -0.12250921875238419, "num_chars": 2}, {"sum_logits": -1.5887181758880615, "num_tokens": 1, "num_tokens_all": 997, "is_greedy": false, "logits_per_token": -1.5887181758880615, "logits_per_char": -0.7943590879440308, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 619, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2138178050518036, "incorrect_loss_raw": 1.6967322826385498, "correct_loss_per_char": 0.1069089025259018, "incorrect_loss_per_char": 0.8483661413192749, "correct_loss_per_token": 0.2138178050518036, "incorrect_loss_per_token": 1.6967322826385498, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2138178050518036, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": true, "logits_per_token": -0.2138178050518036, "logits_per_char": -0.1069089025259018, "num_chars": 2}, {"sum_logits": -1.6967322826385498, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": false, "logits_per_token": -1.6967322826385498, "logits_per_char": -0.8483661413192749, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 620, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2599872946739197, "incorrect_loss_raw": 1.5337061882019043, "correct_loss_per_char": 0.12999364733695984, "incorrect_loss_per_char": 0.7668530941009521, "correct_loss_per_token": 0.2599872946739197, "incorrect_loss_per_token": 1.5337061882019043, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2599872946739197, "num_tokens": 1, "num_tokens_all": 1128, "is_greedy": true, "logits_per_token": -0.2599872946739197, "logits_per_char": -0.12999364733695984, "num_chars": 2}, {"sum_logits": -1.5337061882019043, "num_tokens": 1, "num_tokens_all": 1128, "is_greedy": false, "logits_per_token": -1.5337061882019043, "logits_per_char": -0.7668530941009521, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 621, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8563936948776245, "incorrect_loss_raw": 0.18132123351097107, "correct_loss_per_char": 0.9281968474388123, "incorrect_loss_per_char": 0.09066061675548553, "correct_loss_per_token": 1.8563936948776245, "incorrect_loss_per_token": 0.18132123351097107, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.18132123351097107, "num_tokens": 1, "num_tokens_all": 1153, "is_greedy": true, "logits_per_token": -0.18132123351097107, "logits_per_char": -0.09066061675548553, "num_chars": 2}, {"sum_logits": -1.8563936948776245, "num_tokens": 1, "num_tokens_all": 1153, "is_greedy": false, "logits_per_token": -1.8563936948776245, "logits_per_char": -0.9281968474388123, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 622, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6298954486846924, "incorrect_loss_raw": 0.2270502746105194, "correct_loss_per_char": 0.8149477243423462, "incorrect_loss_per_char": 0.1135251373052597, "correct_loss_per_token": 1.6298954486846924, "incorrect_loss_per_token": 0.2270502746105194, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2270502746105194, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": true, "logits_per_token": -0.2270502746105194, "logits_per_char": -0.1135251373052597, "num_chars": 2}, {"sum_logits": -1.6298954486846924, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": false, "logits_per_token": -1.6298954486846924, "logits_per_char": -0.8149477243423462, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 623, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.387359380722046, "incorrect_loss_raw": 0.30480504035949707, "correct_loss_per_char": 0.693679690361023, "incorrect_loss_per_char": 0.15240252017974854, "correct_loss_per_token": 1.387359380722046, "incorrect_loss_per_token": 0.30480504035949707, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.30480504035949707, "num_tokens": 1, "num_tokens_all": 1023, "is_greedy": true, "logits_per_token": -0.30480504035949707, "logits_per_char": -0.15240252017974854, "num_chars": 2}, {"sum_logits": -1.387359380722046, "num_tokens": 1, "num_tokens_all": 1023, "is_greedy": false, "logits_per_token": -1.387359380722046, "logits_per_char": -0.693679690361023, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 624, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.30016976594924927, "incorrect_loss_raw": 1.4090602397918701, "correct_loss_per_char": 0.15008488297462463, "incorrect_loss_per_char": 0.7045301198959351, "correct_loss_per_token": 0.30016976594924927, "incorrect_loss_per_token": 1.4090602397918701, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.30016976594924927, "num_tokens": 1, "num_tokens_all": 1233, "is_greedy": true, "logits_per_token": -0.30016976594924927, "logits_per_char": -0.15008488297462463, "num_chars": 2}, {"sum_logits": -1.4090602397918701, "num_tokens": 1, "num_tokens_all": 1233, "is_greedy": false, "logits_per_token": -1.4090602397918701, "logits_per_char": -0.7045301198959351, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 625, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6072410345077515, "incorrect_loss_raw": 0.23298032581806183, "correct_loss_per_char": 0.8036205172538757, "incorrect_loss_per_char": 0.11649016290903091, "correct_loss_per_token": 1.6072410345077515, "incorrect_loss_per_token": 0.23298032581806183, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23298032581806183, "num_tokens": 1, "num_tokens_all": 958, "is_greedy": true, "logits_per_token": -0.23298032581806183, "logits_per_char": -0.11649016290903091, "num_chars": 2}, {"sum_logits": -1.6072410345077515, "num_tokens": 1, "num_tokens_all": 958, "is_greedy": false, "logits_per_token": -1.6072410345077515, "logits_per_char": -0.8036205172538757, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 626, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6581776142120361, "incorrect_loss_raw": 0.22358757257461548, "correct_loss_per_char": 0.8290888071060181, "incorrect_loss_per_char": 0.11179378628730774, "correct_loss_per_token": 1.6581776142120361, "incorrect_loss_per_token": 0.22358757257461548, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22358757257461548, "num_tokens": 1, "num_tokens_all": 1062, "is_greedy": true, "logits_per_token": -0.22358757257461548, "logits_per_char": -0.11179378628730774, "num_chars": 2}, {"sum_logits": -1.6581776142120361, "num_tokens": 1, "num_tokens_all": 1062, "is_greedy": false, "logits_per_token": -1.6581776142120361, "logits_per_char": -0.8290888071060181, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 627, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.745223879814148, "incorrect_loss_raw": 0.20088377594947815, "correct_loss_per_char": 0.872611939907074, "incorrect_loss_per_char": 0.10044188797473907, "correct_loss_per_token": 1.745223879814148, "incorrect_loss_per_token": 0.20088377594947815, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20088377594947815, "num_tokens": 1, "num_tokens_all": 1003, "is_greedy": true, "logits_per_token": -0.20088377594947815, "logits_per_char": -0.10044188797473907, "num_chars": 2}, {"sum_logits": -1.745223879814148, "num_tokens": 1, "num_tokens_all": 1003, "is_greedy": false, "logits_per_token": -1.745223879814148, "logits_per_char": -0.872611939907074, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 628, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8937128782272339, "incorrect_loss_raw": 0.17399683594703674, "correct_loss_per_char": 0.9468564391136169, "incorrect_loss_per_char": 0.08699841797351837, "correct_loss_per_token": 1.8937128782272339, "incorrect_loss_per_token": 0.17399683594703674, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.17399683594703674, "num_tokens": 1, "num_tokens_all": 1040, "is_greedy": true, "logits_per_token": -0.17399683594703674, "logits_per_char": -0.08699841797351837, "num_chars": 2}, {"sum_logits": -1.8937128782272339, "num_tokens": 1, "num_tokens_all": 1040, "is_greedy": false, "logits_per_token": -1.8937128782272339, "logits_per_char": -0.9468564391136169, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 629, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2975922226905823, "incorrect_loss_raw": 1.4066882133483887, "correct_loss_per_char": 0.14879611134529114, "incorrect_loss_per_char": 0.7033441066741943, "correct_loss_per_token": 0.2975922226905823, "incorrect_loss_per_token": 1.4066882133483887, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2975922226905823, "num_tokens": 1, "num_tokens_all": 991, "is_greedy": true, "logits_per_token": -0.2975922226905823, "logits_per_char": -0.14879611134529114, "num_chars": 2}, {"sum_logits": -1.4066882133483887, "num_tokens": 1, "num_tokens_all": 991, "is_greedy": false, "logits_per_token": -1.4066882133483887, "logits_per_char": -0.7033441066741943, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 630, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6888638734817505, "incorrect_loss_raw": 0.21579113602638245, "correct_loss_per_char": 0.8444319367408752, "incorrect_loss_per_char": 0.10789556801319122, "correct_loss_per_token": 1.6888638734817505, "incorrect_loss_per_token": 0.21579113602638245, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21579113602638245, "num_tokens": 1, "num_tokens_all": 1091, "is_greedy": true, "logits_per_token": -0.21579113602638245, "logits_per_char": -0.10789556801319122, "num_chars": 2}, {"sum_logits": -1.6888638734817505, "num_tokens": 1, "num_tokens_all": 1091, "is_greedy": false, "logits_per_token": -1.6888638734817505, "logits_per_char": -0.8444319367408752, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 631, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2647785246372223, "incorrect_loss_raw": 1.5216816663742065, "correct_loss_per_char": 0.13238926231861115, "incorrect_loss_per_char": 0.7608408331871033, "correct_loss_per_token": 0.2647785246372223, "incorrect_loss_per_token": 1.5216816663742065, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2647785246372223, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": true, "logits_per_token": -0.2647785246372223, "logits_per_char": -0.13238926231861115, "num_chars": 2}, {"sum_logits": -1.5216816663742065, "num_tokens": 1, "num_tokens_all": 1104, "is_greedy": false, "logits_per_token": -1.5216816663742065, "logits_per_char": -0.7608408331871033, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 632, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.18695048987865448, "incorrect_loss_raw": 1.827323079109192, "correct_loss_per_char": 0.09347524493932724, "incorrect_loss_per_char": 0.913661539554596, "correct_loss_per_token": 0.18695048987865448, "incorrect_loss_per_token": 1.827323079109192, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.18695048987865448, "num_tokens": 1, "num_tokens_all": 986, "is_greedy": true, "logits_per_token": -0.18695048987865448, "logits_per_char": -0.09347524493932724, "num_chars": 2}, {"sum_logits": -1.827323079109192, "num_tokens": 1, "num_tokens_all": 986, "is_greedy": false, "logits_per_token": -1.827323079109192, "logits_per_char": -0.913661539554596, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 633, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.454575777053833, "incorrect_loss_raw": 0.2873992621898651, "correct_loss_per_char": 0.7272878885269165, "incorrect_loss_per_char": 0.14369963109493256, "correct_loss_per_token": 1.454575777053833, "incorrect_loss_per_token": 0.2873992621898651, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2873992621898651, "num_tokens": 1, "num_tokens_all": 997, "is_greedy": true, "logits_per_token": -0.2873992621898651, "logits_per_char": -0.14369963109493256, "num_chars": 2}, {"sum_logits": -1.454575777053833, "num_tokens": 1, "num_tokens_all": 997, "is_greedy": false, "logits_per_token": -1.454575777053833, "logits_per_char": -0.7272878885269165, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 634, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.567453384399414, "incorrect_loss_raw": 0.25104689598083496, "correct_loss_per_char": 0.783726692199707, "incorrect_loss_per_char": 0.12552344799041748, "correct_loss_per_token": 1.567453384399414, "incorrect_loss_per_token": 0.25104689598083496, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25104689598083496, "num_tokens": 1, "num_tokens_all": 1014, "is_greedy": true, "logits_per_token": -0.25104689598083496, "logits_per_char": -0.12552344799041748, "num_chars": 2}, {"sum_logits": -1.567453384399414, "num_tokens": 1, "num_tokens_all": 1014, "is_greedy": false, "logits_per_token": -1.567453384399414, "logits_per_char": -0.783726692199707, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 635, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6930314302444458, "incorrect_loss_raw": 0.20859338343143463, "correct_loss_per_char": 0.8465157151222229, "incorrect_loss_per_char": 0.10429669171571732, "correct_loss_per_token": 1.6930314302444458, "incorrect_loss_per_token": 0.20859338343143463, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20859338343143463, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": true, "logits_per_token": -0.20859338343143463, "logits_per_char": -0.10429669171571732, "num_chars": 2}, {"sum_logits": -1.6930314302444458, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": false, "logits_per_token": -1.6930314302444458, "logits_per_char": -0.8465157151222229, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 636, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2206553816795349, "incorrect_loss_raw": 1.6491429805755615, "correct_loss_per_char": 0.11032769083976746, "incorrect_loss_per_char": 0.8245714902877808, "correct_loss_per_token": 0.2206553816795349, "incorrect_loss_per_token": 1.6491429805755615, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2206553816795349, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": true, "logits_per_token": -0.2206553816795349, "logits_per_char": -0.11032769083976746, "num_chars": 2}, {"sum_logits": -1.6491429805755615, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": false, "logits_per_token": -1.6491429805755615, "logits_per_char": -0.8245714902877808, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 637, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.213395893573761, "incorrect_loss_raw": 1.7120602130889893, "correct_loss_per_char": 0.1066979467868805, "incorrect_loss_per_char": 0.8560301065444946, "correct_loss_per_token": 0.213395893573761, "incorrect_loss_per_token": 1.7120602130889893, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.213395893573761, "num_tokens": 1, "num_tokens_all": 1106, "is_greedy": true, "logits_per_token": -0.213395893573761, "logits_per_char": -0.1066979467868805, "num_chars": 2}, {"sum_logits": -1.7120602130889893, "num_tokens": 1, "num_tokens_all": 1106, "is_greedy": false, "logits_per_token": -1.7120602130889893, "logits_per_char": -0.8560301065444946, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 638, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6228971481323242, "incorrect_loss_raw": 0.23297616839408875, "correct_loss_per_char": 0.8114485740661621, "incorrect_loss_per_char": 0.11648808419704437, "correct_loss_per_token": 1.6228971481323242, "incorrect_loss_per_token": 0.23297616839408875, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23297616839408875, "num_tokens": 1, "num_tokens_all": 974, "is_greedy": true, "logits_per_token": -0.23297616839408875, "logits_per_char": -0.11648808419704437, "num_chars": 2}, {"sum_logits": -1.6228971481323242, "num_tokens": 1, "num_tokens_all": 974, "is_greedy": false, "logits_per_token": -1.6228971481323242, "logits_per_char": -0.8114485740661621, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 639, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.18353818356990814, "incorrect_loss_raw": 1.8284270763397217, "correct_loss_per_char": 0.09176909178495407, "incorrect_loss_per_char": 0.9142135381698608, "correct_loss_per_token": 0.18353818356990814, "incorrect_loss_per_token": 1.8284270763397217, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.18353818356990814, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": true, "logits_per_token": -0.18353818356990814, "logits_per_char": -0.09176909178495407, "num_chars": 2}, {"sum_logits": -1.8284270763397217, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": false, "logits_per_token": -1.8284270763397217, "logits_per_char": -0.9142135381698608, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 640, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2283829301595688, "incorrect_loss_raw": 1.610197901725769, "correct_loss_per_char": 0.1141914650797844, "incorrect_loss_per_char": 0.8050989508628845, "correct_loss_per_token": 0.2283829301595688, "incorrect_loss_per_token": 1.610197901725769, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2283829301595688, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": true, "logits_per_token": -0.2283829301595688, "logits_per_char": -0.1141914650797844, "num_chars": 2}, {"sum_logits": -1.610197901725769, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": false, "logits_per_token": -1.610197901725769, "logits_per_char": -0.8050989508628845, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 641, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.31427642703056335, "incorrect_loss_raw": 1.340179443359375, "correct_loss_per_char": 0.15713821351528168, "incorrect_loss_per_char": 0.6700897216796875, "correct_loss_per_token": 0.31427642703056335, "incorrect_loss_per_token": 1.340179443359375, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.31427642703056335, "num_tokens": 1, "num_tokens_all": 909, "is_greedy": true, "logits_per_token": -0.31427642703056335, "logits_per_char": -0.15713821351528168, "num_chars": 2}, {"sum_logits": -1.340179443359375, "num_tokens": 1, "num_tokens_all": 909, "is_greedy": false, "logits_per_token": -1.340179443359375, "logits_per_char": -0.6700897216796875, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 642, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.634435772895813, "incorrect_loss_raw": 0.22666583955287933, "correct_loss_per_char": 0.8172178864479065, "incorrect_loss_per_char": 0.11333291977643967, "correct_loss_per_token": 1.634435772895813, "incorrect_loss_per_token": 0.22666583955287933, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22666583955287933, "num_tokens": 1, "num_tokens_all": 1019, "is_greedy": true, "logits_per_token": -0.22666583955287933, "logits_per_char": -0.11333291977643967, "num_chars": 2}, {"sum_logits": -1.634435772895813, "num_tokens": 1, "num_tokens_all": 1019, "is_greedy": false, "logits_per_token": -1.634435772895813, "logits_per_char": -0.8172178864479065, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 643, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2848983108997345, "incorrect_loss_raw": 1.4551658630371094, "correct_loss_per_char": 0.14244915544986725, "incorrect_loss_per_char": 0.7275829315185547, "correct_loss_per_token": 0.2848983108997345, "incorrect_loss_per_token": 1.4551658630371094, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2848983108997345, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": true, "logits_per_token": -0.2848983108997345, "logits_per_char": -0.14244915544986725, "num_chars": 2}, {"sum_logits": -1.4551658630371094, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": false, "logits_per_token": -1.4551658630371094, "logits_per_char": -0.7275829315185547, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 644, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22237268090248108, "incorrect_loss_raw": 1.6445022821426392, "correct_loss_per_char": 0.11118634045124054, "incorrect_loss_per_char": 0.8222511410713196, "correct_loss_per_token": 0.22237268090248108, "incorrect_loss_per_token": 1.6445022821426392, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22237268090248108, "num_tokens": 1, "num_tokens_all": 941, "is_greedy": true, "logits_per_token": -0.22237268090248108, "logits_per_char": -0.11118634045124054, "num_chars": 2}, {"sum_logits": -1.6445022821426392, "num_tokens": 1, "num_tokens_all": 941, "is_greedy": false, "logits_per_token": -1.6445022821426392, "logits_per_char": -0.8222511410713196, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 645, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5305390357971191, "incorrect_loss_raw": 0.262204647064209, "correct_loss_per_char": 0.7652695178985596, "incorrect_loss_per_char": 0.1311023235321045, "correct_loss_per_token": 1.5305390357971191, "incorrect_loss_per_token": 0.262204647064209, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.262204647064209, "num_tokens": 1, "num_tokens_all": 1059, "is_greedy": true, "logits_per_token": -0.262204647064209, "logits_per_char": -0.1311023235321045, "num_chars": 2}, {"sum_logits": -1.5305390357971191, "num_tokens": 1, "num_tokens_all": 1059, "is_greedy": false, "logits_per_token": -1.5305390357971191, "logits_per_char": -0.7652695178985596, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 646, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5426254272460938, "incorrect_loss_raw": 0.24824675917625427, "correct_loss_per_char": 0.7713127136230469, "incorrect_loss_per_char": 0.12412337958812714, "correct_loss_per_token": 1.5426254272460938, "incorrect_loss_per_token": 0.24824675917625427, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24824675917625427, "num_tokens": 1, "num_tokens_all": 909, "is_greedy": true, "logits_per_token": -0.24824675917625427, "logits_per_char": -0.12412337958812714, "num_chars": 2}, {"sum_logits": -1.5426254272460938, "num_tokens": 1, "num_tokens_all": 909, "is_greedy": false, "logits_per_token": -1.5426254272460938, "logits_per_char": -0.7713127136230469, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 647, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1887200027704239, "incorrect_loss_raw": 1.8117979764938354, "correct_loss_per_char": 0.09436000138521194, "incorrect_loss_per_char": 0.9058989882469177, "correct_loss_per_token": 0.1887200027704239, "incorrect_loss_per_token": 1.8117979764938354, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1887200027704239, "num_tokens": 1, "num_tokens_all": 991, "is_greedy": true, "logits_per_token": -0.1887200027704239, "logits_per_char": -0.09436000138521194, "num_chars": 2}, {"sum_logits": -1.8117979764938354, "num_tokens": 1, "num_tokens_all": 991, "is_greedy": false, "logits_per_token": -1.8117979764938354, "logits_per_char": -0.9058989882469177, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 648, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.32173797488212585, "incorrect_loss_raw": 1.3262581825256348, "correct_loss_per_char": 0.16086898744106293, "incorrect_loss_per_char": 0.6631290912628174, "correct_loss_per_token": 0.32173797488212585, "incorrect_loss_per_token": 1.3262581825256348, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.32173797488212585, "num_tokens": 1, "num_tokens_all": 1035, "is_greedy": true, "logits_per_token": -0.32173797488212585, "logits_per_char": -0.16086898744106293, "num_chars": 2}, {"sum_logits": -1.3262581825256348, "num_tokens": 1, "num_tokens_all": 1035, "is_greedy": false, "logits_per_token": -1.3262581825256348, "logits_per_char": -0.6631290912628174, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 649, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24956050515174866, "incorrect_loss_raw": 1.578005075454712, "correct_loss_per_char": 0.12478025257587433, "incorrect_loss_per_char": 0.789002537727356, "correct_loss_per_token": 0.24956050515174866, "incorrect_loss_per_token": 1.578005075454712, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24956050515174866, "num_tokens": 1, "num_tokens_all": 1013, "is_greedy": true, "logits_per_token": -0.24956050515174866, "logits_per_char": -0.12478025257587433, "num_chars": 2}, {"sum_logits": -1.578005075454712, "num_tokens": 1, "num_tokens_all": 1013, "is_greedy": false, "logits_per_token": -1.578005075454712, "logits_per_char": -0.789002537727356, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 650, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2748100459575653, "incorrect_loss_raw": 1.4691346883773804, "correct_loss_per_char": 0.13740502297878265, "incorrect_loss_per_char": 0.7345673441886902, "correct_loss_per_token": 0.2748100459575653, "incorrect_loss_per_token": 1.4691346883773804, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2748100459575653, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": true, "logits_per_token": -0.2748100459575653, "logits_per_char": -0.13740502297878265, "num_chars": 2}, {"sum_logits": -1.4691346883773804, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": false, "logits_per_token": -1.4691346883773804, "logits_per_char": -0.7345673441886902, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 651, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.25882765650749207, "incorrect_loss_raw": 1.5161373615264893, "correct_loss_per_char": 0.12941382825374603, "incorrect_loss_per_char": 0.7580686807632446, "correct_loss_per_token": 0.25882765650749207, "incorrect_loss_per_token": 1.5161373615264893, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25882765650749207, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": true, "logits_per_token": -0.25882765650749207, "logits_per_char": -0.12941382825374603, "num_chars": 2}, {"sum_logits": -1.5161373615264893, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": false, "logits_per_token": -1.5161373615264893, "logits_per_char": -0.7580686807632446, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 652, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3452788293361664, "incorrect_loss_raw": 1.273281216621399, "correct_loss_per_char": 0.1726394146680832, "incorrect_loss_per_char": 0.6366406083106995, "correct_loss_per_token": 0.3452788293361664, "incorrect_loss_per_token": 1.273281216621399, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3452788293361664, "num_tokens": 1, "num_tokens_all": 1032, "is_greedy": true, "logits_per_token": -0.3452788293361664, "logits_per_char": -0.1726394146680832, "num_chars": 2}, {"sum_logits": -1.273281216621399, "num_tokens": 1, "num_tokens_all": 1032, "is_greedy": false, "logits_per_token": -1.273281216621399, "logits_per_char": -0.6366406083106995, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 653, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.28083986043930054, "incorrect_loss_raw": 1.4532275199890137, "correct_loss_per_char": 0.14041993021965027, "incorrect_loss_per_char": 0.7266137599945068, "correct_loss_per_token": 0.28083986043930054, "incorrect_loss_per_token": 1.4532275199890137, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.28083986043930054, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": true, "logits_per_token": -0.28083986043930054, "logits_per_char": -0.14041993021965027, "num_chars": 2}, {"sum_logits": -1.4532275199890137, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": false, "logits_per_token": -1.4532275199890137, "logits_per_char": -0.7266137599945068, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 654, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3012269139289856, "incorrect_loss_raw": 1.3872480392456055, "correct_loss_per_char": 0.1506134569644928, "incorrect_loss_per_char": 0.6936240196228027, "correct_loss_per_token": 0.3012269139289856, "incorrect_loss_per_token": 1.3872480392456055, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3012269139289856, "num_tokens": 1, "num_tokens_all": 1039, "is_greedy": true, "logits_per_token": -0.3012269139289856, "logits_per_char": -0.1506134569644928, "num_chars": 2}, {"sum_logits": -1.3872480392456055, "num_tokens": 1, "num_tokens_all": 1039, "is_greedy": false, "logits_per_token": -1.3872480392456055, "logits_per_char": -0.6936240196228027, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 655, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6715713739395142, "incorrect_loss_raw": 0.21922238171100616, "correct_loss_per_char": 0.8357856869697571, "incorrect_loss_per_char": 0.10961119085550308, "correct_loss_per_token": 1.6715713739395142, "incorrect_loss_per_token": 0.21922238171100616, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21922238171100616, "num_tokens": 1, "num_tokens_all": 941, "is_greedy": true, "logits_per_token": -0.21922238171100616, "logits_per_char": -0.10961119085550308, "num_chars": 2}, {"sum_logits": -1.6715713739395142, "num_tokens": 1, "num_tokens_all": 941, "is_greedy": false, "logits_per_token": -1.6715713739395142, "logits_per_char": -0.8357856869697571, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 656, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.277156800031662, "incorrect_loss_raw": 1.4500443935394287, "correct_loss_per_char": 0.138578400015831, "incorrect_loss_per_char": 0.7250221967697144, "correct_loss_per_token": 0.277156800031662, "incorrect_loss_per_token": 1.4500443935394287, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.277156800031662, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": true, "logits_per_token": -0.277156800031662, "logits_per_char": -0.138578400015831, "num_chars": 2}, {"sum_logits": -1.4500443935394287, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": false, "logits_per_token": -1.4500443935394287, "logits_per_char": -0.7250221967697144, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 657, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21396632492542267, "incorrect_loss_raw": 1.6959304809570312, "correct_loss_per_char": 0.10698316246271133, "incorrect_loss_per_char": 0.8479652404785156, "correct_loss_per_token": 0.21396632492542267, "incorrect_loss_per_token": 1.6959304809570312, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21396632492542267, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": true, "logits_per_token": -0.21396632492542267, "logits_per_char": -0.10698316246271133, "num_chars": 2}, {"sum_logits": -1.6959304809570312, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -1.6959304809570312, "logits_per_char": -0.8479652404785156, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 658, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21600572764873505, "incorrect_loss_raw": 1.6918957233428955, "correct_loss_per_char": 0.10800286382436752, "incorrect_loss_per_char": 0.8459478616714478, "correct_loss_per_token": 0.21600572764873505, "incorrect_loss_per_token": 1.6918957233428955, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21600572764873505, "num_tokens": 1, "num_tokens_all": 1000, "is_greedy": true, "logits_per_token": -0.21600572764873505, "logits_per_char": -0.10800286382436752, "num_chars": 2}, {"sum_logits": -1.6918957233428955, "num_tokens": 1, "num_tokens_all": 1000, "is_greedy": false, "logits_per_token": -1.6918957233428955, "logits_per_char": -0.8459478616714478, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 659, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6163229942321777, "incorrect_loss_raw": 0.23549626767635345, "correct_loss_per_char": 0.8081614971160889, "incorrect_loss_per_char": 0.11774813383817673, "correct_loss_per_token": 1.6163229942321777, "incorrect_loss_per_token": 0.23549626767635345, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23549626767635345, "num_tokens": 1, "num_tokens_all": 976, "is_greedy": true, "logits_per_token": -0.23549626767635345, "logits_per_char": -0.11774813383817673, "num_chars": 2}, {"sum_logits": -1.6163229942321777, "num_tokens": 1, "num_tokens_all": 976, "is_greedy": false, "logits_per_token": -1.6163229942321777, "logits_per_char": -0.8081614971160889, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 660, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.453352928161621, "incorrect_loss_raw": 0.3161446452140808, "correct_loss_per_char": 0.7266764640808105, "incorrect_loss_per_char": 0.1580723226070404, "correct_loss_per_token": 1.453352928161621, "incorrect_loss_per_token": 0.3161446452140808, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3161446452140808, "num_tokens": 1, "num_tokens_all": 1505, "is_greedy": true, "logits_per_token": -0.3161446452140808, "logits_per_char": -0.1580723226070404, "num_chars": 2}, {"sum_logits": -1.453352928161621, "num_tokens": 1, "num_tokens_all": 1505, "is_greedy": false, "logits_per_token": -1.453352928161621, "logits_per_char": -0.7266764640808105, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 661, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4044311046600342, "incorrect_loss_raw": 0.29233670234680176, "correct_loss_per_char": 0.7022155523300171, "incorrect_loss_per_char": 0.14616835117340088, "correct_loss_per_token": 1.4044311046600342, "incorrect_loss_per_token": 0.29233670234680176, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.29233670234680176, "num_tokens": 1, "num_tokens_all": 950, "is_greedy": true, "logits_per_token": -0.29233670234680176, "logits_per_char": -0.14616835117340088, "num_chars": 2}, {"sum_logits": -1.4044311046600342, "num_tokens": 1, "num_tokens_all": 950, "is_greedy": false, "logits_per_token": -1.4044311046600342, "logits_per_char": -0.7022155523300171, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 662, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2102566510438919, "incorrect_loss_raw": 1.7080076932907104, "correct_loss_per_char": 0.10512832552194595, "incorrect_loss_per_char": 0.8540038466453552, "correct_loss_per_token": 0.2102566510438919, "incorrect_loss_per_token": 1.7080076932907104, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2102566510438919, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": true, "logits_per_token": -0.2102566510438919, "logits_per_char": -0.10512832552194595, "num_chars": 2}, {"sum_logits": -1.7080076932907104, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": false, "logits_per_token": -1.7080076932907104, "logits_per_char": -0.8540038466453552, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 663, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2394603043794632, "incorrect_loss_raw": 1.5998224020004272, "correct_loss_per_char": 0.1197301521897316, "incorrect_loss_per_char": 0.7999112010002136, "correct_loss_per_token": 0.2394603043794632, "incorrect_loss_per_token": 1.5998224020004272, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2394603043794632, "num_tokens": 1, "num_tokens_all": 1059, "is_greedy": true, "logits_per_token": -0.2394603043794632, "logits_per_char": -0.1197301521897316, "num_chars": 2}, {"sum_logits": -1.5998224020004272, "num_tokens": 1, "num_tokens_all": 1059, "is_greedy": false, "logits_per_token": -1.5998224020004272, "logits_per_char": -0.7999112010002136, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 664, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.501971960067749, "incorrect_loss_raw": 0.2612481713294983, "correct_loss_per_char": 0.7509859800338745, "incorrect_loss_per_char": 0.13062408566474915, "correct_loss_per_token": 1.501971960067749, "incorrect_loss_per_token": 0.2612481713294983, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2612481713294983, "num_tokens": 1, "num_tokens_all": 903, "is_greedy": true, "logits_per_token": -0.2612481713294983, "logits_per_char": -0.13062408566474915, "num_chars": 2}, {"sum_logits": -1.501971960067749, "num_tokens": 1, "num_tokens_all": 903, "is_greedy": false, "logits_per_token": -1.501971960067749, "logits_per_char": -0.7509859800338745, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 665, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19026289880275726, "incorrect_loss_raw": 1.8121482133865356, "correct_loss_per_char": 0.09513144940137863, "incorrect_loss_per_char": 0.9060741066932678, "correct_loss_per_token": 0.19026289880275726, "incorrect_loss_per_token": 1.8121482133865356, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19026289880275726, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": true, "logits_per_token": -0.19026289880275726, "logits_per_char": -0.09513144940137863, "num_chars": 2}, {"sum_logits": -1.8121482133865356, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": false, "logits_per_token": -1.8121482133865356, "logits_per_char": -0.9060741066932678, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 666, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2305985987186432, "incorrect_loss_raw": 1.6293492317199707, "correct_loss_per_char": 0.1152992993593216, "incorrect_loss_per_char": 0.8146746158599854, "correct_loss_per_token": 0.2305985987186432, "incorrect_loss_per_token": 1.6293492317199707, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2305985987186432, "num_tokens": 1, "num_tokens_all": 1005, "is_greedy": true, "logits_per_token": -0.2305985987186432, "logits_per_char": -0.1152992993593216, "num_chars": 2}, {"sum_logits": -1.6293492317199707, "num_tokens": 1, "num_tokens_all": 1005, "is_greedy": false, "logits_per_token": -1.6293492317199707, "logits_per_char": -0.8146746158599854, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 667, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24321044981479645, "incorrect_loss_raw": 1.5868339538574219, "correct_loss_per_char": 0.12160522490739822, "incorrect_loss_per_char": 0.7934169769287109, "correct_loss_per_token": 0.24321044981479645, "incorrect_loss_per_token": 1.5868339538574219, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24321044981479645, "num_tokens": 1, "num_tokens_all": 1117, "is_greedy": true, "logits_per_token": -0.24321044981479645, "logits_per_char": -0.12160522490739822, "num_chars": 2}, {"sum_logits": -1.5868339538574219, "num_tokens": 1, "num_tokens_all": 1117, "is_greedy": false, "logits_per_token": -1.5868339538574219, "logits_per_char": -0.7934169769287109, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 668, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23193581402301788, "incorrect_loss_raw": 1.6280722618103027, "correct_loss_per_char": 0.11596790701150894, "incorrect_loss_per_char": 0.8140361309051514, "correct_loss_per_token": 0.23193581402301788, "incorrect_loss_per_token": 1.6280722618103027, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23193581402301788, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": true, "logits_per_token": -0.23193581402301788, "logits_per_char": -0.11596790701150894, "num_chars": 2}, {"sum_logits": -1.6280722618103027, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": false, "logits_per_token": -1.6280722618103027, "logits_per_char": -0.8140361309051514, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 669, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21682153642177582, "incorrect_loss_raw": 1.6859022378921509, "correct_loss_per_char": 0.10841076821088791, "incorrect_loss_per_char": 0.8429511189460754, "correct_loss_per_token": 0.21682153642177582, "incorrect_loss_per_token": 1.6859022378921509, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21682153642177582, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": true, "logits_per_token": -0.21682153642177582, "logits_per_char": -0.10841076821088791, "num_chars": 2}, {"sum_logits": -1.6859022378921509, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": false, "logits_per_token": -1.6859022378921509, "logits_per_char": -0.8429511189460754, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 670, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5962226390838623, "incorrect_loss_raw": 0.23369531333446503, "correct_loss_per_char": 0.7981113195419312, "incorrect_loss_per_char": 0.11684765666723251, "correct_loss_per_token": 1.5962226390838623, "incorrect_loss_per_token": 0.23369531333446503, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23369531333446503, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": true, "logits_per_token": -0.23369531333446503, "logits_per_char": -0.11684765666723251, "num_chars": 2}, {"sum_logits": -1.5962226390838623, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": false, "logits_per_token": -1.5962226390838623, "logits_per_char": -0.7981113195419312, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 671, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24674615263938904, "incorrect_loss_raw": 1.5741709470748901, "correct_loss_per_char": 0.12337307631969452, "incorrect_loss_per_char": 0.7870854735374451, "correct_loss_per_token": 0.24674615263938904, "incorrect_loss_per_token": 1.5741709470748901, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24674615263938904, "num_tokens": 1, "num_tokens_all": 996, "is_greedy": true, "logits_per_token": -0.24674615263938904, "logits_per_char": -0.12337307631969452, "num_chars": 2}, {"sum_logits": -1.5741709470748901, "num_tokens": 1, "num_tokens_all": 996, "is_greedy": false, "logits_per_token": -1.5741709470748901, "logits_per_char": -0.7870854735374451, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 672, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19346298277378082, "incorrect_loss_raw": 1.7992253303527832, "correct_loss_per_char": 0.09673149138689041, "incorrect_loss_per_char": 0.8996126651763916, "correct_loss_per_token": 0.19346298277378082, "incorrect_loss_per_token": 1.7992253303527832, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19346298277378082, "num_tokens": 1, "num_tokens_all": 977, "is_greedy": true, "logits_per_token": -0.19346298277378082, "logits_per_char": -0.09673149138689041, "num_chars": 2}, {"sum_logits": -1.7992253303527832, "num_tokens": 1, "num_tokens_all": 977, "is_greedy": false, "logits_per_token": -1.7992253303527832, "logits_per_char": -0.8996126651763916, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 673, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24303357303142548, "incorrect_loss_raw": 1.586016297340393, "correct_loss_per_char": 0.12151678651571274, "incorrect_loss_per_char": 0.7930081486701965, "correct_loss_per_token": 0.24303357303142548, "incorrect_loss_per_token": 1.586016297340393, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24303357303142548, "num_tokens": 1, "num_tokens_all": 1007, "is_greedy": true, "logits_per_token": -0.24303357303142548, "logits_per_char": -0.12151678651571274, "num_chars": 2}, {"sum_logits": -1.586016297340393, "num_tokens": 1, "num_tokens_all": 1007, "is_greedy": false, "logits_per_token": -1.586016297340393, "logits_per_char": -0.7930081486701965, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 674, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1871241182088852, "incorrect_loss_raw": 1.8096033334732056, "correct_loss_per_char": 0.0935620591044426, "incorrect_loss_per_char": 0.9048016667366028, "correct_loss_per_token": 0.1871241182088852, "incorrect_loss_per_token": 1.8096033334732056, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1871241182088852, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": true, "logits_per_token": -0.1871241182088852, "logits_per_char": -0.0935620591044426, "num_chars": 2}, {"sum_logits": -1.8096033334732056, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": false, "logits_per_token": -1.8096033334732056, "logits_per_char": -0.9048016667366028, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 675, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23268119990825653, "incorrect_loss_raw": 1.623326063156128, "correct_loss_per_char": 0.11634059995412827, "incorrect_loss_per_char": 0.811663031578064, "correct_loss_per_token": 0.23268119990825653, "incorrect_loss_per_token": 1.623326063156128, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23268119990825653, "num_tokens": 1, "num_tokens_all": 1022, "is_greedy": true, "logits_per_token": -0.23268119990825653, "logits_per_char": -0.11634059995412827, "num_chars": 2}, {"sum_logits": -1.623326063156128, "num_tokens": 1, "num_tokens_all": 1022, "is_greedy": false, "logits_per_token": -1.623326063156128, "logits_per_char": -0.811663031578064, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 676, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19153395295143127, "incorrect_loss_raw": 1.7988444566726685, "correct_loss_per_char": 0.09576697647571564, "incorrect_loss_per_char": 0.8994222283363342, "correct_loss_per_token": 0.19153395295143127, "incorrect_loss_per_token": 1.7988444566726685, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19153395295143127, "num_tokens": 1, "num_tokens_all": 974, "is_greedy": true, "logits_per_token": -0.19153395295143127, "logits_per_char": -0.09576697647571564, "num_chars": 2}, {"sum_logits": -1.7988444566726685, "num_tokens": 1, "num_tokens_all": 974, "is_greedy": false, "logits_per_token": -1.7988444566726685, "logits_per_char": -0.8994222283363342, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 677, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2082754522562027, "incorrect_loss_raw": 1.6973506212234497, "correct_loss_per_char": 0.10413772612810135, "incorrect_loss_per_char": 0.8486753106117249, "correct_loss_per_token": 0.2082754522562027, "incorrect_loss_per_token": 1.6973506212234497, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2082754522562027, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": true, "logits_per_token": -0.2082754522562027, "logits_per_char": -0.10413772612810135, "num_chars": 2}, {"sum_logits": -1.6973506212234497, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": false, "logits_per_token": -1.6973506212234497, "logits_per_char": -0.8486753106117249, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 678, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1749124825000763, "incorrect_loss_raw": 1.9050252437591553, "correct_loss_per_char": 0.08745624125003815, "incorrect_loss_per_char": 0.9525126218795776, "correct_loss_per_token": 0.1749124825000763, "incorrect_loss_per_token": 1.9050252437591553, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1749124825000763, "num_tokens": 1, "num_tokens_all": 1005, "is_greedy": true, "logits_per_token": -0.1749124825000763, "logits_per_char": -0.08745624125003815, "num_chars": 2}, {"sum_logits": -1.9050252437591553, "num_tokens": 1, "num_tokens_all": 1005, "is_greedy": false, "logits_per_token": -1.9050252437591553, "logits_per_char": -0.9525126218795776, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 679, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6092908382415771, "incorrect_loss_raw": 0.23187682032585144, "correct_loss_per_char": 0.8046454191207886, "incorrect_loss_per_char": 0.11593841016292572, "correct_loss_per_token": 1.6092908382415771, "incorrect_loss_per_token": 0.23187682032585144, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23187682032585144, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": true, "logits_per_token": -0.23187682032585144, "logits_per_char": -0.11593841016292572, "num_chars": 2}, {"sum_logits": -1.6092908382415771, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": false, "logits_per_token": -1.6092908382415771, "logits_per_char": -0.8046454191207886, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 680, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5831761360168457, "incorrect_loss_raw": 0.24117861688137054, "correct_loss_per_char": 0.7915880680084229, "incorrect_loss_per_char": 0.12058930844068527, "correct_loss_per_token": 1.5831761360168457, "incorrect_loss_per_token": 0.24117861688137054, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24117861688137054, "num_tokens": 1, "num_tokens_all": 991, "is_greedy": true, "logits_per_token": -0.24117861688137054, "logits_per_char": -0.12058930844068527, "num_chars": 2}, {"sum_logits": -1.5831761360168457, "num_tokens": 1, "num_tokens_all": 991, "is_greedy": false, "logits_per_token": -1.5831761360168457, "logits_per_char": -0.7915880680084229, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 681, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6536993980407715, "incorrect_loss_raw": 0.2230527549982071, "correct_loss_per_char": 0.8268496990203857, "incorrect_loss_per_char": 0.11152637749910355, "correct_loss_per_token": 1.6536993980407715, "incorrect_loss_per_token": 0.2230527549982071, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2230527549982071, "num_tokens": 1, "num_tokens_all": 1011, "is_greedy": true, "logits_per_token": -0.2230527549982071, "logits_per_char": -0.11152637749910355, "num_chars": 2}, {"sum_logits": -1.6536993980407715, "num_tokens": 1, "num_tokens_all": 1011, "is_greedy": false, "logits_per_token": -1.6536993980407715, "logits_per_char": -0.8268496990203857, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 682, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22537419199943542, "incorrect_loss_raw": 1.6429972648620605, "correct_loss_per_char": 0.11268709599971771, "incorrect_loss_per_char": 0.8214986324310303, "correct_loss_per_token": 0.22537419199943542, "incorrect_loss_per_token": 1.6429972648620605, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22537419199943542, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": true, "logits_per_token": -0.22537419199943542, "logits_per_char": -0.11268709599971771, "num_chars": 2}, {"sum_logits": -1.6429972648620605, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": false, "logits_per_token": -1.6429972648620605, "logits_per_char": -0.8214986324310303, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 683, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.31501251459121704, "incorrect_loss_raw": 1.3469665050506592, "correct_loss_per_char": 0.15750625729560852, "incorrect_loss_per_char": 0.6734832525253296, "correct_loss_per_token": 0.31501251459121704, "incorrect_loss_per_token": 1.3469665050506592, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.31501251459121704, "num_tokens": 1, "num_tokens_all": 1036, "is_greedy": true, "logits_per_token": -0.31501251459121704, "logits_per_char": -0.15750625729560852, "num_chars": 2}, {"sum_logits": -1.3469665050506592, "num_tokens": 1, "num_tokens_all": 1036, "is_greedy": false, "logits_per_token": -1.3469665050506592, "logits_per_char": -0.6734832525253296, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 684, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22222894430160522, "incorrect_loss_raw": 1.6484389305114746, "correct_loss_per_char": 0.11111447215080261, "incorrect_loss_per_char": 0.8242194652557373, "correct_loss_per_token": 0.22222894430160522, "incorrect_loss_per_token": 1.6484389305114746, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22222894430160522, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": true, "logits_per_token": -0.22222894430160522, "logits_per_char": -0.11111447215080261, "num_chars": 2}, {"sum_logits": -1.6484389305114746, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -1.6484389305114746, "logits_per_char": -0.8242194652557373, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 685, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2624574899673462, "incorrect_loss_raw": 1.494397759437561, "correct_loss_per_char": 0.1312287449836731, "incorrect_loss_per_char": 0.7471988797187805, "correct_loss_per_token": 0.2624574899673462, "incorrect_loss_per_token": 1.494397759437561, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2624574899673462, "num_tokens": 1, "num_tokens_all": 1046, "is_greedy": true, "logits_per_token": -0.2624574899673462, "logits_per_char": -0.1312287449836731, "num_chars": 2}, {"sum_logits": -1.494397759437561, "num_tokens": 1, "num_tokens_all": 1046, "is_greedy": false, "logits_per_token": -1.494397759437561, "logits_per_char": -0.7471988797187805, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 686, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5094877481460571, "incorrect_loss_raw": 0.2683583199977875, "correct_loss_per_char": 0.7547438740730286, "incorrect_loss_per_char": 0.13417915999889374, "correct_loss_per_token": 1.5094877481460571, "incorrect_loss_per_token": 0.2683583199977875, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2683583199977875, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": true, "logits_per_token": -0.2683583199977875, "logits_per_char": -0.13417915999889374, "num_chars": 2}, {"sum_logits": -1.5094877481460571, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": false, "logits_per_token": -1.5094877481460571, "logits_per_char": -0.7547438740730286, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 687, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2713072597980499, "incorrect_loss_raw": 1.478865385055542, "correct_loss_per_char": 0.13565362989902496, "incorrect_loss_per_char": 0.739432692527771, "correct_loss_per_token": 0.2713072597980499, "incorrect_loss_per_token": 1.478865385055542, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2713072597980499, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": true, "logits_per_token": -0.2713072597980499, "logits_per_char": -0.13565362989902496, "num_chars": 2}, {"sum_logits": -1.478865385055542, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": false, "logits_per_token": -1.478865385055542, "logits_per_char": -0.739432692527771, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 688, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4776651859283447, "incorrect_loss_raw": 0.27695366740226746, "correct_loss_per_char": 0.7388325929641724, "incorrect_loss_per_char": 0.13847683370113373, "correct_loss_per_token": 1.4776651859283447, "incorrect_loss_per_token": 0.27695366740226746, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.27695366740226746, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": true, "logits_per_token": -0.27695366740226746, "logits_per_char": -0.13847683370113373, "num_chars": 2}, {"sum_logits": -1.4776651859283447, "num_tokens": 1, "num_tokens_all": 1067, "is_greedy": false, "logits_per_token": -1.4776651859283447, "logits_per_char": -0.7388325929641724, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 689, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1756923943758011, "incorrect_loss_raw": 1.8512508869171143, "correct_loss_per_char": 0.08784619718790054, "incorrect_loss_per_char": 0.9256254434585571, "correct_loss_per_token": 0.1756923943758011, "incorrect_loss_per_token": 1.8512508869171143, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1756923943758011, "num_tokens": 1, "num_tokens_all": 1142, "is_greedy": true, "logits_per_token": -0.1756923943758011, "logits_per_char": -0.08784619718790054, "num_chars": 2}, {"sum_logits": -1.8512508869171143, "num_tokens": 1, "num_tokens_all": 1142, "is_greedy": false, "logits_per_token": -1.8512508869171143, "logits_per_char": -0.9256254434585571, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 690, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2624737024307251, "incorrect_loss_raw": 1.5107358694076538, "correct_loss_per_char": 0.13123685121536255, "incorrect_loss_per_char": 0.7553679347038269, "correct_loss_per_token": 0.2624737024307251, "incorrect_loss_per_token": 1.5107358694076538, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2624737024307251, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": true, "logits_per_token": -0.2624737024307251, "logits_per_char": -0.13123685121536255, "num_chars": 2}, {"sum_logits": -1.5107358694076538, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": false, "logits_per_token": -1.5107358694076538, "logits_per_char": -0.7553679347038269, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 691, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3860923051834106, "incorrect_loss_raw": 0.3075488805770874, "correct_loss_per_char": 0.6930461525917053, "incorrect_loss_per_char": 0.1537744402885437, "correct_loss_per_token": 1.3860923051834106, "incorrect_loss_per_token": 0.3075488805770874, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3075488805770874, "num_tokens": 1, "num_tokens_all": 1242, "is_greedy": true, "logits_per_token": -0.3075488805770874, "logits_per_char": -0.1537744402885437, "num_chars": 2}, {"sum_logits": -1.3860923051834106, "num_tokens": 1, "num_tokens_all": 1242, "is_greedy": false, "logits_per_token": -1.3860923051834106, "logits_per_char": -0.6930461525917053, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 692, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20418336987495422, "incorrect_loss_raw": 1.749234676361084, "correct_loss_per_char": 0.10209168493747711, "incorrect_loss_per_char": 0.874617338180542, "correct_loss_per_token": 0.20418336987495422, "incorrect_loss_per_token": 1.749234676361084, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20418336987495422, "num_tokens": 1, "num_tokens_all": 991, "is_greedy": true, "logits_per_token": -0.20418336987495422, "logits_per_char": -0.10209168493747711, "num_chars": 2}, {"sum_logits": -1.749234676361084, "num_tokens": 1, "num_tokens_all": 991, "is_greedy": false, "logits_per_token": -1.749234676361084, "logits_per_char": -0.874617338180542, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 693, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2756173610687256, "incorrect_loss_raw": 1.4604713916778564, "correct_loss_per_char": 0.1378086805343628, "incorrect_loss_per_char": 0.7302356958389282, "correct_loss_per_token": 0.2756173610687256, "incorrect_loss_per_token": 1.4604713916778564, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2756173610687256, "num_tokens": 1, "num_tokens_all": 1015, "is_greedy": true, "logits_per_token": -0.2756173610687256, "logits_per_char": -0.1378086805343628, "num_chars": 2}, {"sum_logits": -1.4604713916778564, "num_tokens": 1, "num_tokens_all": 1015, "is_greedy": false, "logits_per_token": -1.4604713916778564, "logits_per_char": -0.7302356958389282, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 694, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.18770498037338257, "incorrect_loss_raw": 1.8106801509857178, "correct_loss_per_char": 0.09385249018669128, "incorrect_loss_per_char": 0.9053400754928589, "correct_loss_per_token": 0.18770498037338257, "incorrect_loss_per_token": 1.8106801509857178, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.18770498037338257, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": true, "logits_per_token": -0.18770498037338257, "logits_per_char": -0.09385249018669128, "num_chars": 2}, {"sum_logits": -1.8106801509857178, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": false, "logits_per_token": -1.8106801509857178, "logits_per_char": -0.9053400754928589, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 695, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.30764099955558777, "incorrect_loss_raw": 1.3566733598709106, "correct_loss_per_char": 0.15382049977779388, "incorrect_loss_per_char": 0.6783366799354553, "correct_loss_per_token": 0.30764099955558777, "incorrect_loss_per_token": 1.3566733598709106, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.30764099955558777, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": true, "logits_per_token": -0.30764099955558777, "logits_per_char": -0.15382049977779388, "num_chars": 2}, {"sum_logits": -1.3566733598709106, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": false, "logits_per_token": -1.3566733598709106, "logits_per_char": -0.6783366799354553, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 696, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.18978913128376007, "incorrect_loss_raw": 1.7947278022766113, "correct_loss_per_char": 0.09489456564188004, "incorrect_loss_per_char": 0.8973639011383057, "correct_loss_per_token": 0.18978913128376007, "incorrect_loss_per_token": 1.7947278022766113, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.18978913128376007, "num_tokens": 1, "num_tokens_all": 970, "is_greedy": true, "logits_per_token": -0.18978913128376007, "logits_per_char": -0.09489456564188004, "num_chars": 2}, {"sum_logits": -1.7947278022766113, "num_tokens": 1, "num_tokens_all": 970, "is_greedy": false, "logits_per_token": -1.7947278022766113, "logits_per_char": -0.8973639011383057, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 697, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2159658670425415, "incorrect_loss_raw": 1.6797983646392822, "correct_loss_per_char": 0.10798293352127075, "incorrect_loss_per_char": 0.8398991823196411, "correct_loss_per_token": 0.2159658670425415, "incorrect_loss_per_token": 1.6797983646392822, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2159658670425415, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": true, "logits_per_token": -0.2159658670425415, "logits_per_char": -0.10798293352127075, "num_chars": 2}, {"sum_logits": -1.6797983646392822, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": false, "logits_per_token": -1.6797983646392822, "logits_per_char": -0.8398991823196411, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 698, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.30232271552085876, "incorrect_loss_raw": 1.3832168579101562, "correct_loss_per_char": 0.15116135776042938, "incorrect_loss_per_char": 0.6916084289550781, "correct_loss_per_token": 0.30232271552085876, "incorrect_loss_per_token": 1.3832168579101562, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.30232271552085876, "num_tokens": 1, "num_tokens_all": 998, "is_greedy": true, "logits_per_token": -0.30232271552085876, "logits_per_char": -0.15116135776042938, "num_chars": 2}, {"sum_logits": -1.3832168579101562, "num_tokens": 1, "num_tokens_all": 998, "is_greedy": false, "logits_per_token": -1.3832168579101562, "logits_per_char": -0.6916084289550781, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 699, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3354041576385498, "incorrect_loss_raw": 0.3189792335033417, "correct_loss_per_char": 0.6677020788192749, "incorrect_loss_per_char": 0.15948961675167084, "correct_loss_per_token": 1.3354041576385498, "incorrect_loss_per_token": 0.3189792335033417, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3189792335033417, "num_tokens": 1, "num_tokens_all": 995, "is_greedy": true, "logits_per_token": -0.3189792335033417, "logits_per_char": -0.15948961675167084, "num_chars": 2}, {"sum_logits": -1.3354041576385498, "num_tokens": 1, "num_tokens_all": 995, "is_greedy": false, "logits_per_token": -1.3354041576385498, "logits_per_char": -0.6677020788192749, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 700, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5821623802185059, "incorrect_loss_raw": 0.24017713963985443, "correct_loss_per_char": 0.7910811901092529, "incorrect_loss_per_char": 0.12008856981992722, "correct_loss_per_token": 1.5821623802185059, "incorrect_loss_per_token": 0.24017713963985443, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24017713963985443, "num_tokens": 1, "num_tokens_all": 990, "is_greedy": true, "logits_per_token": -0.24017713963985443, "logits_per_char": -0.12008856981992722, "num_chars": 2}, {"sum_logits": -1.5821623802185059, "num_tokens": 1, "num_tokens_all": 990, "is_greedy": false, "logits_per_token": -1.5821623802185059, "logits_per_char": -0.7910811901092529, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 701, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1675375998020172, "incorrect_loss_raw": 1.9149380922317505, "correct_loss_per_char": 0.0837687999010086, "incorrect_loss_per_char": 0.9574690461158752, "correct_loss_per_token": 0.1675375998020172, "incorrect_loss_per_token": 1.9149380922317505, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1675375998020172, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": true, "logits_per_token": -0.1675375998020172, "logits_per_char": -0.0837687999010086, "num_chars": 2}, {"sum_logits": -1.9149380922317505, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": false, "logits_per_token": -1.9149380922317505, "logits_per_char": -0.9574690461158752, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 702, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24653907120227814, "incorrect_loss_raw": 1.5589942932128906, "correct_loss_per_char": 0.12326953560113907, "incorrect_loss_per_char": 0.7794971466064453, "correct_loss_per_token": 0.24653907120227814, "incorrect_loss_per_token": 1.5589942932128906, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24653907120227814, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": true, "logits_per_token": -0.24653907120227814, "logits_per_char": -0.12326953560113907, "num_chars": 2}, {"sum_logits": -1.5589942932128906, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": false, "logits_per_token": -1.5589942932128906, "logits_per_char": -0.7794971466064453, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 703, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6480822563171387, "incorrect_loss_raw": 0.22069896757602692, "correct_loss_per_char": 0.8240411281585693, "incorrect_loss_per_char": 0.11034948378801346, "correct_loss_per_token": 1.6480822563171387, "incorrect_loss_per_token": 0.22069896757602692, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22069896757602692, "num_tokens": 1, "num_tokens_all": 882, "is_greedy": true, "logits_per_token": -0.22069896757602692, "logits_per_char": -0.11034948378801346, "num_chars": 2}, {"sum_logits": -1.6480822563171387, "num_tokens": 1, "num_tokens_all": 882, "is_greedy": false, "logits_per_token": -1.6480822563171387, "logits_per_char": -0.8240411281585693, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 704, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19395455718040466, "incorrect_loss_raw": 1.786467432975769, "correct_loss_per_char": 0.09697727859020233, "incorrect_loss_per_char": 0.8932337164878845, "correct_loss_per_token": 0.19395455718040466, "incorrect_loss_per_token": 1.786467432975769, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19395455718040466, "num_tokens": 1, "num_tokens_all": 973, "is_greedy": true, "logits_per_token": -0.19395455718040466, "logits_per_char": -0.09697727859020233, "num_chars": 2}, {"sum_logits": -1.786467432975769, "num_tokens": 1, "num_tokens_all": 973, "is_greedy": false, "logits_per_token": -1.786467432975769, "logits_per_char": -0.8932337164878845, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 705, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22983981668949127, "incorrect_loss_raw": 1.6236279010772705, "correct_loss_per_char": 0.11491990834474564, "incorrect_loss_per_char": 0.8118139505386353, "correct_loss_per_token": 0.22983981668949127, "incorrect_loss_per_token": 1.6236279010772705, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22983981668949127, "num_tokens": 1, "num_tokens_all": 987, "is_greedy": true, "logits_per_token": -0.22983981668949127, "logits_per_char": -0.11491990834474564, "num_chars": 2}, {"sum_logits": -1.6236279010772705, "num_tokens": 1, "num_tokens_all": 987, "is_greedy": false, "logits_per_token": -1.6236279010772705, "logits_per_char": -0.8118139505386353, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 706, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.325608730316162, "incorrect_loss_raw": 0.3269798159599304, "correct_loss_per_char": 0.662804365158081, "incorrect_loss_per_char": 0.1634899079799652, "correct_loss_per_token": 1.325608730316162, "incorrect_loss_per_token": 0.3269798159599304, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3269798159599304, "num_tokens": 1, "num_tokens_all": 1037, "is_greedy": true, "logits_per_token": -0.3269798159599304, "logits_per_char": -0.1634899079799652, "num_chars": 2}, {"sum_logits": -1.325608730316162, "num_tokens": 1, "num_tokens_all": 1037, "is_greedy": false, "logits_per_token": -1.325608730316162, "logits_per_char": -0.662804365158081, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 707, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23175457119941711, "incorrect_loss_raw": 1.6344101428985596, "correct_loss_per_char": 0.11587728559970856, "incorrect_loss_per_char": 0.8172050714492798, "correct_loss_per_token": 0.23175457119941711, "incorrect_loss_per_token": 1.6344101428985596, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23175457119941711, "num_tokens": 1, "num_tokens_all": 977, "is_greedy": true, "logits_per_token": -0.23175457119941711, "logits_per_char": -0.11587728559970856, "num_chars": 2}, {"sum_logits": -1.6344101428985596, "num_tokens": 1, "num_tokens_all": 977, "is_greedy": false, "logits_per_token": -1.6344101428985596, "logits_per_char": -0.8172050714492798, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 708, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6992634534835815, "incorrect_loss_raw": 0.21576039493083954, "correct_loss_per_char": 0.8496317267417908, "incorrect_loss_per_char": 0.10788019746541977, "correct_loss_per_token": 1.6992634534835815, "incorrect_loss_per_token": 0.21576039493083954, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21576039493083954, "num_tokens": 1, "num_tokens_all": 1068, "is_greedy": true, "logits_per_token": -0.21576039493083954, "logits_per_char": -0.10788019746541977, "num_chars": 2}, {"sum_logits": -1.6992634534835815, "num_tokens": 1, "num_tokens_all": 1068, "is_greedy": false, "logits_per_token": -1.6992634534835815, "logits_per_char": -0.8496317267417908, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 709, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2768482267856598, "incorrect_loss_raw": 1.4666794538497925, "correct_loss_per_char": 0.1384241133928299, "incorrect_loss_per_char": 0.7333397269248962, "correct_loss_per_token": 0.2768482267856598, "incorrect_loss_per_token": 1.4666794538497925, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2768482267856598, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": true, "logits_per_token": -0.2768482267856598, "logits_per_char": -0.1384241133928299, "num_chars": 2}, {"sum_logits": -1.4666794538497925, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": false, "logits_per_token": -1.4666794538497925, "logits_per_char": -0.7333397269248962, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 710, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7391027212142944, "incorrect_loss_raw": 0.20559653639793396, "correct_loss_per_char": 0.8695513606071472, "incorrect_loss_per_char": 0.10279826819896698, "correct_loss_per_token": 1.7391027212142944, "incorrect_loss_per_token": 0.20559653639793396, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20559653639793396, "num_tokens": 1, "num_tokens_all": 976, "is_greedy": true, "logits_per_token": -0.20559653639793396, "logits_per_char": -0.10279826819896698, "num_chars": 2}, {"sum_logits": -1.7391027212142944, "num_tokens": 1, "num_tokens_all": 976, "is_greedy": false, "logits_per_token": -1.7391027212142944, "logits_per_char": -0.8695513606071472, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 711, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22390152513980865, "incorrect_loss_raw": 1.649705410003662, "correct_loss_per_char": 0.11195076256990433, "incorrect_loss_per_char": 0.824852705001831, "correct_loss_per_token": 0.22390152513980865, "incorrect_loss_per_token": 1.649705410003662, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22390152513980865, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": true, "logits_per_token": -0.22390152513980865, "logits_per_char": -0.11195076256990433, "num_chars": 2}, {"sum_logits": -1.649705410003662, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": false, "logits_per_token": -1.649705410003662, "logits_per_char": -0.824852705001831, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 712, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2710868716239929, "incorrect_loss_raw": 1.471276044845581, "correct_loss_per_char": 0.13554343581199646, "incorrect_loss_per_char": 0.7356380224227905, "correct_loss_per_token": 0.2710868716239929, "incorrect_loss_per_token": 1.471276044845581, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2710868716239929, "num_tokens": 1, "num_tokens_all": 968, "is_greedy": true, "logits_per_token": -0.2710868716239929, "logits_per_char": -0.13554343581199646, "num_chars": 2}, {"sum_logits": -1.471276044845581, "num_tokens": 1, "num_tokens_all": 968, "is_greedy": false, "logits_per_token": -1.471276044845581, "logits_per_char": -0.7356380224227905, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 713, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2027096152305603, "incorrect_loss_raw": 1.7473804950714111, "correct_loss_per_char": 0.10135480761528015, "incorrect_loss_per_char": 0.8736902475357056, "correct_loss_per_token": 0.2027096152305603, "incorrect_loss_per_token": 1.7473804950714111, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2027096152305603, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": true, "logits_per_token": -0.2027096152305603, "logits_per_char": -0.10135480761528015, "num_chars": 2}, {"sum_logits": -1.7473804950714111, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": false, "logits_per_token": -1.7473804950714111, "logits_per_char": -0.8736902475357056, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 714, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8128724098205566, "incorrect_loss_raw": 0.18607787787914276, "correct_loss_per_char": 0.9064362049102783, "incorrect_loss_per_char": 0.09303893893957138, "correct_loss_per_token": 1.8128724098205566, "incorrect_loss_per_token": 0.18607787787914276, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.18607787787914276, "num_tokens": 1, "num_tokens_all": 1006, "is_greedy": true, "logits_per_token": -0.18607787787914276, "logits_per_char": -0.09303893893957138, "num_chars": 2}, {"sum_logits": -1.8128724098205566, "num_tokens": 1, "num_tokens_all": 1006, "is_greedy": false, "logits_per_token": -1.8128724098205566, "logits_per_char": -0.9064362049102783, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 715, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19116219878196716, "incorrect_loss_raw": 1.8118810653686523, "correct_loss_per_char": 0.09558109939098358, "incorrect_loss_per_char": 0.9059405326843262, "correct_loss_per_token": 0.19116219878196716, "incorrect_loss_per_token": 1.8118810653686523, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19116219878196716, "num_tokens": 1, "num_tokens_all": 1016, "is_greedy": true, "logits_per_token": -0.19116219878196716, "logits_per_char": -0.09558109939098358, "num_chars": 2}, {"sum_logits": -1.8118810653686523, "num_tokens": 1, "num_tokens_all": 1016, "is_greedy": false, "logits_per_token": -1.8118810653686523, "logits_per_char": -0.9059405326843262, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 716, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.26123934984207153, "incorrect_loss_raw": 1.5023300647735596, "correct_loss_per_char": 0.13061967492103577, "incorrect_loss_per_char": 0.7511650323867798, "correct_loss_per_token": 0.26123934984207153, "incorrect_loss_per_token": 1.5023300647735596, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26123934984207153, "num_tokens": 1, "num_tokens_all": 1015, "is_greedy": true, "logits_per_token": -0.26123934984207153, "logits_per_char": -0.13061967492103577, "num_chars": 2}, {"sum_logits": -1.5023300647735596, "num_tokens": 1, "num_tokens_all": 1015, "is_greedy": false, "logits_per_token": -1.5023300647735596, "logits_per_char": -0.7511650323867798, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 717, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19324526190757751, "incorrect_loss_raw": 1.7711135149002075, "correct_loss_per_char": 0.09662263095378876, "incorrect_loss_per_char": 0.8855567574501038, "correct_loss_per_token": 0.19324526190757751, "incorrect_loss_per_token": 1.7711135149002075, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19324526190757751, "num_tokens": 1, "num_tokens_all": 894, "is_greedy": true, "logits_per_token": -0.19324526190757751, "logits_per_char": -0.09662263095378876, "num_chars": 2}, {"sum_logits": -1.7711135149002075, "num_tokens": 1, "num_tokens_all": 894, "is_greedy": false, "logits_per_token": -1.7711135149002075, "logits_per_char": -0.8855567574501038, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 718, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24772004783153534, "incorrect_loss_raw": 1.567916750907898, "correct_loss_per_char": 0.12386002391576767, "incorrect_loss_per_char": 0.783958375453949, "correct_loss_per_token": 0.24772004783153534, "incorrect_loss_per_token": 1.567916750907898, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24772004783153534, "num_tokens": 1, "num_tokens_all": 1027, "is_greedy": true, "logits_per_token": -0.24772004783153534, "logits_per_char": -0.12386002391576767, "num_chars": 2}, {"sum_logits": -1.567916750907898, "num_tokens": 1, "num_tokens_all": 1027, "is_greedy": false, "logits_per_token": -1.567916750907898, "logits_per_char": -0.783958375453949, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 719, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.684956431388855, "incorrect_loss_raw": 0.21349017322063446, "correct_loss_per_char": 0.8424782156944275, "incorrect_loss_per_char": 0.10674508661031723, "correct_loss_per_token": 1.684956431388855, "incorrect_loss_per_token": 0.21349017322063446, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21349017322063446, "num_tokens": 1, "num_tokens_all": 976, "is_greedy": true, "logits_per_token": -0.21349017322063446, "logits_per_char": -0.10674508661031723, "num_chars": 2}, {"sum_logits": -1.684956431388855, "num_tokens": 1, "num_tokens_all": 976, "is_greedy": false, "logits_per_token": -1.684956431388855, "logits_per_char": -0.8424782156944275, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 720, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.799378514289856, "incorrect_loss_raw": 0.19227690994739532, "correct_loss_per_char": 0.899689257144928, "incorrect_loss_per_char": 0.09613845497369766, "correct_loss_per_token": 1.799378514289856, "incorrect_loss_per_token": 0.19227690994739532, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19227690994739532, "num_tokens": 1, "num_tokens_all": 973, "is_greedy": true, "logits_per_token": -0.19227690994739532, "logits_per_char": -0.09613845497369766, "num_chars": 2}, {"sum_logits": -1.799378514289856, "num_tokens": 1, "num_tokens_all": 973, "is_greedy": false, "logits_per_token": -1.799378514289856, "logits_per_char": -0.899689257144928, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 721, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23515814542770386, "incorrect_loss_raw": 1.5965983867645264, "correct_loss_per_char": 0.11757907271385193, "incorrect_loss_per_char": 0.7982991933822632, "correct_loss_per_token": 0.23515814542770386, "incorrect_loss_per_token": 1.5965983867645264, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23515814542770386, "num_tokens": 1, "num_tokens_all": 1071, "is_greedy": true, "logits_per_token": -0.23515814542770386, "logits_per_char": -0.11757907271385193, "num_chars": 2}, {"sum_logits": -1.5965983867645264, "num_tokens": 1, "num_tokens_all": 1071, "is_greedy": false, "logits_per_token": -1.5965983867645264, "logits_per_char": -0.7982991933822632, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 722, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23648551106452942, "incorrect_loss_raw": 1.6084511280059814, "correct_loss_per_char": 0.11824275553226471, "incorrect_loss_per_char": 0.8042255640029907, "correct_loss_per_token": 0.23648551106452942, "incorrect_loss_per_token": 1.6084511280059814, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23648551106452942, "num_tokens": 1, "num_tokens_all": 971, "is_greedy": true, "logits_per_token": -0.23648551106452942, "logits_per_char": -0.11824275553226471, "num_chars": 2}, {"sum_logits": -1.6084511280059814, "num_tokens": 1, "num_tokens_all": 971, "is_greedy": false, "logits_per_token": -1.6084511280059814, "logits_per_char": -0.8042255640029907, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 723, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6089798212051392, "incorrect_loss_raw": 0.23142899572849274, "correct_loss_per_char": 0.8044899106025696, "incorrect_loss_per_char": 0.11571449786424637, "correct_loss_per_token": 1.6089798212051392, "incorrect_loss_per_token": 0.23142899572849274, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23142899572849274, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": true, "logits_per_token": -0.23142899572849274, "logits_per_char": -0.11571449786424637, "num_chars": 2}, {"sum_logits": -1.6089798212051392, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -1.6089798212051392, "logits_per_char": -0.8044899106025696, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 724, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.270850419998169, "incorrect_loss_raw": 0.347494900226593, "correct_loss_per_char": 0.6354252099990845, "incorrect_loss_per_char": 0.1737474501132965, "correct_loss_per_token": 1.270850419998169, "incorrect_loss_per_token": 0.347494900226593, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.347494900226593, "num_tokens": 1, "num_tokens_all": 1075, "is_greedy": true, "logits_per_token": -0.347494900226593, "logits_per_char": -0.1737474501132965, "num_chars": 2}, {"sum_logits": -1.270850419998169, "num_tokens": 1, "num_tokens_all": 1075, "is_greedy": false, "logits_per_token": -1.270850419998169, "logits_per_char": -0.6354252099990845, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 725, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23499661684036255, "incorrect_loss_raw": 1.6065235137939453, "correct_loss_per_char": 0.11749830842018127, "incorrect_loss_per_char": 0.8032617568969727, "correct_loss_per_token": 0.23499661684036255, "incorrect_loss_per_token": 1.6065235137939453, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23499661684036255, "num_tokens": 1, "num_tokens_all": 971, "is_greedy": true, "logits_per_token": -0.23499661684036255, "logits_per_char": -0.11749830842018127, "num_chars": 2}, {"sum_logits": -1.6065235137939453, "num_tokens": 1, "num_tokens_all": 971, "is_greedy": false, "logits_per_token": -1.6065235137939453, "logits_per_char": -0.8032617568969727, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 726, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21055462956428528, "incorrect_loss_raw": 1.709714412689209, "correct_loss_per_char": 0.10527731478214264, "incorrect_loss_per_char": 0.8548572063446045, "correct_loss_per_token": 0.21055462956428528, "incorrect_loss_per_token": 1.709714412689209, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21055462956428528, "num_tokens": 1, "num_tokens_all": 998, "is_greedy": true, "logits_per_token": -0.21055462956428528, "logits_per_char": -0.10527731478214264, "num_chars": 2}, {"sum_logits": -1.709714412689209, "num_tokens": 1, "num_tokens_all": 998, "is_greedy": false, "logits_per_token": -1.709714412689209, "logits_per_char": -0.8548572063446045, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 727, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7064582109451294, "incorrect_loss_raw": 0.21099743247032166, "correct_loss_per_char": 0.8532291054725647, "incorrect_loss_per_char": 0.10549871623516083, "correct_loss_per_token": 1.7064582109451294, "incorrect_loss_per_token": 0.21099743247032166, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21099743247032166, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": true, "logits_per_token": -0.21099743247032166, "logits_per_char": -0.10549871623516083, "num_chars": 2}, {"sum_logits": -1.7064582109451294, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": false, "logits_per_token": -1.7064582109451294, "logits_per_char": -0.8532291054725647, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 728, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24891673028469086, "incorrect_loss_raw": 1.5418620109558105, "correct_loss_per_char": 0.12445836514234543, "incorrect_loss_per_char": 0.7709310054779053, "correct_loss_per_token": 0.24891673028469086, "incorrect_loss_per_token": 1.5418620109558105, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24891673028469086, "num_tokens": 1, "num_tokens_all": 1047, "is_greedy": true, "logits_per_token": -0.24891673028469086, "logits_per_char": -0.12445836514234543, "num_chars": 2}, {"sum_logits": -1.5418620109558105, "num_tokens": 1, "num_tokens_all": 1047, "is_greedy": false, "logits_per_token": -1.5418620109558105, "logits_per_char": -0.7709310054779053, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 729, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.26475271582603455, "incorrect_loss_raw": 1.4798871278762817, "correct_loss_per_char": 0.13237635791301727, "incorrect_loss_per_char": 0.7399435639381409, "correct_loss_per_token": 0.26475271582603455, "incorrect_loss_per_token": 1.4798871278762817, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26475271582603455, "num_tokens": 1, "num_tokens_all": 912, "is_greedy": true, "logits_per_token": -0.26475271582603455, "logits_per_char": -0.13237635791301727, "num_chars": 2}, {"sum_logits": -1.4798871278762817, "num_tokens": 1, "num_tokens_all": 912, "is_greedy": false, "logits_per_token": -1.4798871278762817, "logits_per_char": -0.7399435639381409, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 730, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.536750078201294, "incorrect_loss_raw": 0.25356677174568176, "correct_loss_per_char": 0.768375039100647, "incorrect_loss_per_char": 0.12678338587284088, "correct_loss_per_token": 1.536750078201294, "incorrect_loss_per_token": 0.25356677174568176, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25356677174568176, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": true, "logits_per_token": -0.25356677174568176, "logits_per_char": -0.12678338587284088, "num_chars": 2}, {"sum_logits": -1.536750078201294, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": false, "logits_per_token": -1.536750078201294, "logits_per_char": -0.768375039100647, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 731, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2076011449098587, "incorrect_loss_raw": 1.7169538736343384, "correct_loss_per_char": 0.10380057245492935, "incorrect_loss_per_char": 0.8584769368171692, "correct_loss_per_token": 0.2076011449098587, "incorrect_loss_per_token": 1.7169538736343384, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2076011449098587, "num_tokens": 1, "num_tokens_all": 983, "is_greedy": true, "logits_per_token": -0.2076011449098587, "logits_per_char": -0.10380057245492935, "num_chars": 2}, {"sum_logits": -1.7169538736343384, "num_tokens": 1, "num_tokens_all": 983, "is_greedy": false, "logits_per_token": -1.7169538736343384, "logits_per_char": -0.8584769368171692, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 732, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2939401865005493, "incorrect_loss_raw": 1.3997961282730103, "correct_loss_per_char": 0.14697009325027466, "incorrect_loss_per_char": 0.6998980641365051, "correct_loss_per_token": 0.2939401865005493, "incorrect_loss_per_token": 1.3997961282730103, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2939401865005493, "num_tokens": 1, "num_tokens_all": 904, "is_greedy": true, "logits_per_token": -0.2939401865005493, "logits_per_char": -0.14697009325027466, "num_chars": 2}, {"sum_logits": -1.3997961282730103, "num_tokens": 1, "num_tokens_all": 904, "is_greedy": false, "logits_per_token": -1.3997961282730103, "logits_per_char": -0.6998980641365051, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 733, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2752859592437744, "incorrect_loss_raw": 0.3529280424118042, "correct_loss_per_char": 0.6376429796218872, "incorrect_loss_per_char": 0.1764640212059021, "correct_loss_per_token": 1.2752859592437744, "incorrect_loss_per_token": 0.3529280424118042, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3529280424118042, "num_tokens": 1, "num_tokens_all": 1404, "is_greedy": true, "logits_per_token": -0.3529280424118042, "logits_per_char": -0.1764640212059021, "num_chars": 2}, {"sum_logits": -1.2752859592437744, "num_tokens": 1, "num_tokens_all": 1404, "is_greedy": false, "logits_per_token": -1.2752859592437744, "logits_per_char": -0.6376429796218872, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 734, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2973887026309967, "incorrect_loss_raw": 1.4068101644515991, "correct_loss_per_char": 0.14869435131549835, "incorrect_loss_per_char": 0.7034050822257996, "correct_loss_per_token": 0.2973887026309967, "incorrect_loss_per_token": 1.4068101644515991, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2973887026309967, "num_tokens": 1, "num_tokens_all": 1013, "is_greedy": true, "logits_per_token": -0.2973887026309967, "logits_per_char": -0.14869435131549835, "num_chars": 2}, {"sum_logits": -1.4068101644515991, "num_tokens": 1, "num_tokens_all": 1013, "is_greedy": false, "logits_per_token": -1.4068101644515991, "logits_per_char": -0.7034050822257996, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 735, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2226579189300537, "incorrect_loss_raw": 1.6486599445343018, "correct_loss_per_char": 0.11132895946502686, "incorrect_loss_per_char": 0.8243299722671509, "correct_loss_per_token": 0.2226579189300537, "incorrect_loss_per_token": 1.6486599445343018, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2226579189300537, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": true, "logits_per_token": -0.2226579189300537, "logits_per_char": -0.11132895946502686, "num_chars": 2}, {"sum_logits": -1.6486599445343018, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": false, "logits_per_token": -1.6486599445343018, "logits_per_char": -0.8243299722671509, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 736, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2695794701576233, "incorrect_loss_raw": 1.4965455532073975, "correct_loss_per_char": 0.13478973507881165, "incorrect_loss_per_char": 0.7482727766036987, "correct_loss_per_token": 0.2695794701576233, "incorrect_loss_per_token": 1.4965455532073975, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2695794701576233, "num_tokens": 1, "num_tokens_all": 1085, "is_greedy": true, "logits_per_token": -0.2695794701576233, "logits_per_char": -0.13478973507881165, "num_chars": 2}, {"sum_logits": -1.4965455532073975, "num_tokens": 1, "num_tokens_all": 1085, "is_greedy": false, "logits_per_token": -1.4965455532073975, "logits_per_char": -0.7482727766036987, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 737, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5478119850158691, "incorrect_loss_raw": 0.2540404200553894, "correct_loss_per_char": 0.7739059925079346, "incorrect_loss_per_char": 0.1270202100276947, "correct_loss_per_token": 1.5478119850158691, "incorrect_loss_per_token": 0.2540404200553894, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2540404200553894, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": true, "logits_per_token": -0.2540404200553894, "logits_per_char": -0.1270202100276947, "num_chars": 2}, {"sum_logits": -1.5478119850158691, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": false, "logits_per_token": -1.5478119850158691, "logits_per_char": -0.7739059925079346, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 738, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1681990623474121, "incorrect_loss_raw": 1.9070942401885986, "correct_loss_per_char": 0.08409953117370605, "incorrect_loss_per_char": 0.9535471200942993, "correct_loss_per_token": 0.1681990623474121, "incorrect_loss_per_token": 1.9070942401885986, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1681990623474121, "num_tokens": 1, "num_tokens_all": 972, "is_greedy": true, "logits_per_token": -0.1681990623474121, "logits_per_char": -0.08409953117370605, "num_chars": 2}, {"sum_logits": -1.9070942401885986, "num_tokens": 1, "num_tokens_all": 972, "is_greedy": false, "logits_per_token": -1.9070942401885986, "logits_per_char": -0.9535471200942993, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 739, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21863846480846405, "incorrect_loss_raw": 1.6609270572662354, "correct_loss_per_char": 0.10931923240423203, "incorrect_loss_per_char": 0.8304635286331177, "correct_loss_per_token": 0.21863846480846405, "incorrect_loss_per_token": 1.6609270572662354, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21863846480846405, "num_tokens": 1, "num_tokens_all": 994, "is_greedy": true, "logits_per_token": -0.21863846480846405, "logits_per_char": -0.10931923240423203, "num_chars": 2}, {"sum_logits": -1.6609270572662354, "num_tokens": 1, "num_tokens_all": 994, "is_greedy": false, "logits_per_token": -1.6609270572662354, "logits_per_char": -0.8304635286331177, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 740, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.17555809020996094, "incorrect_loss_raw": 1.853780746459961, "correct_loss_per_char": 0.08777904510498047, "incorrect_loss_per_char": 0.9268903732299805, "correct_loss_per_token": 0.17555809020996094, "incorrect_loss_per_token": 1.853780746459961, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.17555809020996094, "num_tokens": 1, "num_tokens_all": 1005, "is_greedy": true, "logits_per_token": -0.17555809020996094, "logits_per_char": -0.08777904510498047, "num_chars": 2}, {"sum_logits": -1.853780746459961, "num_tokens": 1, "num_tokens_all": 1005, "is_greedy": false, "logits_per_token": -1.853780746459961, "logits_per_char": -0.9268903732299805, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 741, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23051273822784424, "incorrect_loss_raw": 1.6400967836380005, "correct_loss_per_char": 0.11525636911392212, "incorrect_loss_per_char": 0.8200483918190002, "correct_loss_per_token": 0.23051273822784424, "incorrect_loss_per_token": 1.6400967836380005, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23051273822784424, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": true, "logits_per_token": -0.23051273822784424, "logits_per_char": -0.11525636911392212, "num_chars": 2}, {"sum_logits": -1.6400967836380005, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": false, "logits_per_token": -1.6400967836380005, "logits_per_char": -0.8200483918190002, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 742, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5071789026260376, "incorrect_loss_raw": 0.25832560658454895, "correct_loss_per_char": 0.7535894513130188, "incorrect_loss_per_char": 0.12916280329227448, "correct_loss_per_token": 1.5071789026260376, "incorrect_loss_per_token": 0.25832560658454895, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25832560658454895, "num_tokens": 1, "num_tokens_all": 904, "is_greedy": true, "logits_per_token": -0.25832560658454895, "logits_per_char": -0.12916280329227448, "num_chars": 2}, {"sum_logits": -1.5071789026260376, "num_tokens": 1, "num_tokens_all": 904, "is_greedy": false, "logits_per_token": -1.5071789026260376, "logits_per_char": -0.7535894513130188, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 743, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19940143823623657, "incorrect_loss_raw": 1.764789342880249, "correct_loss_per_char": 0.09970071911811829, "incorrect_loss_per_char": 0.8823946714401245, "correct_loss_per_token": 0.19940143823623657, "incorrect_loss_per_token": 1.764789342880249, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19940143823623657, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": true, "logits_per_token": -0.19940143823623657, "logits_per_char": -0.09970071911811829, "num_chars": 2}, {"sum_logits": -1.764789342880249, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": false, "logits_per_token": -1.764789342880249, "logits_per_char": -0.8823946714401245, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 744, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24613690376281738, "incorrect_loss_raw": 1.5687333345413208, "correct_loss_per_char": 0.12306845188140869, "incorrect_loss_per_char": 0.7843666672706604, "correct_loss_per_token": 0.24613690376281738, "incorrect_loss_per_token": 1.5687333345413208, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24613690376281738, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": true, "logits_per_token": -0.24613690376281738, "logits_per_char": -0.12306845188140869, "num_chars": 2}, {"sum_logits": -1.5687333345413208, "num_tokens": 1, "num_tokens_all": 1110, "is_greedy": false, "logits_per_token": -1.5687333345413208, "logits_per_char": -0.7843666672706604, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 745, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19506694376468658, "incorrect_loss_raw": 1.7663743495941162, "correct_loss_per_char": 0.09753347188234329, "incorrect_loss_per_char": 0.8831871747970581, "correct_loss_per_token": 0.19506694376468658, "incorrect_loss_per_token": 1.7663743495941162, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19506694376468658, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": true, "logits_per_token": -0.19506694376468658, "logits_per_char": -0.09753347188234329, "num_chars": 2}, {"sum_logits": -1.7663743495941162, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": false, "logits_per_token": -1.7663743495941162, "logits_per_char": -0.8831871747970581, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 746, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1819741427898407, "incorrect_loss_raw": 1.8443937301635742, "correct_loss_per_char": 0.09098707139492035, "incorrect_loss_per_char": 0.9221968650817871, "correct_loss_per_token": 0.1819741427898407, "incorrect_loss_per_token": 1.8443937301635742, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1819741427898407, "num_tokens": 1, "num_tokens_all": 972, "is_greedy": true, "logits_per_token": -0.1819741427898407, "logits_per_char": -0.09098707139492035, "num_chars": 2}, {"sum_logits": -1.8443937301635742, "num_tokens": 1, "num_tokens_all": 972, "is_greedy": false, "logits_per_token": -1.8443937301635742, "logits_per_char": -0.9221968650817871, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 747, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20544102787971497, "incorrect_loss_raw": 1.722870945930481, "correct_loss_per_char": 0.10272051393985748, "incorrect_loss_per_char": 0.8614354729652405, "correct_loss_per_token": 0.20544102787971497, "incorrect_loss_per_token": 1.722870945930481, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20544102787971497, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": true, "logits_per_token": -0.20544102787971497, "logits_per_char": -0.10272051393985748, "num_chars": 2}, {"sum_logits": -1.722870945930481, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -1.722870945930481, "logits_per_char": -0.8614354729652405, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 748, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7992558479309082, "incorrect_loss_raw": 0.19194342195987701, "correct_loss_per_char": 0.8996279239654541, "incorrect_loss_per_char": 0.09597171097993851, "correct_loss_per_token": 1.7992558479309082, "incorrect_loss_per_token": 0.19194342195987701, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19194342195987701, "num_tokens": 1, "num_tokens_all": 981, "is_greedy": true, "logits_per_token": -0.19194342195987701, "logits_per_char": -0.09597171097993851, "num_chars": 2}, {"sum_logits": -1.7992558479309082, "num_tokens": 1, "num_tokens_all": 981, "is_greedy": false, "logits_per_token": -1.7992558479309082, "logits_per_char": -0.8996279239654541, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 749, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2241281121969223, "incorrect_loss_raw": 1.6637920141220093, "correct_loss_per_char": 0.11206405609846115, "incorrect_loss_per_char": 0.8318960070610046, "correct_loss_per_token": 0.2241281121969223, "incorrect_loss_per_token": 1.6637920141220093, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2241281121969223, "num_tokens": 1, "num_tokens_all": 1219, "is_greedy": true, "logits_per_token": -0.2241281121969223, "logits_per_char": -0.11206405609846115, "num_chars": 2}, {"sum_logits": -1.6637920141220093, "num_tokens": 1, "num_tokens_all": 1219, "is_greedy": false, "logits_per_token": -1.6637920141220093, "logits_per_char": -0.8318960070610046, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 750, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2063758671283722, "incorrect_loss_raw": 1.7216026782989502, "correct_loss_per_char": 0.1031879335641861, "incorrect_loss_per_char": 0.8608013391494751, "correct_loss_per_token": 0.2063758671283722, "incorrect_loss_per_token": 1.7216026782989502, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2063758671283722, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": true, "logits_per_token": -0.2063758671283722, "logits_per_char": -0.1031879335641861, "num_chars": 2}, {"sum_logits": -1.7216026782989502, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -1.7216026782989502, "logits_per_char": -0.8608013391494751, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 751, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5278021097183228, "incorrect_loss_raw": 0.2565819323062897, "correct_loss_per_char": 0.7639010548591614, "incorrect_loss_per_char": 0.12829096615314484, "correct_loss_per_token": 1.5278021097183228, "incorrect_loss_per_token": 0.2565819323062897, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2565819323062897, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": true, "logits_per_token": -0.2565819323062897, "logits_per_char": -0.12829096615314484, "num_chars": 2}, {"sum_logits": -1.5278021097183228, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": false, "logits_per_token": -1.5278021097183228, "logits_per_char": -0.7639010548591614, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 752, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1982434093952179, "incorrect_loss_raw": 1.7627906799316406, "correct_loss_per_char": 0.09912170469760895, "incorrect_loss_per_char": 0.8813953399658203, "correct_loss_per_token": 0.1982434093952179, "incorrect_loss_per_token": 1.7627906799316406, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1982434093952179, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": true, "logits_per_token": -0.1982434093952179, "logits_per_char": -0.09912170469760895, "num_chars": 2}, {"sum_logits": -1.7627906799316406, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": false, "logits_per_token": -1.7627906799316406, "logits_per_char": -0.8813953399658203, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 753, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.214105486869812, "incorrect_loss_raw": 1.685916781425476, "correct_loss_per_char": 0.107052743434906, "incorrect_loss_per_char": 0.842958390712738, "correct_loss_per_token": 0.214105486869812, "incorrect_loss_per_token": 1.685916781425476, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.214105486869812, "num_tokens": 1, "num_tokens_all": 994, "is_greedy": true, "logits_per_token": -0.214105486869812, "logits_per_char": -0.107052743434906, "num_chars": 2}, {"sum_logits": -1.685916781425476, "num_tokens": 1, "num_tokens_all": 994, "is_greedy": false, "logits_per_token": -1.685916781425476, "logits_per_char": -0.842958390712738, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 754, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23199252784252167, "incorrect_loss_raw": 1.6286187171936035, "correct_loss_per_char": 0.11599626392126083, "incorrect_loss_per_char": 0.8143093585968018, "correct_loss_per_token": 0.23199252784252167, "incorrect_loss_per_token": 1.6286187171936035, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23199252784252167, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": true, "logits_per_token": -0.23199252784252167, "logits_per_char": -0.11599626392126083, "num_chars": 2}, {"sum_logits": -1.6286187171936035, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": false, "logits_per_token": -1.6286187171936035, "logits_per_char": -0.8143093585968018, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 755, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20222555100917816, "incorrect_loss_raw": 1.731646180152893, "correct_loss_per_char": 0.10111277550458908, "incorrect_loss_per_char": 0.8658230900764465, "correct_loss_per_token": 0.20222555100917816, "incorrect_loss_per_token": 1.731646180152893, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20222555100917816, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": true, "logits_per_token": -0.20222555100917816, "logits_per_char": -0.10111277550458908, "num_chars": 2}, {"sum_logits": -1.731646180152893, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": false, "logits_per_token": -1.731646180152893, "logits_per_char": -0.8658230900764465, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 756, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.568485140800476, "incorrect_loss_raw": 0.24818481504917145, "correct_loss_per_char": 0.784242570400238, "incorrect_loss_per_char": 0.12409240752458572, "correct_loss_per_token": 1.568485140800476, "incorrect_loss_per_token": 0.24818481504917145, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24818481504917145, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": true, "logits_per_token": -0.24818481504917145, "logits_per_char": -0.12409240752458572, "num_chars": 2}, {"sum_logits": -1.568485140800476, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": false, "logits_per_token": -1.568485140800476, "logits_per_char": -0.784242570400238, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 757, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.969056487083435, "incorrect_loss_raw": 0.1613999903202057, "correct_loss_per_char": 0.9845282435417175, "incorrect_loss_per_char": 0.08069999516010284, "correct_loss_per_token": 1.969056487083435, "incorrect_loss_per_token": 0.1613999903202057, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1613999903202057, "num_tokens": 1, "num_tokens_all": 948, "is_greedy": true, "logits_per_token": -0.1613999903202057, "logits_per_char": -0.08069999516010284, "num_chars": 2}, {"sum_logits": -1.969056487083435, "num_tokens": 1, "num_tokens_all": 948, "is_greedy": false, "logits_per_token": -1.969056487083435, "logits_per_char": -0.9845282435417175, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 758, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.26060616970062256, "incorrect_loss_raw": 1.5086790323257446, "correct_loss_per_char": 0.13030308485031128, "incorrect_loss_per_char": 0.7543395161628723, "correct_loss_per_token": 0.26060616970062256, "incorrect_loss_per_token": 1.5086790323257446, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26060616970062256, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": true, "logits_per_token": -0.26060616970062256, "logits_per_char": -0.13030308485031128, "num_chars": 2}, {"sum_logits": -1.5086790323257446, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": false, "logits_per_token": -1.5086790323257446, "logits_per_char": -0.7543395161628723, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 759, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4828987121582031, "incorrect_loss_raw": 0.2648359537124634, "correct_loss_per_char": 0.7414493560791016, "incorrect_loss_per_char": 0.1324179768562317, "correct_loss_per_token": 1.4828987121582031, "incorrect_loss_per_token": 0.2648359537124634, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2648359537124634, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": true, "logits_per_token": -0.2648359537124634, "logits_per_char": -0.1324179768562317, "num_chars": 2}, {"sum_logits": -1.4828987121582031, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -1.4828987121582031, "logits_per_char": -0.7414493560791016, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 760, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5687980651855469, "incorrect_loss_raw": 0.24515530467033386, "correct_loss_per_char": 0.7843990325927734, "incorrect_loss_per_char": 0.12257765233516693, "correct_loss_per_token": 1.5687980651855469, "incorrect_loss_per_token": 0.24515530467033386, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24515530467033386, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": true, "logits_per_token": -0.24515530467033386, "logits_per_char": -0.12257765233516693, "num_chars": 2}, {"sum_logits": -1.5687980651855469, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": false, "logits_per_token": -1.5687980651855469, "logits_per_char": -0.7843990325927734, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 761, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5087149143218994, "incorrect_loss_raw": 0.26632001996040344, "correct_loss_per_char": 0.7543574571609497, "incorrect_loss_per_char": 0.13316000998020172, "correct_loss_per_token": 1.5087149143218994, "incorrect_loss_per_token": 0.26632001996040344, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26632001996040344, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": true, "logits_per_token": -0.26632001996040344, "logits_per_char": -0.13316000998020172, "num_chars": 2}, {"sum_logits": -1.5087149143218994, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": false, "logits_per_token": -1.5087149143218994, "logits_per_char": -0.7543574571609497, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 762, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6167196035385132, "incorrect_loss_raw": 0.23149187862873077, "correct_loss_per_char": 0.8083598017692566, "incorrect_loss_per_char": 0.11574593931436539, "correct_loss_per_token": 1.6167196035385132, "incorrect_loss_per_token": 0.23149187862873077, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23149187862873077, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": true, "logits_per_token": -0.23149187862873077, "logits_per_char": -0.11574593931436539, "num_chars": 2}, {"sum_logits": -1.6167196035385132, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": false, "logits_per_token": -1.6167196035385132, "logits_per_char": -0.8083598017692566, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 763, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5815398693084717, "incorrect_loss_raw": 0.24551180005073547, "correct_loss_per_char": 0.7907699346542358, "incorrect_loss_per_char": 0.12275590002536774, "correct_loss_per_token": 1.5815398693084717, "incorrect_loss_per_token": 0.24551180005073547, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24551180005073547, "num_tokens": 1, "num_tokens_all": 997, "is_greedy": true, "logits_per_token": -0.24551180005073547, "logits_per_char": -0.12275590002536774, "num_chars": 2}, {"sum_logits": -1.5815398693084717, "num_tokens": 1, "num_tokens_all": 997, "is_greedy": false, "logits_per_token": -1.5815398693084717, "logits_per_char": -0.7907699346542358, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 764, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6394535303115845, "incorrect_loss_raw": 0.22849999368190765, "correct_loss_per_char": 0.8197267651557922, "incorrect_loss_per_char": 0.11424999684095383, "correct_loss_per_token": 1.6394535303115845, "incorrect_loss_per_token": 0.22849999368190765, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22849999368190765, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": true, "logits_per_token": -0.22849999368190765, "logits_per_char": -0.11424999684095383, "num_chars": 2}, {"sum_logits": -1.6394535303115845, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": false, "logits_per_token": -1.6394535303115845, "logits_per_char": -0.8197267651557922, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 765, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2079959660768509, "incorrect_loss_raw": 1.7418080568313599, "correct_loss_per_char": 0.10399798303842545, "incorrect_loss_per_char": 0.8709040284156799, "correct_loss_per_token": 0.2079959660768509, "incorrect_loss_per_token": 1.7418080568313599, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2079959660768509, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": true, "logits_per_token": -0.2079959660768509, "logits_per_char": -0.10399798303842545, "num_chars": 2}, {"sum_logits": -1.7418080568313599, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": false, "logits_per_token": -1.7418080568313599, "logits_per_char": -0.8709040284156799, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 766, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4944443702697754, "incorrect_loss_raw": 0.2611846327781677, "correct_loss_per_char": 0.7472221851348877, "incorrect_loss_per_char": 0.13059231638908386, "correct_loss_per_token": 1.4944443702697754, "incorrect_loss_per_token": 0.2611846327781677, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2611846327781677, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": true, "logits_per_token": -0.2611846327781677, "logits_per_char": -0.13059231638908386, "num_chars": 2}, {"sum_logits": -1.4944443702697754, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": false, "logits_per_token": -1.4944443702697754, "logits_per_char": -0.7472221851348877, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 767, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.561629056930542, "incorrect_loss_raw": 0.24508832395076752, "correct_loss_per_char": 0.780814528465271, "incorrect_loss_per_char": 0.12254416197538376, "correct_loss_per_token": 1.561629056930542, "incorrect_loss_per_token": 0.24508832395076752, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24508832395076752, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": true, "logits_per_token": -0.24508832395076752, "logits_per_char": -0.12254416197538376, "num_chars": 2}, {"sum_logits": -1.561629056930542, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": false, "logits_per_token": -1.561629056930542, "logits_per_char": -0.780814528465271, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 768, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21767868101596832, "incorrect_loss_raw": 1.664171814918518, "correct_loss_per_char": 0.10883934050798416, "incorrect_loss_per_char": 0.832085907459259, "correct_loss_per_token": 0.21767868101596832, "incorrect_loss_per_token": 1.664171814918518, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21767868101596832, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": true, "logits_per_token": -0.21767868101596832, "logits_per_char": -0.10883934050798416, "num_chars": 2}, {"sum_logits": -1.664171814918518, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": false, "logits_per_token": -1.664171814918518, "logits_per_char": -0.832085907459259, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 769, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7379579544067383, "incorrect_loss_raw": 0.20503218472003937, "correct_loss_per_char": 0.8689789772033691, "incorrect_loss_per_char": 0.10251609236001968, "correct_loss_per_token": 1.7379579544067383, "incorrect_loss_per_token": 0.20503218472003937, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20503218472003937, "num_tokens": 1, "num_tokens_all": 1054, "is_greedy": true, "logits_per_token": -0.20503218472003937, "logits_per_char": -0.10251609236001968, "num_chars": 2}, {"sum_logits": -1.7379579544067383, "num_tokens": 1, "num_tokens_all": 1054, "is_greedy": false, "logits_per_token": -1.7379579544067383, "logits_per_char": -0.8689789772033691, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 770, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5643640756607056, "incorrect_loss_raw": 0.24846065044403076, "correct_loss_per_char": 0.7821820378303528, "incorrect_loss_per_char": 0.12423032522201538, "correct_loss_per_token": 1.5643640756607056, "incorrect_loss_per_token": 0.24846065044403076, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24846065044403076, "num_tokens": 1, "num_tokens_all": 1025, "is_greedy": true, "logits_per_token": -0.24846065044403076, "logits_per_char": -0.12423032522201538, "num_chars": 2}, {"sum_logits": -1.5643640756607056, "num_tokens": 1, "num_tokens_all": 1025, "is_greedy": false, "logits_per_token": -1.5643640756607056, "logits_per_char": -0.7821820378303528, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 771, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2986766993999481, "incorrect_loss_raw": 1.388915777206421, "correct_loss_per_char": 0.14933834969997406, "incorrect_loss_per_char": 0.6944578886032104, "correct_loss_per_token": 0.2986766993999481, "incorrect_loss_per_token": 1.388915777206421, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2986766993999481, "num_tokens": 1, "num_tokens_all": 1032, "is_greedy": true, "logits_per_token": -0.2986766993999481, "logits_per_char": -0.14933834969997406, "num_chars": 2}, {"sum_logits": -1.388915777206421, "num_tokens": 1, "num_tokens_all": 1032, "is_greedy": false, "logits_per_token": -1.388915777206421, "logits_per_char": -0.6944578886032104, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 772, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.471606731414795, "incorrect_loss_raw": 0.26845410466194153, "correct_loss_per_char": 0.7358033657073975, "incorrect_loss_per_char": 0.13422705233097076, "correct_loss_per_token": 1.471606731414795, "incorrect_loss_per_token": 0.26845410466194153, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26845410466194153, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": true, "logits_per_token": -0.26845410466194153, "logits_per_char": -0.13422705233097076, "num_chars": 2}, {"sum_logits": -1.471606731414795, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": false, "logits_per_token": -1.471606731414795, "logits_per_char": -0.7358033657073975, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 773, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5628262758255005, "incorrect_loss_raw": 0.2435985654592514, "correct_loss_per_char": 0.7814131379127502, "incorrect_loss_per_char": 0.1217992827296257, "correct_loss_per_token": 1.5628262758255005, "incorrect_loss_per_token": 0.2435985654592514, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2435985654592514, "num_tokens": 1, "num_tokens_all": 1117, "is_greedy": true, "logits_per_token": -0.2435985654592514, "logits_per_char": -0.1217992827296257, "num_chars": 2}, {"sum_logits": -1.5628262758255005, "num_tokens": 1, "num_tokens_all": 1117, "is_greedy": false, "logits_per_token": -1.5628262758255005, "logits_per_char": -0.7814131379127502, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 774, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6169888973236084, "incorrect_loss_raw": 0.2329535037279129, "correct_loss_per_char": 0.8084944486618042, "incorrect_loss_per_char": 0.11647675186395645, "correct_loss_per_token": 1.6169888973236084, "incorrect_loss_per_token": 0.2329535037279129, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2329535037279129, "num_tokens": 1, "num_tokens_all": 1052, "is_greedy": true, "logits_per_token": -0.2329535037279129, "logits_per_char": -0.11647675186395645, "num_chars": 2}, {"sum_logits": -1.6169888973236084, "num_tokens": 1, "num_tokens_all": 1052, "is_greedy": false, "logits_per_token": -1.6169888973236084, "logits_per_char": -0.8084944486618042, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 775, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3307584822177887, "incorrect_loss_raw": 1.3026888370513916, "correct_loss_per_char": 0.16537924110889435, "incorrect_loss_per_char": 0.6513444185256958, "correct_loss_per_token": 0.3307584822177887, "incorrect_loss_per_token": 1.3026888370513916, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3307584822177887, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": true, "logits_per_token": -0.3307584822177887, "logits_per_char": -0.16537924110889435, "num_chars": 2}, {"sum_logits": -1.3026888370513916, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": false, "logits_per_token": -1.3026888370513916, "logits_per_char": -0.6513444185256958, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 776, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23576423525810242, "incorrect_loss_raw": 1.605846881866455, "correct_loss_per_char": 0.11788211762905121, "incorrect_loss_per_char": 0.8029234409332275, "correct_loss_per_token": 0.23576423525810242, "incorrect_loss_per_token": 1.605846881866455, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23576423525810242, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": true, "logits_per_token": -0.23576423525810242, "logits_per_char": -0.11788211762905121, "num_chars": 2}, {"sum_logits": -1.605846881866455, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": false, "logits_per_token": -1.605846881866455, "logits_per_char": -0.8029234409332275, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 777, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2372526228427887, "incorrect_loss_raw": 1.5826666355133057, "correct_loss_per_char": 0.11862631142139435, "incorrect_loss_per_char": 0.7913333177566528, "correct_loss_per_token": 0.2372526228427887, "incorrect_loss_per_token": 1.5826666355133057, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2372526228427887, "num_tokens": 1, "num_tokens_all": 918, "is_greedy": true, "logits_per_token": -0.2372526228427887, "logits_per_char": -0.11862631142139435, "num_chars": 2}, {"sum_logits": -1.5826666355133057, "num_tokens": 1, "num_tokens_all": 918, "is_greedy": false, "logits_per_token": -1.5826666355133057, "logits_per_char": -0.7913333177566528, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 778, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6123900413513184, "incorrect_loss_raw": 0.23389723896980286, "correct_loss_per_char": 0.8061950206756592, "incorrect_loss_per_char": 0.11694861948490143, "correct_loss_per_token": 1.6123900413513184, "incorrect_loss_per_token": 0.23389723896980286, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23389723896980286, "num_tokens": 1, "num_tokens_all": 1062, "is_greedy": true, "logits_per_token": -0.23389723896980286, "logits_per_char": -0.11694861948490143, "num_chars": 2}, {"sum_logits": -1.6123900413513184, "num_tokens": 1, "num_tokens_all": 1062, "is_greedy": false, "logits_per_token": -1.6123900413513184, "logits_per_char": -0.8061950206756592, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 779, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19538821280002594, "incorrect_loss_raw": 1.790054440498352, "correct_loss_per_char": 0.09769410640001297, "incorrect_loss_per_char": 0.895027220249176, "correct_loss_per_token": 0.19538821280002594, "incorrect_loss_per_token": 1.790054440498352, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19538821280002594, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": true, "logits_per_token": -0.19538821280002594, "logits_per_char": -0.09769410640001297, "num_chars": 2}, {"sum_logits": -1.790054440498352, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": false, "logits_per_token": -1.790054440498352, "logits_per_char": -0.895027220249176, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 780, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2750893831253052, "incorrect_loss_raw": 1.4574381113052368, "correct_loss_per_char": 0.1375446915626526, "incorrect_loss_per_char": 0.7287190556526184, "correct_loss_per_token": 0.2750893831253052, "incorrect_loss_per_token": 1.4574381113052368, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2750893831253052, "num_tokens": 1, "num_tokens_all": 912, "is_greedy": true, "logits_per_token": -0.2750893831253052, "logits_per_char": -0.1375446915626526, "num_chars": 2}, {"sum_logits": -1.4574381113052368, "num_tokens": 1, "num_tokens_all": 912, "is_greedy": false, "logits_per_token": -1.4574381113052368, "logits_per_char": -0.7287190556526184, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 781, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19948112964630127, "incorrect_loss_raw": 1.7658077478408813, "correct_loss_per_char": 0.09974056482315063, "incorrect_loss_per_char": 0.8829038739204407, "correct_loss_per_token": 0.19948112964630127, "incorrect_loss_per_token": 1.7658077478408813, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19948112964630127, "num_tokens": 1, "num_tokens_all": 1000, "is_greedy": true, "logits_per_token": -0.19948112964630127, "logits_per_char": -0.09974056482315063, "num_chars": 2}, {"sum_logits": -1.7658077478408813, "num_tokens": 1, "num_tokens_all": 1000, "is_greedy": false, "logits_per_token": -1.7658077478408813, "logits_per_char": -0.8829038739204407, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 782, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.25191718339920044, "incorrect_loss_raw": 1.538938283920288, "correct_loss_per_char": 0.12595859169960022, "incorrect_loss_per_char": 0.769469141960144, "correct_loss_per_token": 0.25191718339920044, "incorrect_loss_per_token": 1.538938283920288, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25191718339920044, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": true, "logits_per_token": -0.25191718339920044, "logits_per_char": -0.12595859169960022, "num_chars": 2}, {"sum_logits": -1.538938283920288, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": false, "logits_per_token": -1.538938283920288, "logits_per_char": -0.769469141960144, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 783, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.396604299545288, "incorrect_loss_raw": 0.2990969121456146, "correct_loss_per_char": 0.698302149772644, "incorrect_loss_per_char": 0.1495484560728073, "correct_loss_per_token": 1.396604299545288, "incorrect_loss_per_token": 0.2990969121456146, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2990969121456146, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": true, "logits_per_token": -0.2990969121456146, "logits_per_char": -0.1495484560728073, "num_chars": 2}, {"sum_logits": -1.396604299545288, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": false, "logits_per_token": -1.396604299545288, "logits_per_char": -0.698302149772644, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 784, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.761622428894043, "incorrect_loss_raw": 0.19877856969833374, "correct_loss_per_char": 0.8808112144470215, "incorrect_loss_per_char": 0.09938928484916687, "correct_loss_per_token": 1.761622428894043, "incorrect_loss_per_token": 0.19877856969833374, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19877856969833374, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": true, "logits_per_token": -0.19877856969833374, "logits_per_char": -0.09938928484916687, "num_chars": 2}, {"sum_logits": -1.761622428894043, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": false, "logits_per_token": -1.761622428894043, "logits_per_char": -0.8808112144470215, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 785, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2645118236541748, "incorrect_loss_raw": 1.4984318017959595, "correct_loss_per_char": 0.1322559118270874, "incorrect_loss_per_char": 0.7492159008979797, "correct_loss_per_token": 0.2645118236541748, "incorrect_loss_per_token": 1.4984318017959595, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2645118236541748, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": true, "logits_per_token": -0.2645118236541748, "logits_per_char": -0.1322559118270874, "num_chars": 2}, {"sum_logits": -1.4984318017959595, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": false, "logits_per_token": -1.4984318017959595, "logits_per_char": -0.7492159008979797, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 786, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23302225768566132, "incorrect_loss_raw": 1.5984454154968262, "correct_loss_per_char": 0.11651112884283066, "incorrect_loss_per_char": 0.7992227077484131, "correct_loss_per_token": 0.23302225768566132, "incorrect_loss_per_token": 1.5984454154968262, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23302225768566132, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": true, "logits_per_token": -0.23302225768566132, "logits_per_char": -0.11651112884283066, "num_chars": 2}, {"sum_logits": -1.5984454154968262, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": false, "logits_per_token": -1.5984454154968262, "logits_per_char": -0.7992227077484131, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 787, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.441136360168457, "incorrect_loss_raw": 0.2784292697906494, "correct_loss_per_char": 0.7205681800842285, "incorrect_loss_per_char": 0.1392146348953247, "correct_loss_per_token": 1.441136360168457, "incorrect_loss_per_token": 0.2784292697906494, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2784292697906494, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": true, "logits_per_token": -0.2784292697906494, "logits_per_char": -0.1392146348953247, "num_chars": 2}, {"sum_logits": -1.441136360168457, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": false, "logits_per_token": -1.441136360168457, "logits_per_char": -0.7205681800842285, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 788, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.609451174736023, "incorrect_loss_raw": 0.23340745270252228, "correct_loss_per_char": 0.8047255873680115, "incorrect_loss_per_char": 0.11670372635126114, "correct_loss_per_token": 1.609451174736023, "incorrect_loss_per_token": 0.23340745270252228, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23340745270252228, "num_tokens": 1, "num_tokens_all": 980, "is_greedy": true, "logits_per_token": -0.23340745270252228, "logits_per_char": -0.11670372635126114, "num_chars": 2}, {"sum_logits": -1.609451174736023, "num_tokens": 1, "num_tokens_all": 980, "is_greedy": false, "logits_per_token": -1.609451174736023, "logits_per_char": -0.8047255873680115, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 789, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2563467025756836, "incorrect_loss_raw": 0.34275615215301514, "correct_loss_per_char": 0.6281733512878418, "incorrect_loss_per_char": 0.17137807607650757, "correct_loss_per_token": 1.2563467025756836, "incorrect_loss_per_token": 0.34275615215301514, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.34275615215301514, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": true, "logits_per_token": -0.34275615215301514, "logits_per_char": -0.17137807607650757, "num_chars": 2}, {"sum_logits": -1.2563467025756836, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": false, "logits_per_token": -1.2563467025756836, "logits_per_char": -0.6281733512878418, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 790, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.12312879413366318, "incorrect_loss_raw": 2.2135446071624756, "correct_loss_per_char": 0.06156439706683159, "incorrect_loss_per_char": 1.1067723035812378, "correct_loss_per_token": 0.12312879413366318, "incorrect_loss_per_token": 2.2135446071624756, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.12312879413366318, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": true, "logits_per_token": -0.12312879413366318, "logits_per_char": -0.06156439706683159, "num_chars": 2}, {"sum_logits": -2.2135446071624756, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": false, "logits_per_token": -2.2135446071624756, "logits_per_char": -1.1067723035812378, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 791, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.26022636890411377, "incorrect_loss_raw": 1.5219963788986206, "correct_loss_per_char": 0.13011318445205688, "incorrect_loss_per_char": 0.7609981894493103, "correct_loss_per_token": 0.26022636890411377, "incorrect_loss_per_token": 1.5219963788986206, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26022636890411377, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": true, "logits_per_token": -0.26022636890411377, "logits_per_char": -0.13011318445205688, "num_chars": 2}, {"sum_logits": -1.5219963788986206, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": false, "logits_per_token": -1.5219963788986206, "logits_per_char": -0.7609981894493103, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 792, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.28010475635528564, "incorrect_loss_raw": 1.4393658638000488, "correct_loss_per_char": 0.14005237817764282, "incorrect_loss_per_char": 0.7196829319000244, "correct_loss_per_token": 0.28010475635528564, "incorrect_loss_per_token": 1.4393658638000488, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.28010475635528564, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": true, "logits_per_token": -0.28010475635528564, "logits_per_char": -0.14005237817764282, "num_chars": 2}, {"sum_logits": -1.4393658638000488, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": false, "logits_per_token": -1.4393658638000488, "logits_per_char": -0.7196829319000244, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 793, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23455961048603058, "incorrect_loss_raw": 1.6132330894470215, "correct_loss_per_char": 0.11727980524301529, "incorrect_loss_per_char": 0.8066165447235107, "correct_loss_per_token": 0.23455961048603058, "incorrect_loss_per_token": 1.6132330894470215, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23455961048603058, "num_tokens": 1, "num_tokens_all": 974, "is_greedy": true, "logits_per_token": -0.23455961048603058, "logits_per_char": -0.11727980524301529, "num_chars": 2}, {"sum_logits": -1.6132330894470215, "num_tokens": 1, "num_tokens_all": 974, "is_greedy": false, "logits_per_token": -1.6132330894470215, "logits_per_char": -0.8066165447235107, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 794, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20553170144557953, "incorrect_loss_raw": 1.7301921844482422, "correct_loss_per_char": 0.10276585072278976, "incorrect_loss_per_char": 0.8650960922241211, "correct_loss_per_token": 0.20553170144557953, "incorrect_loss_per_token": 1.7301921844482422, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20553170144557953, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": true, "logits_per_token": -0.20553170144557953, "logits_per_char": -0.10276585072278976, "num_chars": 2}, {"sum_logits": -1.7301921844482422, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": false, "logits_per_token": -1.7301921844482422, "logits_per_char": -0.8650960922241211, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 795, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.453951358795166, "incorrect_loss_raw": 0.27164703607559204, "correct_loss_per_char": 0.726975679397583, "incorrect_loss_per_char": 0.13582351803779602, "correct_loss_per_token": 1.453951358795166, "incorrect_loss_per_token": 0.27164703607559204, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.27164703607559204, "num_tokens": 1, "num_tokens_all": 908, "is_greedy": true, "logits_per_token": -0.27164703607559204, "logits_per_char": -0.13582351803779602, "num_chars": 2}, {"sum_logits": -1.453951358795166, "num_tokens": 1, "num_tokens_all": 908, "is_greedy": false, "logits_per_token": -1.453951358795166, "logits_per_char": -0.726975679397583, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 796, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2210603654384613, "incorrect_loss_raw": 1.6577692031860352, "correct_loss_per_char": 0.11053018271923065, "incorrect_loss_per_char": 0.8288846015930176, "correct_loss_per_token": 0.2210603654384613, "incorrect_loss_per_token": 1.6577692031860352, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2210603654384613, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": true, "logits_per_token": -0.2210603654384613, "logits_per_char": -0.11053018271923065, "num_chars": 2}, {"sum_logits": -1.6577692031860352, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": false, "logits_per_token": -1.6577692031860352, "logits_per_char": -0.8288846015930176, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 797, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4321740865707397, "incorrect_loss_raw": 0.2805556058883667, "correct_loss_per_char": 0.7160870432853699, "incorrect_loss_per_char": 0.14027780294418335, "correct_loss_per_token": 1.4321740865707397, "incorrect_loss_per_token": 0.2805556058883667, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2805556058883667, "num_tokens": 1, "num_tokens_all": 890, "is_greedy": true, "logits_per_token": -0.2805556058883667, "logits_per_char": -0.14027780294418335, "num_chars": 2}, {"sum_logits": -1.4321740865707397, "num_tokens": 1, "num_tokens_all": 890, "is_greedy": false, "logits_per_token": -1.4321740865707397, "logits_per_char": -0.7160870432853699, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 798, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.25837790966033936, "incorrect_loss_raw": 1.5293692350387573, "correct_loss_per_char": 0.12918895483016968, "incorrect_loss_per_char": 0.7646846175193787, "correct_loss_per_token": 0.25837790966033936, "incorrect_loss_per_token": 1.5293692350387573, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25837790966033936, "num_tokens": 1, "num_tokens_all": 1008, "is_greedy": true, "logits_per_token": -0.25837790966033936, "logits_per_char": -0.12918895483016968, "num_chars": 2}, {"sum_logits": -1.5293692350387573, "num_tokens": 1, "num_tokens_all": 1008, "is_greedy": false, "logits_per_token": -1.5293692350387573, "logits_per_char": -0.7646846175193787, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 799, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5536327362060547, "incorrect_loss_raw": 0.25175732374191284, "correct_loss_per_char": 0.7768163681030273, "incorrect_loss_per_char": 0.12587866187095642, "correct_loss_per_token": 1.5536327362060547, "incorrect_loss_per_token": 0.25175732374191284, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25175732374191284, "num_tokens": 1, "num_tokens_all": 1038, "is_greedy": true, "logits_per_token": -0.25175732374191284, "logits_per_char": -0.12587866187095642, "num_chars": 2}, {"sum_logits": -1.5536327362060547, "num_tokens": 1, "num_tokens_all": 1038, "is_greedy": false, "logits_per_token": -1.5536327362060547, "logits_per_char": -0.7768163681030273, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 800, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.17899540066719055, "incorrect_loss_raw": 1.8334553241729736, "correct_loss_per_char": 0.08949770033359528, "incorrect_loss_per_char": 0.9167276620864868, "correct_loss_per_token": 0.17899540066719055, "incorrect_loss_per_token": 1.8334553241729736, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.17899540066719055, "num_tokens": 1, "num_tokens_all": 958, "is_greedy": true, "logits_per_token": -0.17899540066719055, "logits_per_char": -0.08949770033359528, "num_chars": 2}, {"sum_logits": -1.8334553241729736, "num_tokens": 1, "num_tokens_all": 958, "is_greedy": false, "logits_per_token": -1.8334553241729736, "logits_per_char": -0.9167276620864868, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 801, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6981197595596313, "incorrect_loss_raw": 0.2123984843492508, "correct_loss_per_char": 0.8490598797798157, "incorrect_loss_per_char": 0.1061992421746254, "correct_loss_per_token": 1.6981197595596313, "incorrect_loss_per_token": 0.2123984843492508, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2123984843492508, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": true, "logits_per_token": -0.2123984843492508, "logits_per_char": -0.1061992421746254, "num_chars": 2}, {"sum_logits": -1.6981197595596313, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": false, "logits_per_token": -1.6981197595596313, "logits_per_char": -0.8490598797798157, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 802, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24939125776290894, "incorrect_loss_raw": 1.5607683658599854, "correct_loss_per_char": 0.12469562888145447, "incorrect_loss_per_char": 0.7803841829299927, "correct_loss_per_token": 0.24939125776290894, "incorrect_loss_per_token": 1.5607683658599854, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24939125776290894, "num_tokens": 1, "num_tokens_all": 1046, "is_greedy": true, "logits_per_token": -0.24939125776290894, "logits_per_char": -0.12469562888145447, "num_chars": 2}, {"sum_logits": -1.5607683658599854, "num_tokens": 1, "num_tokens_all": 1046, "is_greedy": false, "logits_per_token": -1.5607683658599854, "logits_per_char": -0.7803841829299927, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 803, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22646236419677734, "incorrect_loss_raw": 1.6384525299072266, "correct_loss_per_char": 0.11323118209838867, "incorrect_loss_per_char": 0.8192262649536133, "correct_loss_per_token": 0.22646236419677734, "incorrect_loss_per_token": 1.6384525299072266, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22646236419677734, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": true, "logits_per_token": -0.22646236419677734, "logits_per_char": -0.11323118209838867, "num_chars": 2}, {"sum_logits": -1.6384525299072266, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": false, "logits_per_token": -1.6384525299072266, "logits_per_char": -0.8192262649536133, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 804, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2587776780128479, "incorrect_loss_raw": 1.531998634338379, "correct_loss_per_char": 0.12938883900642395, "incorrect_loss_per_char": 0.7659993171691895, "correct_loss_per_token": 0.2587776780128479, "incorrect_loss_per_token": 1.531998634338379, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2587776780128479, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": true, "logits_per_token": -0.2587776780128479, "logits_per_char": -0.12938883900642395, "num_chars": 2}, {"sum_logits": -1.531998634338379, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": false, "logits_per_token": -1.531998634338379, "logits_per_char": -0.7659993171691895, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 805, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.822597861289978, "incorrect_loss_raw": 0.19028842449188232, "correct_loss_per_char": 0.911298930644989, "incorrect_loss_per_char": 0.09514421224594116, "correct_loss_per_token": 1.822597861289978, "incorrect_loss_per_token": 0.19028842449188232, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19028842449188232, "num_tokens": 1, "num_tokens_all": 958, "is_greedy": true, "logits_per_token": -0.19028842449188232, "logits_per_char": -0.09514421224594116, "num_chars": 2}, {"sum_logits": -1.822597861289978, "num_tokens": 1, "num_tokens_all": 958, "is_greedy": false, "logits_per_token": -1.822597861289978, "logits_per_char": -0.911298930644989, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 806, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4332220554351807, "incorrect_loss_raw": 0.290424644947052, "correct_loss_per_char": 0.7166110277175903, "incorrect_loss_per_char": 0.145212322473526, "correct_loss_per_token": 1.4332220554351807, "incorrect_loss_per_token": 0.290424644947052, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.290424644947052, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": true, "logits_per_token": -0.290424644947052, "logits_per_char": -0.145212322473526, "num_chars": 2}, {"sum_logits": -1.4332220554351807, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": false, "logits_per_token": -1.4332220554351807, "logits_per_char": -0.7166110277175903, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 807, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2516736388206482, "incorrect_loss_raw": 1.5358774662017822, "correct_loss_per_char": 0.1258368194103241, "incorrect_loss_per_char": 0.7679387331008911, "correct_loss_per_token": 0.2516736388206482, "incorrect_loss_per_token": 1.5358774662017822, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2516736388206482, "num_tokens": 1, "num_tokens_all": 908, "is_greedy": true, "logits_per_token": -0.2516736388206482, "logits_per_char": -0.1258368194103241, "num_chars": 2}, {"sum_logits": -1.5358774662017822, "num_tokens": 1, "num_tokens_all": 908, "is_greedy": false, "logits_per_token": -1.5358774662017822, "logits_per_char": -0.7679387331008911, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 808, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20478558540344238, "incorrect_loss_raw": 1.7339198589324951, "correct_loss_per_char": 0.10239279270172119, "incorrect_loss_per_char": 0.8669599294662476, "correct_loss_per_token": 0.20478558540344238, "incorrect_loss_per_token": 1.7339198589324951, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20478558540344238, "num_tokens": 1, "num_tokens_all": 990, "is_greedy": true, "logits_per_token": -0.20478558540344238, "logits_per_char": -0.10239279270172119, "num_chars": 2}, {"sum_logits": -1.7339198589324951, "num_tokens": 1, "num_tokens_all": 990, "is_greedy": false, "logits_per_token": -1.7339198589324951, "logits_per_char": -0.8669599294662476, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 809, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.757087230682373, "incorrect_loss_raw": 0.19485782086849213, "correct_loss_per_char": 0.8785436153411865, "incorrect_loss_per_char": 0.09742891043424606, "correct_loss_per_token": 1.757087230682373, "incorrect_loss_per_token": 0.19485782086849213, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19485782086849213, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": true, "logits_per_token": -0.19485782086849213, "logits_per_char": -0.09742891043424606, "num_chars": 2}, {"sum_logits": -1.757087230682373, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": false, "logits_per_token": -1.757087230682373, "logits_per_char": -0.8785436153411865, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 810, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24739915132522583, "incorrect_loss_raw": 1.5584220886230469, "correct_loss_per_char": 0.12369957566261292, "incorrect_loss_per_char": 0.7792110443115234, "correct_loss_per_token": 0.24739915132522583, "incorrect_loss_per_token": 1.5584220886230469, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24739915132522583, "num_tokens": 1, "num_tokens_all": 1007, "is_greedy": true, "logits_per_token": -0.24739915132522583, "logits_per_char": -0.12369957566261292, "num_chars": 2}, {"sum_logits": -1.5584220886230469, "num_tokens": 1, "num_tokens_all": 1007, "is_greedy": false, "logits_per_token": -1.5584220886230469, "logits_per_char": -0.7792110443115234, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 811, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21537438035011292, "incorrect_loss_raw": 1.6895815134048462, "correct_loss_per_char": 0.10768719017505646, "incorrect_loss_per_char": 0.8447907567024231, "correct_loss_per_token": 0.21537438035011292, "incorrect_loss_per_token": 1.6895815134048462, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21537438035011292, "num_tokens": 1, "num_tokens_all": 908, "is_greedy": true, "logits_per_token": -0.21537438035011292, "logits_per_char": -0.10768719017505646, "num_chars": 2}, {"sum_logits": -1.6895815134048462, "num_tokens": 1, "num_tokens_all": 908, "is_greedy": false, "logits_per_token": -1.6895815134048462, "logits_per_char": -0.8447907567024231, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 812, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4037268161773682, "incorrect_loss_raw": 0.3036852478981018, "correct_loss_per_char": 0.7018634080886841, "incorrect_loss_per_char": 0.1518426239490509, "correct_loss_per_token": 1.4037268161773682, "incorrect_loss_per_token": 0.3036852478981018, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3036852478981018, "num_tokens": 1, "num_tokens_all": 1085, "is_greedy": true, "logits_per_token": -0.3036852478981018, "logits_per_char": -0.1518426239490509, "num_chars": 2}, {"sum_logits": -1.4037268161773682, "num_tokens": 1, "num_tokens_all": 1085, "is_greedy": false, "logits_per_token": -1.4037268161773682, "logits_per_char": -0.7018634080886841, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 813, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.26036518812179565, "incorrect_loss_raw": 1.5092544555664062, "correct_loss_per_char": 0.13018259406089783, "incorrect_loss_per_char": 0.7546272277832031, "correct_loss_per_token": 0.26036518812179565, "incorrect_loss_per_token": 1.5092544555664062, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26036518812179565, "num_tokens": 1, "num_tokens_all": 1027, "is_greedy": true, "logits_per_token": -0.26036518812179565, "logits_per_char": -0.13018259406089783, "num_chars": 2}, {"sum_logits": -1.5092544555664062, "num_tokens": 1, "num_tokens_all": 1027, "is_greedy": false, "logits_per_token": -1.5092544555664062, "logits_per_char": -0.7546272277832031, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 814, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5960619449615479, "incorrect_loss_raw": 0.2363760620355606, "correct_loss_per_char": 0.7980309724807739, "incorrect_loss_per_char": 0.1181880310177803, "correct_loss_per_token": 1.5960619449615479, "incorrect_loss_per_token": 0.2363760620355606, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2363760620355606, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": true, "logits_per_token": -0.2363760620355606, "logits_per_char": -0.1181880310177803, "num_chars": 2}, {"sum_logits": -1.5960619449615479, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": false, "logits_per_token": -1.5960619449615479, "logits_per_char": -0.7980309724807739, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 815, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.25557342171669006, "incorrect_loss_raw": 1.53075110912323, "correct_loss_per_char": 0.12778671085834503, "incorrect_loss_per_char": 0.765375554561615, "correct_loss_per_token": 0.25557342171669006, "incorrect_loss_per_token": 1.53075110912323, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25557342171669006, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": true, "logits_per_token": -0.25557342171669006, "logits_per_char": -0.12778671085834503, "num_chars": 2}, {"sum_logits": -1.53075110912323, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": false, "logits_per_token": -1.53075110912323, "logits_per_char": -0.765375554561615, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 816, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2426164299249649, "incorrect_loss_raw": 1.5648248195648193, "correct_loss_per_char": 0.12130821496248245, "incorrect_loss_per_char": 0.7824124097824097, "correct_loss_per_token": 0.2426164299249649, "incorrect_loss_per_token": 1.5648248195648193, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2426164299249649, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": true, "logits_per_token": -0.2426164299249649, "logits_per_char": -0.12130821496248245, "num_chars": 2}, {"sum_logits": -1.5648248195648193, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": false, "logits_per_token": -1.5648248195648193, "logits_per_char": -0.7824124097824097, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 817, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2000769078731537, "incorrect_loss_raw": 1.7704156637191772, "correct_loss_per_char": 0.10003845393657684, "incorrect_loss_per_char": 0.8852078318595886, "correct_loss_per_token": 0.2000769078731537, "incorrect_loss_per_token": 1.7704156637191772, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2000769078731537, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": true, "logits_per_token": -0.2000769078731537, "logits_per_char": -0.10003845393657684, "num_chars": 2}, {"sum_logits": -1.7704156637191772, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": false, "logits_per_token": -1.7704156637191772, "logits_per_char": -0.8852078318595886, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 818, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.18062062561511993, "incorrect_loss_raw": 1.8496757745742798, "correct_loss_per_char": 0.09031031280755997, "incorrect_loss_per_char": 0.9248378872871399, "correct_loss_per_token": 0.18062062561511993, "incorrect_loss_per_token": 1.8496757745742798, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.18062062561511993, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": true, "logits_per_token": -0.18062062561511993, "logits_per_char": -0.09031031280755997, "num_chars": 2}, {"sum_logits": -1.8496757745742798, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": false, "logits_per_token": -1.8496757745742798, "logits_per_char": -0.9248378872871399, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 819, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2777360677719116, "incorrect_loss_raw": 1.4722594022750854, "correct_loss_per_char": 0.1388680338859558, "incorrect_loss_per_char": 0.7361297011375427, "correct_loss_per_token": 0.2777360677719116, "incorrect_loss_per_token": 1.4722594022750854, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2777360677719116, "num_tokens": 1, "num_tokens_all": 1027, "is_greedy": true, "logits_per_token": -0.2777360677719116, "logits_per_char": -0.1388680338859558, "num_chars": 2}, {"sum_logits": -1.4722594022750854, "num_tokens": 1, "num_tokens_all": 1027, "is_greedy": false, "logits_per_token": -1.4722594022750854, "logits_per_char": -0.7361297011375427, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 820, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7572832107543945, "incorrect_loss_raw": 0.19771863520145416, "correct_loss_per_char": 0.8786416053771973, "incorrect_loss_per_char": 0.09885931760072708, "correct_loss_per_token": 1.7572832107543945, "incorrect_loss_per_token": 0.19771863520145416, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19771863520145416, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": true, "logits_per_token": -0.19771863520145416, "logits_per_char": -0.09885931760072708, "num_chars": 2}, {"sum_logits": -1.7572832107543945, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": false, "logits_per_token": -1.7572832107543945, "logits_per_char": -0.8786416053771973, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 821, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5800596475601196, "incorrect_loss_raw": 0.24085776507854462, "correct_loss_per_char": 0.7900298237800598, "incorrect_loss_per_char": 0.12042888253927231, "correct_loss_per_token": 1.5800596475601196, "incorrect_loss_per_token": 0.24085776507854462, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24085776507854462, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": true, "logits_per_token": -0.24085776507854462, "logits_per_char": -0.12042888253927231, "num_chars": 2}, {"sum_logits": -1.5800596475601196, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": false, "logits_per_token": -1.5800596475601196, "logits_per_char": -0.7900298237800598, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 822, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2753102779388428, "incorrect_loss_raw": 1.443062663078308, "correct_loss_per_char": 0.1376551389694214, "incorrect_loss_per_char": 0.721531331539154, "correct_loss_per_token": 0.2753102779388428, "incorrect_loss_per_token": 1.443062663078308, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2753102779388428, "num_tokens": 1, "num_tokens_all": 909, "is_greedy": true, "logits_per_token": -0.2753102779388428, "logits_per_char": -0.1376551389694214, "num_chars": 2}, {"sum_logits": -1.443062663078308, "num_tokens": 1, "num_tokens_all": 909, "is_greedy": false, "logits_per_token": -1.443062663078308, "logits_per_char": -0.721531331539154, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 823, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8131048679351807, "incorrect_loss_raw": 0.19060295820236206, "correct_loss_per_char": 0.9065524339675903, "incorrect_loss_per_char": 0.09530147910118103, "correct_loss_per_token": 1.8131048679351807, "incorrect_loss_per_token": 0.19060295820236206, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19060295820236206, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": true, "logits_per_token": -0.19060295820236206, "logits_per_char": -0.09530147910118103, "num_chars": 2}, {"sum_logits": -1.8131048679351807, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": false, "logits_per_token": -1.8131048679351807, "logits_per_char": -0.9065524339675903, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 824, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6125545501708984, "incorrect_loss_raw": 0.23319701850414276, "correct_loss_per_char": 0.8062772750854492, "incorrect_loss_per_char": 0.11659850925207138, "correct_loss_per_token": 1.6125545501708984, "incorrect_loss_per_token": 0.23319701850414276, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23319701850414276, "num_tokens": 1, "num_tokens_all": 997, "is_greedy": true, "logits_per_token": -0.23319701850414276, "logits_per_char": -0.11659850925207138, "num_chars": 2}, {"sum_logits": -1.6125545501708984, "num_tokens": 1, "num_tokens_all": 997, "is_greedy": false, "logits_per_token": -1.6125545501708984, "logits_per_char": -0.8062772750854492, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 825, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22319714725017548, "incorrect_loss_raw": 1.6410105228424072, "correct_loss_per_char": 0.11159857362508774, "incorrect_loss_per_char": 0.8205052614212036, "correct_loss_per_token": 0.22319714725017548, "incorrect_loss_per_token": 1.6410105228424072, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22319714725017548, "num_tokens": 1, "num_tokens_all": 950, "is_greedy": true, "logits_per_token": -0.22319714725017548, "logits_per_char": -0.11159857362508774, "num_chars": 2}, {"sum_logits": -1.6410105228424072, "num_tokens": 1, "num_tokens_all": 950, "is_greedy": false, "logits_per_token": -1.6410105228424072, "logits_per_char": -0.8205052614212036, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 826, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19058345258235931, "incorrect_loss_raw": 1.8099431991577148, "correct_loss_per_char": 0.09529172629117966, "incorrect_loss_per_char": 0.9049715995788574, "correct_loss_per_token": 0.19058345258235931, "incorrect_loss_per_token": 1.8099431991577148, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19058345258235931, "num_tokens": 1, "num_tokens_all": 985, "is_greedy": true, "logits_per_token": -0.19058345258235931, "logits_per_char": -0.09529172629117966, "num_chars": 2}, {"sum_logits": -1.8099431991577148, "num_tokens": 1, "num_tokens_all": 985, "is_greedy": false, "logits_per_token": -1.8099431991577148, "logits_per_char": -0.9049715995788574, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 827, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24075095355510712, "incorrect_loss_raw": 1.5813392400741577, "correct_loss_per_char": 0.12037547677755356, "incorrect_loss_per_char": 0.7906696200370789, "correct_loss_per_token": 0.24075095355510712, "incorrect_loss_per_token": 1.5813392400741577, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24075095355510712, "num_tokens": 1, "num_tokens_all": 1015, "is_greedy": true, "logits_per_token": -0.24075095355510712, "logits_per_char": -0.12037547677755356, "num_chars": 2}, {"sum_logits": -1.5813392400741577, "num_tokens": 1, "num_tokens_all": 1015, "is_greedy": false, "logits_per_token": -1.5813392400741577, "logits_per_char": -0.7906696200370789, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 828, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4866441488265991, "incorrect_loss_raw": 0.2655348479747772, "correct_loss_per_char": 0.7433220744132996, "incorrect_loss_per_char": 0.1327674239873886, "correct_loss_per_token": 1.4866441488265991, "incorrect_loss_per_token": 0.2655348479747772, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2655348479747772, "num_tokens": 1, "num_tokens_all": 1047, "is_greedy": true, "logits_per_token": -0.2655348479747772, "logits_per_char": -0.1327674239873886, "num_chars": 2}, {"sum_logits": -1.4866441488265991, "num_tokens": 1, "num_tokens_all": 1047, "is_greedy": false, "logits_per_token": -1.4866441488265991, "logits_per_char": -0.7433220744132996, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 829, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6697455644607544, "incorrect_loss_raw": 0.22079141438007355, "correct_loss_per_char": 0.8348727822303772, "incorrect_loss_per_char": 0.11039570719003677, "correct_loss_per_token": 1.6697455644607544, "incorrect_loss_per_token": 0.22079141438007355, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22079141438007355, "num_tokens": 1, "num_tokens_all": 968, "is_greedy": true, "logits_per_token": -0.22079141438007355, "logits_per_char": -0.11039570719003677, "num_chars": 2}, {"sum_logits": -1.6697455644607544, "num_tokens": 1, "num_tokens_all": 968, "is_greedy": false, "logits_per_token": -1.6697455644607544, "logits_per_char": -0.8348727822303772, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 830, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8182564973831177, "incorrect_loss_raw": 0.1872510462999344, "correct_loss_per_char": 0.9091282486915588, "incorrect_loss_per_char": 0.0936255231499672, "correct_loss_per_token": 1.8182564973831177, "incorrect_loss_per_token": 0.1872510462999344, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1872510462999344, "num_tokens": 1, "num_tokens_all": 1145, "is_greedy": true, "logits_per_token": -0.1872510462999344, "logits_per_char": -0.0936255231499672, "num_chars": 2}, {"sum_logits": -1.8182564973831177, "num_tokens": 1, "num_tokens_all": 1145, "is_greedy": false, "logits_per_token": -1.8182564973831177, "logits_per_char": -0.9091282486915588, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 831, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24176187813282013, "incorrect_loss_raw": 1.583399772644043, "correct_loss_per_char": 0.12088093906641006, "incorrect_loss_per_char": 0.7916998863220215, "correct_loss_per_token": 0.24176187813282013, "incorrect_loss_per_token": 1.583399772644043, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24176187813282013, "num_tokens": 1, "num_tokens_all": 1054, "is_greedy": true, "logits_per_token": -0.24176187813282013, "logits_per_char": -0.12088093906641006, "num_chars": 2}, {"sum_logits": -1.583399772644043, "num_tokens": 1, "num_tokens_all": 1054, "is_greedy": false, "logits_per_token": -1.583399772644043, "logits_per_char": -0.7916998863220215, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 832, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21538610756397247, "incorrect_loss_raw": 1.6864652633666992, "correct_loss_per_char": 0.10769305378198624, "incorrect_loss_per_char": 0.8432326316833496, "correct_loss_per_token": 0.21538610756397247, "incorrect_loss_per_token": 1.6864652633666992, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21538610756397247, "num_tokens": 1, "num_tokens_all": 1001, "is_greedy": true, "logits_per_token": -0.21538610756397247, "logits_per_char": -0.10769305378198624, "num_chars": 2}, {"sum_logits": -1.6864652633666992, "num_tokens": 1, "num_tokens_all": 1001, "is_greedy": false, "logits_per_token": -1.6864652633666992, "logits_per_char": -0.8432326316833496, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 833, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.914337396621704, "incorrect_loss_raw": 0.16975721716880798, "correct_loss_per_char": 0.957168698310852, "incorrect_loss_per_char": 0.08487860858440399, "correct_loss_per_token": 1.914337396621704, "incorrect_loss_per_token": 0.16975721716880798, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.16975721716880798, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": true, "logits_per_token": -0.16975721716880798, "logits_per_char": -0.08487860858440399, "num_chars": 2}, {"sum_logits": -1.914337396621704, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": false, "logits_per_token": -1.914337396621704, "logits_per_char": -0.957168698310852, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 834, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19369195401668549, "incorrect_loss_raw": 1.7700600624084473, "correct_loss_per_char": 0.09684597700834274, "incorrect_loss_per_char": 0.8850300312042236, "correct_loss_per_token": 0.19369195401668549, "incorrect_loss_per_token": 1.7700600624084473, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19369195401668549, "num_tokens": 1, "num_tokens_all": 1016, "is_greedy": true, "logits_per_token": -0.19369195401668549, "logits_per_char": -0.09684597700834274, "num_chars": 2}, {"sum_logits": -1.7700600624084473, "num_tokens": 1, "num_tokens_all": 1016, "is_greedy": false, "logits_per_token": -1.7700600624084473, "logits_per_char": -0.8850300312042236, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 835, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1728624403476715, "incorrect_loss_raw": 1.8901253938674927, "correct_loss_per_char": 0.08643122017383575, "incorrect_loss_per_char": 0.9450626969337463, "correct_loss_per_token": 0.1728624403476715, "incorrect_loss_per_token": 1.8901253938674927, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1728624403476715, "num_tokens": 1, "num_tokens_all": 990, "is_greedy": true, "logits_per_token": -0.1728624403476715, "logits_per_char": -0.08643122017383575, "num_chars": 2}, {"sum_logits": -1.8901253938674927, "num_tokens": 1, "num_tokens_all": 990, "is_greedy": false, "logits_per_token": -1.8901253938674927, "logits_per_char": -0.9450626969337463, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 836, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.238310769200325, "incorrect_loss_raw": 1.599247932434082, "correct_loss_per_char": 0.1191553846001625, "incorrect_loss_per_char": 0.799623966217041, "correct_loss_per_token": 0.238310769200325, "incorrect_loss_per_token": 1.599247932434082, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.238310769200325, "num_tokens": 1, "num_tokens_all": 986, "is_greedy": true, "logits_per_token": -0.238310769200325, "logits_per_char": -0.1191553846001625, "num_chars": 2}, {"sum_logits": -1.599247932434082, "num_tokens": 1, "num_tokens_all": 986, "is_greedy": false, "logits_per_token": -1.599247932434082, "logits_per_char": -0.799623966217041, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 837, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5133693218231201, "incorrect_loss_raw": 0.2606845498085022, "correct_loss_per_char": 0.7566846609115601, "incorrect_loss_per_char": 0.1303422749042511, "correct_loss_per_token": 1.5133693218231201, "incorrect_loss_per_token": 0.2606845498085022, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2606845498085022, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": true, "logits_per_token": -0.2606845498085022, "logits_per_char": -0.1303422749042511, "num_chars": 2}, {"sum_logits": -1.5133693218231201, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": false, "logits_per_token": -1.5133693218231201, "logits_per_char": -0.7566846609115601, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 838, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.559649109840393, "incorrect_loss_raw": 0.253282755613327, "correct_loss_per_char": 0.7798245549201965, "incorrect_loss_per_char": 0.1266413778066635, "correct_loss_per_token": 1.559649109840393, "incorrect_loss_per_token": 0.253282755613327, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.253282755613327, "num_tokens": 1, "num_tokens_all": 1051, "is_greedy": true, "logits_per_token": -0.253282755613327, "logits_per_char": -0.1266413778066635, "num_chars": 2}, {"sum_logits": -1.559649109840393, "num_tokens": 1, "num_tokens_all": 1051, "is_greedy": false, "logits_per_token": -1.559649109840393, "logits_per_char": -0.7798245549201965, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 839, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23054559528827667, "incorrect_loss_raw": 1.6334658861160278, "correct_loss_per_char": 0.11527279764413834, "incorrect_loss_per_char": 0.8167329430580139, "correct_loss_per_token": 0.23054559528827667, "incorrect_loss_per_token": 1.6334658861160278, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23054559528827667, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": true, "logits_per_token": -0.23054559528827667, "logits_per_char": -0.11527279764413834, "num_chars": 2}, {"sum_logits": -1.6334658861160278, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": false, "logits_per_token": -1.6334658861160278, "logits_per_char": -0.8167329430580139, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 840, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.25134602189064026, "incorrect_loss_raw": 1.5431219339370728, "correct_loss_per_char": 0.12567301094532013, "incorrect_loss_per_char": 0.7715609669685364, "correct_loss_per_token": 0.25134602189064026, "incorrect_loss_per_token": 1.5431219339370728, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25134602189064026, "num_tokens": 1, "num_tokens_all": 1053, "is_greedy": true, "logits_per_token": -0.25134602189064026, "logits_per_char": -0.12567301094532013, "num_chars": 2}, {"sum_logits": -1.5431219339370728, "num_tokens": 1, "num_tokens_all": 1053, "is_greedy": false, "logits_per_token": -1.5431219339370728, "logits_per_char": -0.7715609669685364, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 841, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2576441466808319, "incorrect_loss_raw": 1.507308006286621, "correct_loss_per_char": 0.12882207334041595, "incorrect_loss_per_char": 0.7536540031433105, "correct_loss_per_token": 0.2576441466808319, "incorrect_loss_per_token": 1.507308006286621, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2576441466808319, "num_tokens": 1, "num_tokens_all": 1039, "is_greedy": true, "logits_per_token": -0.2576441466808319, "logits_per_char": -0.12882207334041595, "num_chars": 2}, {"sum_logits": -1.507308006286621, "num_tokens": 1, "num_tokens_all": 1039, "is_greedy": false, "logits_per_token": -1.507308006286621, "logits_per_char": -0.7536540031433105, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 842, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2016010284423828, "incorrect_loss_raw": 1.740478277206421, "correct_loss_per_char": 0.1008005142211914, "incorrect_loss_per_char": 0.8702391386032104, "correct_loss_per_token": 0.2016010284423828, "incorrect_loss_per_token": 1.740478277206421, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2016010284423828, "num_tokens": 1, "num_tokens_all": 976, "is_greedy": true, "logits_per_token": -0.2016010284423828, "logits_per_char": -0.1008005142211914, "num_chars": 2}, {"sum_logits": -1.740478277206421, "num_tokens": 1, "num_tokens_all": 976, "is_greedy": false, "logits_per_token": -1.740478277206421, "logits_per_char": -0.8702391386032104, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 843, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8675874471664429, "incorrect_loss_raw": 0.1764487326145172, "correct_loss_per_char": 0.9337937235832214, "incorrect_loss_per_char": 0.0882243663072586, "correct_loss_per_token": 1.8675874471664429, "incorrect_loss_per_token": 0.1764487326145172, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1764487326145172, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": true, "logits_per_token": -0.1764487326145172, "logits_per_char": -0.0882243663072586, "num_chars": 2}, {"sum_logits": -1.8675874471664429, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": false, "logits_per_token": -1.8675874471664429, "logits_per_char": -0.9337937235832214, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 844, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2280094474554062, "incorrect_loss_raw": 1.6342332363128662, "correct_loss_per_char": 0.1140047237277031, "incorrect_loss_per_char": 0.8171166181564331, "correct_loss_per_token": 0.2280094474554062, "incorrect_loss_per_token": 1.6342332363128662, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2280094474554062, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": true, "logits_per_token": -0.2280094474554062, "logits_per_char": -0.1140047237277031, "num_chars": 2}, {"sum_logits": -1.6342332363128662, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": false, "logits_per_token": -1.6342332363128662, "logits_per_char": -0.8171166181564331, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 845, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22919221222400665, "incorrect_loss_raw": 1.643949031829834, "correct_loss_per_char": 0.11459610611200333, "incorrect_loss_per_char": 0.821974515914917, "correct_loss_per_token": 0.22919221222400665, "incorrect_loss_per_token": 1.643949031829834, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22919221222400665, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": true, "logits_per_token": -0.22919221222400665, "logits_per_char": -0.11459610611200333, "num_chars": 2}, {"sum_logits": -1.643949031829834, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": false, "logits_per_token": -1.643949031829834, "logits_per_char": -0.821974515914917, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 846, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2782374918460846, "incorrect_loss_raw": 1.4493836164474487, "correct_loss_per_char": 0.1391187459230423, "incorrect_loss_per_char": 0.7246918082237244, "correct_loss_per_token": 0.2782374918460846, "incorrect_loss_per_token": 1.4493836164474487, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2782374918460846, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": true, "logits_per_token": -0.2782374918460846, "logits_per_char": -0.1391187459230423, "num_chars": 2}, {"sum_logits": -1.4493836164474487, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": false, "logits_per_token": -1.4493836164474487, "logits_per_char": -0.7246918082237244, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 847, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3007034063339233, "incorrect_loss_raw": 0.32488349080085754, "correct_loss_per_char": 0.6503517031669617, "incorrect_loss_per_char": 0.16244174540042877, "correct_loss_per_token": 1.3007034063339233, "incorrect_loss_per_token": 0.32488349080085754, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.32488349080085754, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": true, "logits_per_token": -0.32488349080085754, "logits_per_char": -0.16244174540042877, "num_chars": 2}, {"sum_logits": -1.3007034063339233, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": false, "logits_per_token": -1.3007034063339233, "logits_per_char": -0.6503517031669617, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 848, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5059951543807983, "incorrect_loss_raw": 0.26058855652809143, "correct_loss_per_char": 0.7529975771903992, "incorrect_loss_per_char": 0.13029427826404572, "correct_loss_per_token": 1.5059951543807983, "incorrect_loss_per_token": 0.26058855652809143, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26058855652809143, "num_tokens": 1, "num_tokens_all": 1001, "is_greedy": true, "logits_per_token": -0.26058855652809143, "logits_per_char": -0.13029427826404572, "num_chars": 2}, {"sum_logits": -1.5059951543807983, "num_tokens": 1, "num_tokens_all": 1001, "is_greedy": false, "logits_per_token": -1.5059951543807983, "logits_per_char": -0.7529975771903992, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 849, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.688560962677002, "incorrect_loss_raw": 0.21801692247390747, "correct_loss_per_char": 0.844280481338501, "incorrect_loss_per_char": 0.10900846123695374, "correct_loss_per_token": 1.688560962677002, "incorrect_loss_per_token": 0.21801692247390747, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21801692247390747, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": true, "logits_per_token": -0.21801692247390747, "logits_per_char": -0.10900846123695374, "num_chars": 2}, {"sum_logits": -1.688560962677002, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": false, "logits_per_token": -1.688560962677002, "logits_per_char": -0.844280481338501, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 850, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3709891736507416, "incorrect_loss_raw": 1.212634563446045, "correct_loss_per_char": 0.1854945868253708, "incorrect_loss_per_char": 0.6063172817230225, "correct_loss_per_token": 0.3709891736507416, "incorrect_loss_per_token": 1.212634563446045, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3709891736507416, "num_tokens": 1, "num_tokens_all": 1024, "is_greedy": true, "logits_per_token": -0.3709891736507416, "logits_per_char": -0.1854945868253708, "num_chars": 2}, {"sum_logits": -1.212634563446045, "num_tokens": 1, "num_tokens_all": 1024, "is_greedy": false, "logits_per_token": -1.212634563446045, "logits_per_char": -0.6063172817230225, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 851, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.26232361793518066, "incorrect_loss_raw": 1.496761441230774, "correct_loss_per_char": 0.13116180896759033, "incorrect_loss_per_char": 0.748380720615387, "correct_loss_per_token": 0.26232361793518066, "incorrect_loss_per_token": 1.496761441230774, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26232361793518066, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": true, "logits_per_token": -0.26232361793518066, "logits_per_char": -0.13116180896759033, "num_chars": 2}, {"sum_logits": -1.496761441230774, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": false, "logits_per_token": -1.496761441230774, "logits_per_char": -0.748380720615387, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 852, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2074490636587143, "incorrect_loss_raw": 1.7107363939285278, "correct_loss_per_char": 0.10372453182935715, "incorrect_loss_per_char": 0.8553681969642639, "correct_loss_per_token": 0.2074490636587143, "incorrect_loss_per_token": 1.7107363939285278, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2074490636587143, "num_tokens": 1, "num_tokens_all": 977, "is_greedy": true, "logits_per_token": -0.2074490636587143, "logits_per_char": -0.10372453182935715, "num_chars": 2}, {"sum_logits": -1.7107363939285278, "num_tokens": 1, "num_tokens_all": 977, "is_greedy": false, "logits_per_token": -1.7107363939285278, "logits_per_char": -0.8553681969642639, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 853, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1818196028470993, "incorrect_loss_raw": 1.848633050918579, "correct_loss_per_char": 0.09090980142354965, "incorrect_loss_per_char": 0.9243165254592896, "correct_loss_per_token": 0.1818196028470993, "incorrect_loss_per_token": 1.848633050918579, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1818196028470993, "num_tokens": 1, "num_tokens_all": 964, "is_greedy": true, "logits_per_token": -0.1818196028470993, "logits_per_char": -0.09090980142354965, "num_chars": 2}, {"sum_logits": -1.848633050918579, "num_tokens": 1, "num_tokens_all": 964, "is_greedy": false, "logits_per_token": -1.848633050918579, "logits_per_char": -0.9243165254592896, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 854, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3676748275756836, "incorrect_loss_raw": 0.31432586908340454, "correct_loss_per_char": 0.6838374137878418, "incorrect_loss_per_char": 0.15716293454170227, "correct_loss_per_token": 1.3676748275756836, "incorrect_loss_per_token": 0.31432586908340454, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.31432586908340454, "num_tokens": 1, "num_tokens_all": 1217, "is_greedy": true, "logits_per_token": -0.31432586908340454, "logits_per_char": -0.15716293454170227, "num_chars": 2}, {"sum_logits": -1.3676748275756836, "num_tokens": 1, "num_tokens_all": 1217, "is_greedy": false, "logits_per_token": -1.3676748275756836, "logits_per_char": -0.6838374137878418, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 855, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9102944135665894, "incorrect_loss_raw": 0.1712249517440796, "correct_loss_per_char": 0.9551472067832947, "incorrect_loss_per_char": 0.0856124758720398, "correct_loss_per_token": 1.9102944135665894, "incorrect_loss_per_token": 0.1712249517440796, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1712249517440796, "num_tokens": 1, "num_tokens_all": 1137, "is_greedy": true, "logits_per_token": -0.1712249517440796, "logits_per_char": -0.0856124758720398, "num_chars": 2}, {"sum_logits": -1.9102944135665894, "num_tokens": 1, "num_tokens_all": 1137, "is_greedy": false, "logits_per_token": -1.9102944135665894, "logits_per_char": -0.9551472067832947, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 856, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.25860676169395447, "incorrect_loss_raw": 1.5185470581054688, "correct_loss_per_char": 0.12930338084697723, "incorrect_loss_per_char": 0.7592735290527344, "correct_loss_per_token": 0.25860676169395447, "incorrect_loss_per_token": 1.5185470581054688, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25860676169395447, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": true, "logits_per_token": -0.25860676169395447, "logits_per_char": -0.12930338084697723, "num_chars": 2}, {"sum_logits": -1.5185470581054688, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": false, "logits_per_token": -1.5185470581054688, "logits_per_char": -0.7592735290527344, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 857, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4868704080581665, "incorrect_loss_raw": 0.2705768644809723, "correct_loss_per_char": 0.7434352040290833, "incorrect_loss_per_char": 0.13528843224048615, "correct_loss_per_token": 1.4868704080581665, "incorrect_loss_per_token": 0.2705768644809723, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2705768644809723, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": true, "logits_per_token": -0.2705768644809723, "logits_per_char": -0.13528843224048615, "num_chars": 2}, {"sum_logits": -1.4868704080581665, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": false, "logits_per_token": -1.4868704080581665, "logits_per_char": -0.7434352040290833, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 858, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3361481726169586, "incorrect_loss_raw": 1.292853593826294, "correct_loss_per_char": 0.1680740863084793, "incorrect_loss_per_char": 0.646426796913147, "correct_loss_per_token": 0.3361481726169586, "incorrect_loss_per_token": 1.292853593826294, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3361481726169586, "num_tokens": 1, "num_tokens_all": 1028, "is_greedy": true, "logits_per_token": -0.3361481726169586, "logits_per_char": -0.1680740863084793, "num_chars": 2}, {"sum_logits": -1.292853593826294, "num_tokens": 1, "num_tokens_all": 1028, "is_greedy": false, "logits_per_token": -1.292853593826294, "logits_per_char": -0.646426796913147, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 859, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.26480796933174133, "incorrect_loss_raw": 1.4964728355407715, "correct_loss_per_char": 0.13240398466587067, "incorrect_loss_per_char": 0.7482364177703857, "correct_loss_per_token": 0.26480796933174133, "incorrect_loss_per_token": 1.4964728355407715, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26480796933174133, "num_tokens": 1, "num_tokens_all": 1023, "is_greedy": true, "logits_per_token": -0.26480796933174133, "logits_per_char": -0.13240398466587067, "num_chars": 2}, {"sum_logits": -1.4964728355407715, "num_tokens": 1, "num_tokens_all": 1023, "is_greedy": false, "logits_per_token": -1.4964728355407715, "logits_per_char": -0.7482364177703857, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 860, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.666006088256836, "incorrect_loss_raw": 0.21893338859081268, "correct_loss_per_char": 0.833003044128418, "incorrect_loss_per_char": 0.10946669429540634, "correct_loss_per_token": 1.666006088256836, "incorrect_loss_per_token": 0.21893338859081268, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21893338859081268, "num_tokens": 1, "num_tokens_all": 1001, "is_greedy": true, "logits_per_token": -0.21893338859081268, "logits_per_char": -0.10946669429540634, "num_chars": 2}, {"sum_logits": -1.666006088256836, "num_tokens": 1, "num_tokens_all": 1001, "is_greedy": false, "logits_per_token": -1.666006088256836, "logits_per_char": -0.833003044128418, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 861, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6459076404571533, "incorrect_loss_raw": 0.22716785967350006, "correct_loss_per_char": 0.8229538202285767, "incorrect_loss_per_char": 0.11358392983675003, "correct_loss_per_token": 1.6459076404571533, "incorrect_loss_per_token": 0.22716785967350006, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22716785967350006, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": true, "logits_per_token": -0.22716785967350006, "logits_per_char": -0.11358392983675003, "num_chars": 2}, {"sum_logits": -1.6459076404571533, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": false, "logits_per_token": -1.6459076404571533, "logits_per_char": -0.8229538202285767, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 862, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7571653127670288, "incorrect_loss_raw": 0.19851253926753998, "correct_loss_per_char": 0.8785826563835144, "incorrect_loss_per_char": 0.09925626963376999, "correct_loss_per_token": 1.7571653127670288, "incorrect_loss_per_token": 0.19851253926753998, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19851253926753998, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": true, "logits_per_token": -0.19851253926753998, "logits_per_char": -0.09925626963376999, "num_chars": 2}, {"sum_logits": -1.7571653127670288, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": false, "logits_per_token": -1.7571653127670288, "logits_per_char": -0.8785826563835144, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 863, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5035799741744995, "incorrect_loss_raw": 0.26341545581817627, "correct_loss_per_char": 0.7517899870872498, "incorrect_loss_per_char": 0.13170772790908813, "correct_loss_per_token": 1.5035799741744995, "incorrect_loss_per_token": 0.26341545581817627, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26341545581817627, "num_tokens": 1, "num_tokens_all": 1008, "is_greedy": true, "logits_per_token": -0.26341545581817627, "logits_per_char": -0.13170772790908813, "num_chars": 2}, {"sum_logits": -1.5035799741744995, "num_tokens": 1, "num_tokens_all": 1008, "is_greedy": false, "logits_per_token": -1.5035799741744995, "logits_per_char": -0.7517899870872498, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 864, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9699525833129883, "incorrect_loss_raw": 0.160885751247406, "correct_loss_per_char": 0.9849762916564941, "incorrect_loss_per_char": 0.080442875623703, "correct_loss_per_token": 1.9699525833129883, "incorrect_loss_per_token": 0.160885751247406, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.160885751247406, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": true, "logits_per_token": -0.160885751247406, "logits_per_char": -0.080442875623703, "num_chars": 2}, {"sum_logits": -1.9699525833129883, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": false, "logits_per_token": -1.9699525833129883, "logits_per_char": -0.9849762916564941, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 865, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.554166555404663, "incorrect_loss_raw": 0.24999898672103882, "correct_loss_per_char": 0.7770832777023315, "incorrect_loss_per_char": 0.12499949336051941, "correct_loss_per_token": 1.554166555404663, "incorrect_loss_per_token": 0.24999898672103882, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24999898672103882, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": true, "logits_per_token": -0.24999898672103882, "logits_per_char": -0.12499949336051941, "num_chars": 2}, {"sum_logits": -1.554166555404663, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": false, "logits_per_token": -1.554166555404663, "logits_per_char": -0.7770832777023315, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 866, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.18612270057201385, "incorrect_loss_raw": 1.8470487594604492, "correct_loss_per_char": 0.09306135028600693, "incorrect_loss_per_char": 0.9235243797302246, "correct_loss_per_token": 0.18612270057201385, "incorrect_loss_per_token": 1.8470487594604492, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.18612270057201385, "num_tokens": 1, "num_tokens_all": 995, "is_greedy": true, "logits_per_token": -0.18612270057201385, "logits_per_char": -0.09306135028600693, "num_chars": 2}, {"sum_logits": -1.8470487594604492, "num_tokens": 1, "num_tokens_all": 995, "is_greedy": false, "logits_per_token": -1.8470487594604492, "logits_per_char": -0.9235243797302246, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 867, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.462059736251831, "incorrect_loss_raw": 0.27829569578170776, "correct_loss_per_char": 0.7310298681259155, "incorrect_loss_per_char": 0.13914784789085388, "correct_loss_per_token": 1.462059736251831, "incorrect_loss_per_token": 0.27829569578170776, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.27829569578170776, "num_tokens": 1, "num_tokens_all": 1007, "is_greedy": true, "logits_per_token": -0.27829569578170776, "logits_per_char": -0.13914784789085388, "num_chars": 2}, {"sum_logits": -1.462059736251831, "num_tokens": 1, "num_tokens_all": 1007, "is_greedy": false, "logits_per_token": -1.462059736251831, "logits_per_char": -0.7310298681259155, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 868, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.677341341972351, "incorrect_loss_raw": 0.21251435577869415, "correct_loss_per_char": 0.8386706709861755, "incorrect_loss_per_char": 0.10625717788934708, "correct_loss_per_token": 1.677341341972351, "incorrect_loss_per_token": 0.21251435577869415, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21251435577869415, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": true, "logits_per_token": -0.21251435577869415, "logits_per_char": -0.10625717788934708, "num_chars": 2}, {"sum_logits": -1.677341341972351, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": false, "logits_per_token": -1.677341341972351, "logits_per_char": -0.8386706709861755, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 869, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.26199278235435486, "incorrect_loss_raw": 1.5094401836395264, "correct_loss_per_char": 0.13099639117717743, "incorrect_loss_per_char": 0.7547200918197632, "correct_loss_per_token": 0.26199278235435486, "incorrect_loss_per_token": 1.5094401836395264, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26199278235435486, "num_tokens": 1, "num_tokens_all": 1037, "is_greedy": true, "logits_per_token": -0.26199278235435486, "logits_per_char": -0.13099639117717743, "num_chars": 2}, {"sum_logits": -1.5094401836395264, "num_tokens": 1, "num_tokens_all": 1037, "is_greedy": false, "logits_per_token": -1.5094401836395264, "logits_per_char": -0.7547200918197632, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 870, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.190656840801239, "incorrect_loss_raw": 1.8048772811889648, "correct_loss_per_char": 0.0953284204006195, "incorrect_loss_per_char": 0.9024386405944824, "correct_loss_per_token": 0.190656840801239, "incorrect_loss_per_token": 1.8048772811889648, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.190656840801239, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": true, "logits_per_token": -0.190656840801239, "logits_per_char": -0.0953284204006195, "num_chars": 2}, {"sum_logits": -1.8048772811889648, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": false, "logits_per_token": -1.8048772811889648, "logits_per_char": -0.9024386405944824, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 871, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.17091451585292816, "incorrect_loss_raw": 1.8807268142700195, "correct_loss_per_char": 0.08545725792646408, "incorrect_loss_per_char": 0.9403634071350098, "correct_loss_per_token": 0.17091451585292816, "incorrect_loss_per_token": 1.8807268142700195, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.17091451585292816, "num_tokens": 1, "num_tokens_all": 1005, "is_greedy": true, "logits_per_token": -0.17091451585292816, "logits_per_char": -0.08545725792646408, "num_chars": 2}, {"sum_logits": -1.8807268142700195, "num_tokens": 1, "num_tokens_all": 1005, "is_greedy": false, "logits_per_token": -1.8807268142700195, "logits_per_char": -0.9403634071350098, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 872, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.17993855476379395, "incorrect_loss_raw": 1.8622748851776123, "correct_loss_per_char": 0.08996927738189697, "incorrect_loss_per_char": 0.9311374425888062, "correct_loss_per_token": 0.17993855476379395, "incorrect_loss_per_token": 1.8622748851776123, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.17993855476379395, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": true, "logits_per_token": -0.17993855476379395, "logits_per_char": -0.08996927738189697, "num_chars": 2}, {"sum_logits": -1.8622748851776123, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": false, "logits_per_token": -1.8622748851776123, "logits_per_char": -0.9311374425888062, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 873, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5174298286437988, "incorrect_loss_raw": 0.2626897096633911, "correct_loss_per_char": 0.7587149143218994, "incorrect_loss_per_char": 0.13134485483169556, "correct_loss_per_token": 1.5174298286437988, "incorrect_loss_per_token": 0.2626897096633911, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2626897096633911, "num_tokens": 1, "num_tokens_all": 901, "is_greedy": true, "logits_per_token": -0.2626897096633911, "logits_per_char": -0.13134485483169556, "num_chars": 2}, {"sum_logits": -1.5174298286437988, "num_tokens": 1, "num_tokens_all": 901, "is_greedy": false, "logits_per_token": -1.5174298286437988, "logits_per_char": -0.7587149143218994, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 874, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20682208240032196, "incorrect_loss_raw": 1.718821406364441, "correct_loss_per_char": 0.10341104120016098, "incorrect_loss_per_char": 0.8594107031822205, "correct_loss_per_token": 0.20682208240032196, "incorrect_loss_per_token": 1.718821406364441, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20682208240032196, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": true, "logits_per_token": -0.20682208240032196, "logits_per_char": -0.10341104120016098, "num_chars": 2}, {"sum_logits": -1.718821406364441, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": false, "logits_per_token": -1.718821406364441, "logits_per_char": -0.8594107031822205, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 875, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4085253477096558, "incorrect_loss_raw": 0.29706838726997375, "correct_loss_per_char": 0.7042626738548279, "incorrect_loss_per_char": 0.14853419363498688, "correct_loss_per_token": 1.4085253477096558, "incorrect_loss_per_token": 0.29706838726997375, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.29706838726997375, "num_tokens": 1, "num_tokens_all": 997, "is_greedy": true, "logits_per_token": -0.29706838726997375, "logits_per_char": -0.14853419363498688, "num_chars": 2}, {"sum_logits": -1.4085253477096558, "num_tokens": 1, "num_tokens_all": 997, "is_greedy": false, "logits_per_token": -1.4085253477096558, "logits_per_char": -0.7042626738548279, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 876, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19085828959941864, "incorrect_loss_raw": 1.785035490989685, "correct_loss_per_char": 0.09542914479970932, "incorrect_loss_per_char": 0.8925177454948425, "correct_loss_per_token": 0.19085828959941864, "incorrect_loss_per_token": 1.785035490989685, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19085828959941864, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": true, "logits_per_token": -0.19085828959941864, "logits_per_char": -0.09542914479970932, "num_chars": 2}, {"sum_logits": -1.785035490989685, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": false, "logits_per_token": -1.785035490989685, "logits_per_char": -0.8925177454948425, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 877, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.31653276085853577, "incorrect_loss_raw": 1.3474745750427246, "correct_loss_per_char": 0.15826638042926788, "incorrect_loss_per_char": 0.6737372875213623, "correct_loss_per_token": 0.31653276085853577, "incorrect_loss_per_token": 1.3474745750427246, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.31653276085853577, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": true, "logits_per_token": -0.31653276085853577, "logits_per_char": -0.15826638042926788, "num_chars": 2}, {"sum_logits": -1.3474745750427246, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.3474745750427246, "logits_per_char": -0.6737372875213623, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 878, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2767898142337799, "incorrect_loss_raw": 1.4558087587356567, "correct_loss_per_char": 0.13839490711688995, "incorrect_loss_per_char": 0.7279043793678284, "correct_loss_per_token": 0.2767898142337799, "incorrect_loss_per_token": 1.4558087587356567, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2767898142337799, "num_tokens": 1, "num_tokens_all": 1021, "is_greedy": true, "logits_per_token": -0.2767898142337799, "logits_per_char": -0.13839490711688995, "num_chars": 2}, {"sum_logits": -1.4558087587356567, "num_tokens": 1, "num_tokens_all": 1021, "is_greedy": false, "logits_per_token": -1.4558087587356567, "logits_per_char": -0.7279043793678284, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 879, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5910820960998535, "incorrect_loss_raw": 0.24081794917583466, "correct_loss_per_char": 0.7955410480499268, "incorrect_loss_per_char": 0.12040897458791733, "correct_loss_per_token": 1.5910820960998535, "incorrect_loss_per_token": 0.24081794917583466, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24081794917583466, "num_tokens": 1, "num_tokens_all": 1017, "is_greedy": true, "logits_per_token": -0.24081794917583466, "logits_per_char": -0.12040897458791733, "num_chars": 2}, {"sum_logits": -1.5910820960998535, "num_tokens": 1, "num_tokens_all": 1017, "is_greedy": false, "logits_per_token": -1.5910820960998535, "logits_per_char": -0.7955410480499268, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 880, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2548369765281677, "incorrect_loss_raw": 1.5271363258361816, "correct_loss_per_char": 0.12741848826408386, "incorrect_loss_per_char": 0.7635681629180908, "correct_loss_per_token": 0.2548369765281677, "incorrect_loss_per_token": 1.5271363258361816, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2548369765281677, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": true, "logits_per_token": -0.2548369765281677, "logits_per_char": -0.12741848826408386, "num_chars": 2}, {"sum_logits": -1.5271363258361816, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": false, "logits_per_token": -1.5271363258361816, "logits_per_char": -0.7635681629180908, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 881, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6466975212097168, "incorrect_loss_raw": 0.22564667463302612, "correct_loss_per_char": 0.8233487606048584, "incorrect_loss_per_char": 0.11282333731651306, "correct_loss_per_token": 1.6466975212097168, "incorrect_loss_per_token": 0.22564667463302612, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22564667463302612, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": true, "logits_per_token": -0.22564667463302612, "logits_per_char": -0.11282333731651306, "num_chars": 2}, {"sum_logits": -1.6466975212097168, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": false, "logits_per_token": -1.6466975212097168, "logits_per_char": -0.8233487606048584, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 882, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4934115409851074, "incorrect_loss_raw": 0.2616519331932068, "correct_loss_per_char": 0.7467057704925537, "incorrect_loss_per_char": 0.1308259665966034, "correct_loss_per_token": 1.4934115409851074, "incorrect_loss_per_token": 0.2616519331932068, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2616519331932068, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": true, "logits_per_token": -0.2616519331932068, "logits_per_char": -0.1308259665966034, "num_chars": 2}, {"sum_logits": -1.4934115409851074, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": false, "logits_per_token": -1.4934115409851074, "logits_per_char": -0.7467057704925537, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 883, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2383764237165451, "incorrect_loss_raw": 1.5878610610961914, "correct_loss_per_char": 0.11918821185827255, "incorrect_loss_per_char": 0.7939305305480957, "correct_loss_per_token": 0.2383764237165451, "incorrect_loss_per_token": 1.5878610610961914, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2383764237165451, "num_tokens": 1, "num_tokens_all": 895, "is_greedy": true, "logits_per_token": -0.2383764237165451, "logits_per_char": -0.11918821185827255, "num_chars": 2}, {"sum_logits": -1.5878610610961914, "num_tokens": 1, "num_tokens_all": 895, "is_greedy": false, "logits_per_token": -1.5878610610961914, "logits_per_char": -0.7939305305480957, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 884, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4673353433609009, "incorrect_loss_raw": 0.2757929265499115, "correct_loss_per_char": 0.7336676716804504, "incorrect_loss_per_char": 0.13789646327495575, "correct_loss_per_token": 1.4673353433609009, "incorrect_loss_per_token": 0.2757929265499115, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2757929265499115, "num_tokens": 1, "num_tokens_all": 891, "is_greedy": true, "logits_per_token": -0.2757929265499115, "logits_per_char": -0.13789646327495575, "num_chars": 2}, {"sum_logits": -1.4673353433609009, "num_tokens": 1, "num_tokens_all": 891, "is_greedy": false, "logits_per_token": -1.4673353433609009, "logits_per_char": -0.7336676716804504, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 885, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2368810474872589, "incorrect_loss_raw": 1.6041865348815918, "correct_loss_per_char": 0.11844052374362946, "incorrect_loss_per_char": 0.8020932674407959, "correct_loss_per_token": 0.2368810474872589, "incorrect_loss_per_token": 1.6041865348815918, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2368810474872589, "num_tokens": 1, "num_tokens_all": 1000, "is_greedy": true, "logits_per_token": -0.2368810474872589, "logits_per_char": -0.11844052374362946, "num_chars": 2}, {"sum_logits": -1.6041865348815918, "num_tokens": 1, "num_tokens_all": 1000, "is_greedy": false, "logits_per_token": -1.6041865348815918, "logits_per_char": -0.8020932674407959, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 886, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.25172147154808044, "incorrect_loss_raw": 1.5536967515945435, "correct_loss_per_char": 0.12586073577404022, "incorrect_loss_per_char": 0.7768483757972717, "correct_loss_per_token": 0.25172147154808044, "incorrect_loss_per_token": 1.5536967515945435, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25172147154808044, "num_tokens": 1, "num_tokens_all": 1006, "is_greedy": true, "logits_per_token": -0.25172147154808044, "logits_per_char": -0.12586073577404022, "num_chars": 2}, {"sum_logits": -1.5536967515945435, "num_tokens": 1, "num_tokens_all": 1006, "is_greedy": false, "logits_per_token": -1.5536967515945435, "logits_per_char": -0.7768483757972717, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 887, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2518472969532013, "incorrect_loss_raw": 1.5513923168182373, "correct_loss_per_char": 0.12592364847660065, "incorrect_loss_per_char": 0.7756961584091187, "correct_loss_per_token": 0.2518472969532013, "incorrect_loss_per_token": 1.5513923168182373, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2518472969532013, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": true, "logits_per_token": -0.2518472969532013, "logits_per_char": -0.12592364847660065, "num_chars": 2}, {"sum_logits": -1.5513923168182373, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": false, "logits_per_token": -1.5513923168182373, "logits_per_char": -0.7756961584091187, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 888, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.660709261894226, "incorrect_loss_raw": 0.22146587073802948, "correct_loss_per_char": 0.830354630947113, "incorrect_loss_per_char": 0.11073293536901474, "correct_loss_per_token": 1.660709261894226, "incorrect_loss_per_token": 0.22146587073802948, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22146587073802948, "num_tokens": 1, "num_tokens_all": 1013, "is_greedy": true, "logits_per_token": -0.22146587073802948, "logits_per_char": -0.11073293536901474, "num_chars": 2}, {"sum_logits": -1.660709261894226, "num_tokens": 1, "num_tokens_all": 1013, "is_greedy": false, "logits_per_token": -1.660709261894226, "logits_per_char": -0.830354630947113, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 889, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.25359880924224854, "incorrect_loss_raw": 1.5411194562911987, "correct_loss_per_char": 0.12679940462112427, "incorrect_loss_per_char": 0.7705597281455994, "correct_loss_per_token": 0.25359880924224854, "incorrect_loss_per_token": 1.5411194562911987, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25359880924224854, "num_tokens": 1, "num_tokens_all": 964, "is_greedy": true, "logits_per_token": -0.25359880924224854, "logits_per_char": -0.12679940462112427, "num_chars": 2}, {"sum_logits": -1.5411194562911987, "num_tokens": 1, "num_tokens_all": 964, "is_greedy": false, "logits_per_token": -1.5411194562911987, "logits_per_char": -0.7705597281455994, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 890, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24270518124103546, "incorrect_loss_raw": 1.5707309246063232, "correct_loss_per_char": 0.12135259062051773, "incorrect_loss_per_char": 0.7853654623031616, "correct_loss_per_token": 0.24270518124103546, "incorrect_loss_per_token": 1.5707309246063232, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24270518124103546, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": true, "logits_per_token": -0.24270518124103546, "logits_per_char": -0.12135259062051773, "num_chars": 2}, {"sum_logits": -1.5707309246063232, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": false, "logits_per_token": -1.5707309246063232, "logits_per_char": -0.7853654623031616, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 891, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4134048223495483, "incorrect_loss_raw": 0.2930477559566498, "correct_loss_per_char": 0.7067024111747742, "incorrect_loss_per_char": 0.1465238779783249, "correct_loss_per_token": 1.4134048223495483, "incorrect_loss_per_token": 0.2930477559566498, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2930477559566498, "num_tokens": 1, "num_tokens_all": 1049, "is_greedy": true, "logits_per_token": -0.2930477559566498, "logits_per_char": -0.1465238779783249, "num_chars": 2}, {"sum_logits": -1.4134048223495483, "num_tokens": 1, "num_tokens_all": 1049, "is_greedy": false, "logits_per_token": -1.4134048223495483, "logits_per_char": -0.7067024111747742, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 892, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19918490946292877, "incorrect_loss_raw": 1.7564250230789185, "correct_loss_per_char": 0.09959245473146439, "incorrect_loss_per_char": 0.8782125115394592, "correct_loss_per_token": 0.19918490946292877, "incorrect_loss_per_token": 1.7564250230789185, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19918490946292877, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": true, "logits_per_token": -0.19918490946292877, "logits_per_char": -0.09959245473146439, "num_chars": 2}, {"sum_logits": -1.7564250230789185, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": false, "logits_per_token": -1.7564250230789185, "logits_per_char": -0.8782125115394592, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 893, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.27179184556007385, "incorrect_loss_raw": 1.4905459880828857, "correct_loss_per_char": 0.13589592278003693, "incorrect_loss_per_char": 0.7452729940414429, "correct_loss_per_token": 0.27179184556007385, "incorrect_loss_per_token": 1.4905459880828857, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.27179184556007385, "num_tokens": 1, "num_tokens_all": 972, "is_greedy": true, "logits_per_token": -0.27179184556007385, "logits_per_char": -0.13589592278003693, "num_chars": 2}, {"sum_logits": -1.4905459880828857, "num_tokens": 1, "num_tokens_all": 972, "is_greedy": false, "logits_per_token": -1.4905459880828857, "logits_per_char": -0.7452729940414429, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 894, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8135735988616943, "incorrect_loss_raw": 0.18438318371772766, "correct_loss_per_char": 0.9067867994308472, "incorrect_loss_per_char": 0.09219159185886383, "correct_loss_per_token": 1.8135735988616943, "incorrect_loss_per_token": 0.18438318371772766, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.18438318371772766, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": true, "logits_per_token": -0.18438318371772766, "logits_per_char": -0.09219159185886383, "num_chars": 2}, {"sum_logits": -1.8135735988616943, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.8135735988616943, "logits_per_char": -0.9067867994308472, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 895, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21639437973499298, "incorrect_loss_raw": 1.6896395683288574, "correct_loss_per_char": 0.10819718986749649, "incorrect_loss_per_char": 0.8448197841644287, "correct_loss_per_token": 0.21639437973499298, "incorrect_loss_per_token": 1.6896395683288574, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21639437973499298, "num_tokens": 1, "num_tokens_all": 1166, "is_greedy": true, "logits_per_token": -0.21639437973499298, "logits_per_char": -0.10819718986749649, "num_chars": 2}, {"sum_logits": -1.6896395683288574, "num_tokens": 1, "num_tokens_all": 1166, "is_greedy": false, "logits_per_token": -1.6896395683288574, "logits_per_char": -0.8448197841644287, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 896, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6898775100708008, "incorrect_loss_raw": 0.21109704673290253, "correct_loss_per_char": 0.8449387550354004, "incorrect_loss_per_char": 0.10554852336645126, "correct_loss_per_token": 1.6898775100708008, "incorrect_loss_per_token": 0.21109704673290253, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21109704673290253, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": true, "logits_per_token": -0.21109704673290253, "logits_per_char": -0.10554852336645126, "num_chars": 2}, {"sum_logits": -1.6898775100708008, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": false, "logits_per_token": -1.6898775100708008, "logits_per_char": -0.8449387550354004, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 897, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2763296365737915, "incorrect_loss_raw": 1.4501327276229858, "correct_loss_per_char": 0.13816481828689575, "incorrect_loss_per_char": 0.7250663638114929, "correct_loss_per_token": 0.2763296365737915, "incorrect_loss_per_token": 1.4501327276229858, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2763296365737915, "num_tokens": 1, "num_tokens_all": 1007, "is_greedy": true, "logits_per_token": -0.2763296365737915, "logits_per_char": -0.13816481828689575, "num_chars": 2}, {"sum_logits": -1.4501327276229858, "num_tokens": 1, "num_tokens_all": 1007, "is_greedy": false, "logits_per_token": -1.4501327276229858, "logits_per_char": -0.7250663638114929, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 898, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7143936157226562, "incorrect_loss_raw": 0.20849932730197906, "correct_loss_per_char": 0.8571968078613281, "incorrect_loss_per_char": 0.10424966365098953, "correct_loss_per_token": 1.7143936157226562, "incorrect_loss_per_token": 0.20849932730197906, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20849932730197906, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": true, "logits_per_token": -0.20849932730197906, "logits_per_char": -0.10424966365098953, "num_chars": 2}, {"sum_logits": -1.7143936157226562, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": false, "logits_per_token": -1.7143936157226562, "logits_per_char": -0.8571968078613281, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 899, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2574782967567444, "incorrect_loss_raw": 1.5085575580596924, "correct_loss_per_char": 0.1287391483783722, "incorrect_loss_per_char": 0.7542787790298462, "correct_loss_per_token": 0.2574782967567444, "incorrect_loss_per_token": 1.5085575580596924, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2574782967567444, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": true, "logits_per_token": -0.2574782967567444, "logits_per_char": -0.1287391483783722, "num_chars": 2}, {"sum_logits": -1.5085575580596924, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": false, "logits_per_token": -1.5085575580596924, "logits_per_char": -0.7542787790298462, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 900, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2092079371213913, "incorrect_loss_raw": 1.7228991985321045, "correct_loss_per_char": 0.10460396856069565, "incorrect_loss_per_char": 0.8614495992660522, "correct_loss_per_token": 0.2092079371213913, "incorrect_loss_per_token": 1.7228991985321045, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2092079371213913, "num_tokens": 1, "num_tokens_all": 1095, "is_greedy": true, "logits_per_token": -0.2092079371213913, "logits_per_char": -0.10460396856069565, "num_chars": 2}, {"sum_logits": -1.7228991985321045, "num_tokens": 1, "num_tokens_all": 1095, "is_greedy": false, "logits_per_token": -1.7228991985321045, "logits_per_char": -0.8614495992660522, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 901, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21243569254875183, "incorrect_loss_raw": 1.6851205825805664, "correct_loss_per_char": 0.10621784627437592, "incorrect_loss_per_char": 0.8425602912902832, "correct_loss_per_token": 0.21243569254875183, "incorrect_loss_per_token": 1.6851205825805664, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21243569254875183, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": true, "logits_per_token": -0.21243569254875183, "logits_per_char": -0.10621784627437592, "num_chars": 2}, {"sum_logits": -1.6851205825805664, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": false, "logits_per_token": -1.6851205825805664, "logits_per_char": -0.8425602912902832, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 902, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.18538326025009155, "incorrect_loss_raw": 1.8282573223114014, "correct_loss_per_char": 0.09269163012504578, "incorrect_loss_per_char": 0.9141286611557007, "correct_loss_per_token": 0.18538326025009155, "incorrect_loss_per_token": 1.8282573223114014, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.18538326025009155, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": true, "logits_per_token": -0.18538326025009155, "logits_per_char": -0.09269163012504578, "num_chars": 2}, {"sum_logits": -1.8282573223114014, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": false, "logits_per_token": -1.8282573223114014, "logits_per_char": -0.9141286611557007, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 903, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2881242036819458, "incorrect_loss_raw": 1.4282387495040894, "correct_loss_per_char": 0.1440621018409729, "incorrect_loss_per_char": 0.7141193747520447, "correct_loss_per_token": 0.2881242036819458, "incorrect_loss_per_token": 1.4282387495040894, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2881242036819458, "num_tokens": 1, "num_tokens_all": 1030, "is_greedy": true, "logits_per_token": -0.2881242036819458, "logits_per_char": -0.1440621018409729, "num_chars": 2}, {"sum_logits": -1.4282387495040894, "num_tokens": 1, "num_tokens_all": 1030, "is_greedy": false, "logits_per_token": -1.4282387495040894, "logits_per_char": -0.7141193747520447, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 904, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24044187366962433, "incorrect_loss_raw": 1.6013239622116089, "correct_loss_per_char": 0.12022093683481216, "incorrect_loss_per_char": 0.8006619811058044, "correct_loss_per_token": 0.24044187366962433, "incorrect_loss_per_token": 1.6013239622116089, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24044187366962433, "num_tokens": 1, "num_tokens_all": 1009, "is_greedy": true, "logits_per_token": -0.24044187366962433, "logits_per_char": -0.12022093683481216, "num_chars": 2}, {"sum_logits": -1.6013239622116089, "num_tokens": 1, "num_tokens_all": 1009, "is_greedy": false, "logits_per_token": -1.6013239622116089, "logits_per_char": -0.8006619811058044, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 905, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5694677829742432, "incorrect_loss_raw": 0.23868754506111145, "correct_loss_per_char": 0.7847338914871216, "incorrect_loss_per_char": 0.11934377253055573, "correct_loss_per_token": 1.5694677829742432, "incorrect_loss_per_token": 0.23868754506111145, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23868754506111145, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": true, "logits_per_token": -0.23868754506111145, "logits_per_char": -0.11934377253055573, "num_chars": 2}, {"sum_logits": -1.5694677829742432, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": false, "logits_per_token": -1.5694677829742432, "logits_per_char": -0.7847338914871216, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 906, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22780074179172516, "incorrect_loss_raw": 1.6451480388641357, "correct_loss_per_char": 0.11390037089586258, "incorrect_loss_per_char": 0.8225740194320679, "correct_loss_per_token": 0.22780074179172516, "incorrect_loss_per_token": 1.6451480388641357, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22780074179172516, "num_tokens": 1, "num_tokens_all": 908, "is_greedy": true, "logits_per_token": -0.22780074179172516, "logits_per_char": -0.11390037089586258, "num_chars": 2}, {"sum_logits": -1.6451480388641357, "num_tokens": 1, "num_tokens_all": 908, "is_greedy": false, "logits_per_token": -1.6451480388641357, "logits_per_char": -0.8225740194320679, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 907, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6574344635009766, "incorrect_loss_raw": 0.22058872878551483, "correct_loss_per_char": 0.8287172317504883, "incorrect_loss_per_char": 0.11029436439275742, "correct_loss_per_token": 1.6574344635009766, "incorrect_loss_per_token": 0.22058872878551483, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22058872878551483, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": true, "logits_per_token": -0.22058872878551483, "logits_per_char": -0.11029436439275742, "num_chars": 2}, {"sum_logits": -1.6574344635009766, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": false, "logits_per_token": -1.6574344635009766, "logits_per_char": -0.8287172317504883, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 908, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.25514882802963257, "incorrect_loss_raw": 1.5375511646270752, "correct_loss_per_char": 0.12757441401481628, "incorrect_loss_per_char": 0.7687755823135376, "correct_loss_per_token": 0.25514882802963257, "incorrect_loss_per_token": 1.5375511646270752, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25514882802963257, "num_tokens": 1, "num_tokens_all": 960, "is_greedy": true, "logits_per_token": -0.25514882802963257, "logits_per_char": -0.12757441401481628, "num_chars": 2}, {"sum_logits": -1.5375511646270752, "num_tokens": 1, "num_tokens_all": 960, "is_greedy": false, "logits_per_token": -1.5375511646270752, "logits_per_char": -0.7687755823135376, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 909, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.18569885194301605, "incorrect_loss_raw": 1.803292155265808, "correct_loss_per_char": 0.09284942597150803, "incorrect_loss_per_char": 0.901646077632904, "correct_loss_per_token": 0.18569885194301605, "incorrect_loss_per_token": 1.803292155265808, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.18569885194301605, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": true, "logits_per_token": -0.18569885194301605, "logits_per_char": -0.09284942597150803, "num_chars": 2}, {"sum_logits": -1.803292155265808, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": false, "logits_per_token": -1.803292155265808, "logits_per_char": -0.901646077632904, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 910, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2754100263118744, "incorrect_loss_raw": 1.4651297330856323, "correct_loss_per_char": 0.1377050131559372, "incorrect_loss_per_char": 0.7325648665428162, "correct_loss_per_token": 0.2754100263118744, "incorrect_loss_per_token": 1.4651297330856323, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2754100263118744, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": true, "logits_per_token": -0.2754100263118744, "logits_per_char": -0.1377050131559372, "num_chars": 2}, {"sum_logits": -1.4651297330856323, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": false, "logits_per_token": -1.4651297330856323, "logits_per_char": -0.7325648665428162, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 911, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9541641473770142, "incorrect_loss_raw": 0.16055524349212646, "correct_loss_per_char": 0.9770820736885071, "incorrect_loss_per_char": 0.08027762174606323, "correct_loss_per_token": 1.9541641473770142, "incorrect_loss_per_token": 0.16055524349212646, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.16055524349212646, "num_tokens": 1, "num_tokens_all": 941, "is_greedy": true, "logits_per_token": -0.16055524349212646, "logits_per_char": -0.08027762174606323, "num_chars": 2}, {"sum_logits": -1.9541641473770142, "num_tokens": 1, "num_tokens_all": 941, "is_greedy": false, "logits_per_token": -1.9541641473770142, "logits_per_char": -0.9770820736885071, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 912, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2032628208398819, "incorrect_loss_raw": 1.7316606044769287, "correct_loss_per_char": 0.10163141041994095, "incorrect_loss_per_char": 0.8658303022384644, "correct_loss_per_token": 0.2032628208398819, "incorrect_loss_per_token": 1.7316606044769287, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2032628208398819, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": true, "logits_per_token": -0.2032628208398819, "logits_per_char": -0.10163141041994095, "num_chars": 2}, {"sum_logits": -1.7316606044769287, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": false, "logits_per_token": -1.7316606044769287, "logits_per_char": -0.8658303022384644, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 913, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2693980932235718, "incorrect_loss_raw": 1.4815346002578735, "correct_loss_per_char": 0.1346990466117859, "incorrect_loss_per_char": 0.7407673001289368, "correct_loss_per_token": 0.2693980932235718, "incorrect_loss_per_token": 1.4815346002578735, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2693980932235718, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": true, "logits_per_token": -0.2693980932235718, "logits_per_char": -0.1346990466117859, "num_chars": 2}, {"sum_logits": -1.4815346002578735, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": false, "logits_per_token": -1.4815346002578735, "logits_per_char": -0.7407673001289368, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 914, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5111682415008545, "incorrect_loss_raw": 0.2603471875190735, "correct_loss_per_char": 0.7555841207504272, "incorrect_loss_per_char": 0.13017359375953674, "correct_loss_per_token": 1.5111682415008545, "incorrect_loss_per_token": 0.2603471875190735, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2603471875190735, "num_tokens": 1, "num_tokens_all": 1028, "is_greedy": true, "logits_per_token": -0.2603471875190735, "logits_per_char": -0.13017359375953674, "num_chars": 2}, {"sum_logits": -1.5111682415008545, "num_tokens": 1, "num_tokens_all": 1028, "is_greedy": false, "logits_per_token": -1.5111682415008545, "logits_per_char": -0.7555841207504272, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 915, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6120052337646484, "incorrect_loss_raw": 0.23226198554039001, "correct_loss_per_char": 0.8060026168823242, "incorrect_loss_per_char": 0.11613099277019501, "correct_loss_per_token": 1.6120052337646484, "incorrect_loss_per_token": 0.23226198554039001, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23226198554039001, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": true, "logits_per_token": -0.23226198554039001, "logits_per_char": -0.11613099277019501, "num_chars": 2}, {"sum_logits": -1.6120052337646484, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": false, "logits_per_token": -1.6120052337646484, "logits_per_char": -0.8060026168823242, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 916, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22302882373332977, "incorrect_loss_raw": 1.6522581577301025, "correct_loss_per_char": 0.11151441186666489, "incorrect_loss_per_char": 0.8261290788650513, "correct_loss_per_token": 0.22302882373332977, "incorrect_loss_per_token": 1.6522581577301025, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22302882373332977, "num_tokens": 1, "num_tokens_all": 944, "is_greedy": true, "logits_per_token": -0.22302882373332977, "logits_per_char": -0.11151441186666489, "num_chars": 2}, {"sum_logits": -1.6522581577301025, "num_tokens": 1, "num_tokens_all": 944, "is_greedy": false, "logits_per_token": -1.6522581577301025, "logits_per_char": -0.8261290788650513, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 917, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.467543125152588, "incorrect_loss_raw": 0.2710731327533722, "correct_loss_per_char": 0.733771562576294, "incorrect_loss_per_char": 0.1355365663766861, "correct_loss_per_token": 1.467543125152588, "incorrect_loss_per_token": 0.2710731327533722, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2710731327533722, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": true, "logits_per_token": -0.2710731327533722, "logits_per_char": -0.1355365663766861, "num_chars": 2}, {"sum_logits": -1.467543125152588, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": false, "logits_per_token": -1.467543125152588, "logits_per_char": -0.733771562576294, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 918, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6466236114501953, "incorrect_loss_raw": 0.22430254518985748, "correct_loss_per_char": 0.8233118057250977, "incorrect_loss_per_char": 0.11215127259492874, "correct_loss_per_token": 1.6466236114501953, "incorrect_loss_per_token": 0.22430254518985748, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22430254518985748, "num_tokens": 1, "num_tokens_all": 1007, "is_greedy": true, "logits_per_token": -0.22430254518985748, "logits_per_char": -0.11215127259492874, "num_chars": 2}, {"sum_logits": -1.6466236114501953, "num_tokens": 1, "num_tokens_all": 1007, "is_greedy": false, "logits_per_token": -1.6466236114501953, "logits_per_char": -0.8233118057250977, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 919, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.17662309110164642, "incorrect_loss_raw": 1.869145393371582, "correct_loss_per_char": 0.08831154555082321, "incorrect_loss_per_char": 0.934572696685791, "correct_loss_per_token": 0.17662309110164642, "incorrect_loss_per_token": 1.869145393371582, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.17662309110164642, "num_tokens": 1, "num_tokens_all": 981, "is_greedy": true, "logits_per_token": -0.17662309110164642, "logits_per_char": -0.08831154555082321, "num_chars": 2}, {"sum_logits": -1.869145393371582, "num_tokens": 1, "num_tokens_all": 981, "is_greedy": false, "logits_per_token": -1.869145393371582, "logits_per_char": -0.934572696685791, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 920, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.27659881114959717, "incorrect_loss_raw": 1.4703835248947144, "correct_loss_per_char": 0.13829940557479858, "incorrect_loss_per_char": 0.7351917624473572, "correct_loss_per_token": 0.27659881114959717, "incorrect_loss_per_token": 1.4703835248947144, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.27659881114959717, "num_tokens": 1, "num_tokens_all": 1003, "is_greedy": true, "logits_per_token": -0.27659881114959717, "logits_per_char": -0.13829940557479858, "num_chars": 2}, {"sum_logits": -1.4703835248947144, "num_tokens": 1, "num_tokens_all": 1003, "is_greedy": false, "logits_per_token": -1.4703835248947144, "logits_per_char": -0.7351917624473572, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 921, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22995319962501526, "incorrect_loss_raw": 1.609865427017212, "correct_loss_per_char": 0.11497659981250763, "incorrect_loss_per_char": 0.804932713508606, "correct_loss_per_token": 0.22995319962501526, "incorrect_loss_per_token": 1.609865427017212, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22995319962501526, "num_tokens": 1, "num_tokens_all": 903, "is_greedy": true, "logits_per_token": -0.22995319962501526, "logits_per_char": -0.11497659981250763, "num_chars": 2}, {"sum_logits": -1.609865427017212, "num_tokens": 1, "num_tokens_all": 903, "is_greedy": false, "logits_per_token": -1.609865427017212, "logits_per_char": -0.804932713508606, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 922, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.424607515335083, "incorrect_loss_raw": 0.28887560963630676, "correct_loss_per_char": 0.7123037576675415, "incorrect_loss_per_char": 0.14443780481815338, "correct_loss_per_token": 1.424607515335083, "incorrect_loss_per_token": 0.28887560963630676, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.28887560963630676, "num_tokens": 1, "num_tokens_all": 1038, "is_greedy": true, "logits_per_token": -0.28887560963630676, "logits_per_char": -0.14443780481815338, "num_chars": 2}, {"sum_logits": -1.424607515335083, "num_tokens": 1, "num_tokens_all": 1038, "is_greedy": false, "logits_per_token": -1.424607515335083, "logits_per_char": -0.7123037576675415, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 923, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2769449055194855, "incorrect_loss_raw": 1.4554011821746826, "correct_loss_per_char": 0.13847245275974274, "incorrect_loss_per_char": 0.7277005910873413, "correct_loss_per_token": 0.2769449055194855, "incorrect_loss_per_token": 1.4554011821746826, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2769449055194855, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": true, "logits_per_token": -0.2769449055194855, "logits_per_char": -0.13847245275974274, "num_chars": 2}, {"sum_logits": -1.4554011821746826, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": false, "logits_per_token": -1.4554011821746826, "logits_per_char": -0.7277005910873413, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 924, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2138994336128235, "incorrect_loss_raw": 1.6960601806640625, "correct_loss_per_char": 0.10694971680641174, "incorrect_loss_per_char": 0.8480300903320312, "correct_loss_per_token": 0.2138994336128235, "incorrect_loss_per_token": 1.6960601806640625, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2138994336128235, "num_tokens": 1, "num_tokens_all": 981, "is_greedy": true, "logits_per_token": -0.2138994336128235, "logits_per_char": -0.10694971680641174, "num_chars": 2}, {"sum_logits": -1.6960601806640625, "num_tokens": 1, "num_tokens_all": 981, "is_greedy": false, "logits_per_token": -1.6960601806640625, "logits_per_char": -0.8480300903320312, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 925, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8233542442321777, "incorrect_loss_raw": 0.18436791002750397, "correct_loss_per_char": 0.9116771221160889, "incorrect_loss_per_char": 0.09218395501375198, "correct_loss_per_token": 1.8233542442321777, "incorrect_loss_per_token": 0.18436791002750397, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.18436791002750397, "num_tokens": 1, "num_tokens_all": 890, "is_greedy": true, "logits_per_token": -0.18436791002750397, "logits_per_char": -0.09218395501375198, "num_chars": 2}, {"sum_logits": -1.8233542442321777, "num_tokens": 1, "num_tokens_all": 890, "is_greedy": false, "logits_per_token": -1.8233542442321777, "logits_per_char": -0.9116771221160889, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 926, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.25103047490119934, "incorrect_loss_raw": 1.5374631881713867, "correct_loss_per_char": 0.12551523745059967, "incorrect_loss_per_char": 0.7687315940856934, "correct_loss_per_token": 0.25103047490119934, "incorrect_loss_per_token": 1.5374631881713867, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25103047490119934, "num_tokens": 1, "num_tokens_all": 956, "is_greedy": true, "logits_per_token": -0.25103047490119934, "logits_per_char": -0.12551523745059967, "num_chars": 2}, {"sum_logits": -1.5374631881713867, "num_tokens": 1, "num_tokens_all": 956, "is_greedy": false, "logits_per_token": -1.5374631881713867, "logits_per_char": -0.7687315940856934, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 927, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.507122278213501, "incorrect_loss_raw": 0.2609425187110901, "correct_loss_per_char": 0.7535611391067505, "incorrect_loss_per_char": 0.13047125935554504, "correct_loss_per_token": 1.507122278213501, "incorrect_loss_per_token": 0.2609425187110901, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2609425187110901, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": true, "logits_per_token": -0.2609425187110901, "logits_per_char": -0.13047125935554504, "num_chars": 2}, {"sum_logits": -1.507122278213501, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": false, "logits_per_token": -1.507122278213501, "logits_per_char": -0.7535611391067505, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 928, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.553493857383728, "incorrect_loss_raw": 0.2468780279159546, "correct_loss_per_char": 0.776746928691864, "incorrect_loss_per_char": 0.1234390139579773, "correct_loss_per_token": 1.553493857383728, "incorrect_loss_per_token": 0.2468780279159546, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2468780279159546, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": true, "logits_per_token": -0.2468780279159546, "logits_per_char": -0.1234390139579773, "num_chars": 2}, {"sum_logits": -1.553493857383728, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": false, "logits_per_token": -1.553493857383728, "logits_per_char": -0.776746928691864, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 929, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23610101640224457, "incorrect_loss_raw": 1.592265009880066, "correct_loss_per_char": 0.11805050820112228, "incorrect_loss_per_char": 0.796132504940033, "correct_loss_per_token": 0.23610101640224457, "incorrect_loss_per_token": 1.592265009880066, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23610101640224457, "num_tokens": 1, "num_tokens_all": 985, "is_greedy": true, "logits_per_token": -0.23610101640224457, "logits_per_char": -0.11805050820112228, "num_chars": 2}, {"sum_logits": -1.592265009880066, "num_tokens": 1, "num_tokens_all": 985, "is_greedy": false, "logits_per_token": -1.592265009880066, "logits_per_char": -0.796132504940033, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 930, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3136189877986908, "incorrect_loss_raw": 1.3511332273483276, "correct_loss_per_char": 0.1568094938993454, "incorrect_loss_per_char": 0.6755666136741638, "correct_loss_per_token": 0.3136189877986908, "incorrect_loss_per_token": 1.3511332273483276, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3136189877986908, "num_tokens": 1, "num_tokens_all": 1047, "is_greedy": true, "logits_per_token": -0.3136189877986908, "logits_per_char": -0.1568094938993454, "num_chars": 2}, {"sum_logits": -1.3511332273483276, "num_tokens": 1, "num_tokens_all": 1047, "is_greedy": false, "logits_per_token": -1.3511332273483276, "logits_per_char": -0.6755666136741638, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 931, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20868849754333496, "incorrect_loss_raw": 1.7267253398895264, "correct_loss_per_char": 0.10434424877166748, "incorrect_loss_per_char": 0.8633626699447632, "correct_loss_per_token": 0.20868849754333496, "incorrect_loss_per_token": 1.7267253398895264, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20868849754333496, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": true, "logits_per_token": -0.20868849754333496, "logits_per_char": -0.10434424877166748, "num_chars": 2}, {"sum_logits": -1.7267253398895264, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": false, "logits_per_token": -1.7267253398895264, "logits_per_char": -0.8633626699447632, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 932, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19555342197418213, "incorrect_loss_raw": 1.7791681289672852, "correct_loss_per_char": 0.09777671098709106, "incorrect_loss_per_char": 0.8895840644836426, "correct_loss_per_token": 0.19555342197418213, "incorrect_loss_per_token": 1.7791681289672852, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19555342197418213, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": true, "logits_per_token": -0.19555342197418213, "logits_per_char": -0.09777671098709106, "num_chars": 2}, {"sum_logits": -1.7791681289672852, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": false, "logits_per_token": -1.7791681289672852, "logits_per_char": -0.8895840644836426, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 933, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6245944499969482, "incorrect_loss_raw": 0.22866056859493256, "correct_loss_per_char": 0.8122972249984741, "incorrect_loss_per_char": 0.11433028429746628, "correct_loss_per_token": 1.6245944499969482, "incorrect_loss_per_token": 0.22866056859493256, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22866056859493256, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": true, "logits_per_token": -0.22866056859493256, "logits_per_char": -0.11433028429746628, "num_chars": 2}, {"sum_logits": -1.6245944499969482, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": false, "logits_per_token": -1.6245944499969482, "logits_per_char": -0.8122972249984741, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 934, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5482842922210693, "incorrect_loss_raw": 0.25229722261428833, "correct_loss_per_char": 0.7741421461105347, "incorrect_loss_per_char": 0.12614861130714417, "correct_loss_per_token": 1.5482842922210693, "incorrect_loss_per_token": 0.25229722261428833, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25229722261428833, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": true, "logits_per_token": -0.25229722261428833, "logits_per_char": -0.12614861130714417, "num_chars": 2}, {"sum_logits": -1.5482842922210693, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": false, "logits_per_token": -1.5482842922210693, "logits_per_char": -0.7741421461105347, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 935, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3996113538742065, "incorrect_loss_raw": 0.30076172947883606, "correct_loss_per_char": 0.6998056769371033, "incorrect_loss_per_char": 0.15038086473941803, "correct_loss_per_token": 1.3996113538742065, "incorrect_loss_per_token": 0.30076172947883606, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.30076172947883606, "num_tokens": 1, "num_tokens_all": 1017, "is_greedy": true, "logits_per_token": -0.30076172947883606, "logits_per_char": -0.15038086473941803, "num_chars": 2}, {"sum_logits": -1.3996113538742065, "num_tokens": 1, "num_tokens_all": 1017, "is_greedy": false, "logits_per_token": -1.3996113538742065, "logits_per_char": -0.6998056769371033, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 936, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5895215272903442, "incorrect_loss_raw": 0.23778150975704193, "correct_loss_per_char": 0.7947607636451721, "incorrect_loss_per_char": 0.11889075487852097, "correct_loss_per_token": 1.5895215272903442, "incorrect_loss_per_token": 0.23778150975704193, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23778150975704193, "num_tokens": 1, "num_tokens_all": 966, "is_greedy": true, "logits_per_token": -0.23778150975704193, "logits_per_char": -0.11889075487852097, "num_chars": 2}, {"sum_logits": -1.5895215272903442, "num_tokens": 1, "num_tokens_all": 966, "is_greedy": false, "logits_per_token": -1.5895215272903442, "logits_per_char": -0.7947607636451721, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 937, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2482895404100418, "incorrect_loss_raw": 1.5677475929260254, "correct_loss_per_char": 0.1241447702050209, "incorrect_loss_per_char": 0.7838737964630127, "correct_loss_per_token": 0.2482895404100418, "incorrect_loss_per_token": 1.5677475929260254, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2482895404100418, "num_tokens": 1, "num_tokens_all": 1038, "is_greedy": true, "logits_per_token": -0.2482895404100418, "logits_per_char": -0.1241447702050209, "num_chars": 2}, {"sum_logits": -1.5677475929260254, "num_tokens": 1, "num_tokens_all": 1038, "is_greedy": false, "logits_per_token": -1.5677475929260254, "logits_per_char": -0.7838737964630127, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 938, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2873642146587372, "incorrect_loss_raw": 1.427501916885376, "correct_loss_per_char": 0.1436821073293686, "incorrect_loss_per_char": 0.713750958442688, "correct_loss_per_token": 0.2873642146587372, "incorrect_loss_per_token": 1.427501916885376, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2873642146587372, "num_tokens": 1, "num_tokens_all": 1035, "is_greedy": true, "logits_per_token": -0.2873642146587372, "logits_per_char": -0.1436821073293686, "num_chars": 2}, {"sum_logits": -1.427501916885376, "num_tokens": 1, "num_tokens_all": 1035, "is_greedy": false, "logits_per_token": -1.427501916885376, "logits_per_char": -0.713750958442688, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 939, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23499464988708496, "incorrect_loss_raw": 1.6071107387542725, "correct_loss_per_char": 0.11749732494354248, "incorrect_loss_per_char": 0.8035553693771362, "correct_loss_per_token": 0.23499464988708496, "incorrect_loss_per_token": 1.6071107387542725, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23499464988708496, "num_tokens": 1, "num_tokens_all": 1139, "is_greedy": true, "logits_per_token": -0.23499464988708496, "logits_per_char": -0.11749732494354248, "num_chars": 2}, {"sum_logits": -1.6071107387542725, "num_tokens": 1, "num_tokens_all": 1139, "is_greedy": false, "logits_per_token": -1.6071107387542725, "logits_per_char": -0.8035553693771362, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 940, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24529299139976501, "incorrect_loss_raw": 1.5863388776779175, "correct_loss_per_char": 0.12264649569988251, "incorrect_loss_per_char": 0.7931694388389587, "correct_loss_per_token": 0.24529299139976501, "incorrect_loss_per_token": 1.5863388776779175, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24529299139976501, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": true, "logits_per_token": -0.24529299139976501, "logits_per_char": -0.12264649569988251, "num_chars": 2}, {"sum_logits": -1.5863388776779175, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -1.5863388776779175, "logits_per_char": -0.7931694388389587, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 941, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2579081654548645, "incorrect_loss_raw": 1.531498908996582, "correct_loss_per_char": 0.12895408272743225, "incorrect_loss_per_char": 0.765749454498291, "correct_loss_per_token": 0.2579081654548645, "incorrect_loss_per_token": 1.531498908996582, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2579081654548645, "num_tokens": 1, "num_tokens_all": 990, "is_greedy": true, "logits_per_token": -0.2579081654548645, "logits_per_char": -0.12895408272743225, "num_chars": 2}, {"sum_logits": -1.531498908996582, "num_tokens": 1, "num_tokens_all": 990, "is_greedy": false, "logits_per_token": -1.531498908996582, "logits_per_char": -0.765749454498291, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 942, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6706622838974, "incorrect_loss_raw": 0.2175683081150055, "correct_loss_per_char": 0.8353311419487, "incorrect_loss_per_char": 0.10878415405750275, "correct_loss_per_token": 1.6706622838974, "incorrect_loss_per_token": 0.2175683081150055, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2175683081150055, "num_tokens": 1, "num_tokens_all": 887, "is_greedy": true, "logits_per_token": -0.2175683081150055, "logits_per_char": -0.10878415405750275, "num_chars": 2}, {"sum_logits": -1.6706622838974, "num_tokens": 1, "num_tokens_all": 887, "is_greedy": false, "logits_per_token": -1.6706622838974, "logits_per_char": -0.8353311419487, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 943, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2657250463962555, "incorrect_loss_raw": 1.5100921392440796, "correct_loss_per_char": 0.13286252319812775, "incorrect_loss_per_char": 0.7550460696220398, "correct_loss_per_token": 0.2657250463962555, "incorrect_loss_per_token": 1.5100921392440796, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2657250463962555, "num_tokens": 1, "num_tokens_all": 1275, "is_greedy": true, "logits_per_token": -0.2657250463962555, "logits_per_char": -0.13286252319812775, "num_chars": 2}, {"sum_logits": -1.5100921392440796, "num_tokens": 1, "num_tokens_all": 1275, "is_greedy": false, "logits_per_token": -1.5100921392440796, "logits_per_char": -0.7550460696220398, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 944, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22159616649150848, "incorrect_loss_raw": 1.6612156629562378, "correct_loss_per_char": 0.11079808324575424, "incorrect_loss_per_char": 0.8306078314781189, "correct_loss_per_token": 0.22159616649150848, "incorrect_loss_per_token": 1.6612156629562378, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22159616649150848, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": true, "logits_per_token": -0.22159616649150848, "logits_per_char": -0.11079808324575424, "num_chars": 2}, {"sum_logits": -1.6612156629562378, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": false, "logits_per_token": -1.6612156629562378, "logits_per_char": -0.8306078314781189, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 945, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5766639709472656, "incorrect_loss_raw": 0.2465837001800537, "correct_loss_per_char": 0.7883319854736328, "incorrect_loss_per_char": 0.12329185009002686, "correct_loss_per_token": 1.5766639709472656, "incorrect_loss_per_token": 0.2465837001800537, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2465837001800537, "num_tokens": 1, "num_tokens_all": 971, "is_greedy": true, "logits_per_token": -0.2465837001800537, "logits_per_char": -0.12329185009002686, "num_chars": 2}, {"sum_logits": -1.5766639709472656, "num_tokens": 1, "num_tokens_all": 971, "is_greedy": false, "logits_per_token": -1.5766639709472656, "logits_per_char": -0.7883319854736328, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 946, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2086312174797058, "incorrect_loss_raw": 1.7067406177520752, "correct_loss_per_char": 0.1043156087398529, "incorrect_loss_per_char": 0.8533703088760376, "correct_loss_per_token": 0.2086312174797058, "incorrect_loss_per_token": 1.7067406177520752, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2086312174797058, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": true, "logits_per_token": -0.2086312174797058, "logits_per_char": -0.1043156087398529, "num_chars": 2}, {"sum_logits": -1.7067406177520752, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": false, "logits_per_token": -1.7067406177520752, "logits_per_char": -0.8533703088760376, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 947, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.733027696609497, "incorrect_loss_raw": 0.20846980810165405, "correct_loss_per_char": 0.8665138483047485, "incorrect_loss_per_char": 0.10423490405082703, "correct_loss_per_token": 1.733027696609497, "incorrect_loss_per_token": 0.20846980810165405, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20846980810165405, "num_tokens": 1, "num_tokens_all": 969, "is_greedy": true, "logits_per_token": -0.20846980810165405, "logits_per_char": -0.10423490405082703, "num_chars": 2}, {"sum_logits": -1.733027696609497, "num_tokens": 1, "num_tokens_all": 969, "is_greedy": false, "logits_per_token": -1.733027696609497, "logits_per_char": -0.8665138483047485, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 948, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5923993587493896, "incorrect_loss_raw": 0.2398848533630371, "correct_loss_per_char": 0.7961996793746948, "incorrect_loss_per_char": 0.11994242668151855, "correct_loss_per_token": 1.5923993587493896, "incorrect_loss_per_token": 0.2398848533630371, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2398848533630371, "num_tokens": 1, "num_tokens_all": 970, "is_greedy": true, "logits_per_token": -0.2398848533630371, "logits_per_char": -0.11994242668151855, "num_chars": 2}, {"sum_logits": -1.5923993587493896, "num_tokens": 1, "num_tokens_all": 970, "is_greedy": false, "logits_per_token": -1.5923993587493896, "logits_per_char": -0.7961996793746948, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 949, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2584994435310364, "incorrect_loss_raw": 1.5281989574432373, "correct_loss_per_char": 0.1292497217655182, "incorrect_loss_per_char": 0.7640994787216187, "correct_loss_per_token": 0.2584994435310364, "incorrect_loss_per_token": 1.5281989574432373, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2584994435310364, "num_tokens": 1, "num_tokens_all": 988, "is_greedy": true, "logits_per_token": -0.2584994435310364, "logits_per_char": -0.1292497217655182, "num_chars": 2}, {"sum_logits": -1.5281989574432373, "num_tokens": 1, "num_tokens_all": 988, "is_greedy": false, "logits_per_token": -1.5281989574432373, "logits_per_char": -0.7640994787216187, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 950, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2899400293827057, "incorrect_loss_raw": 1.429377555847168, "correct_loss_per_char": 0.14497001469135284, "incorrect_loss_per_char": 0.714688777923584, "correct_loss_per_token": 0.2899400293827057, "incorrect_loss_per_token": 1.429377555847168, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2899400293827057, "num_tokens": 1, "num_tokens_all": 1011, "is_greedy": true, "logits_per_token": -0.2899400293827057, "logits_per_char": -0.14497001469135284, "num_chars": 2}, {"sum_logits": -1.429377555847168, "num_tokens": 1, "num_tokens_all": 1011, "is_greedy": false, "logits_per_token": -1.429377555847168, "logits_per_char": -0.714688777923584, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 951, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23719489574432373, "incorrect_loss_raw": 1.6080611944198608, "correct_loss_per_char": 0.11859744787216187, "incorrect_loss_per_char": 0.8040305972099304, "correct_loss_per_token": 0.23719489574432373, "incorrect_loss_per_token": 1.6080611944198608, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23719489574432373, "num_tokens": 1, "num_tokens_all": 950, "is_greedy": true, "logits_per_token": -0.23719489574432373, "logits_per_char": -0.11859744787216187, "num_chars": 2}, {"sum_logits": -1.6080611944198608, "num_tokens": 1, "num_tokens_all": 950, "is_greedy": false, "logits_per_token": -1.6080611944198608, "logits_per_char": -0.8040305972099304, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 952, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20063674449920654, "incorrect_loss_raw": 1.7544939517974854, "correct_loss_per_char": 0.10031837224960327, "incorrect_loss_per_char": 0.8772469758987427, "correct_loss_per_token": 0.20063674449920654, "incorrect_loss_per_token": 1.7544939517974854, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20063674449920654, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": true, "logits_per_token": -0.20063674449920654, "logits_per_char": -0.10031837224960327, "num_chars": 2}, {"sum_logits": -1.7544939517974854, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": false, "logits_per_token": -1.7544939517974854, "logits_per_char": -0.8772469758987427, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 953, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5477380752563477, "incorrect_loss_raw": 0.2514076828956604, "correct_loss_per_char": 0.7738690376281738, "incorrect_loss_per_char": 0.1257038414478302, "correct_loss_per_token": 1.5477380752563477, "incorrect_loss_per_token": 0.2514076828956604, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2514076828956604, "num_tokens": 1, "num_tokens_all": 976, "is_greedy": true, "logits_per_token": -0.2514076828956604, "logits_per_char": -0.1257038414478302, "num_chars": 2}, {"sum_logits": -1.5477380752563477, "num_tokens": 1, "num_tokens_all": 976, "is_greedy": false, "logits_per_token": -1.5477380752563477, "logits_per_char": -0.7738690376281738, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 954, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21275518834590912, "incorrect_loss_raw": 1.7082217931747437, "correct_loss_per_char": 0.10637759417295456, "incorrect_loss_per_char": 0.8541108965873718, "correct_loss_per_token": 0.21275518834590912, "incorrect_loss_per_token": 1.7082217931747437, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21275518834590912, "num_tokens": 1, "num_tokens_all": 960, "is_greedy": true, "logits_per_token": -0.21275518834590912, "logits_per_char": -0.10637759417295456, "num_chars": 2}, {"sum_logits": -1.7082217931747437, "num_tokens": 1, "num_tokens_all": 960, "is_greedy": false, "logits_per_token": -1.7082217931747437, "logits_per_char": -0.8541108965873718, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 955, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6420594453811646, "incorrect_loss_raw": 0.2253677099943161, "correct_loss_per_char": 0.8210297226905823, "incorrect_loss_per_char": 0.11268385499715805, "correct_loss_per_token": 1.6420594453811646, "incorrect_loss_per_token": 0.2253677099943161, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2253677099943161, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": true, "logits_per_token": -0.2253677099943161, "logits_per_char": -0.11268385499715805, "num_chars": 2}, {"sum_logits": -1.6420594453811646, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": false, "logits_per_token": -1.6420594453811646, "logits_per_char": -0.8210297226905823, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 956, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20060290396213531, "incorrect_loss_raw": 1.747627854347229, "correct_loss_per_char": 0.10030145198106766, "incorrect_loss_per_char": 0.8738139271736145, "correct_loss_per_token": 0.20060290396213531, "incorrect_loss_per_token": 1.747627854347229, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20060290396213531, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": true, "logits_per_token": -0.20060290396213531, "logits_per_char": -0.10030145198106766, "num_chars": 2}, {"sum_logits": -1.747627854347229, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": false, "logits_per_token": -1.747627854347229, "logits_per_char": -0.8738139271736145, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 957, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6460882425308228, "incorrect_loss_raw": 0.2249721735715866, "correct_loss_per_char": 0.8230441212654114, "incorrect_loss_per_char": 0.1124860867857933, "correct_loss_per_token": 1.6460882425308228, "incorrect_loss_per_token": 0.2249721735715866, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2249721735715866, "num_tokens": 1, "num_tokens_all": 993, "is_greedy": true, "logits_per_token": -0.2249721735715866, "logits_per_char": -0.1124860867857933, "num_chars": 2}, {"sum_logits": -1.6460882425308228, "num_tokens": 1, "num_tokens_all": 993, "is_greedy": false, "logits_per_token": -1.6460882425308228, "logits_per_char": -0.8230441212654114, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 958, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23735859990119934, "incorrect_loss_raw": 1.5990149974822998, "correct_loss_per_char": 0.11867929995059967, "incorrect_loss_per_char": 0.7995074987411499, "correct_loss_per_token": 0.23735859990119934, "incorrect_loss_per_token": 1.5990149974822998, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23735859990119934, "num_tokens": 1, "num_tokens_all": 882, "is_greedy": true, "logits_per_token": -0.23735859990119934, "logits_per_char": -0.11867929995059967, "num_chars": 2}, {"sum_logits": -1.5990149974822998, "num_tokens": 1, "num_tokens_all": 882, "is_greedy": false, "logits_per_token": -1.5990149974822998, "logits_per_char": -0.7995074987411499, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 959, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.25446105003356934, "incorrect_loss_raw": 1.523293375968933, "correct_loss_per_char": 0.12723052501678467, "incorrect_loss_per_char": 0.7616466879844666, "correct_loss_per_token": 0.25446105003356934, "incorrect_loss_per_token": 1.523293375968933, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25446105003356934, "num_tokens": 1, "num_tokens_all": 1031, "is_greedy": true, "logits_per_token": -0.25446105003356934, "logits_per_char": -0.12723052501678467, "num_chars": 2}, {"sum_logits": -1.523293375968933, "num_tokens": 1, "num_tokens_all": 1031, "is_greedy": false, "logits_per_token": -1.523293375968933, "logits_per_char": -0.7616466879844666, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 960, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6327028274536133, "incorrect_loss_raw": 0.2290569394826889, "correct_loss_per_char": 0.8163514137268066, "incorrect_loss_per_char": 0.11452846974134445, "correct_loss_per_token": 1.6327028274536133, "incorrect_loss_per_token": 0.2290569394826889, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2290569394826889, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": true, "logits_per_token": -0.2290569394826889, "logits_per_char": -0.11452846974134445, "num_chars": 2}, {"sum_logits": -1.6327028274536133, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.6327028274536133, "logits_per_char": -0.8163514137268066, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 961, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.18319551646709442, "incorrect_loss_raw": 1.8232425451278687, "correct_loss_per_char": 0.09159775823354721, "incorrect_loss_per_char": 0.9116212725639343, "correct_loss_per_token": 0.18319551646709442, "incorrect_loss_per_token": 1.8232425451278687, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.18319551646709442, "num_tokens": 1, "num_tokens_all": 960, "is_greedy": true, "logits_per_token": -0.18319551646709442, "logits_per_char": -0.09159775823354721, "num_chars": 2}, {"sum_logits": -1.8232425451278687, "num_tokens": 1, "num_tokens_all": 960, "is_greedy": false, "logits_per_token": -1.8232425451278687, "logits_per_char": -0.9116212725639343, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 962, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.509804606437683, "incorrect_loss_raw": 0.26663362979888916, "correct_loss_per_char": 0.7549023032188416, "incorrect_loss_per_char": 0.13331681489944458, "correct_loss_per_token": 1.509804606437683, "incorrect_loss_per_token": 0.26663362979888916, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26663362979888916, "num_tokens": 1, "num_tokens_all": 964, "is_greedy": true, "logits_per_token": -0.26663362979888916, "logits_per_char": -0.13331681489944458, "num_chars": 2}, {"sum_logits": -1.509804606437683, "num_tokens": 1, "num_tokens_all": 964, "is_greedy": false, "logits_per_token": -1.509804606437683, "logits_per_char": -0.7549023032188416, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 963, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24770952761173248, "incorrect_loss_raw": 1.561020851135254, "correct_loss_per_char": 0.12385476380586624, "incorrect_loss_per_char": 0.780510425567627, "correct_loss_per_token": 0.24770952761173248, "incorrect_loss_per_token": 1.561020851135254, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24770952761173248, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": true, "logits_per_token": -0.24770952761173248, "logits_per_char": -0.12385476380586624, "num_chars": 2}, {"sum_logits": -1.561020851135254, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": false, "logits_per_token": -1.561020851135254, "logits_per_char": -0.780510425567627, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 964, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2648542523384094, "incorrect_loss_raw": 1.5133860111236572, "correct_loss_per_char": 0.1324271261692047, "incorrect_loss_per_char": 0.7566930055618286, "correct_loss_per_token": 0.2648542523384094, "incorrect_loss_per_token": 1.5133860111236572, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2648542523384094, "num_tokens": 1, "num_tokens_all": 944, "is_greedy": true, "logits_per_token": -0.2648542523384094, "logits_per_char": -0.1324271261692047, "num_chars": 2}, {"sum_logits": -1.5133860111236572, "num_tokens": 1, "num_tokens_all": 944, "is_greedy": false, "logits_per_token": -1.5133860111236572, "logits_per_char": -0.7566930055618286, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 965, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2160981297492981, "incorrect_loss_raw": 1.677215576171875, "correct_loss_per_char": 0.10804906487464905, "incorrect_loss_per_char": 0.8386077880859375, "correct_loss_per_token": 0.2160981297492981, "incorrect_loss_per_token": 1.677215576171875, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2160981297492981, "num_tokens": 1, "num_tokens_all": 920, "is_greedy": true, "logits_per_token": -0.2160981297492981, "logits_per_char": -0.10804906487464905, "num_chars": 2}, {"sum_logits": -1.677215576171875, "num_tokens": 1, "num_tokens_all": 920, "is_greedy": false, "logits_per_token": -1.677215576171875, "logits_per_char": -0.8386077880859375, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 966, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4736045598983765, "incorrect_loss_raw": 0.2735099494457245, "correct_loss_per_char": 0.7368022799491882, "incorrect_loss_per_char": 0.13675497472286224, "correct_loss_per_token": 1.4736045598983765, "incorrect_loss_per_token": 0.2735099494457245, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2735099494457245, "num_tokens": 1, "num_tokens_all": 1014, "is_greedy": true, "logits_per_token": -0.2735099494457245, "logits_per_char": -0.13675497472286224, "num_chars": 2}, {"sum_logits": -1.4736045598983765, "num_tokens": 1, "num_tokens_all": 1014, "is_greedy": false, "logits_per_token": -1.4736045598983765, "logits_per_char": -0.7368022799491882, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 967, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.27849945425987244, "incorrect_loss_raw": 1.4749664068222046, "correct_loss_per_char": 0.13924972712993622, "incorrect_loss_per_char": 0.7374832034111023, "correct_loss_per_token": 0.27849945425987244, "incorrect_loss_per_token": 1.4749664068222046, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.27849945425987244, "num_tokens": 1, "num_tokens_all": 988, "is_greedy": true, "logits_per_token": -0.27849945425987244, "logits_per_char": -0.13924972712993622, "num_chars": 2}, {"sum_logits": -1.4749664068222046, "num_tokens": 1, "num_tokens_all": 988, "is_greedy": false, "logits_per_token": -1.4749664068222046, "logits_per_char": -0.7374832034111023, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 968, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7361602783203125, "incorrect_loss_raw": 0.2020743042230606, "correct_loss_per_char": 0.8680801391601562, "incorrect_loss_per_char": 0.1010371521115303, "correct_loss_per_token": 1.7361602783203125, "incorrect_loss_per_token": 0.2020743042230606, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2020743042230606, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": true, "logits_per_token": -0.2020743042230606, "logits_per_char": -0.1010371521115303, "num_chars": 2}, {"sum_logits": -1.7361602783203125, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": false, "logits_per_token": -1.7361602783203125, "logits_per_char": -0.8680801391601562, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 969, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6538059711456299, "incorrect_loss_raw": 0.22126571834087372, "correct_loss_per_char": 0.8269029855728149, "incorrect_loss_per_char": 0.11063285917043686, "correct_loss_per_token": 1.6538059711456299, "incorrect_loss_per_token": 0.22126571834087372, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22126571834087372, "num_tokens": 1, "num_tokens_all": 1049, "is_greedy": true, "logits_per_token": -0.22126571834087372, "logits_per_char": -0.11063285917043686, "num_chars": 2}, {"sum_logits": -1.6538059711456299, "num_tokens": 1, "num_tokens_all": 1049, "is_greedy": false, "logits_per_token": -1.6538059711456299, "logits_per_char": -0.8269029855728149, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 970, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24483387172222137, "incorrect_loss_raw": 1.575182557106018, "correct_loss_per_char": 0.12241693586111069, "incorrect_loss_per_char": 0.787591278553009, "correct_loss_per_token": 0.24483387172222137, "incorrect_loss_per_token": 1.575182557106018, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24483387172222137, "num_tokens": 1, "num_tokens_all": 1001, "is_greedy": true, "logits_per_token": -0.24483387172222137, "logits_per_char": -0.12241693586111069, "num_chars": 2}, {"sum_logits": -1.575182557106018, "num_tokens": 1, "num_tokens_all": 1001, "is_greedy": false, "logits_per_token": -1.575182557106018, "logits_per_char": -0.787591278553009, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 971, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23416419327259064, "incorrect_loss_raw": 1.6255481243133545, "correct_loss_per_char": 0.11708209663629532, "incorrect_loss_per_char": 0.8127740621566772, "correct_loss_per_token": 0.23416419327259064, "incorrect_loss_per_token": 1.6255481243133545, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23416419327259064, "num_tokens": 1, "num_tokens_all": 991, "is_greedy": true, "logits_per_token": -0.23416419327259064, "logits_per_char": -0.11708209663629532, "num_chars": 2}, {"sum_logits": -1.6255481243133545, "num_tokens": 1, "num_tokens_all": 991, "is_greedy": false, "logits_per_token": -1.6255481243133545, "logits_per_char": -0.8127740621566772, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 972, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5952987670898438, "incorrect_loss_raw": 0.23831753432750702, "correct_loss_per_char": 0.7976493835449219, "incorrect_loss_per_char": 0.11915876716375351, "correct_loss_per_token": 1.5952987670898438, "incorrect_loss_per_token": 0.23831753432750702, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23831753432750702, "num_tokens": 1, "num_tokens_all": 1008, "is_greedy": true, "logits_per_token": -0.23831753432750702, "logits_per_char": -0.11915876716375351, "num_chars": 2}, {"sum_logits": -1.5952987670898438, "num_tokens": 1, "num_tokens_all": 1008, "is_greedy": false, "logits_per_token": -1.5952987670898438, "logits_per_char": -0.7976493835449219, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 973, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2512330114841461, "incorrect_loss_raw": 1.5376167297363281, "correct_loss_per_char": 0.12561650574207306, "incorrect_loss_per_char": 0.7688083648681641, "correct_loss_per_token": 0.2512330114841461, "incorrect_loss_per_token": 1.5376167297363281, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2512330114841461, "num_tokens": 1, "num_tokens_all": 1039, "is_greedy": true, "logits_per_token": -0.2512330114841461, "logits_per_char": -0.12561650574207306, "num_chars": 2}, {"sum_logits": -1.5376167297363281, "num_tokens": 1, "num_tokens_all": 1039, "is_greedy": false, "logits_per_token": -1.5376167297363281, "logits_per_char": -0.7688083648681641, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 974, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6500824689865112, "incorrect_loss_raw": 0.22276391088962555, "correct_loss_per_char": 0.8250412344932556, "incorrect_loss_per_char": 0.11138195544481277, "correct_loss_per_token": 1.6500824689865112, "incorrect_loss_per_token": 0.22276391088962555, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22276391088962555, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": true, "logits_per_token": -0.22276391088962555, "logits_per_char": -0.11138195544481277, "num_chars": 2}, {"sum_logits": -1.6500824689865112, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": false, "logits_per_token": -1.6500824689865112, "logits_per_char": -0.8250412344932556, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 975, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6324563026428223, "incorrect_loss_raw": 0.22734451293945312, "correct_loss_per_char": 0.8162281513214111, "incorrect_loss_per_char": 0.11367225646972656, "correct_loss_per_token": 1.6324563026428223, "incorrect_loss_per_token": 0.22734451293945312, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22734451293945312, "num_tokens": 1, "num_tokens_all": 1058, "is_greedy": true, "logits_per_token": -0.22734451293945312, "logits_per_char": -0.11367225646972656, "num_chars": 2}, {"sum_logits": -1.6324563026428223, "num_tokens": 1, "num_tokens_all": 1058, "is_greedy": false, "logits_per_token": -1.6324563026428223, "logits_per_char": -0.8162281513214111, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 976, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6326040029525757, "incorrect_loss_raw": 0.2279643714427948, "correct_loss_per_char": 0.8163020014762878, "incorrect_loss_per_char": 0.1139821857213974, "correct_loss_per_token": 1.6326040029525757, "incorrect_loss_per_token": 0.2279643714427948, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2279643714427948, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": true, "logits_per_token": -0.2279643714427948, "logits_per_char": -0.1139821857213974, "num_chars": 2}, {"sum_logits": -1.6326040029525757, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": false, "logits_per_token": -1.6326040029525757, "logits_per_char": -0.8163020014762878, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 977, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8271433115005493, "incorrect_loss_raw": 0.1858772486448288, "correct_loss_per_char": 0.9135716557502747, "incorrect_loss_per_char": 0.0929386243224144, "correct_loss_per_token": 1.8271433115005493, "incorrect_loss_per_token": 0.1858772486448288, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1858772486448288, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": true, "logits_per_token": -0.1858772486448288, "logits_per_char": -0.0929386243224144, "num_chars": 2}, {"sum_logits": -1.8271433115005493, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": false, "logits_per_token": -1.8271433115005493, "logits_per_char": -0.9135716557502747, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 978, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23195500671863556, "incorrect_loss_raw": 1.6131311655044556, "correct_loss_per_char": 0.11597750335931778, "incorrect_loss_per_char": 0.8065655827522278, "correct_loss_per_token": 0.23195500671863556, "incorrect_loss_per_token": 1.6131311655044556, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23195500671863556, "num_tokens": 1, "num_tokens_all": 1058, "is_greedy": true, "logits_per_token": -0.23195500671863556, "logits_per_char": -0.11597750335931778, "num_chars": 2}, {"sum_logits": -1.6131311655044556, "num_tokens": 1, "num_tokens_all": 1058, "is_greedy": false, "logits_per_token": -1.6131311655044556, "logits_per_char": -0.8065655827522278, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 979, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2661978006362915, "incorrect_loss_raw": 1.4965120553970337, "correct_loss_per_char": 0.13309890031814575, "incorrect_loss_per_char": 0.7482560276985168, "correct_loss_per_token": 0.2661978006362915, "incorrect_loss_per_token": 1.4965120553970337, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2661978006362915, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": true, "logits_per_token": -0.2661978006362915, "logits_per_char": -0.13309890031814575, "num_chars": 2}, {"sum_logits": -1.4965120553970337, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": false, "logits_per_token": -1.4965120553970337, "logits_per_char": -0.7482560276985168, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 980, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7925386428833008, "incorrect_loss_raw": 0.1905231475830078, "correct_loss_per_char": 0.8962693214416504, "incorrect_loss_per_char": 0.0952615737915039, "correct_loss_per_token": 1.7925386428833008, "incorrect_loss_per_token": 0.1905231475830078, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1905231475830078, "num_tokens": 1, "num_tokens_all": 985, "is_greedy": true, "logits_per_token": -0.1905231475830078, "logits_per_char": -0.0952615737915039, "num_chars": 2}, {"sum_logits": -1.7925386428833008, "num_tokens": 1, "num_tokens_all": 985, "is_greedy": false, "logits_per_token": -1.7925386428833008, "logits_per_char": -0.8962693214416504, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 981, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22371207177639008, "incorrect_loss_raw": 1.643074870109558, "correct_loss_per_char": 0.11185603588819504, "incorrect_loss_per_char": 0.821537435054779, "correct_loss_per_token": 0.22371207177639008, "incorrect_loss_per_token": 1.643074870109558, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22371207177639008, "num_tokens": 1, "num_tokens_all": 977, "is_greedy": true, "logits_per_token": -0.22371207177639008, "logits_per_char": -0.11185603588819504, "num_chars": 2}, {"sum_logits": -1.643074870109558, "num_tokens": 1, "num_tokens_all": 977, "is_greedy": false, "logits_per_token": -1.643074870109558, "logits_per_char": -0.821537435054779, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 982, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.527182698249817, "incorrect_loss_raw": 0.25847944617271423, "correct_loss_per_char": 0.7635913491249084, "incorrect_loss_per_char": 0.12923972308635712, "correct_loss_per_token": 1.527182698249817, "incorrect_loss_per_token": 0.25847944617271423, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25847944617271423, "num_tokens": 1, "num_tokens_all": 1134, "is_greedy": true, "logits_per_token": -0.25847944617271423, "logits_per_char": -0.12923972308635712, "num_chars": 2}, {"sum_logits": -1.527182698249817, "num_tokens": 1, "num_tokens_all": 1134, "is_greedy": false, "logits_per_token": -1.527182698249817, "logits_per_char": -0.7635913491249084, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 983, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2553847134113312, "incorrect_loss_raw": 1.5333188772201538, "correct_loss_per_char": 0.1276923567056656, "incorrect_loss_per_char": 0.7666594386100769, "correct_loss_per_token": 0.2553847134113312, "incorrect_loss_per_token": 1.5333188772201538, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2553847134113312, "num_tokens": 1, "num_tokens_all": 1000, "is_greedy": true, "logits_per_token": -0.2553847134113312, "logits_per_char": -0.1276923567056656, "num_chars": 2}, {"sum_logits": -1.5333188772201538, "num_tokens": 1, "num_tokens_all": 1000, "is_greedy": false, "logits_per_token": -1.5333188772201538, "logits_per_char": -0.7666594386100769, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 984, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2785904109477997, "incorrect_loss_raw": 1.4491146802902222, "correct_loss_per_char": 0.13929520547389984, "incorrect_loss_per_char": 0.7245573401451111, "correct_loss_per_token": 0.2785904109477997, "incorrect_loss_per_token": 1.4491146802902222, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2785904109477997, "num_tokens": 1, "num_tokens_all": 891, "is_greedy": true, "logits_per_token": -0.2785904109477997, "logits_per_char": -0.13929520547389984, "num_chars": 2}, {"sum_logits": -1.4491146802902222, "num_tokens": 1, "num_tokens_all": 891, "is_greedy": false, "logits_per_token": -1.4491146802902222, "logits_per_char": -0.7245573401451111, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 985, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.30233344435691833, "incorrect_loss_raw": 1.3674710988998413, "correct_loss_per_char": 0.15116672217845917, "incorrect_loss_per_char": 0.6837355494499207, "correct_loss_per_token": 0.30233344435691833, "incorrect_loss_per_token": 1.3674710988998413, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.30233344435691833, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": true, "logits_per_token": -0.30233344435691833, "logits_per_char": -0.15116672217845917, "num_chars": 2}, {"sum_logits": -1.3674710988998413, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": false, "logits_per_token": -1.3674710988998413, "logits_per_char": -0.6837355494499207, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 986, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2923991680145264, "incorrect_loss_raw": 0.3370068073272705, "correct_loss_per_char": 0.6461995840072632, "incorrect_loss_per_char": 0.16850340366363525, "correct_loss_per_token": 1.2923991680145264, "incorrect_loss_per_token": 0.3370068073272705, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3370068073272705, "num_tokens": 1, "num_tokens_all": 918, "is_greedy": true, "logits_per_token": -0.3370068073272705, "logits_per_char": -0.16850340366363525, "num_chars": 2}, {"sum_logits": -1.2923991680145264, "num_tokens": 1, "num_tokens_all": 918, "is_greedy": false, "logits_per_token": -1.2923991680145264, "logits_per_char": -0.6461995840072632, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 987, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24027755856513977, "incorrect_loss_raw": 1.5963692665100098, "correct_loss_per_char": 0.12013877928256989, "incorrect_loss_per_char": 0.7981846332550049, "correct_loss_per_token": 0.24027755856513977, "incorrect_loss_per_token": 1.5963692665100098, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24027755856513977, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": true, "logits_per_token": -0.24027755856513977, "logits_per_char": -0.12013877928256989, "num_chars": 2}, {"sum_logits": -1.5963692665100098, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": false, "logits_per_token": -1.5963692665100098, "logits_per_char": -0.7981846332550049, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 988, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4014060497283936, "incorrect_loss_raw": 0.30055856704711914, "correct_loss_per_char": 0.7007030248641968, "incorrect_loss_per_char": 0.15027928352355957, "correct_loss_per_token": 1.4014060497283936, "incorrect_loss_per_token": 0.30055856704711914, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.30055856704711914, "num_tokens": 1, "num_tokens_all": 1075, "is_greedy": true, "logits_per_token": -0.30055856704711914, "logits_per_char": -0.15027928352355957, "num_chars": 2}, {"sum_logits": -1.4014060497283936, "num_tokens": 1, "num_tokens_all": 1075, "is_greedy": false, "logits_per_token": -1.4014060497283936, "logits_per_char": -0.7007030248641968, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 989, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22863230109214783, "incorrect_loss_raw": 1.6289061307907104, "correct_loss_per_char": 0.11431615054607391, "incorrect_loss_per_char": 0.8144530653953552, "correct_loss_per_token": 0.22863230109214783, "incorrect_loss_per_token": 1.6289061307907104, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22863230109214783, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": true, "logits_per_token": -0.22863230109214783, "logits_per_char": -0.11431615054607391, "num_chars": 2}, {"sum_logits": -1.6289061307907104, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": false, "logits_per_token": -1.6289061307907104, "logits_per_char": -0.8144530653953552, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 990, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3181835412979126, "incorrect_loss_raw": 0.3207196891307831, "correct_loss_per_char": 0.6590917706489563, "incorrect_loss_per_char": 0.16035984456539154, "correct_loss_per_token": 1.3181835412979126, "incorrect_loss_per_token": 0.3207196891307831, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3207196891307831, "num_tokens": 1, "num_tokens_all": 902, "is_greedy": true, "logits_per_token": -0.3207196891307831, "logits_per_char": -0.16035984456539154, "num_chars": 2}, {"sum_logits": -1.3181835412979126, "num_tokens": 1, "num_tokens_all": 902, "is_greedy": false, "logits_per_token": -1.3181835412979126, "logits_per_char": -0.6590917706489563, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 991, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21258480846881866, "incorrect_loss_raw": 1.705088496208191, "correct_loss_per_char": 0.10629240423440933, "incorrect_loss_per_char": 0.8525442481040955, "correct_loss_per_token": 0.21258480846881866, "incorrect_loss_per_token": 1.705088496208191, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21258480846881866, "num_tokens": 1, "num_tokens_all": 971, "is_greedy": true, "logits_per_token": -0.21258480846881866, "logits_per_char": -0.10629240423440933, "num_chars": 2}, {"sum_logits": -1.705088496208191, "num_tokens": 1, "num_tokens_all": 971, "is_greedy": false, "logits_per_token": -1.705088496208191, "logits_per_char": -0.8525442481040955, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 992, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1860940009355545, "incorrect_loss_raw": 1.8389976024627686, "correct_loss_per_char": 0.09304700046777725, "incorrect_loss_per_char": 0.9194988012313843, "correct_loss_per_token": 0.1860940009355545, "incorrect_loss_per_token": 1.8389976024627686, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1860940009355545, "num_tokens": 1, "num_tokens_all": 976, "is_greedy": true, "logits_per_token": -0.1860940009355545, "logits_per_char": -0.09304700046777725, "num_chars": 2}, {"sum_logits": -1.8389976024627686, "num_tokens": 1, "num_tokens_all": 976, "is_greedy": false, "logits_per_token": -1.8389976024627686, "logits_per_char": -0.9194988012313843, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 993, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.28200727701187134, "incorrect_loss_raw": 1.450995922088623, "correct_loss_per_char": 0.14100363850593567, "incorrect_loss_per_char": 0.7254979610443115, "correct_loss_per_token": 0.28200727701187134, "incorrect_loss_per_token": 1.450995922088623, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.28200727701187134, "num_tokens": 1, "num_tokens_all": 981, "is_greedy": true, "logits_per_token": -0.28200727701187134, "logits_per_char": -0.14100363850593567, "num_chars": 2}, {"sum_logits": -1.450995922088623, "num_tokens": 1, "num_tokens_all": 981, "is_greedy": false, "logits_per_token": -1.450995922088623, "logits_per_char": -0.7254979610443115, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 994, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2444297969341278, "incorrect_loss_raw": 1.5561574697494507, "correct_loss_per_char": 0.1222148984670639, "incorrect_loss_per_char": 0.7780787348747253, "correct_loss_per_token": 0.2444297969341278, "incorrect_loss_per_token": 1.5561574697494507, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2444297969341278, "num_tokens": 1, "num_tokens_all": 1019, "is_greedy": true, "logits_per_token": -0.2444297969341278, "logits_per_char": -0.1222148984670639, "num_chars": 2}, {"sum_logits": -1.5561574697494507, "num_tokens": 1, "num_tokens_all": 1019, "is_greedy": false, "logits_per_token": -1.5561574697494507, "logits_per_char": -0.7780787348747253, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 995, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2041742354631424, "incorrect_loss_raw": 1.7373995780944824, "correct_loss_per_char": 0.1020871177315712, "incorrect_loss_per_char": 0.8686997890472412, "correct_loss_per_token": 0.2041742354631424, "incorrect_loss_per_token": 1.7373995780944824, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2041742354631424, "num_tokens": 1, "num_tokens_all": 1048, "is_greedy": true, "logits_per_token": -0.2041742354631424, "logits_per_char": -0.1020871177315712, "num_chars": 2}, {"sum_logits": -1.7373995780944824, "num_tokens": 1, "num_tokens_all": 1048, "is_greedy": false, "logits_per_token": -1.7373995780944824, "logits_per_char": -0.8686997890472412, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 996, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6815720796585083, "incorrect_loss_raw": 0.21641404926776886, "correct_loss_per_char": 0.8407860398292542, "incorrect_loss_per_char": 0.10820702463388443, "correct_loss_per_token": 1.6815720796585083, "incorrect_loss_per_token": 0.21641404926776886, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21641404926776886, "num_tokens": 1, "num_tokens_all": 970, "is_greedy": true, "logits_per_token": -0.21641404926776886, "logits_per_char": -0.10820702463388443, "num_chars": 2}, {"sum_logits": -1.6815720796585083, "num_tokens": 1, "num_tokens_all": 970, "is_greedy": false, "logits_per_token": -1.6815720796585083, "logits_per_char": -0.8407860398292542, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 997, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6188817024230957, "incorrect_loss_raw": 0.22996702790260315, "correct_loss_per_char": 0.8094408512115479, "incorrect_loss_per_char": 0.11498351395130157, "correct_loss_per_token": 1.6188817024230957, "incorrect_loss_per_token": 0.22996702790260315, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22996702790260315, "num_tokens": 1, "num_tokens_all": 958, "is_greedy": true, "logits_per_token": -0.22996702790260315, "logits_per_char": -0.11498351395130157, "num_chars": 2}, {"sum_logits": -1.6188817024230957, "num_tokens": 1, "num_tokens_all": 958, "is_greedy": false, "logits_per_token": -1.6188817024230957, "logits_per_char": -0.8094408512115479, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 998, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23991993069648743, "incorrect_loss_raw": 1.5908372402191162, "correct_loss_per_char": 0.11995996534824371, "incorrect_loss_per_char": 0.7954186201095581, "correct_loss_per_token": 0.23991993069648743, "incorrect_loss_per_token": 1.5908372402191162, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23991993069648743, "num_tokens": 1, "num_tokens_all": 1156, "is_greedy": true, "logits_per_token": -0.23991993069648743, "logits_per_char": -0.11995996534824371, "num_chars": 2}, {"sum_logits": -1.5908372402191162, "num_tokens": 1, "num_tokens_all": 1156, "is_greedy": false, "logits_per_token": -1.5908372402191162, "logits_per_char": -0.7954186201095581, "num_chars": 2}], "label": 0, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 999, "native_id": null, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.678717017173767, "incorrect_loss_raw": 0.22116625308990479, "correct_loss_per_char": 0.8393585085868835, "incorrect_loss_per_char": 0.11058312654495239, "correct_loss_per_token": 1.678717017173767, "incorrect_loss_per_token": 0.22116625308990479, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22116625308990479, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": true, "logits_per_token": -0.22116625308990479, "logits_per_char": -0.11058312654495239, "num_chars": 2}, {"sum_logits": -1.678717017173767, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": false, "logits_per_token": -1.678717017173767, "logits_per_char": -0.8393585085868835, "num_chars": 2}], "label": 1, "task_hash": "e6a86116b0573ade267bddc6598da6f4", "model_hash": "03418cf8091a9882619950ffb07429a5"}