diff --git "a/evals/core_9mcqa/task-006-csqa:mc-predictions.jsonl" "b/evals/core_9mcqa/task-006-csqa:mc-predictions.jsonl" new file mode 100644--- /dev/null +++ "b/evals/core_9mcqa/task-006-csqa:mc-predictions.jsonl" @@ -0,0 +1,1221 @@ +{"doc_id": 0, "native_id": "1afa02df02c908a558b4036e80242fac", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5826294422149658, "incorrect_loss_raw": 1.6240987181663513, "correct_loss_per_char": 0.7913147211074829, "incorrect_loss_per_char": 0.8120493590831757, "correct_loss_per_token": 1.5826294422149658, "incorrect_loss_per_token": 1.6240987181663513, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5826294422149658, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.5826294422149658, "logits_per_char": -0.7913147211074829, "num_chars": 2}, {"sum_logits": -1.5907247066497803, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.5907247066497803, "logits_per_char": -0.7953623533248901, "num_chars": 2}, {"sum_logits": -1.5826234817504883, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": true, "logits_per_token": -1.5826234817504883, "logits_per_char": -0.7913117408752441, "num_chars": 2}, {"sum_logits": -1.6167736053466797, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.6167736053466797, "logits_per_char": -0.8083868026733398, "num_chars": 2}, {"sum_logits": -1.706273078918457, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.706273078918457, "logits_per_char": -0.8531365394592285, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1, "native_id": "a7ab086045575bb497933726e4e6ad28", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.125499963760376, "incorrect_loss_raw": 1.829802691936493, "correct_loss_per_char": 0.562749981880188, "incorrect_loss_per_char": 0.9149013459682465, "correct_loss_per_token": 1.125499963760376, "incorrect_loss_per_token": 1.829802691936493, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.125499963760376, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.125499963760376, "logits_per_char": -0.562749981880188, "num_chars": 2}, {"sum_logits": -1.4967677593231201, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.4967677593231201, "logits_per_char": -0.7483838796615601, "num_chars": 2}, {"sum_logits": -1.7075775861740112, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.7075775861740112, "logits_per_char": -0.8537887930870056, "num_chars": 2}, {"sum_logits": -1.7870699167251587, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.7870699167251587, "logits_per_char": -0.8935349583625793, "num_chars": 2}, {"sum_logits": -2.3277955055236816, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -2.3277955055236816, "logits_per_char": -1.1638977527618408, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 2, "native_id": "b8c0a4703079cf661d7261a60a1bcbff", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6123731136322021, "incorrect_loss_raw": 1.6320725083351135, "correct_loss_per_char": 0.8061865568161011, "incorrect_loss_per_char": 0.8160362541675568, "correct_loss_per_token": 1.6123731136322021, "incorrect_loss_per_token": 1.6320725083351135, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4225993156433105, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.4225993156433105, "logits_per_char": -0.7112996578216553, "num_chars": 2}, {"sum_logits": -1.6123731136322021, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6123731136322021, "logits_per_char": -0.8061865568161011, "num_chars": 2}, {"sum_logits": -1.6045572757720947, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6045572757720947, "logits_per_char": -0.8022786378860474, "num_chars": 2}, {"sum_logits": -1.5609703063964844, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5609703063964844, "logits_per_char": -0.7804851531982422, "num_chars": 2}, {"sum_logits": -1.9401631355285645, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.9401631355285645, "logits_per_char": -0.9700815677642822, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 3, "native_id": "e68fb2448fd74e402aae9982aa76e527", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.361582636833191, "incorrect_loss_raw": 1.7040134370326996, "correct_loss_per_char": 0.6807913184165955, "incorrect_loss_per_char": 0.8520067185163498, "correct_loss_per_token": 1.361582636833191, "incorrect_loss_per_token": 1.7040134370326996, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.361582636833191, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.361582636833191, "logits_per_char": -0.6807913184165955, "num_chars": 2}, {"sum_logits": -1.5227479934692383, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5227479934692383, "logits_per_char": -0.7613739967346191, "num_chars": 2}, {"sum_logits": -1.6483741998672485, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.6483741998672485, "logits_per_char": -0.8241870999336243, "num_chars": 2}, {"sum_logits": -1.690671443939209, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.690671443939209, "logits_per_char": -0.8453357219696045, "num_chars": 2}, {"sum_logits": -1.9542601108551025, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.9542601108551025, "logits_per_char": -0.9771300554275513, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 4, "native_id": "2435de612dd69f2012b9e40d6af4ce38", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6782035827636719, "incorrect_loss_raw": 1.6016753315925598, "correct_loss_per_char": 0.8391017913818359, "incorrect_loss_per_char": 0.8008376657962799, "correct_loss_per_token": 1.6782035827636719, "incorrect_loss_per_token": 1.6016753315925598, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6782035827636719, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6782035827636719, "logits_per_char": -0.8391017913818359, "num_chars": 2}, {"sum_logits": -1.5836812257766724, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5836812257766724, "logits_per_char": -0.7918406128883362, "num_chars": 2}, {"sum_logits": -1.5443662405014038, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.5443662405014038, "logits_per_char": -0.7721831202507019, "num_chars": 2}, {"sum_logits": -1.5565462112426758, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5565462112426758, "logits_per_char": -0.7782731056213379, "num_chars": 2}, {"sum_logits": -1.7221076488494873, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.7221076488494873, "logits_per_char": -0.8610538244247437, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 5, "native_id": "a4892551cb4beb279653ae52d0de4c89", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5256510972976685, "incorrect_loss_raw": 1.676107943058014, "correct_loss_per_char": 0.7628255486488342, "incorrect_loss_per_char": 0.838053971529007, "correct_loss_per_token": 1.5256510972976685, "incorrect_loss_per_token": 1.676107943058014, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.366059422492981, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.366059422492981, "logits_per_char": -0.6830297112464905, "num_chars": 2}, {"sum_logits": -1.7435803413391113, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.7435803413391113, "logits_per_char": -0.8717901706695557, "num_chars": 2}, {"sum_logits": -1.5256510972976685, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5256510972976685, "logits_per_char": -0.7628255486488342, "num_chars": 2}, {"sum_logits": -1.5099865198135376, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5099865198135376, "logits_per_char": -0.7549932599067688, "num_chars": 2}, {"sum_logits": -2.084805488586426, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.084805488586426, "logits_per_char": -1.042402744293213, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 6, "native_id": "118a9093a30695622363455e4d911866", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7053264379501343, "incorrect_loss_raw": 1.6102095246315002, "correct_loss_per_char": 0.8526632189750671, "incorrect_loss_per_char": 0.8051047623157501, "correct_loss_per_token": 1.7053264379501343, "incorrect_loss_per_token": 1.6102095246315002, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.439192771911621, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": true, "logits_per_token": -1.439192771911621, "logits_per_char": -0.7195963859558105, "num_chars": 2}, {"sum_logits": -1.7053264379501343, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.7053264379501343, "logits_per_char": -0.8526632189750671, "num_chars": 2}, {"sum_logits": -1.6111624240875244, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.6111624240875244, "logits_per_char": -0.8055812120437622, "num_chars": 2}, {"sum_logits": -1.5022188425064087, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.5022188425064087, "logits_per_char": -0.7511094212532043, "num_chars": 2}, {"sum_logits": -1.8882640600204468, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.8882640600204468, "logits_per_char": -0.9441320300102234, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 7, "native_id": "05ea49b82e8ec519e82d6633936ab8bf", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7434412240982056, "incorrect_loss_raw": 1.6974075436592102, "correct_loss_per_char": 0.8717206120491028, "incorrect_loss_per_char": 0.8487037718296051, "correct_loss_per_token": 1.7434412240982056, "incorrect_loss_per_token": 1.6974075436592102, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1744518280029297, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.1744518280029297, "logits_per_char": -0.5872259140014648, "num_chars": 2}, {"sum_logits": -1.431705355644226, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.431705355644226, "logits_per_char": -0.715852677822113, "num_chars": 2}, {"sum_logits": -1.6369818449020386, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6369818449020386, "logits_per_char": -0.8184909224510193, "num_chars": 2}, {"sum_logits": -1.7434412240982056, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.7434412240982056, "logits_per_char": -0.8717206120491028, "num_chars": 2}, {"sum_logits": -2.5464911460876465, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.5464911460876465, "logits_per_char": -1.2732455730438232, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 8, "native_id": "c0c07ce781653b2a2c01871ba2bcba93", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2894941568374634, "incorrect_loss_raw": 1.7385434806346893, "correct_loss_per_char": 0.6447470784187317, "incorrect_loss_per_char": 0.8692717403173447, "correct_loss_per_token": 1.2894941568374634, "incorrect_loss_per_token": 1.7385434806346893, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2894941568374634, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.2894941568374634, "logits_per_char": -0.6447470784187317, "num_chars": 2}, {"sum_logits": -1.4821840524673462, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.4821840524673462, "logits_per_char": -0.7410920262336731, "num_chars": 2}, {"sum_logits": -1.6079745292663574, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6079745292663574, "logits_per_char": -0.8039872646331787, "num_chars": 2}, {"sum_logits": -1.7913601398468018, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.7913601398468018, "logits_per_char": -0.8956800699234009, "num_chars": 2}, {"sum_logits": -2.072655200958252, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.072655200958252, "logits_per_char": -1.036327600479126, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 9, "native_id": "1d24f406b6828492040b405d3f35119c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6266052722930908, "incorrect_loss_raw": 1.7015590369701385, "correct_loss_per_char": 0.8133026361465454, "incorrect_loss_per_char": 0.8507795184850693, "correct_loss_per_token": 1.6266052722930908, "incorrect_loss_per_token": 1.7015590369701385, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2241512537002563, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": true, "logits_per_token": -1.2241512537002563, "logits_per_char": -0.6120756268501282, "num_chars": 2}, {"sum_logits": -1.4301421642303467, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.4301421642303467, "logits_per_char": -0.7150710821151733, "num_chars": 2}, {"sum_logits": -1.6266052722930908, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.6266052722930908, "logits_per_char": -0.8133026361465454, "num_chars": 2}, {"sum_logits": -1.7413454055786133, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.7413454055786133, "logits_per_char": -0.8706727027893066, "num_chars": 2}, {"sum_logits": -2.410597324371338, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -2.410597324371338, "logits_per_char": -1.205298662185669, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 10, "native_id": "57f92025d860e32c4e780c0d51c1c20c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.108463764190674, "incorrect_loss_raw": 1.5238769352436066, "correct_loss_per_char": 1.054231882095337, "incorrect_loss_per_char": 0.7619384676218033, "correct_loss_per_token": 2.108463764190674, "incorrect_loss_per_token": 1.5238769352436066, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4324499368667603, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.4324499368667603, "logits_per_char": -0.7162249684333801, "num_chars": 2}, {"sum_logits": -1.509533405303955, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.509533405303955, "logits_per_char": -0.7547667026519775, "num_chars": 2}, {"sum_logits": -1.522755742073059, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.522755742073059, "logits_per_char": -0.7613778710365295, "num_chars": 2}, {"sum_logits": -1.6307686567306519, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6307686567306519, "logits_per_char": -0.8153843283653259, "num_chars": 2}, {"sum_logits": -2.108463764190674, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.108463764190674, "logits_per_char": -1.054231882095337, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 11, "native_id": "81eb4b2ee66edd8bc91ee944697c4e9f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.675123929977417, "incorrect_loss_raw": 1.642827719449997, "correct_loss_per_char": 0.8375619649887085, "incorrect_loss_per_char": 0.8214138597249985, "correct_loss_per_token": 1.675123929977417, "incorrect_loss_per_token": 1.642827719449997, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.234598994255066, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.234598994255066, "logits_per_char": -0.617299497127533, "num_chars": 2}, {"sum_logits": -1.6152312755584717, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6152312755584717, "logits_per_char": -0.8076156377792358, "num_chars": 2}, {"sum_logits": -1.6473393440246582, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6473393440246582, "logits_per_char": -0.8236696720123291, "num_chars": 2}, {"sum_logits": -1.675123929977417, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.675123929977417, "logits_per_char": -0.8375619649887085, "num_chars": 2}, {"sum_logits": -2.074141263961792, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -2.074141263961792, "logits_per_char": -1.037070631980896, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 12, "native_id": "d807e7ae60976324920c8d29eb42dad6", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3510067462921143, "incorrect_loss_raw": 1.712037593126297, "correct_loss_per_char": 0.6755033731460571, "incorrect_loss_per_char": 0.8560187965631485, "correct_loss_per_token": 1.3510067462921143, "incorrect_loss_per_token": 1.712037593126297, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3510067462921143, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.3510067462921143, "logits_per_char": -0.6755033731460571, "num_chars": 2}, {"sum_logits": -1.5236684083938599, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5236684083938599, "logits_per_char": -0.7618342041969299, "num_chars": 2}, {"sum_logits": -1.603392243385315, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.603392243385315, "logits_per_char": -0.8016961216926575, "num_chars": 2}, {"sum_logits": -1.6685503721237183, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.6685503721237183, "logits_per_char": -0.8342751860618591, "num_chars": 2}, {"sum_logits": -2.052539348602295, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.052539348602295, "logits_per_char": -1.0262696743011475, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 13, "native_id": "7ea9f721ffc662918bb0c0937a487f04", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7156718969345093, "incorrect_loss_raw": 1.617751121520996, "correct_loss_per_char": 0.8578359484672546, "incorrect_loss_per_char": 0.808875560760498, "correct_loss_per_token": 1.7156718969345093, "incorrect_loss_per_token": 1.617751121520996, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3207086324691772, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.3207086324691772, "logits_per_char": -0.6603543162345886, "num_chars": 2}, {"sum_logits": -1.5141927003860474, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5141927003860474, "logits_per_char": -0.7570963501930237, "num_chars": 2}, {"sum_logits": -1.6785014867782593, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6785014867782593, "logits_per_char": -0.8392507433891296, "num_chars": 2}, {"sum_logits": -1.7156718969345093, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.7156718969345093, "logits_per_char": -0.8578359484672546, "num_chars": 2}, {"sum_logits": -1.9576016664505005, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.9576016664505005, "logits_per_char": -0.9788008332252502, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 14, "native_id": "fc1d33a2301a30214523c12573f81aba", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.608635663986206, "incorrect_loss_raw": 1.6277619898319244, "correct_loss_per_char": 0.804317831993103, "incorrect_loss_per_char": 0.8138809949159622, "correct_loss_per_token": 1.608635663986206, "incorrect_loss_per_token": 1.6277619898319244, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4881120920181274, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.4881120920181274, "logits_per_char": -0.7440560460090637, "num_chars": 2}, {"sum_logits": -1.4722867012023926, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.4722867012023926, "logits_per_char": -0.7361433506011963, "num_chars": 2}, {"sum_logits": -1.608635663986206, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.608635663986206, "logits_per_char": -0.804317831993103, "num_chars": 2}, {"sum_logits": -1.708292007446289, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.708292007446289, "logits_per_char": -0.8541460037231445, "num_chars": 2}, {"sum_logits": -1.8423571586608887, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.8423571586608887, "logits_per_char": -0.9211785793304443, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 15, "native_id": "3b8e1d236f5169b6c833a994d6d9c39a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8553919792175293, "incorrect_loss_raw": 1.6811326146125793, "correct_loss_per_char": 0.9276959896087646, "incorrect_loss_per_char": 0.8405663073062897, "correct_loss_per_token": 1.8553919792175293, "incorrect_loss_per_token": 1.6811326146125793, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1109437942504883, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.1109437942504883, "logits_per_char": -0.5554718971252441, "num_chars": 2}, {"sum_logits": -1.4083256721496582, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.4083256721496582, "logits_per_char": -0.7041628360748291, "num_chars": 2}, {"sum_logits": -1.696218729019165, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.696218729019165, "logits_per_char": -0.8481093645095825, "num_chars": 2}, {"sum_logits": -1.8553919792175293, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.8553919792175293, "logits_per_char": -0.9276959896087646, "num_chars": 2}, {"sum_logits": -2.509042263031006, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.509042263031006, "logits_per_char": -1.254521131515503, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 16, "native_id": "c5c4166f2ed3c2b3517b79e6848e9ae2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7535029649734497, "incorrect_loss_raw": 1.6349593698978424, "correct_loss_per_char": 0.8767514824867249, "incorrect_loss_per_char": 0.8174796849489212, "correct_loss_per_token": 1.7535029649734497, "incorrect_loss_per_token": 1.6349593698978424, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2416912317276, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.2416912317276, "logits_per_char": -0.6208456158638, "num_chars": 2}, {"sum_logits": -1.4994944334030151, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.4994944334030151, "logits_per_char": -0.7497472167015076, "num_chars": 2}, {"sum_logits": -1.641633152961731, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.641633152961731, "logits_per_char": -0.8208165764808655, "num_chars": 2}, {"sum_logits": -1.7535029649734497, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.7535029649734497, "logits_per_char": -0.8767514824867249, "num_chars": 2}, {"sum_logits": -2.1570186614990234, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -2.1570186614990234, "logits_per_char": -1.0785093307495117, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 17, "native_id": "6dc5b2884737e66543ce65f8dc40c992", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.3404672145843506, "incorrect_loss_raw": 1.4963521361351013, "correct_loss_per_char": 1.1702336072921753, "incorrect_loss_per_char": 0.7481760680675507, "correct_loss_per_token": 2.3404672145843506, "incorrect_loss_per_token": 1.4963521361351013, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4499104022979736, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.4499104022979736, "logits_per_char": -0.7249552011489868, "num_chars": 2}, {"sum_logits": -1.4089632034301758, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.4089632034301758, "logits_per_char": -0.7044816017150879, "num_chars": 2}, {"sum_logits": -1.468557596206665, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.468557596206665, "logits_per_char": -0.7342787981033325, "num_chars": 2}, {"sum_logits": -1.6579773426055908, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6579773426055908, "logits_per_char": -0.8289886713027954, "num_chars": 2}, {"sum_logits": -2.3404672145843506, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -2.3404672145843506, "logits_per_char": -1.1702336072921753, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 18, "native_id": "8af63d58cc35061dec38e5448c325988", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9990074634552002, "incorrect_loss_raw": 1.5604390799999237, "correct_loss_per_char": 0.9995037317276001, "incorrect_loss_per_char": 0.7802195399999619, "correct_loss_per_token": 1.9990074634552002, "incorrect_loss_per_token": 1.5604390799999237, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3050364255905151, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.3050364255905151, "logits_per_char": -0.6525182127952576, "num_chars": 2}, {"sum_logits": -1.4370888471603394, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.4370888471603394, "logits_per_char": -0.7185444235801697, "num_chars": 2}, {"sum_logits": -1.6350411176681519, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6350411176681519, "logits_per_char": -0.8175205588340759, "num_chars": 2}, {"sum_logits": -1.8645899295806885, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.8645899295806885, "logits_per_char": -0.9322949647903442, "num_chars": 2}, {"sum_logits": -1.9990074634552002, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.9990074634552002, "logits_per_char": -0.9995037317276001, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 19, "native_id": "768fb09deab56046e1565b6a2556ad5c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6648223400115967, "incorrect_loss_raw": 1.6634474694728851, "correct_loss_per_char": 0.8324111700057983, "incorrect_loss_per_char": 0.8317237347364426, "correct_loss_per_token": 1.6648223400115967, "incorrect_loss_per_token": 1.6634474694728851, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.317238688468933, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": true, "logits_per_token": -1.317238688468933, "logits_per_char": -0.6586193442344666, "num_chars": 2}, {"sum_logits": -1.5023748874664307, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.5023748874664307, "logits_per_char": -0.7511874437332153, "num_chars": 2}, {"sum_logits": -1.5325400829315186, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.5325400829315186, "logits_per_char": -0.7662700414657593, "num_chars": 2}, {"sum_logits": -1.6648223400115967, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.6648223400115967, "logits_per_char": -0.8324111700057983, "num_chars": 2}, {"sum_logits": -2.301636219024658, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -2.301636219024658, "logits_per_char": -1.150818109512329, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 20, "native_id": "cd639cf3ff82f825ace7dd2b087562bd", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.717175841331482, "incorrect_loss_raw": 1.6093584299087524, "correct_loss_per_char": 0.858587920665741, "incorrect_loss_per_char": 0.8046792149543762, "correct_loss_per_token": 1.717175841331482, "incorrect_loss_per_token": 1.6093584299087524, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.405210018157959, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": true, "logits_per_token": -1.405210018157959, "logits_per_char": -0.7026050090789795, "num_chars": 2}, {"sum_logits": -1.594197154045105, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.594197154045105, "logits_per_char": -0.7970985770225525, "num_chars": 2}, {"sum_logits": -1.5103774070739746, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5103774070739746, "logits_per_char": -0.7551887035369873, "num_chars": 2}, {"sum_logits": -1.717175841331482, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.717175841331482, "logits_per_char": -0.858587920665741, "num_chars": 2}, {"sum_logits": -1.9276491403579712, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.9276491403579712, "logits_per_char": -0.9638245701789856, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 21, "native_id": "8d79cc5e4eea11f50fab18fdea20fd4f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.718327522277832, "incorrect_loss_raw": 1.6225321590900421, "correct_loss_per_char": 0.859163761138916, "incorrect_loss_per_char": 0.8112660795450211, "correct_loss_per_token": 1.718327522277832, "incorrect_loss_per_token": 1.6225321590900421, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3201628923416138, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.3201628923416138, "logits_per_char": -0.6600814461708069, "num_chars": 2}, {"sum_logits": -1.5034841299057007, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5034841299057007, "logits_per_char": -0.7517420649528503, "num_chars": 2}, {"sum_logits": -1.718327522277832, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.718327522277832, "logits_per_char": -0.859163761138916, "num_chars": 2}, {"sum_logits": -1.6231852769851685, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.6231852769851685, "logits_per_char": -0.8115926384925842, "num_chars": 2}, {"sum_logits": -2.0432963371276855, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -2.0432963371276855, "logits_per_char": -1.0216481685638428, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 22, "native_id": "e5ad2184e37ae88b2bf46bf6bc0ed2f4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6454908847808838, "incorrect_loss_raw": 1.6237654089927673, "correct_loss_per_char": 0.8227454423904419, "incorrect_loss_per_char": 0.8118827044963837, "correct_loss_per_token": 1.6454908847808838, "incorrect_loss_per_token": 1.6237654089927673, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.417364478111267, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": true, "logits_per_token": -1.417364478111267, "logits_per_char": -0.7086822390556335, "num_chars": 2}, {"sum_logits": -1.5475209951400757, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.5475209951400757, "logits_per_char": -0.7737604975700378, "num_chars": 2}, {"sum_logits": -1.6125988960266113, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.6125988960266113, "logits_per_char": -0.8062994480133057, "num_chars": 2}, {"sum_logits": -1.6454908847808838, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.6454908847808838, "logits_per_char": -0.8227454423904419, "num_chars": 2}, {"sum_logits": -1.9175772666931152, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.9175772666931152, "logits_per_char": -0.9587886333465576, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 23, "native_id": "b8b287b6277fccd4b7c9c72577177328", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5883902311325073, "incorrect_loss_raw": 1.6355566084384918, "correct_loss_per_char": 0.7941951155662537, "incorrect_loss_per_char": 0.8177783042192459, "correct_loss_per_token": 1.5883902311325073, "incorrect_loss_per_token": 1.6355566084384918, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4882935285568237, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.4882935285568237, "logits_per_char": -0.7441467642784119, "num_chars": 2}, {"sum_logits": -1.5265169143676758, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5265169143676758, "logits_per_char": -0.7632584571838379, "num_chars": 2}, {"sum_logits": -1.585156798362732, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.585156798362732, "logits_per_char": -0.792578399181366, "num_chars": 2}, {"sum_logits": -1.5883902311325073, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5883902311325073, "logits_per_char": -0.7941951155662537, "num_chars": 2}, {"sum_logits": -1.9422591924667358, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.9422591924667358, "logits_per_char": -0.9711295962333679, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 24, "native_id": "f646f3e064f06423fc25b98500796cf0", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9683494567871094, "incorrect_loss_raw": 1.5479828715324402, "correct_loss_per_char": 0.9841747283935547, "incorrect_loss_per_char": 0.7739914357662201, "correct_loss_per_token": 1.9683494567871094, "incorrect_loss_per_token": 1.5479828715324402, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.355994462966919, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.355994462966919, "logits_per_char": -0.6779972314834595, "num_chars": 2}, {"sum_logits": -1.6388328075408936, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6388328075408936, "logits_per_char": -0.8194164037704468, "num_chars": 2}, {"sum_logits": -1.6184008121490479, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6184008121490479, "logits_per_char": -0.8092004060745239, "num_chars": 2}, {"sum_logits": -1.5787034034729004, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5787034034729004, "logits_per_char": -0.7893517017364502, "num_chars": 2}, {"sum_logits": -1.9683494567871094, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.9683494567871094, "logits_per_char": -0.9841747283935547, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 25, "native_id": "b0f7d7978ac41c465108a92660d70e84", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6985396146774292, "incorrect_loss_raw": 1.6173216104507446, "correct_loss_per_char": 0.8492698073387146, "incorrect_loss_per_char": 0.8086608052253723, "correct_loss_per_token": 1.6985396146774292, "incorrect_loss_per_token": 1.6173216104507446, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4358270168304443, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.4358270168304443, "logits_per_char": -0.7179135084152222, "num_chars": 2}, {"sum_logits": -1.4428906440734863, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.4428906440734863, "logits_per_char": -0.7214453220367432, "num_chars": 2}, {"sum_logits": -1.6061707735061646, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6061707735061646, "logits_per_char": -0.8030853867530823, "num_chars": 2}, {"sum_logits": -1.6985396146774292, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6985396146774292, "logits_per_char": -0.8492698073387146, "num_chars": 2}, {"sum_logits": -1.9843980073928833, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.9843980073928833, "logits_per_char": -0.9921990036964417, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 26, "native_id": "54075de8b8b89ecef2e4eb4eaee2713d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.572731375694275, "incorrect_loss_raw": 1.6592231094837189, "correct_loss_per_char": 0.7863656878471375, "incorrect_loss_per_char": 0.8296115547418594, "correct_loss_per_token": 1.572731375694275, "incorrect_loss_per_token": 1.6592231094837189, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3077458143234253, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": true, "logits_per_token": -1.3077458143234253, "logits_per_char": -0.6538729071617126, "num_chars": 2}, {"sum_logits": -1.572731375694275, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.572731375694275, "logits_per_char": -0.7863656878471375, "num_chars": 2}, {"sum_logits": -1.5903116464614868, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.5903116464614868, "logits_per_char": -0.7951558232307434, "num_chars": 2}, {"sum_logits": -1.6998845338821411, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.6998845338821411, "logits_per_char": -0.8499422669410706, "num_chars": 2}, {"sum_logits": -2.0389504432678223, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -2.0389504432678223, "logits_per_char": -1.0194752216339111, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 27, "native_id": "65435b996ce9d1685bebb74b49c1ba7f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2525972127914429, "incorrect_loss_raw": 1.7654172778129578, "correct_loss_per_char": 0.6262986063957214, "incorrect_loss_per_char": 0.8827086389064789, "correct_loss_per_token": 1.2525972127914429, "incorrect_loss_per_token": 1.7654172778129578, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2525972127914429, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.2525972127914429, "logits_per_char": -0.6262986063957214, "num_chars": 2}, {"sum_logits": -1.4877815246582031, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.4877815246582031, "logits_per_char": -0.7438907623291016, "num_chars": 2}, {"sum_logits": -1.5836925506591797, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5836925506591797, "logits_per_char": -0.7918462753295898, "num_chars": 2}, {"sum_logits": -1.7720537185668945, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.7720537185668945, "logits_per_char": -0.8860268592834473, "num_chars": 2}, {"sum_logits": -2.2181413173675537, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.2181413173675537, "logits_per_char": -1.1090706586837769, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 28, "native_id": "9889e5389917d812c09d6e5d382d333d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6248453855514526, "incorrect_loss_raw": 1.6306239366531372, "correct_loss_per_char": 0.8124226927757263, "incorrect_loss_per_char": 0.8153119683265686, "correct_loss_per_token": 1.6248453855514526, "incorrect_loss_per_token": 1.6306239366531372, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.38066565990448, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.38066565990448, "logits_per_char": -0.69033282995224, "num_chars": 2}, {"sum_logits": -1.6248453855514526, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6248453855514526, "logits_per_char": -0.8124226927757263, "num_chars": 2}, {"sum_logits": -1.600488543510437, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.600488543510437, "logits_per_char": -0.8002442717552185, "num_chars": 2}, {"sum_logits": -1.6061135530471802, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6061135530471802, "logits_per_char": -0.8030567765235901, "num_chars": 2}, {"sum_logits": -1.9352279901504517, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.9352279901504517, "logits_per_char": -0.9676139950752258, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 29, "native_id": "a651ffa44ac5febf0aede6748899b981", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5757029056549072, "incorrect_loss_raw": 1.6311759054660797, "correct_loss_per_char": 0.7878514528274536, "incorrect_loss_per_char": 0.8155879527330399, "correct_loss_per_token": 1.5757029056549072, "incorrect_loss_per_token": 1.6311759054660797, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.494954228401184, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": true, "logits_per_token": -1.494954228401184, "logits_per_char": -0.747477114200592, "num_chars": 2}, {"sum_logits": -1.5757029056549072, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5757029056549072, "logits_per_char": -0.7878514528274536, "num_chars": 2}, {"sum_logits": -1.542670488357544, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.542670488357544, "logits_per_char": -0.771335244178772, "num_chars": 2}, {"sum_logits": -1.6683769226074219, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.6683769226074219, "logits_per_char": -0.8341884613037109, "num_chars": 2}, {"sum_logits": -1.818701982498169, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.818701982498169, "logits_per_char": -0.9093509912490845, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 30, "native_id": "bdcfbe2132295d437e4c5701085f19c0", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5029150247573853, "incorrect_loss_raw": 1.6758351922035217, "correct_loss_per_char": 0.7514575123786926, "incorrect_loss_per_char": 0.8379175961017609, "correct_loss_per_token": 1.5029150247573853, "incorrect_loss_per_token": 1.6758351922035217, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3395469188690186, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": true, "logits_per_token": -1.3395469188690186, "logits_per_char": -0.6697734594345093, "num_chars": 2}, {"sum_logits": -1.5029150247573853, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.5029150247573853, "logits_per_char": -0.7514575123786926, "num_chars": 2}, {"sum_logits": -1.6060010194778442, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.6060010194778442, "logits_per_char": -0.8030005097389221, "num_chars": 2}, {"sum_logits": -1.71542489528656, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.71542489528656, "logits_per_char": -0.85771244764328, "num_chars": 2}, {"sum_logits": -2.042367935180664, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -2.042367935180664, "logits_per_char": -1.021183967590332, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 31, "native_id": "8d3dc21a53523850ec80771daaa5ff20", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6084781885147095, "incorrect_loss_raw": 1.6162865459918976, "correct_loss_per_char": 0.8042390942573547, "incorrect_loss_per_char": 0.8081432729959488, "correct_loss_per_token": 1.6084781885147095, "incorrect_loss_per_token": 1.6162865459918976, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6084781885147095, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.6084781885147095, "logits_per_char": -0.8042390942573547, "num_chars": 2}, {"sum_logits": -1.5957728624343872, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.5957728624343872, "logits_per_char": -0.7978864312171936, "num_chars": 2}, {"sum_logits": -1.654211163520813, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.654211163520813, "logits_per_char": -0.8271055817604065, "num_chars": 2}, {"sum_logits": -1.5454952716827393, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": true, "logits_per_token": -1.5454952716827393, "logits_per_char": -0.7727476358413696, "num_chars": 2}, {"sum_logits": -1.6696668863296509, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.6696668863296509, "logits_per_char": -0.8348334431648254, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 32, "native_id": "a80ee7775e934c423012fe98e20ba28b", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9302103519439697, "incorrect_loss_raw": 1.5496001243591309, "correct_loss_per_char": 0.9651051759719849, "incorrect_loss_per_char": 0.7748000621795654, "correct_loss_per_token": 1.9302103519439697, "incorrect_loss_per_token": 1.5496001243591309, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5396826267242432, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.5396826267242432, "logits_per_char": -0.7698413133621216, "num_chars": 2}, {"sum_logits": -1.6403679847717285, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.6403679847717285, "logits_per_char": -0.8201839923858643, "num_chars": 2}, {"sum_logits": -1.527651071548462, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.527651071548462, "logits_per_char": -0.763825535774231, "num_chars": 2}, {"sum_logits": -1.4906988143920898, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": true, "logits_per_token": -1.4906988143920898, "logits_per_char": -0.7453494071960449, "num_chars": 2}, {"sum_logits": -1.9302103519439697, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.9302103519439697, "logits_per_char": -0.9651051759719849, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 33, "native_id": "48a315cfa3ce11f7a9d615bc854331d5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.653439998626709, "incorrect_loss_raw": 1.6526676714420319, "correct_loss_per_char": 0.8267199993133545, "incorrect_loss_per_char": 0.8263338357210159, "correct_loss_per_token": 1.653439998626709, "incorrect_loss_per_token": 1.6526676714420319, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2741903066635132, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.2741903066635132, "logits_per_char": -0.6370951533317566, "num_chars": 2}, {"sum_logits": -1.5586344003677368, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5586344003677368, "logits_per_char": -0.7793172001838684, "num_chars": 2}, {"sum_logits": -1.6106466054916382, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.6106466054916382, "logits_per_char": -0.8053233027458191, "num_chars": 2}, {"sum_logits": -1.653439998626709, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.653439998626709, "logits_per_char": -0.8267199993133545, "num_chars": 2}, {"sum_logits": -2.1671993732452393, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -2.1671993732452393, "logits_per_char": -1.0835996866226196, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 34, "native_id": "4acd496cc78d96c2431279a5fd87de7c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.2045013904571533, "incorrect_loss_raw": 1.5127589702606201, "correct_loss_per_char": 1.1022506952285767, "incorrect_loss_per_char": 0.7563794851303101, "correct_loss_per_token": 2.2045013904571533, "incorrect_loss_per_token": 1.5127589702606201, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4088640213012695, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.4088640213012695, "logits_per_char": -0.7044320106506348, "num_chars": 2}, {"sum_logits": -1.4124999046325684, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.4124999046325684, "logits_per_char": -0.7062499523162842, "num_chars": 2}, {"sum_logits": -1.5921411514282227, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5921411514282227, "logits_per_char": -0.7960705757141113, "num_chars": 2}, {"sum_logits": -1.63753080368042, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.63753080368042, "logits_per_char": -0.81876540184021, "num_chars": 2}, {"sum_logits": -2.2045013904571533, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.2045013904571533, "logits_per_char": -1.1022506952285767, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 35, "native_id": "91e0f4ab62c9d2fd440d73a3f5308d96", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.635221004486084, "incorrect_loss_raw": 1.628530889749527, "correct_loss_per_char": 0.817610502243042, "incorrect_loss_per_char": 0.8142654448747635, "correct_loss_per_token": 1.635221004486084, "incorrect_loss_per_token": 1.628530889749527, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4472836256027222, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": true, "logits_per_token": -1.4472836256027222, "logits_per_char": -0.7236418128013611, "num_chars": 2}, {"sum_logits": -1.4799134731292725, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.4799134731292725, "logits_per_char": -0.7399567365646362, "num_chars": 2}, {"sum_logits": -1.635221004486084, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.635221004486084, "logits_per_char": -0.817610502243042, "num_chars": 2}, {"sum_logits": -1.6440274715423584, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.6440274715423584, "logits_per_char": -0.8220137357711792, "num_chars": 2}, {"sum_logits": -1.9428989887237549, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.9428989887237549, "logits_per_char": -0.9714494943618774, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 36, "native_id": "b61e849e44db16a581f0b65e28ab95dc", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5361731052398682, "incorrect_loss_raw": 1.6704948842525482, "correct_loss_per_char": 0.7680865526199341, "incorrect_loss_per_char": 0.8352474421262741, "correct_loss_per_token": 1.5361731052398682, "incorrect_loss_per_token": 1.6704948842525482, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3572627305984497, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": true, "logits_per_token": -1.3572627305984497, "logits_per_char": -0.6786313652992249, "num_chars": 2}, {"sum_logits": -1.544053554534912, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.544053554534912, "logits_per_char": -0.772026777267456, "num_chars": 2}, {"sum_logits": -1.5361731052398682, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5361731052398682, "logits_per_char": -0.7680865526199341, "num_chars": 2}, {"sum_logits": -1.7176194190979004, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.7176194190979004, "logits_per_char": -0.8588097095489502, "num_chars": 2}, {"sum_logits": -2.0630438327789307, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -2.0630438327789307, "logits_per_char": -1.0315219163894653, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 37, "native_id": "ba6bd1bdef02d0ebfe5370f92365ae18", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5509164333343506, "incorrect_loss_raw": 1.6558107435703278, "correct_loss_per_char": 0.7754582166671753, "incorrect_loss_per_char": 0.8279053717851639, "correct_loss_per_token": 1.5509164333343506, "incorrect_loss_per_token": 1.6558107435703278, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4684016704559326, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.4684016704559326, "logits_per_char": -0.7342008352279663, "num_chars": 2}, {"sum_logits": -1.5509164333343506, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5509164333343506, "logits_per_char": -0.7754582166671753, "num_chars": 2}, {"sum_logits": -1.4921510219573975, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.4921510219573975, "logits_per_char": -0.7460755109786987, "num_chars": 2}, {"sum_logits": -1.608164668083191, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.608164668083191, "logits_per_char": -0.8040823340415955, "num_chars": 2}, {"sum_logits": -2.05452561378479, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.05452561378479, "logits_per_char": -1.027262806892395, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 38, "native_id": "dc55d473c22b04877b11d584f9548194", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.403756856918335, "incorrect_loss_raw": 1.6823715269565582, "correct_loss_per_char": 0.7018784284591675, "incorrect_loss_per_char": 0.8411857634782791, "correct_loss_per_token": 1.403756856918335, "incorrect_loss_per_token": 1.6823715269565582, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.403756856918335, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.403756856918335, "logits_per_char": -0.7018784284591675, "num_chars": 2}, {"sum_logits": -1.5529444217681885, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5529444217681885, "logits_per_char": -0.7764722108840942, "num_chars": 2}, {"sum_logits": -1.6201764345169067, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6201764345169067, "logits_per_char": -0.8100882172584534, "num_chars": 2}, {"sum_logits": -1.654225468635559, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.654225468635559, "logits_per_char": -0.8271127343177795, "num_chars": 2}, {"sum_logits": -1.9021397829055786, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.9021397829055786, "logits_per_char": -0.9510698914527893, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 39, "native_id": "113aaea2b1a27a976547f54e531d99bb", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5817381143569946, "incorrect_loss_raw": 1.6480657160282135, "correct_loss_per_char": 0.7908690571784973, "incorrect_loss_per_char": 0.8240328580141068, "correct_loss_per_token": 1.5817381143569946, "incorrect_loss_per_token": 1.6480657160282135, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3728346824645996, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.3728346824645996, "logits_per_char": -0.6864173412322998, "num_chars": 2}, {"sum_logits": -1.5817381143569946, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5817381143569946, "logits_per_char": -0.7908690571784973, "num_chars": 2}, {"sum_logits": -1.5190120935440063, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5190120935440063, "logits_per_char": -0.7595060467720032, "num_chars": 2}, {"sum_logits": -1.7274185419082642, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.7274185419082642, "logits_per_char": -0.8637092709541321, "num_chars": 2}, {"sum_logits": -1.9729975461959839, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.9729975461959839, "logits_per_char": -0.9864987730979919, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 40, "native_id": "ba640b9634ad6b4ad98b17b4f152e562", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5589690208435059, "incorrect_loss_raw": 1.6450881659984589, "correct_loss_per_char": 0.7794845104217529, "incorrect_loss_per_char": 0.8225440829992294, "correct_loss_per_token": 1.5589690208435059, "incorrect_loss_per_token": 1.6450881659984589, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4511691331863403, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": true, "logits_per_token": -1.4511691331863403, "logits_per_char": -0.7255845665931702, "num_chars": 2}, {"sum_logits": -1.5589690208435059, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.5589690208435059, "logits_per_char": -0.7794845104217529, "num_chars": 2}, {"sum_logits": -1.538356065750122, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.538356065750122, "logits_per_char": -0.769178032875061, "num_chars": 2}, {"sum_logits": -1.6482961177825928, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.6482961177825928, "logits_per_char": -0.8241480588912964, "num_chars": 2}, {"sum_logits": -1.9425313472747803, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.9425313472747803, "logits_per_char": -0.9712656736373901, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 41, "native_id": "750ebdf36a0b3b407be0fe2163e3700b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5698498487472534, "incorrect_loss_raw": 1.6370765566825867, "correct_loss_per_char": 0.7849249243736267, "incorrect_loss_per_char": 0.8185382783412933, "correct_loss_per_token": 1.5698498487472534, "incorrect_loss_per_token": 1.6370765566825867, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4217393398284912, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": true, "logits_per_token": -1.4217393398284912, "logits_per_char": -0.7108696699142456, "num_chars": 2}, {"sum_logits": -1.5698498487472534, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": false, "logits_per_token": -1.5698498487472534, "logits_per_char": -0.7849249243736267, "num_chars": 2}, {"sum_logits": -1.6511313915252686, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": false, "logits_per_token": -1.6511313915252686, "logits_per_char": -0.8255656957626343, "num_chars": 2}, {"sum_logits": -1.6699297428131104, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": false, "logits_per_token": -1.6699297428131104, "logits_per_char": -0.8349648714065552, "num_chars": 2}, {"sum_logits": -1.8055057525634766, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": false, "logits_per_token": -1.8055057525634766, "logits_per_char": -0.9027528762817383, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 42, "native_id": "8f01273422a370a8dbda6bf473a395a0", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7605884075164795, "incorrect_loss_raw": 1.5896234810352325, "correct_loss_per_char": 0.8802942037582397, "incorrect_loss_per_char": 0.7948117405176163, "correct_loss_per_token": 1.7605884075164795, "incorrect_loss_per_token": 1.5896234810352325, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3978880643844604, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.3978880643844604, "logits_per_char": -0.6989440321922302, "num_chars": 2}, {"sum_logits": -1.58022141456604, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.58022141456604, "logits_per_char": -0.79011070728302, "num_chars": 2}, {"sum_logits": -1.6321086883544922, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.6321086883544922, "logits_per_char": -0.8160543441772461, "num_chars": 2}, {"sum_logits": -1.7482757568359375, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.7482757568359375, "logits_per_char": -0.8741378784179688, "num_chars": 2}, {"sum_logits": -1.7605884075164795, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.7605884075164795, "logits_per_char": -0.8802942037582397, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 43, "native_id": "e6586bba9fe96d38792e6e6d4f2703dc", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9274197816848755, "incorrect_loss_raw": 1.5522469282150269, "correct_loss_per_char": 0.9637098908424377, "incorrect_loss_per_char": 0.7761234641075134, "correct_loss_per_token": 1.9274197816848755, "incorrect_loss_per_token": 1.5522469282150269, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4604989290237427, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.4604989290237427, "logits_per_char": -0.7302494645118713, "num_chars": 2}, {"sum_logits": -1.5163049697875977, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5163049697875977, "logits_per_char": -0.7581524848937988, "num_chars": 2}, {"sum_logits": -1.5642600059509277, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5642600059509277, "logits_per_char": -0.7821300029754639, "num_chars": 2}, {"sum_logits": -1.6679238080978394, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6679238080978394, "logits_per_char": -0.8339619040489197, "num_chars": 2}, {"sum_logits": -1.9274197816848755, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.9274197816848755, "logits_per_char": -0.9637098908424377, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 44, "native_id": "6e433471d0e2590b8c73ceef275022b1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6153676509857178, "incorrect_loss_raw": 1.6249586939811707, "correct_loss_per_char": 0.8076838254928589, "incorrect_loss_per_char": 0.8124793469905853, "correct_loss_per_token": 1.6153676509857178, "incorrect_loss_per_token": 1.6249586939811707, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.443424940109253, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": true, "logits_per_token": -1.443424940109253, "logits_per_char": -0.7217124700546265, "num_chars": 2}, {"sum_logits": -1.565955400466919, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": false, "logits_per_token": -1.565955400466919, "logits_per_char": -0.7829777002334595, "num_chars": 2}, {"sum_logits": -1.628007173538208, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": false, "logits_per_token": -1.628007173538208, "logits_per_char": -0.814003586769104, "num_chars": 2}, {"sum_logits": -1.6153676509857178, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": false, "logits_per_token": -1.6153676509857178, "logits_per_char": -0.8076838254928589, "num_chars": 2}, {"sum_logits": -1.8624472618103027, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": false, "logits_per_token": -1.8624472618103027, "logits_per_char": -0.9312236309051514, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 45, "native_id": "1bc986f8aea88d6927d8a45367855a94", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.124295234680176, "incorrect_loss_raw": 1.5346824526786804, "correct_loss_per_char": 1.062147617340088, "incorrect_loss_per_char": 0.7673412263393402, "correct_loss_per_token": 2.124295234680176, "incorrect_loss_per_token": 1.5346824526786804, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.269020676612854, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.269020676612854, "logits_per_char": -0.634510338306427, "num_chars": 2}, {"sum_logits": -1.51101815700531, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.51101815700531, "logits_per_char": -0.755509078502655, "num_chars": 2}, {"sum_logits": -1.6453955173492432, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6453955173492432, "logits_per_char": -0.8226977586746216, "num_chars": 2}, {"sum_logits": -1.7132954597473145, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.7132954597473145, "logits_per_char": -0.8566477298736572, "num_chars": 2}, {"sum_logits": -2.124295234680176, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.124295234680176, "logits_per_char": -1.062147617340088, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 46, "native_id": "8d1563697d751a364d688d6701ebdb39", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2181370258331299, "incorrect_loss_raw": 1.8057796955108643, "correct_loss_per_char": 0.6090685129165649, "incorrect_loss_per_char": 0.9028898477554321, "correct_loss_per_token": 1.2181370258331299, "incorrect_loss_per_token": 1.8057796955108643, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2181370258331299, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.2181370258331299, "logits_per_char": -0.6090685129165649, "num_chars": 2}, {"sum_logits": -1.4285223484039307, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.4285223484039307, "logits_per_char": -0.7142611742019653, "num_chars": 2}, {"sum_logits": -1.575467586517334, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.575467586517334, "logits_per_char": -0.787733793258667, "num_chars": 2}, {"sum_logits": -1.8091216087341309, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.8091216087341309, "logits_per_char": -0.9045608043670654, "num_chars": 2}, {"sum_logits": -2.4100072383880615, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.4100072383880615, "logits_per_char": -1.2050036191940308, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 47, "native_id": "91f512273a2da7ae796919069b20d6cf", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8192459344863892, "incorrect_loss_raw": 1.5762434303760529, "correct_loss_per_char": 0.9096229672431946, "incorrect_loss_per_char": 0.7881217151880264, "correct_loss_per_token": 1.8192459344863892, "incorrect_loss_per_token": 1.5762434303760529, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4445750713348389, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": true, "logits_per_token": -1.4445750713348389, "logits_per_char": -0.7222875356674194, "num_chars": 2}, {"sum_logits": -1.5152775049209595, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": false, "logits_per_token": -1.5152775049209595, "logits_per_char": -0.7576387524604797, "num_chars": 2}, {"sum_logits": -1.7423869371414185, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": false, "logits_per_token": -1.7423869371414185, "logits_per_char": -0.8711934685707092, "num_chars": 2}, {"sum_logits": -1.6027342081069946, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": false, "logits_per_token": -1.6027342081069946, "logits_per_char": -0.8013671040534973, "num_chars": 2}, {"sum_logits": -1.8192459344863892, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": false, "logits_per_token": -1.8192459344863892, "logits_per_char": -0.9096229672431946, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 48, "native_id": "49cda7eedbf63b3f38e59ba72f1ee1f9", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6878257989883423, "incorrect_loss_raw": 1.6006983816623688, "correct_loss_per_char": 0.8439128994941711, "incorrect_loss_per_char": 0.8003491908311844, "correct_loss_per_token": 1.6878257989883423, "incorrect_loss_per_token": 1.6006983816623688, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5043081045150757, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.5043081045150757, "logits_per_char": -0.7521540522575378, "num_chars": 2}, {"sum_logits": -1.5878944396972656, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5878944396972656, "logits_per_char": -0.7939472198486328, "num_chars": 2}, {"sum_logits": -1.6723648309707642, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6723648309707642, "logits_per_char": -0.8361824154853821, "num_chars": 2}, {"sum_logits": -1.6382261514663696, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6382261514663696, "logits_per_char": -0.8191130757331848, "num_chars": 2}, {"sum_logits": -1.6878257989883423, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6878257989883423, "logits_per_char": -0.8439128994941711, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 49, "native_id": "a588407ecaecf0f30c2241c30b470fe2", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.641658067703247, "incorrect_loss_raw": 1.6451905071735382, "correct_loss_per_char": 0.8208290338516235, "incorrect_loss_per_char": 0.8225952535867691, "correct_loss_per_token": 1.641658067703247, "incorrect_loss_per_token": 1.6451905071735382, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.429787278175354, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.429787278175354, "logits_per_char": -0.714893639087677, "num_chars": 2}, {"sum_logits": -1.3618168830871582, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.3618168830871582, "logits_per_char": -0.6809084415435791, "num_chars": 2}, {"sum_logits": -1.641658067703247, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.641658067703247, "logits_per_char": -0.8208290338516235, "num_chars": 2}, {"sum_logits": -1.7349803447723389, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.7349803447723389, "logits_per_char": -0.8674901723861694, "num_chars": 2}, {"sum_logits": -2.0541775226593018, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.0541775226593018, "logits_per_char": -1.0270887613296509, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 50, "native_id": "011096bcfff30fd38046cf9db3a411c5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.542123556137085, "incorrect_loss_raw": 1.6575629115104675, "correct_loss_per_char": 0.7710617780685425, "incorrect_loss_per_char": 0.8287814557552338, "correct_loss_per_token": 1.542123556137085, "incorrect_loss_per_token": 1.6575629115104675, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4570016860961914, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.4570016860961914, "logits_per_char": -0.7285008430480957, "num_chars": 2}, {"sum_logits": -1.514486312866211, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.514486312866211, "logits_per_char": -0.7572431564331055, "num_chars": 2}, {"sum_logits": -1.542123556137085, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.542123556137085, "logits_per_char": -0.7710617780685425, "num_chars": 2}, {"sum_logits": -1.6159453392028809, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6159453392028809, "logits_per_char": -0.8079726696014404, "num_chars": 2}, {"sum_logits": -2.042818307876587, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.042818307876587, "logits_per_char": -1.0214091539382935, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 51, "native_id": "435a728f45d32faa4b3c4553c966fd6b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.485251784324646, "incorrect_loss_raw": 1.6736594140529633, "correct_loss_per_char": 0.742625892162323, "incorrect_loss_per_char": 0.8368297070264816, "correct_loss_per_token": 1.485251784324646, "incorrect_loss_per_token": 1.6736594140529633, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.382400631904602, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": true, "logits_per_token": -1.382400631904602, "logits_per_char": -0.691200315952301, "num_chars": 2}, {"sum_logits": -1.485251784324646, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.485251784324646, "logits_per_char": -0.742625892162323, "num_chars": 2}, {"sum_logits": -1.6184746026992798, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.6184746026992798, "logits_per_char": -0.8092373013496399, "num_chars": 2}, {"sum_logits": -1.6827532052993774, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.6827532052993774, "logits_per_char": -0.8413766026496887, "num_chars": 2}, {"sum_logits": -2.0110092163085938, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -2.0110092163085938, "logits_per_char": -1.0055046081542969, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 52, "native_id": "e953dee48c70159ad879143a319ec607", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7608354091644287, "incorrect_loss_raw": 1.609997570514679, "correct_loss_per_char": 0.8804177045822144, "incorrect_loss_per_char": 0.8049987852573395, "correct_loss_per_token": 1.7608354091644287, "incorrect_loss_per_token": 1.609997570514679, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.314847707748413, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.314847707748413, "logits_per_char": -0.6574238538742065, "num_chars": 2}, {"sum_logits": -1.5248851776123047, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5248851776123047, "logits_per_char": -0.7624425888061523, "num_chars": 2}, {"sum_logits": -1.5870797634124756, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5870797634124756, "logits_per_char": -0.7935398817062378, "num_chars": 2}, {"sum_logits": -1.7608354091644287, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.7608354091644287, "logits_per_char": -0.8804177045822144, "num_chars": 2}, {"sum_logits": -2.0131776332855225, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.0131776332855225, "logits_per_char": -1.0065888166427612, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 53, "native_id": "9c784727afd7176b54764055df7a7927", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6654112339019775, "incorrect_loss_raw": 1.6481851935386658, "correct_loss_per_char": 0.8327056169509888, "incorrect_loss_per_char": 0.8240925967693329, "correct_loss_per_token": 1.6654112339019775, "incorrect_loss_per_token": 1.6481851935386658, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3518693447113037, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.3518693447113037, "logits_per_char": -0.6759346723556519, "num_chars": 2}, {"sum_logits": -1.3669023513793945, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.3669023513793945, "logits_per_char": -0.6834511756896973, "num_chars": 2}, {"sum_logits": -1.6654112339019775, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6654112339019775, "logits_per_char": -0.8327056169509888, "num_chars": 2}, {"sum_logits": -1.7903389930725098, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.7903389930725098, "logits_per_char": -0.8951694965362549, "num_chars": 2}, {"sum_logits": -2.083630084991455, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -2.083630084991455, "logits_per_char": -1.0418150424957275, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 54, "native_id": "b47d912136e3304cb5e5890b6b879551", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6653727293014526, "incorrect_loss_raw": 1.6330857574939728, "correct_loss_per_char": 0.8326863646507263, "incorrect_loss_per_char": 0.8165428787469864, "correct_loss_per_token": 1.6653727293014526, "incorrect_loss_per_token": 1.6330857574939728, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4069689512252808, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": true, "logits_per_token": -1.4069689512252808, "logits_per_char": -0.7034844756126404, "num_chars": 2}, {"sum_logits": -1.4447439908981323, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.4447439908981323, "logits_per_char": -0.7223719954490662, "num_chars": 2}, {"sum_logits": -1.6154512166976929, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.6154512166976929, "logits_per_char": -0.8077256083488464, "num_chars": 2}, {"sum_logits": -1.6653727293014526, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.6653727293014526, "logits_per_char": -0.8326863646507263, "num_chars": 2}, {"sum_logits": -2.065178871154785, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -2.065178871154785, "logits_per_char": -1.0325894355773926, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 55, "native_id": "49b4c9e1bd7946a819e173ce8fa4c7c9", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9153300523757935, "incorrect_loss_raw": 1.5530781745910645, "correct_loss_per_char": 0.9576650261878967, "incorrect_loss_per_char": 0.7765390872955322, "correct_loss_per_token": 1.9153300523757935, "incorrect_loss_per_token": 1.5530781745910645, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4406590461730957, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.4406590461730957, "logits_per_char": -0.7203295230865479, "num_chars": 2}, {"sum_logits": -1.5378869771957397, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5378869771957397, "logits_per_char": -0.7689434885978699, "num_chars": 2}, {"sum_logits": -1.5862667560577393, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5862667560577393, "logits_per_char": -0.7931333780288696, "num_chars": 2}, {"sum_logits": -1.647499918937683, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.647499918937683, "logits_per_char": -0.8237499594688416, "num_chars": 2}, {"sum_logits": -1.9153300523757935, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.9153300523757935, "logits_per_char": -0.9576650261878967, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 56, "native_id": "950af0b765c298960ce3dada66df8db1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5831934213638306, "incorrect_loss_raw": 1.6379154324531555, "correct_loss_per_char": 0.7915967106819153, "incorrect_loss_per_char": 0.8189577162265778, "correct_loss_per_token": 1.5831934213638306, "incorrect_loss_per_token": 1.6379154324531555, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.502264380455017, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.502264380455017, "logits_per_char": -0.7511321902275085, "num_chars": 2}, {"sum_logits": -1.5831934213638306, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5831934213638306, "logits_per_char": -0.7915967106819153, "num_chars": 2}, {"sum_logits": -1.5340086221694946, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5340086221694946, "logits_per_char": -0.7670043110847473, "num_chars": 2}, {"sum_logits": -1.5613404512405396, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5613404512405396, "logits_per_char": -0.7806702256202698, "num_chars": 2}, {"sum_logits": -1.9540482759475708, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.9540482759475708, "logits_per_char": -0.9770241379737854, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 57, "native_id": "63cf1adb5fe302b9867ead8bc8103d0b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6488841772079468, "incorrect_loss_raw": 1.6269863545894623, "correct_loss_per_char": 0.8244420886039734, "incorrect_loss_per_char": 0.8134931772947311, "correct_loss_per_token": 1.6488841772079468, "incorrect_loss_per_token": 1.6269863545894623, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.362168550491333, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.362168550491333, "logits_per_char": -0.6810842752456665, "num_chars": 2}, {"sum_logits": -1.5028866529464722, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5028866529464722, "logits_per_char": -0.7514433264732361, "num_chars": 2}, {"sum_logits": -1.7293661832809448, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.7293661832809448, "logits_per_char": -0.8646830916404724, "num_chars": 2}, {"sum_logits": -1.6488841772079468, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.6488841772079468, "logits_per_char": -0.8244420886039734, "num_chars": 2}, {"sum_logits": -1.9135240316390991, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.9135240316390991, "logits_per_char": -0.9567620158195496, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 58, "native_id": "ede4d302fc2ffe07703158f83c1493f2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.570813775062561, "incorrect_loss_raw": 1.6289355754852295, "correct_loss_per_char": 0.7854068875312805, "incorrect_loss_per_char": 0.8144677877426147, "correct_loss_per_token": 1.570813775062561, "incorrect_loss_per_token": 1.6289355754852295, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5394238233566284, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.5394238233566284, "logits_per_char": -0.7697119116783142, "num_chars": 2}, {"sum_logits": -1.570813775062561, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.570813775062561, "logits_per_char": -0.7854068875312805, "num_chars": 2}, {"sum_logits": -1.5557496547698975, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5557496547698975, "logits_per_char": -0.7778748273849487, "num_chars": 2}, {"sum_logits": -1.7365070581436157, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.7365070581436157, "logits_per_char": -0.8682535290718079, "num_chars": 2}, {"sum_logits": -1.6840617656707764, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6840617656707764, "logits_per_char": -0.8420308828353882, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 59, "native_id": "74ad13a03634e79c85382f1b90969b74", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2876478433609009, "incorrect_loss_raw": 1.7274615168571472, "correct_loss_per_char": 0.6438239216804504, "incorrect_loss_per_char": 0.8637307584285736, "correct_loss_per_token": 1.2876478433609009, "incorrect_loss_per_token": 1.7274615168571472, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2876478433609009, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.2876478433609009, "logits_per_char": -0.6438239216804504, "num_chars": 2}, {"sum_logits": -1.5336211919784546, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5336211919784546, "logits_per_char": -0.7668105959892273, "num_chars": 2}, {"sum_logits": -1.6643682718276978, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6643682718276978, "logits_per_char": -0.8321841359138489, "num_chars": 2}, {"sum_logits": -1.7544738054275513, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.7544738054275513, "logits_per_char": -0.8772369027137756, "num_chars": 2}, {"sum_logits": -1.9573827981948853, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.9573827981948853, "logits_per_char": -0.9786913990974426, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 60, "native_id": "49e466b1782aa4837dae53ff891fcdee", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6035702228546143, "incorrect_loss_raw": 1.6496747434139252, "correct_loss_per_char": 0.8017851114273071, "incorrect_loss_per_char": 0.8248373717069626, "correct_loss_per_token": 1.6035702228546143, "incorrect_loss_per_token": 1.6496747434139252, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.289241909980774, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.289241909980774, "logits_per_char": -0.644620954990387, "num_chars": 2}, {"sum_logits": -1.6035702228546143, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6035702228546143, "logits_per_char": -0.8017851114273071, "num_chars": 2}, {"sum_logits": -1.7370126247406006, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.7370126247406006, "logits_per_char": -0.8685063123703003, "num_chars": 2}, {"sum_logits": -1.6001431941986084, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6001431941986084, "logits_per_char": -0.8000715970993042, "num_chars": 2}, {"sum_logits": -1.9723012447357178, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.9723012447357178, "logits_per_char": -0.9861506223678589, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 61, "native_id": "a8a8ae7792901c7179ff5538c701af1f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.979506015777588, "incorrect_loss_raw": 1.5418040752410889, "correct_loss_per_char": 0.989753007888794, "incorrect_loss_per_char": 0.7709020376205444, "correct_loss_per_token": 1.979506015777588, "incorrect_loss_per_token": 1.5418040752410889, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4663691520690918, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.4663691520690918, "logits_per_char": -0.7331845760345459, "num_chars": 2}, {"sum_logits": -1.5068451166152954, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5068451166152954, "logits_per_char": -0.7534225583076477, "num_chars": 2}, {"sum_logits": -1.570698857307434, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.570698857307434, "logits_per_char": -0.785349428653717, "num_chars": 2}, {"sum_logits": -1.6233031749725342, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.6233031749725342, "logits_per_char": -0.8116515874862671, "num_chars": 2}, {"sum_logits": -1.979506015777588, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.979506015777588, "logits_per_char": -0.989753007888794, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 62, "native_id": "2ffa3808ce26181926990b454e429c85", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.564165472984314, "incorrect_loss_raw": 1.664016455411911, "correct_loss_per_char": 0.782082736492157, "incorrect_loss_per_char": 0.8320082277059555, "correct_loss_per_token": 1.564165472984314, "incorrect_loss_per_token": 1.664016455411911, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.375981330871582, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.375981330871582, "logits_per_char": -0.687990665435791, "num_chars": 2}, {"sum_logits": -1.5222218036651611, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5222218036651611, "logits_per_char": -0.7611109018325806, "num_chars": 2}, {"sum_logits": -1.564165472984314, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.564165472984314, "logits_per_char": -0.782082736492157, "num_chars": 2}, {"sum_logits": -1.633090615272522, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.633090615272522, "logits_per_char": -0.816545307636261, "num_chars": 2}, {"sum_logits": -2.124772071838379, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.124772071838379, "logits_per_char": -1.0623860359191895, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 63, "native_id": "4319eaa36d256a92b72445c0392f9c94", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6710025072097778, "incorrect_loss_raw": 1.6539104580879211, "correct_loss_per_char": 0.8355012536048889, "incorrect_loss_per_char": 0.8269552290439606, "correct_loss_per_token": 1.6710025072097778, "incorrect_loss_per_token": 1.6539104580879211, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.373631238937378, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.373631238937378, "logits_per_char": -0.686815619468689, "num_chars": 2}, {"sum_logits": -1.4754492044448853, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.4754492044448853, "logits_per_char": -0.7377246022224426, "num_chars": 2}, {"sum_logits": -1.5181130170822144, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5181130170822144, "logits_per_char": -0.7590565085411072, "num_chars": 2}, {"sum_logits": -1.6710025072097778, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6710025072097778, "logits_per_char": -0.8355012536048889, "num_chars": 2}, {"sum_logits": -2.248448371887207, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.248448371887207, "logits_per_char": -1.1242241859436035, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 64, "native_id": "ec79ef747bb89281923edb89ba26786d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3549460172653198, "incorrect_loss_raw": 1.7013202905654907, "correct_loss_per_char": 0.6774730086326599, "incorrect_loss_per_char": 0.8506601452827454, "correct_loss_per_token": 1.3549460172653198, "incorrect_loss_per_token": 1.7013202905654907, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3549460172653198, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.3549460172653198, "logits_per_char": -0.6774730086326599, "num_chars": 2}, {"sum_logits": -1.625084638595581, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.625084638595581, "logits_per_char": -0.8125423192977905, "num_chars": 2}, {"sum_logits": -1.5989906787872314, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5989906787872314, "logits_per_char": -0.7994953393936157, "num_chars": 2}, {"sum_logits": -1.6231958866119385, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6231958866119385, "logits_per_char": -0.8115979433059692, "num_chars": 2}, {"sum_logits": -1.958009958267212, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.958009958267212, "logits_per_char": -0.979004979133606, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 65, "native_id": "2d33cde5e3987adc8fa2bca0af4dd3dd", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3159806728363037, "incorrect_loss_raw": 1.7258731424808502, "correct_loss_per_char": 0.6579903364181519, "incorrect_loss_per_char": 0.8629365712404251, "correct_loss_per_token": 1.3159806728363037, "incorrect_loss_per_token": 1.7258731424808502, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3159806728363037, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.3159806728363037, "logits_per_char": -0.6579903364181519, "num_chars": 2}, {"sum_logits": -1.4582809209823608, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.4582809209823608, "logits_per_char": -0.7291404604911804, "num_chars": 2}, {"sum_logits": -1.6980245113372803, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6980245113372803, "logits_per_char": -0.8490122556686401, "num_chars": 2}, {"sum_logits": -1.720416784286499, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.720416784286499, "logits_per_char": -0.8602083921432495, "num_chars": 2}, {"sum_logits": -2.0267703533172607, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -2.0267703533172607, "logits_per_char": -1.0133851766586304, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 66, "native_id": "cc46d936bf69d69a3863b0cb85d75c17", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6735224723815918, "incorrect_loss_raw": 1.6376489400863647, "correct_loss_per_char": 0.8367612361907959, "incorrect_loss_per_char": 0.8188244700431824, "correct_loss_per_token": 1.6735224723815918, "incorrect_loss_per_token": 1.6376489400863647, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.315011739730835, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.315011739730835, "logits_per_char": -0.6575058698654175, "num_chars": 2}, {"sum_logits": -1.491330862045288, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.491330862045288, "logits_per_char": -0.745665431022644, "num_chars": 2}, {"sum_logits": -1.6554243564605713, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6554243564605713, "logits_per_char": -0.8277121782302856, "num_chars": 2}, {"sum_logits": -1.6735224723815918, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6735224723815918, "logits_per_char": -0.8367612361907959, "num_chars": 2}, {"sum_logits": -2.0888288021087646, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.0888288021087646, "logits_per_char": -1.0444144010543823, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 67, "native_id": "46bc1a50eeead10509a43a048e01194e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3539984226226807, "incorrect_loss_raw": 1.717633455991745, "correct_loss_per_char": 0.6769992113113403, "incorrect_loss_per_char": 0.8588167279958725, "correct_loss_per_token": 1.3539984226226807, "incorrect_loss_per_token": 1.717633455991745, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3539984226226807, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.3539984226226807, "logits_per_char": -0.6769992113113403, "num_chars": 2}, {"sum_logits": -1.4677821397781372, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.4677821397781372, "logits_per_char": -0.7338910698890686, "num_chars": 2}, {"sum_logits": -1.5721427202224731, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5721427202224731, "logits_per_char": -0.7860713601112366, "num_chars": 2}, {"sum_logits": -1.732178807258606, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.732178807258606, "logits_per_char": -0.866089403629303, "num_chars": 2}, {"sum_logits": -2.0984301567077637, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.0984301567077637, "logits_per_char": -1.0492150783538818, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 68, "native_id": "4336a8c55b7cb17275d1c60206cd2f18", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5630375146865845, "incorrect_loss_raw": 1.6308208107948303, "correct_loss_per_char": 0.7815187573432922, "incorrect_loss_per_char": 0.8154104053974152, "correct_loss_per_token": 1.5630375146865845, "incorrect_loss_per_token": 1.6308208107948303, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6589082479476929, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6589082479476929, "logits_per_char": -0.8294541239738464, "num_chars": 2}, {"sum_logits": -1.5813822746276855, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5813822746276855, "logits_per_char": -0.7906911373138428, "num_chars": 2}, {"sum_logits": -1.5630375146865845, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5630375146865845, "logits_per_char": -0.7815187573432922, "num_chars": 2}, {"sum_logits": -1.528695821762085, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.528695821762085, "logits_per_char": -0.7643479108810425, "num_chars": 2}, {"sum_logits": -1.754296898841858, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.754296898841858, "logits_per_char": -0.877148449420929, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 69, "native_id": "a287575d3ba4b9f958536fc14a1f5b5a", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4902870655059814, "incorrect_loss_raw": 1.6558597683906555, "correct_loss_per_char": 0.7451435327529907, "incorrect_loss_per_char": 0.8279298841953278, "correct_loss_per_token": 1.4902870655059814, "incorrect_loss_per_token": 1.6558597683906555, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.531679391860962, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.531679391860962, "logits_per_char": -0.765839695930481, "num_chars": 2}, {"sum_logits": -1.7502106428146362, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.7502106428146362, "logits_per_char": -0.8751053214073181, "num_chars": 2}, {"sum_logits": -1.5662347078323364, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5662347078323364, "logits_per_char": -0.7831173539161682, "num_chars": 2}, {"sum_logits": -1.4902870655059814, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.4902870655059814, "logits_per_char": -0.7451435327529907, "num_chars": 2}, {"sum_logits": -1.7753143310546875, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.7753143310546875, "logits_per_char": -0.8876571655273438, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 70, "native_id": "f481dc35b0a97a20dc5cdfe1a59746e2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8595168590545654, "incorrect_loss_raw": 1.5641970038414001, "correct_loss_per_char": 0.9297584295272827, "incorrect_loss_per_char": 0.7820985019207001, "correct_loss_per_token": 1.8595168590545654, "incorrect_loss_per_token": 1.5641970038414001, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4390640258789062, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.4390640258789062, "logits_per_char": -0.7195320129394531, "num_chars": 2}, {"sum_logits": -1.5374553203582764, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5374553203582764, "logits_per_char": -0.7687276601791382, "num_chars": 2}, {"sum_logits": -1.6162092685699463, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6162092685699463, "logits_per_char": -0.8081046342849731, "num_chars": 2}, {"sum_logits": -1.6640594005584717, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6640594005584717, "logits_per_char": -0.8320297002792358, "num_chars": 2}, {"sum_logits": -1.8595168590545654, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.8595168590545654, "logits_per_char": -0.9297584295272827, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 71, "native_id": "c1c7a9efa379b8a7024a71cf364a144c", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7652802467346191, "incorrect_loss_raw": 1.5821934342384338, "correct_loss_per_char": 0.8826401233673096, "incorrect_loss_per_char": 0.7910967171192169, "correct_loss_per_token": 1.7652802467346191, "incorrect_loss_per_token": 1.5821934342384338, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5842301845550537, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.5842301845550537, "logits_per_char": -0.7921150922775269, "num_chars": 2}, {"sum_logits": -1.6028435230255127, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.6028435230255127, "logits_per_char": -0.8014217615127563, "num_chars": 2}, {"sum_logits": -1.6682069301605225, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.6682069301605225, "logits_per_char": -0.8341034650802612, "num_chars": 2}, {"sum_logits": -1.4734930992126465, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": true, "logits_per_token": -1.4734930992126465, "logits_per_char": -0.7367465496063232, "num_chars": 2}, {"sum_logits": -1.7652802467346191, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.7652802467346191, "logits_per_char": -0.8826401233673096, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 72, "native_id": "821b32d39f57396979069b948030afe9", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7108407020568848, "incorrect_loss_raw": 1.6150507032871246, "correct_loss_per_char": 0.8554203510284424, "incorrect_loss_per_char": 0.8075253516435623, "correct_loss_per_token": 1.7108407020568848, "incorrect_loss_per_token": 1.6150507032871246, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3513950109481812, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.3513950109481812, "logits_per_char": -0.6756975054740906, "num_chars": 2}, {"sum_logits": -1.4889311790466309, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.4889311790466309, "logits_per_char": -0.7444655895233154, "num_chars": 2}, {"sum_logits": -1.6690032482147217, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6690032482147217, "logits_per_char": -0.8345016241073608, "num_chars": 2}, {"sum_logits": -1.7108407020568848, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.7108407020568848, "logits_per_char": -0.8554203510284424, "num_chars": 2}, {"sum_logits": -1.9508733749389648, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.9508733749389648, "logits_per_char": -0.9754366874694824, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 73, "native_id": "c68b4082a6872cf8198502651d0f3352", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6070462465286255, "incorrect_loss_raw": 1.6227769553661346, "correct_loss_per_char": 0.8035231232643127, "incorrect_loss_per_char": 0.8113884776830673, "correct_loss_per_token": 1.6070462465286255, "incorrect_loss_per_token": 1.6227769553661346, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6070462465286255, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.6070462465286255, "logits_per_char": -0.8035231232643127, "num_chars": 2}, {"sum_logits": -1.4643410444259644, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": true, "logits_per_token": -1.4643410444259644, "logits_per_char": -0.7321705222129822, "num_chars": 2}, {"sum_logits": -1.5743770599365234, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.5743770599365234, "logits_per_char": -0.7871885299682617, "num_chars": 2}, {"sum_logits": -1.694230079650879, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.694230079650879, "logits_per_char": -0.8471150398254395, "num_chars": 2}, {"sum_logits": -1.7581596374511719, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.7581596374511719, "logits_per_char": -0.8790798187255859, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 74, "native_id": "dd11fea36d89aa09f9a6069545ba4c9c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.674167275428772, "incorrect_loss_raw": 1.6350052952766418, "correct_loss_per_char": 0.837083637714386, "incorrect_loss_per_char": 0.8175026476383209, "correct_loss_per_token": 1.674167275428772, "incorrect_loss_per_token": 1.6350052952766418, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.277222752571106, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": true, "logits_per_token": -1.277222752571106, "logits_per_char": -0.638611376285553, "num_chars": 2}, {"sum_logits": -1.6478878259658813, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.6478878259658813, "logits_per_char": -0.8239439129829407, "num_chars": 2}, {"sum_logits": -1.674167275428772, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.674167275428772, "logits_per_char": -0.837083637714386, "num_chars": 2}, {"sum_logits": -1.617838740348816, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.617838740348816, "logits_per_char": -0.808919370174408, "num_chars": 2}, {"sum_logits": -1.9970718622207642, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.9970718622207642, "logits_per_char": -0.9985359311103821, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 75, "native_id": "7792b2c6518ecf9775efba6d41253312", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5452697277069092, "incorrect_loss_raw": 1.6551967859268188, "correct_loss_per_char": 0.7726348638534546, "incorrect_loss_per_char": 0.8275983929634094, "correct_loss_per_token": 1.5452697277069092, "incorrect_loss_per_token": 1.6551967859268188, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3466825485229492, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": true, "logits_per_token": -1.3466825485229492, "logits_per_char": -0.6733412742614746, "num_chars": 2}, {"sum_logits": -1.5452697277069092, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.5452697277069092, "logits_per_char": -0.7726348638534546, "num_chars": 2}, {"sum_logits": -1.701106071472168, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.701106071472168, "logits_per_char": -0.850553035736084, "num_chars": 2}, {"sum_logits": -1.6194603443145752, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.6194603443145752, "logits_per_char": -0.8097301721572876, "num_chars": 2}, {"sum_logits": -1.953538179397583, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.953538179397583, "logits_per_char": -0.9767690896987915, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 76, "native_id": "1feb4c2a0e8ed638259f5d27b16eae9a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5068258047103882, "incorrect_loss_raw": 1.6800293922424316, "correct_loss_per_char": 0.7534129023551941, "incorrect_loss_per_char": 0.8400146961212158, "correct_loss_per_token": 1.5068258047103882, "incorrect_loss_per_token": 1.6800293922424316, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4151500463485718, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": true, "logits_per_token": -1.4151500463485718, "logits_per_char": -0.7075750231742859, "num_chars": 2}, {"sum_logits": -1.52812659740448, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.52812659740448, "logits_per_char": -0.76406329870224, "num_chars": 2}, {"sum_logits": -1.5068258047103882, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.5068258047103882, "logits_per_char": -0.7534129023551941, "num_chars": 2}, {"sum_logits": -1.6166760921478271, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.6166760921478271, "logits_per_char": -0.8083380460739136, "num_chars": 2}, {"sum_logits": -2.1601648330688477, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -2.1601648330688477, "logits_per_char": -1.0800824165344238, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 77, "native_id": "2de08c7a518b7c226e19bdc8fc10ef1d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.2457118034362793, "incorrect_loss_raw": 1.5063312947750092, "correct_loss_per_char": 1.1228559017181396, "incorrect_loss_per_char": 0.7531656473875046, "correct_loss_per_token": 2.2457118034362793, "incorrect_loss_per_token": 1.5063312947750092, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.385895848274231, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": true, "logits_per_token": -1.385895848274231, "logits_per_char": -0.6929479241371155, "num_chars": 2}, {"sum_logits": -1.5907715559005737, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.5907715559005737, "logits_per_char": -0.7953857779502869, "num_chars": 2}, {"sum_logits": -1.5397242307662964, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.5397242307662964, "logits_per_char": -0.7698621153831482, "num_chars": 2}, {"sum_logits": -1.5089335441589355, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.5089335441589355, "logits_per_char": -0.7544667720794678, "num_chars": 2}, {"sum_logits": -2.2457118034362793, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -2.2457118034362793, "logits_per_char": -1.1228559017181396, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 78, "native_id": "ea8664e77205224154f8519f922220e1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5843191146850586, "incorrect_loss_raw": 1.6424104273319244, "correct_loss_per_char": 0.7921595573425293, "incorrect_loss_per_char": 0.8212052136659622, "correct_loss_per_token": 1.5843191146850586, "incorrect_loss_per_token": 1.6424104273319244, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3622084856033325, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.3622084856033325, "logits_per_char": -0.6811042428016663, "num_chars": 2}, {"sum_logits": -1.5843191146850586, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5843191146850586, "logits_per_char": -0.7921595573425293, "num_chars": 2}, {"sum_logits": -1.5951781272888184, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5951781272888184, "logits_per_char": -0.7975890636444092, "num_chars": 2}, {"sum_logits": -1.6759834289550781, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6759834289550781, "logits_per_char": -0.8379917144775391, "num_chars": 2}, {"sum_logits": -1.9362716674804688, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.9362716674804688, "logits_per_char": -0.9681358337402344, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 79, "native_id": "a64d45cecde84fdcf5f0a79805a0c6fe", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6029633283615112, "incorrect_loss_raw": 1.6620019674301147, "correct_loss_per_char": 0.8014816641807556, "incorrect_loss_per_char": 0.8310009837150574, "correct_loss_per_token": 1.6029633283615112, "incorrect_loss_per_token": 1.6620019674301147, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.304396152496338, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.304396152496338, "logits_per_char": -0.652198076248169, "num_chars": 2}, {"sum_logits": -1.495390772819519, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.495390772819519, "logits_per_char": -0.7476953864097595, "num_chars": 2}, {"sum_logits": -1.6029633283615112, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6029633283615112, "logits_per_char": -0.8014816641807556, "num_chars": 2}, {"sum_logits": -1.7374979257583618, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.7374979257583618, "logits_per_char": -0.8687489628791809, "num_chars": 2}, {"sum_logits": -2.1107230186462402, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.1107230186462402, "logits_per_char": -1.0553615093231201, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 80, "native_id": "60e92cd2f35c345872d1a898e1718d55", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6369094848632812, "incorrect_loss_raw": 1.618859201669693, "correct_loss_per_char": 0.8184547424316406, "incorrect_loss_per_char": 0.8094296008348465, "correct_loss_per_token": 1.6369094848632812, "incorrect_loss_per_token": 1.618859201669693, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4389315843582153, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": true, "logits_per_token": -1.4389315843582153, "logits_per_char": -0.7194657921791077, "num_chars": 2}, {"sum_logits": -1.6369094848632812, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.6369094848632812, "logits_per_char": -0.8184547424316406, "num_chars": 2}, {"sum_logits": -1.6437816619873047, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.6437816619873047, "logits_per_char": -0.8218908309936523, "num_chars": 2}, {"sum_logits": -1.5743770599365234, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.5743770599365234, "logits_per_char": -0.7871885299682617, "num_chars": 2}, {"sum_logits": -1.8183465003967285, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.8183465003967285, "logits_per_char": -0.9091732501983643, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 81, "native_id": "08f3c187908646997b9080c7e9ea7da4", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6438851356506348, "incorrect_loss_raw": 1.6259726285934448, "correct_loss_per_char": 0.8219425678253174, "incorrect_loss_per_char": 0.8129863142967224, "correct_loss_per_token": 1.6438851356506348, "incorrect_loss_per_token": 1.6259726285934448, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5035068988800049, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5035068988800049, "logits_per_char": -0.7517534494400024, "num_chars": 2}, {"sum_logits": -1.518479824066162, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.518479824066162, "logits_per_char": -0.759239912033081, "num_chars": 2}, {"sum_logits": -1.4950687885284424, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.4950687885284424, "logits_per_char": -0.7475343942642212, "num_chars": 2}, {"sum_logits": -1.6438851356506348, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6438851356506348, "logits_per_char": -0.8219425678253174, "num_chars": 2}, {"sum_logits": -1.98683500289917, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.98683500289917, "logits_per_char": -0.993417501449585, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 82, "native_id": "9aff72f0c480c2b4edde45bd2e7e4870", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.192783832550049, "incorrect_loss_raw": 1.5216713547706604, "correct_loss_per_char": 1.0963919162750244, "incorrect_loss_per_char": 0.7608356773853302, "correct_loss_per_token": 2.192783832550049, "incorrect_loss_per_token": 1.5216713547706604, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3157061338424683, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.3157061338424683, "logits_per_char": -0.6578530669212341, "num_chars": 2}, {"sum_logits": -1.4455739259719849, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.4455739259719849, "logits_per_char": -0.7227869629859924, "num_chars": 2}, {"sum_logits": -1.5961097478866577, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5961097478866577, "logits_per_char": -0.7980548739433289, "num_chars": 2}, {"sum_logits": -1.7292956113815308, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.7292956113815308, "logits_per_char": -0.8646478056907654, "num_chars": 2}, {"sum_logits": -2.192783832550049, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.192783832550049, "logits_per_char": -1.0963919162750244, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 83, "native_id": "fd243c96edec5b1b8520d5bfeddc6622", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4140706062316895, "incorrect_loss_raw": 1.6908185482025146, "correct_loss_per_char": 0.7070353031158447, "incorrect_loss_per_char": 0.8454092741012573, "correct_loss_per_token": 1.4140706062316895, "incorrect_loss_per_token": 1.6908185482025146, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4140706062316895, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.4140706062316895, "logits_per_char": -0.7070353031158447, "num_chars": 2}, {"sum_logits": -1.6067726612091064, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6067726612091064, "logits_per_char": -0.8033863306045532, "num_chars": 2}, {"sum_logits": -1.4838829040527344, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.4838829040527344, "logits_per_char": -0.7419414520263672, "num_chars": 2}, {"sum_logits": -1.6465072631835938, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6465072631835938, "logits_per_char": -0.8232536315917969, "num_chars": 2}, {"sum_logits": -2.026111364364624, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.026111364364624, "logits_per_char": -1.013055682182312, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 84, "native_id": "f5ec4fdfd0e37e733bfc1606b986f1e2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9740886688232422, "incorrect_loss_raw": 1.5457560420036316, "correct_loss_per_char": 0.9870443344116211, "incorrect_loss_per_char": 0.7728780210018158, "correct_loss_per_token": 1.9740886688232422, "incorrect_loss_per_token": 1.5457560420036316, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.387376308441162, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.387376308441162, "logits_per_char": -0.693688154220581, "num_chars": 2}, {"sum_logits": -1.600975513458252, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.600975513458252, "logits_per_char": -0.800487756729126, "num_chars": 2}, {"sum_logits": -1.5495281219482422, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5495281219482422, "logits_per_char": -0.7747640609741211, "num_chars": 2}, {"sum_logits": -1.6451442241668701, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6451442241668701, "logits_per_char": -0.8225721120834351, "num_chars": 2}, {"sum_logits": -1.9740886688232422, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.9740886688232422, "logits_per_char": -0.9870443344116211, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 85, "native_id": "e3c6d147f8a727d314046e70e9579ba0", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.414933204650879, "incorrect_loss_raw": 1.6921541094779968, "correct_loss_per_char": 0.7074666023254395, "incorrect_loss_per_char": 0.8460770547389984, "correct_loss_per_token": 1.414933204650879, "incorrect_loss_per_token": 1.6921541094779968, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.414933204650879, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": true, "logits_per_token": -1.414933204650879, "logits_per_char": -0.7074666023254395, "num_chars": 2}, {"sum_logits": -1.425574541091919, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.425574541091919, "logits_per_char": -0.7127872705459595, "num_chars": 2}, {"sum_logits": -1.6250149011611938, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.6250149011611938, "logits_per_char": -0.8125074505805969, "num_chars": 2}, {"sum_logits": -1.7132619619369507, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.7132619619369507, "logits_per_char": -0.8566309809684753, "num_chars": 2}, {"sum_logits": -2.004765033721924, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -2.004765033721924, "logits_per_char": -1.002382516860962, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 86, "native_id": "8ce13c6e08bf38d4cd4af756b661e47c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6765285730361938, "incorrect_loss_raw": 1.6604618728160858, "correct_loss_per_char": 0.8382642865180969, "incorrect_loss_per_char": 0.8302309364080429, "correct_loss_per_token": 1.6765285730361938, "incorrect_loss_per_token": 1.6604618728160858, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3601760864257812, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.3601760864257812, "logits_per_char": -0.6800880432128906, "num_chars": 2}, {"sum_logits": -1.453776478767395, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.453776478767395, "logits_per_char": -0.7268882393836975, "num_chars": 2}, {"sum_logits": -1.5134012699127197, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5134012699127197, "logits_per_char": -0.7567006349563599, "num_chars": 2}, {"sum_logits": -1.6765285730361938, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6765285730361938, "logits_per_char": -0.8382642865180969, "num_chars": 2}, {"sum_logits": -2.3144936561584473, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.3144936561584473, "logits_per_char": -1.1572468280792236, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 87, "native_id": "0f4159e80f8dbf682819215bbf0f5b5a_1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5224659442901611, "incorrect_loss_raw": 1.6884353160858154, "correct_loss_per_char": 0.7612329721450806, "incorrect_loss_per_char": 0.8442176580429077, "correct_loss_per_token": 1.5224659442901611, "incorrect_loss_per_token": 1.6884353160858154, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1868343353271484, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.1868343353271484, "logits_per_char": -0.5934171676635742, "num_chars": 2}, {"sum_logits": -1.5224659442901611, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5224659442901611, "logits_per_char": -0.7612329721450806, "num_chars": 2}, {"sum_logits": -1.6911060810089111, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.6911060810089111, "logits_per_char": -0.8455530405044556, "num_chars": 2}, {"sum_logits": -1.866696834564209, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.866696834564209, "logits_per_char": -0.9333484172821045, "num_chars": 2}, {"sum_logits": -2.009104013442993, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -2.009104013442993, "logits_per_char": -1.0045520067214966, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 88, "native_id": "1a8b3c2a46efabcbd506f9cf70886ed0", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0263442993164062, "incorrect_loss_raw": 1.5408341884613037, "correct_loss_per_char": 1.0131721496582031, "incorrect_loss_per_char": 0.7704170942306519, "correct_loss_per_token": 2.0263442993164062, "incorrect_loss_per_token": 1.5408341884613037, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4516582489013672, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.4516582489013672, "logits_per_char": -0.7258291244506836, "num_chars": 2}, {"sum_logits": -1.6190927028656006, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6190927028656006, "logits_per_char": -0.8095463514328003, "num_chars": 2}, {"sum_logits": -1.4276314973831177, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.4276314973831177, "logits_per_char": -0.7138157486915588, "num_chars": 2}, {"sum_logits": -1.6649543046951294, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6649543046951294, "logits_per_char": -0.8324771523475647, "num_chars": 2}, {"sum_logits": -2.0263442993164062, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -2.0263442993164062, "logits_per_char": -1.0131721496582031, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 89, "native_id": "db0cfd52ca6b2bbfcf26d1a898fd929b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7165132761001587, "incorrect_loss_raw": 1.601294994354248, "correct_loss_per_char": 0.8582566380500793, "incorrect_loss_per_char": 0.800647497177124, "correct_loss_per_token": 1.7165132761001587, "incorrect_loss_per_token": 1.601294994354248, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4230862855911255, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": true, "logits_per_token": -1.4230862855911255, "logits_per_char": -0.7115431427955627, "num_chars": 2}, {"sum_logits": -1.4949251413345337, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.4949251413345337, "logits_per_char": -0.7474625706672668, "num_chars": 2}, {"sum_logits": -1.7165132761001587, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.7165132761001587, "logits_per_char": -0.8582566380500793, "num_chars": 2}, {"sum_logits": -1.729887843132019, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.729887843132019, "logits_per_char": -0.8649439215660095, "num_chars": 2}, {"sum_logits": -1.757280707359314, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.757280707359314, "logits_per_char": -0.878640353679657, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 90, "native_id": "400fb2e196e71abb70e5b3f9aab4b9ee", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6348730325698853, "incorrect_loss_raw": 1.6202345490455627, "correct_loss_per_char": 0.8174365162849426, "incorrect_loss_per_char": 0.8101172745227814, "correct_loss_per_token": 1.6348730325698853, "incorrect_loss_per_token": 1.6202345490455627, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3914579153060913, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": true, "logits_per_token": -1.3914579153060913, "logits_per_char": -0.6957289576530457, "num_chars": 2}, {"sum_logits": -1.6058284044265747, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.6058284044265747, "logits_per_char": -0.8029142022132874, "num_chars": 2}, {"sum_logits": -1.6348730325698853, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.6348730325698853, "logits_per_char": -0.8174365162849426, "num_chars": 2}, {"sum_logits": -1.7072480916976929, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.7072480916976929, "logits_per_char": -0.8536240458488464, "num_chars": 2}, {"sum_logits": -1.776403784751892, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.776403784751892, "logits_per_char": -0.888201892375946, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 91, "native_id": "3fb36127a61903029a363911a1d2b1e9_1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5194735527038574, "incorrect_loss_raw": 1.659992903470993, "correct_loss_per_char": 0.7597367763519287, "incorrect_loss_per_char": 0.8299964517354965, "correct_loss_per_token": 1.5194735527038574, "incorrect_loss_per_token": 1.659992903470993, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4437047243118286, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.4437047243118286, "logits_per_char": -0.7218523621559143, "num_chars": 2}, {"sum_logits": -1.5703787803649902, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5703787803649902, "logits_per_char": -0.7851893901824951, "num_chars": 2}, {"sum_logits": -1.5194735527038574, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5194735527038574, "logits_per_char": -0.7597367763519287, "num_chars": 2}, {"sum_logits": -1.6130731105804443, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6130731105804443, "logits_per_char": -0.8065365552902222, "num_chars": 2}, {"sum_logits": -2.012814998626709, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.012814998626709, "logits_per_char": -1.0064074993133545, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 92, "native_id": "8494b0b95533dcedbd76ae2916c481d4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5974364280700684, "incorrect_loss_raw": 1.6444626450538635, "correct_loss_per_char": 0.7987182140350342, "incorrect_loss_per_char": 0.8222313225269318, "correct_loss_per_token": 1.5974364280700684, "incorrect_loss_per_token": 1.6444626450538635, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.382374882698059, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": true, "logits_per_token": -1.382374882698059, "logits_per_char": -0.6911874413490295, "num_chars": 2}, {"sum_logits": -1.5751100778579712, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.5751100778579712, "logits_per_char": -0.7875550389289856, "num_chars": 2}, {"sum_logits": -1.6594741344451904, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.6594741344451904, "logits_per_char": -0.8297370672225952, "num_chars": 2}, {"sum_logits": -1.5974364280700684, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.5974364280700684, "logits_per_char": -0.7987182140350342, "num_chars": 2}, {"sum_logits": -1.9608914852142334, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.9608914852142334, "logits_per_char": -0.9804457426071167, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 93, "native_id": "1531f1523f5fd24bbdb42c311dbf90e8", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8416111469268799, "incorrect_loss_raw": 1.565507024526596, "correct_loss_per_char": 0.9208055734634399, "incorrect_loss_per_char": 0.782753512263298, "correct_loss_per_token": 1.8416111469268799, "incorrect_loss_per_token": 1.565507024526596, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4928796291351318, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.4928796291351318, "logits_per_char": -0.7464398145675659, "num_chars": 2}, {"sum_logits": -1.5638481378555298, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5638481378555298, "logits_per_char": -0.7819240689277649, "num_chars": 2}, {"sum_logits": -1.5669950246810913, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5669950246810913, "logits_per_char": -0.7834975123405457, "num_chars": 2}, {"sum_logits": -1.6383053064346313, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6383053064346313, "logits_per_char": -0.8191526532173157, "num_chars": 2}, {"sum_logits": -1.8416111469268799, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.8416111469268799, "logits_per_char": -0.9208055734634399, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 94, "native_id": "716ce4404a84b42dd64e561390c4b53b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8148380517959595, "incorrect_loss_raw": 1.5726095736026764, "correct_loss_per_char": 0.9074190258979797, "incorrect_loss_per_char": 0.7863047868013382, "correct_loss_per_token": 1.8148380517959595, "incorrect_loss_per_token": 1.5726095736026764, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.462817907333374, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": true, "logits_per_token": -1.462817907333374, "logits_per_char": -0.731408953666687, "num_chars": 2}, {"sum_logits": -1.6335276365280151, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.6335276365280151, "logits_per_char": -0.8167638182640076, "num_chars": 2}, {"sum_logits": -1.5929538011550903, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.5929538011550903, "logits_per_char": -0.7964769005775452, "num_chars": 2}, {"sum_logits": -1.601138949394226, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.601138949394226, "logits_per_char": -0.800569474697113, "num_chars": 2}, {"sum_logits": -1.8148380517959595, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.8148380517959595, "logits_per_char": -0.9074190258979797, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 95, "native_id": "5169f7ae0781b15161551de3a189ebef", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.567394733428955, "incorrect_loss_raw": 1.5255328118801117, "correct_loss_per_char": 1.2836973667144775, "incorrect_loss_per_char": 0.7627664059400558, "correct_loss_per_token": 2.567394733428955, "incorrect_loss_per_token": 1.5255328118801117, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0176466703414917, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.0176466703414917, "logits_per_char": -0.5088233351707458, "num_chars": 2}, {"sum_logits": -1.4994025230407715, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.4994025230407715, "logits_per_char": -0.7497012615203857, "num_chars": 2}, {"sum_logits": -1.7643280029296875, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.7643280029296875, "logits_per_char": -0.8821640014648438, "num_chars": 2}, {"sum_logits": -1.820754051208496, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.820754051208496, "logits_per_char": -0.910377025604248, "num_chars": 2}, {"sum_logits": -2.567394733428955, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.567394733428955, "logits_per_char": -1.2836973667144775, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 96, "native_id": "ef22ef7aeec70aaa688720f805c1cf38", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.494061827659607, "incorrect_loss_raw": 1.6599400639533997, "correct_loss_per_char": 0.7470309138298035, "incorrect_loss_per_char": 0.8299700319766998, "correct_loss_per_token": 1.494061827659607, "incorrect_loss_per_token": 1.6599400639533997, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5031250715255737, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5031250715255737, "logits_per_char": -0.7515625357627869, "num_chars": 2}, {"sum_logits": -1.494061827659607, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.494061827659607, "logits_per_char": -0.7470309138298035, "num_chars": 2}, {"sum_logits": -1.558587670326233, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.558587670326233, "logits_per_char": -0.7792938351631165, "num_chars": 2}, {"sum_logits": -1.6343830823898315, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6343830823898315, "logits_per_char": -0.8171915411949158, "num_chars": 2}, {"sum_logits": -1.9436644315719604, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.9436644315719604, "logits_per_char": -0.9718322157859802, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 97, "native_id": "514310637fb43a252bfadc8cbf79b277", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6610344648361206, "incorrect_loss_raw": 1.6199957728385925, "correct_loss_per_char": 0.8305172324180603, "incorrect_loss_per_char": 0.8099978864192963, "correct_loss_per_token": 1.6610344648361206, "incorrect_loss_per_token": 1.6199957728385925, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3260987997055054, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": true, "logits_per_token": -1.3260987997055054, "logits_per_char": -0.6630493998527527, "num_chars": 2}, {"sum_logits": -1.6673227548599243, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": false, "logits_per_token": -1.6673227548599243, "logits_per_char": -0.8336613774299622, "num_chars": 2}, {"sum_logits": -1.754116177558899, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": false, "logits_per_token": -1.754116177558899, "logits_per_char": -0.8770580887794495, "num_chars": 2}, {"sum_logits": -1.6610344648361206, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": false, "logits_per_token": -1.6610344648361206, "logits_per_char": -0.8305172324180603, "num_chars": 2}, {"sum_logits": -1.7324453592300415, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": false, "logits_per_token": -1.7324453592300415, "logits_per_char": -0.8662226796150208, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 98, "native_id": "9370b2b0897b796dec4a40f107854c8d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5525346994400024, "incorrect_loss_raw": 1.6682928800582886, "correct_loss_per_char": 0.7762673497200012, "incorrect_loss_per_char": 0.8341464400291443, "correct_loss_per_token": 1.5525346994400024, "incorrect_loss_per_token": 1.6682928800582886, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2451492547988892, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.2451492547988892, "logits_per_char": -0.6225746273994446, "num_chars": 2}, {"sum_logits": -1.5525346994400024, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5525346994400024, "logits_per_char": -0.7762673497200012, "num_chars": 2}, {"sum_logits": -1.6767328977584839, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6767328977584839, "logits_per_char": -0.8383664488792419, "num_chars": 2}, {"sum_logits": -1.779298186302185, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.779298186302185, "logits_per_char": -0.8896490931510925, "num_chars": 2}, {"sum_logits": -1.9719911813735962, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.9719911813735962, "logits_per_char": -0.9859955906867981, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 99, "native_id": "49902e768c45aa41a0f9f95be81114e5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.1177759170532227, "incorrect_loss_raw": 1.527267575263977, "correct_loss_per_char": 1.0588879585266113, "incorrect_loss_per_char": 0.7636337876319885, "correct_loss_per_token": 2.1177759170532227, "incorrect_loss_per_token": 1.527267575263977, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3613322973251343, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": true, "logits_per_token": -1.3613322973251343, "logits_per_char": -0.6806661486625671, "num_chars": 2}, {"sum_logits": -1.4582589864730835, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.4582589864730835, "logits_per_char": -0.7291294932365417, "num_chars": 2}, {"sum_logits": -1.612735390663147, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.612735390663147, "logits_per_char": -0.8063676953315735, "num_chars": 2}, {"sum_logits": -1.6767436265945435, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.6767436265945435, "logits_per_char": -0.8383718132972717, "num_chars": 2}, {"sum_logits": -2.1177759170532227, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -2.1177759170532227, "logits_per_char": -1.0588879585266113, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 100, "native_id": "e1f90cd664a6b150291e6d8444d85c54", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.516413688659668, "incorrect_loss_raw": 1.6681921482086182, "correct_loss_per_char": 0.758206844329834, "incorrect_loss_per_char": 0.8340960741043091, "correct_loss_per_token": 1.516413688659668, "incorrect_loss_per_token": 1.6681921482086182, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3068604469299316, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.3068604469299316, "logits_per_char": -0.6534302234649658, "num_chars": 2}, {"sum_logits": -1.516413688659668, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.516413688659668, "logits_per_char": -0.758206844329834, "num_chars": 2}, {"sum_logits": -1.7176923751831055, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.7176923751831055, "logits_per_char": -0.8588461875915527, "num_chars": 2}, {"sum_logits": -1.6810276508331299, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6810276508331299, "logits_per_char": -0.8405138254165649, "num_chars": 2}, {"sum_logits": -1.9671881198883057, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.9671881198883057, "logits_per_char": -0.9835940599441528, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 101, "native_id": "320ec9b68fdefe13d59cc8b628083790", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5660033226013184, "incorrect_loss_raw": 1.6523192524909973, "correct_loss_per_char": 0.7830016613006592, "incorrect_loss_per_char": 0.8261596262454987, "correct_loss_per_token": 1.5660033226013184, "incorrect_loss_per_token": 1.6523192524909973, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4083197116851807, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.4083197116851807, "logits_per_char": -0.7041598558425903, "num_chars": 2}, {"sum_logits": -1.6016203165054321, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6016203165054321, "logits_per_char": -0.8008101582527161, "num_chars": 2}, {"sum_logits": -1.5943554639816284, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5943554639816284, "logits_per_char": -0.7971777319908142, "num_chars": 2}, {"sum_logits": -1.5660033226013184, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5660033226013184, "logits_per_char": -0.7830016613006592, "num_chars": 2}, {"sum_logits": -2.004981517791748, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -2.004981517791748, "logits_per_char": -1.002490758895874, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 102, "native_id": "964185aed0e381853332bca1a4d91f46", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.630494236946106, "incorrect_loss_raw": 1.622978687286377, "correct_loss_per_char": 0.815247118473053, "incorrect_loss_per_char": 0.8114893436431885, "correct_loss_per_token": 1.630494236946106, "incorrect_loss_per_token": 1.622978687286377, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3903688192367554, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": true, "logits_per_token": -1.3903688192367554, "logits_per_char": -0.6951844096183777, "num_chars": 2}, {"sum_logits": -1.6246803998947144, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.6246803998947144, "logits_per_char": -0.8123401999473572, "num_chars": 2}, {"sum_logits": -1.6957699060440063, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.6957699060440063, "logits_per_char": -0.8478849530220032, "num_chars": 2}, {"sum_logits": -1.630494236946106, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.630494236946106, "logits_per_char": -0.815247118473053, "num_chars": 2}, {"sum_logits": -1.7810956239700317, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.7810956239700317, "logits_per_char": -0.8905478119850159, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 103, "native_id": "db8e010754c532d78635e5b7cf81a147", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.813686490058899, "incorrect_loss_raw": 1.5781443119049072, "correct_loss_per_char": 0.9068432450294495, "incorrect_loss_per_char": 0.7890721559524536, "correct_loss_per_token": 1.813686490058899, "incorrect_loss_per_token": 1.5781443119049072, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.47517991065979, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": true, "logits_per_token": -1.47517991065979, "logits_per_char": -0.737589955329895, "num_chars": 2}, {"sum_logits": -1.4882586002349854, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": false, "logits_per_token": -1.4882586002349854, "logits_per_char": -0.7441293001174927, "num_chars": 2}, {"sum_logits": -1.6304420232772827, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": false, "logits_per_token": -1.6304420232772827, "logits_per_char": -0.8152210116386414, "num_chars": 2}, {"sum_logits": -1.7186967134475708, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": false, "logits_per_token": -1.7186967134475708, "logits_per_char": -0.8593483567237854, "num_chars": 2}, {"sum_logits": -1.813686490058899, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": false, "logits_per_token": -1.813686490058899, "logits_per_char": -0.9068432450294495, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 104, "native_id": "998381f854f51da2a6ccde45909e5168", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7116166353225708, "incorrect_loss_raw": 1.614499807357788, "correct_loss_per_char": 0.8558083176612854, "incorrect_loss_per_char": 0.807249903678894, "correct_loss_per_token": 1.7116166353225708, "incorrect_loss_per_token": 1.614499807357788, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4267562627792358, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.4267562627792358, "logits_per_char": -0.7133781313896179, "num_chars": 2}, {"sum_logits": -1.4755645990371704, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.4755645990371704, "logits_per_char": -0.7377822995185852, "num_chars": 2}, {"sum_logits": -1.5634781122207642, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5634781122207642, "logits_per_char": -0.7817390561103821, "num_chars": 2}, {"sum_logits": -1.7116166353225708, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.7116166353225708, "logits_per_char": -0.8558083176612854, "num_chars": 2}, {"sum_logits": -1.992200255393982, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.992200255393982, "logits_per_char": -0.996100127696991, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 105, "native_id": "bc38ad28e99cff7a65771233f734a007", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6462185382843018, "incorrect_loss_raw": 1.6617875695228577, "correct_loss_per_char": 0.8231092691421509, "incorrect_loss_per_char": 0.8308937847614288, "correct_loss_per_token": 1.6462185382843018, "incorrect_loss_per_token": 1.6617875695228577, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2829222679138184, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.2829222679138184, "logits_per_char": -0.6414611339569092, "num_chars": 2}, {"sum_logits": -1.545773983001709, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.545773983001709, "logits_per_char": -0.7728869915008545, "num_chars": 2}, {"sum_logits": -1.5786633491516113, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5786633491516113, "logits_per_char": -0.7893316745758057, "num_chars": 2}, {"sum_logits": -1.6462185382843018, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6462185382843018, "logits_per_char": -0.8231092691421509, "num_chars": 2}, {"sum_logits": -2.239790678024292, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.239790678024292, "logits_per_char": -1.119895339012146, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 106, "native_id": "e3949997bf9d02048cfa5d8dd0f287aa", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.1340770721435547, "incorrect_loss_raw": 1.5290243029594421, "correct_loss_per_char": 1.0670385360717773, "incorrect_loss_per_char": 0.7645121514797211, "correct_loss_per_token": 2.1340770721435547, "incorrect_loss_per_token": 1.5290243029594421, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3012194633483887, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.3012194633483887, "logits_per_char": -0.6506097316741943, "num_chars": 2}, {"sum_logits": -1.5413252115249634, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5413252115249634, "logits_per_char": -0.7706626057624817, "num_chars": 2}, {"sum_logits": -1.5813167095184326, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5813167095184326, "logits_per_char": -0.7906583547592163, "num_chars": 2}, {"sum_logits": -1.6922358274459839, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.6922358274459839, "logits_per_char": -0.8461179137229919, "num_chars": 2}, {"sum_logits": -2.1340770721435547, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.1340770721435547, "logits_per_char": -1.0670385360717773, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 107, "native_id": "a7d51b753c2113d8b2dbd0ebb5375855", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4695789813995361, "incorrect_loss_raw": 1.6607982814311981, "correct_loss_per_char": 0.7347894906997681, "incorrect_loss_per_char": 0.8303991407155991, "correct_loss_per_token": 1.4695789813995361, "incorrect_loss_per_token": 1.6607982814311981, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4695789813995361, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": true, "logits_per_token": -1.4695789813995361, "logits_per_char": -0.7347894906997681, "num_chars": 2}, {"sum_logits": -1.501157283782959, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.501157283782959, "logits_per_char": -0.7505786418914795, "num_chars": 2}, {"sum_logits": -1.64008629322052, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.64008629322052, "logits_per_char": -0.82004314661026, "num_chars": 2}, {"sum_logits": -1.6860064268112183, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.6860064268112183, "logits_per_char": -0.8430032134056091, "num_chars": 2}, {"sum_logits": -1.8159431219100952, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.8159431219100952, "logits_per_char": -0.9079715609550476, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 108, "native_id": "3e4b326aff96e9adbb52ba18cfa877b2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4398337602615356, "incorrect_loss_raw": 1.6652123928070068, "correct_loss_per_char": 0.7199168801307678, "incorrect_loss_per_char": 0.8326061964035034, "correct_loss_per_token": 1.4398337602615356, "incorrect_loss_per_token": 1.6652123928070068, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4398337602615356, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": true, "logits_per_token": -1.4398337602615356, "logits_per_char": -0.7199168801307678, "num_chars": 2}, {"sum_logits": -1.6382304430007935, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": false, "logits_per_token": -1.6382304430007935, "logits_per_char": -0.8191152215003967, "num_chars": 2}, {"sum_logits": -1.6742080450057983, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": false, "logits_per_token": -1.6742080450057983, "logits_per_char": -0.8371040225028992, "num_chars": 2}, {"sum_logits": -1.5786129236221313, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": false, "logits_per_token": -1.5786129236221313, "logits_per_char": -0.7893064618110657, "num_chars": 2}, {"sum_logits": -1.7697981595993042, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": false, "logits_per_token": -1.7697981595993042, "logits_per_char": -0.8848990797996521, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 109, "native_id": "5ac83e9e6fa9851ad3cccb0d57c1d88f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0258426666259766, "incorrect_loss_raw": 1.5377634167671204, "correct_loss_per_char": 1.0129213333129883, "incorrect_loss_per_char": 0.7688817083835602, "correct_loss_per_token": 2.0258426666259766, "incorrect_loss_per_token": 1.5377634167671204, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.370979905128479, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": true, "logits_per_token": -1.370979905128479, "logits_per_char": -0.6854899525642395, "num_chars": 2}, {"sum_logits": -1.5786534547805786, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5786534547805786, "logits_per_char": -0.7893267273902893, "num_chars": 2}, {"sum_logits": -1.567015290260315, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.567015290260315, "logits_per_char": -0.7835076451301575, "num_chars": 2}, {"sum_logits": -1.6344050168991089, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.6344050168991089, "logits_per_char": -0.8172025084495544, "num_chars": 2}, {"sum_logits": -2.0258426666259766, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -2.0258426666259766, "logits_per_char": -1.0129213333129883, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 110, "native_id": "2c0030cc14a27be2401dcfdaa501f0fc", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5774238109588623, "incorrect_loss_raw": 1.6497285962104797, "correct_loss_per_char": 0.7887119054794312, "incorrect_loss_per_char": 0.8248642981052399, "correct_loss_per_token": 1.5774238109588623, "incorrect_loss_per_token": 1.6497285962104797, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4250056743621826, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": true, "logits_per_token": -1.4250056743621826, "logits_per_char": -0.7125028371810913, "num_chars": 2}, {"sum_logits": -1.5358424186706543, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.5358424186706543, "logits_per_char": -0.7679212093353271, "num_chars": 2}, {"sum_logits": -1.5774238109588623, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.5774238109588623, "logits_per_char": -0.7887119054794312, "num_chars": 2}, {"sum_logits": -1.6106483936309814, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.6106483936309814, "logits_per_char": -0.8053241968154907, "num_chars": 2}, {"sum_logits": -2.0274178981781006, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -2.0274178981781006, "logits_per_char": -1.0137089490890503, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 111, "native_id": "feb83263e6be392351db0794004efc3f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6661020517349243, "incorrect_loss_raw": 1.691992163658142, "correct_loss_per_char": 0.8330510258674622, "incorrect_loss_per_char": 0.845996081829071, "correct_loss_per_token": 1.6661020517349243, "incorrect_loss_per_token": 1.691992163658142, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1816730499267578, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.1816730499267578, "logits_per_char": -0.5908365249633789, "num_chars": 2}, {"sum_logits": -1.5180479288101196, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5180479288101196, "logits_per_char": -0.7590239644050598, "num_chars": 2}, {"sum_logits": -1.6661020517349243, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6661020517349243, "logits_per_char": -0.8330510258674622, "num_chars": 2}, {"sum_logits": -1.6629575490951538, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6629575490951538, "logits_per_char": -0.8314787745475769, "num_chars": 2}, {"sum_logits": -2.405290126800537, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.405290126800537, "logits_per_char": -1.2026450634002686, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 112, "native_id": "80697d599280d994d8a584c95824ef1f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.968178153038025, "incorrect_loss_raw": 1.5449653565883636, "correct_loss_per_char": 0.9840890765190125, "incorrect_loss_per_char": 0.7724826782941818, "correct_loss_per_token": 1.968178153038025, "incorrect_loss_per_token": 1.5449653565883636, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4492316246032715, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.4492316246032715, "logits_per_char": -0.7246158123016357, "num_chars": 2}, {"sum_logits": -1.6213101148605347, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6213101148605347, "logits_per_char": -0.8106550574302673, "num_chars": 2}, {"sum_logits": -1.592093825340271, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.592093825340271, "logits_per_char": -0.7960469126701355, "num_chars": 2}, {"sum_logits": -1.5172258615493774, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5172258615493774, "logits_per_char": -0.7586129307746887, "num_chars": 2}, {"sum_logits": -1.968178153038025, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.968178153038025, "logits_per_char": -0.9840890765190125, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 113, "native_id": "3c1800e7dd96d37fdd3c51b9fe502342", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5921573638916016, "incorrect_loss_raw": 1.62715482711792, "correct_loss_per_char": 0.7960786819458008, "incorrect_loss_per_char": 0.81357741355896, "correct_loss_per_token": 1.5921573638916016, "incorrect_loss_per_token": 1.62715482711792, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.523850679397583, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": true, "logits_per_token": -1.523850679397583, "logits_per_char": -0.7619253396987915, "num_chars": 2}, {"sum_logits": -1.545318365097046, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -1.545318365097046, "logits_per_char": -0.772659182548523, "num_chars": 2}, {"sum_logits": -1.616873025894165, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -1.616873025894165, "logits_per_char": -0.8084365129470825, "num_chars": 2}, {"sum_logits": -1.5921573638916016, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -1.5921573638916016, "logits_per_char": -0.7960786819458008, "num_chars": 2}, {"sum_logits": -1.8225772380828857, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -1.8225772380828857, "logits_per_char": -0.9112886190414429, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 114, "native_id": "4da33e6f4b789776acb1bc10195baa83", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4444750547409058, "incorrect_loss_raw": 1.664629191160202, "correct_loss_per_char": 0.7222375273704529, "incorrect_loss_per_char": 0.832314595580101, "correct_loss_per_token": 1.4444750547409058, "incorrect_loss_per_token": 1.664629191160202, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.665339708328247, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.665339708328247, "logits_per_char": -0.8326698541641235, "num_chars": 2}, {"sum_logits": -1.4444750547409058, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.4444750547409058, "logits_per_char": -0.7222375273704529, "num_chars": 2}, {"sum_logits": -1.6213618516921997, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6213618516921997, "logits_per_char": -0.8106809258460999, "num_chars": 2}, {"sum_logits": -1.5957348346710205, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5957348346710205, "logits_per_char": -0.7978674173355103, "num_chars": 2}, {"sum_logits": -1.7760803699493408, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.7760803699493408, "logits_per_char": -0.8880401849746704, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 115, "native_id": "ae038e9af9d5a511ada7456b5e73b15e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4817918539047241, "incorrect_loss_raw": 1.6915223300457, "correct_loss_per_char": 0.7408959269523621, "incorrect_loss_per_char": 0.84576116502285, "correct_loss_per_token": 1.4817918539047241, "incorrect_loss_per_token": 1.6915223300457, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3617087602615356, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.3617087602615356, "logits_per_char": -0.6808543801307678, "num_chars": 2}, {"sum_logits": -1.4817918539047241, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.4817918539047241, "logits_per_char": -0.7408959269523621, "num_chars": 2}, {"sum_logits": -1.574400544166565, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.574400544166565, "logits_per_char": -0.7872002720832825, "num_chars": 2}, {"sum_logits": -1.6767059564590454, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6767059564590454, "logits_per_char": -0.8383529782295227, "num_chars": 2}, {"sum_logits": -2.1532740592956543, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.1532740592956543, "logits_per_char": -1.0766370296478271, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 116, "native_id": "a400b9fd1e319f901471c4b42d401c52", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9066146612167358, "incorrect_loss_raw": 1.557438611984253, "correct_loss_per_char": 0.9533073306083679, "incorrect_loss_per_char": 0.7787193059921265, "correct_loss_per_token": 1.9066146612167358, "incorrect_loss_per_token": 1.557438611984253, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.423923134803772, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": true, "logits_per_token": -1.423923134803772, "logits_per_char": -0.711961567401886, "num_chars": 2}, {"sum_logits": -1.643687129020691, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.643687129020691, "logits_per_char": -0.8218435645103455, "num_chars": 2}, {"sum_logits": -1.5504978895187378, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.5504978895187378, "logits_per_char": -0.7752489447593689, "num_chars": 2}, {"sum_logits": -1.611646294593811, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.611646294593811, "logits_per_char": -0.8058231472969055, "num_chars": 2}, {"sum_logits": -1.9066146612167358, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.9066146612167358, "logits_per_char": -0.9533073306083679, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 117, "native_id": "9dffd2021771e0ecddb19031acf3701b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5299415588378906, "incorrect_loss_raw": 1.6507458984851837, "correct_loss_per_char": 0.7649707794189453, "incorrect_loss_per_char": 0.8253729492425919, "correct_loss_per_token": 1.5299415588378906, "incorrect_loss_per_token": 1.6507458984851837, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.511371374130249, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.511371374130249, "logits_per_char": -0.7556856870651245, "num_chars": 2}, {"sum_logits": -1.636717677116394, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.636717677116394, "logits_per_char": -0.818358838558197, "num_chars": 2}, {"sum_logits": -1.5299415588378906, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5299415588378906, "logits_per_char": -0.7649707794189453, "num_chars": 2}, {"sum_logits": -1.5194684267044067, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5194684267044067, "logits_per_char": -0.7597342133522034, "num_chars": 2}, {"sum_logits": -1.935426115989685, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.935426115989685, "logits_per_char": -0.9677130579948425, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 118, "native_id": "3730c646fdf54472ab873aac9ff7852e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0423524379730225, "incorrect_loss_raw": 1.5494927763938904, "correct_loss_per_char": 1.0211762189865112, "incorrect_loss_per_char": 0.7747463881969452, "correct_loss_per_token": 2.0423524379730225, "incorrect_loss_per_token": 1.5494927763938904, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2747735977172852, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.2747735977172852, "logits_per_char": -0.6373867988586426, "num_chars": 2}, {"sum_logits": -1.512526273727417, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.512526273727417, "logits_per_char": -0.7562631368637085, "num_chars": 2}, {"sum_logits": -1.638273000717163, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.638273000717163, "logits_per_char": -0.8191365003585815, "num_chars": 2}, {"sum_logits": -1.7723982334136963, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.7723982334136963, "logits_per_char": -0.8861991167068481, "num_chars": 2}, {"sum_logits": -2.0423524379730225, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.0423524379730225, "logits_per_char": -1.0211762189865112, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 119, "native_id": "175e7dcdded13d5adafaebf2264c3abd", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5405412912368774, "incorrect_loss_raw": 1.6594798266887665, "correct_loss_per_char": 0.7702706456184387, "incorrect_loss_per_char": 0.8297399133443832, "correct_loss_per_token": 1.5405412912368774, "incorrect_loss_per_token": 1.6594798266887665, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.465997576713562, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.465997576713562, "logits_per_char": -0.732998788356781, "num_chars": 2}, {"sum_logits": -1.5244585275650024, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5244585275650024, "logits_per_char": -0.7622292637825012, "num_chars": 2}, {"sum_logits": -1.5405412912368774, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5405412912368774, "logits_per_char": -0.7702706456184387, "num_chars": 2}, {"sum_logits": -1.5797418355941772, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5797418355941772, "logits_per_char": -0.7898709177970886, "num_chars": 2}, {"sum_logits": -2.067721366882324, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -2.067721366882324, "logits_per_char": -1.033860683441162, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 120, "native_id": "11d7db1d8e1cff2f40d4184f15cf7ae7", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.60023033618927, "incorrect_loss_raw": 1.6213008761405945, "correct_loss_per_char": 0.800115168094635, "incorrect_loss_per_char": 0.8106504380702972, "correct_loss_per_token": 1.60023033618927, "incorrect_loss_per_token": 1.6213008761405945, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5853379964828491, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": false, "logits_per_token": -1.5853379964828491, "logits_per_char": -0.7926689982414246, "num_chars": 2}, {"sum_logits": -1.60023033618927, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": false, "logits_per_token": -1.60023033618927, "logits_per_char": -0.800115168094635, "num_chars": 2}, {"sum_logits": -1.6114581823349, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": false, "logits_per_token": -1.6114581823349, "logits_per_char": -0.80572909116745, "num_chars": 2}, {"sum_logits": -1.5654810667037964, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": true, "logits_per_token": -1.5654810667037964, "logits_per_char": -0.7827405333518982, "num_chars": 2}, {"sum_logits": -1.7229262590408325, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": false, "logits_per_token": -1.7229262590408325, "logits_per_char": -0.8614631295204163, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 121, "native_id": "08db69edf0ec5848c1a53dca8fc1601a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5377323627471924, "incorrect_loss_raw": 1.6597944498062134, "correct_loss_per_char": 0.7688661813735962, "incorrect_loss_per_char": 0.8298972249031067, "correct_loss_per_token": 1.5377323627471924, "incorrect_loss_per_token": 1.6597944498062134, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3875422477722168, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": true, "logits_per_token": -1.3875422477722168, "logits_per_char": -0.6937711238861084, "num_chars": 2}, {"sum_logits": -1.5551972389221191, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.5551972389221191, "logits_per_char": -0.7775986194610596, "num_chars": 2}, {"sum_logits": -1.5377323627471924, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.5377323627471924, "logits_per_char": -0.7688661813735962, "num_chars": 2}, {"sum_logits": -1.7018184661865234, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.7018184661865234, "logits_per_char": -0.8509092330932617, "num_chars": 2}, {"sum_logits": -1.9946198463439941, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.9946198463439941, "logits_per_char": -0.9973099231719971, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 122, "native_id": "855ab6ba47f6311104c4d29e24ef0234", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7264853715896606, "incorrect_loss_raw": 1.67608642578125, "correct_loss_per_char": 0.8632426857948303, "incorrect_loss_per_char": 0.838043212890625, "correct_loss_per_token": 1.7264853715896606, "incorrect_loss_per_token": 1.67608642578125, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2689554691314697, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.2689554691314697, "logits_per_char": -0.6344777345657349, "num_chars": 2}, {"sum_logits": -1.411807656288147, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.411807656288147, "logits_per_char": -0.7059038281440735, "num_chars": 2}, {"sum_logits": -1.5908910036087036, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5908910036087036, "logits_per_char": -0.7954455018043518, "num_chars": 2}, {"sum_logits": -1.7264853715896606, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.7264853715896606, "logits_per_char": -0.8632426857948303, "num_chars": 2}, {"sum_logits": -2.4326915740966797, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.4326915740966797, "logits_per_char": -1.2163457870483398, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 123, "native_id": "7ec11eeca4221795c117943ca2639e86", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.737549066543579, "incorrect_loss_raw": 1.6116988956928253, "correct_loss_per_char": 0.8687745332717896, "incorrect_loss_per_char": 0.8058494478464127, "correct_loss_per_token": 1.737549066543579, "incorrect_loss_per_token": 1.6116988956928253, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4215707778930664, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": true, "logits_per_token": -1.4215707778930664, "logits_per_char": -0.7107853889465332, "num_chars": 2}, {"sum_logits": -1.4684498310089111, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.4684498310089111, "logits_per_char": -0.7342249155044556, "num_chars": 2}, {"sum_logits": -1.5611425638198853, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.5611425638198853, "logits_per_char": -0.7805712819099426, "num_chars": 2}, {"sum_logits": -1.737549066543579, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.737549066543579, "logits_per_char": -0.8687745332717896, "num_chars": 2}, {"sum_logits": -1.9956324100494385, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.9956324100494385, "logits_per_char": -0.9978162050247192, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 124, "native_id": "e9389b08fdd17f14b148d498d6ff4dfe", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4400426149368286, "incorrect_loss_raw": 1.6737733483314514, "correct_loss_per_char": 0.7200213074684143, "incorrect_loss_per_char": 0.8368866741657257, "correct_loss_per_token": 1.4400426149368286, "incorrect_loss_per_token": 1.6737733483314514, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4400426149368286, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": true, "logits_per_token": -1.4400426149368286, "logits_per_char": -0.7200213074684143, "num_chars": 2}, {"sum_logits": -1.572224736213684, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.572224736213684, "logits_per_char": -0.786112368106842, "num_chars": 2}, {"sum_logits": -1.5923073291778564, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5923073291778564, "logits_per_char": -0.7961536645889282, "num_chars": 2}, {"sum_logits": -1.6099451780319214, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.6099451780319214, "logits_per_char": -0.8049725890159607, "num_chars": 2}, {"sum_logits": -1.9206161499023438, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.9206161499023438, "logits_per_char": -0.9603080749511719, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 125, "native_id": "afa2899cc21e204fa64e63e7839e8c1e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5153898000717163, "incorrect_loss_raw": 1.6987337470054626, "correct_loss_per_char": 0.7576949000358582, "incorrect_loss_per_char": 0.8493668735027313, "correct_loss_per_token": 1.5153898000717163, "incorrect_loss_per_token": 1.6987337470054626, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1952917575836182, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.1952917575836182, "logits_per_char": -0.5976458787918091, "num_chars": 2}, {"sum_logits": -1.5153898000717163, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5153898000717163, "logits_per_char": -0.7576949000358582, "num_chars": 2}, {"sum_logits": -1.6773887872695923, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6773887872695923, "logits_per_char": -0.8386943936347961, "num_chars": 2}, {"sum_logits": -1.7556079626083374, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.7556079626083374, "logits_per_char": -0.8778039813041687, "num_chars": 2}, {"sum_logits": -2.1666464805603027, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -2.1666464805603027, "logits_per_char": -1.0833232402801514, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 126, "native_id": "f898eb5b789d2dc6804edba269f051f0", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7528280019760132, "incorrect_loss_raw": 1.60581374168396, "correct_loss_per_char": 0.8764140009880066, "incorrect_loss_per_char": 0.80290687084198, "correct_loss_per_token": 1.7528280019760132, "incorrect_loss_per_token": 1.60581374168396, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3288605213165283, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.3288605213165283, "logits_per_char": -0.6644302606582642, "num_chars": 2}, {"sum_logits": -1.4897947311401367, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.4897947311401367, "logits_per_char": -0.7448973655700684, "num_chars": 2}, {"sum_logits": -1.7094982862472534, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.7094982862472534, "logits_per_char": -0.8547491431236267, "num_chars": 2}, {"sum_logits": -1.7528280019760132, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.7528280019760132, "logits_per_char": -0.8764140009880066, "num_chars": 2}, {"sum_logits": -1.8951014280319214, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.8951014280319214, "logits_per_char": -0.9475507140159607, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 127, "native_id": "7ed7379fc51fd35a47be022f6c56ce51", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5493779182434082, "incorrect_loss_raw": 1.652539312839508, "correct_loss_per_char": 0.7746889591217041, "incorrect_loss_per_char": 0.826269656419754, "correct_loss_per_token": 1.5493779182434082, "incorrect_loss_per_token": 1.652539312839508, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3633918762207031, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": true, "logits_per_token": -1.3633918762207031, "logits_per_char": -0.6816959381103516, "num_chars": 2}, {"sum_logits": -1.5493779182434082, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.5493779182434082, "logits_per_char": -0.7746889591217041, "num_chars": 2}, {"sum_logits": -1.6361587047576904, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.6361587047576904, "logits_per_char": -0.8180793523788452, "num_chars": 2}, {"sum_logits": -1.6627671718597412, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.6627671718597412, "logits_per_char": -0.8313835859298706, "num_chars": 2}, {"sum_logits": -1.9478394985198975, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.9478394985198975, "logits_per_char": -0.9739197492599487, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 128, "native_id": "15798a23ee6952fedd6d202064069126", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.1405959129333496, "incorrect_loss_raw": 1.5273503363132477, "correct_loss_per_char": 1.0702979564666748, "incorrect_loss_per_char": 0.7636751681566238, "correct_loss_per_token": 2.1405959129333496, "incorrect_loss_per_token": 1.5273503363132477, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3431847095489502, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": true, "logits_per_token": -1.3431847095489502, "logits_per_char": -0.6715923547744751, "num_chars": 2}, {"sum_logits": -1.450210690498352, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.450210690498352, "logits_per_char": -0.725105345249176, "num_chars": 2}, {"sum_logits": -1.5876773595809937, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.5876773595809937, "logits_per_char": -0.7938386797904968, "num_chars": 2}, {"sum_logits": -1.7283285856246948, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.7283285856246948, "logits_per_char": -0.8641642928123474, "num_chars": 2}, {"sum_logits": -2.1405959129333496, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -2.1405959129333496, "logits_per_char": -1.0702979564666748, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 129, "native_id": "273d0134e8ce53d4ebcf41ca7fde02af", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6674695014953613, "incorrect_loss_raw": 1.6347918510437012, "correct_loss_per_char": 0.8337347507476807, "incorrect_loss_per_char": 0.8173959255218506, "correct_loss_per_token": 1.6674695014953613, "incorrect_loss_per_token": 1.6347918510437012, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.339285135269165, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.339285135269165, "logits_per_char": -0.6696425676345825, "num_chars": 2}, {"sum_logits": -1.6142728328704834, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.6142728328704834, "logits_per_char": -0.8071364164352417, "num_chars": 2}, {"sum_logits": -1.5256054401397705, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5256054401397705, "logits_per_char": -0.7628027200698853, "num_chars": 2}, {"sum_logits": -1.6674695014953613, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.6674695014953613, "logits_per_char": -0.8337347507476807, "num_chars": 2}, {"sum_logits": -2.0600039958953857, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.0600039958953857, "logits_per_char": -1.0300019979476929, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 130, "native_id": "2f0931adc3d0d422d9ab6264395e89d8", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6123433113098145, "incorrect_loss_raw": 1.633171558380127, "correct_loss_per_char": 0.8061716556549072, "incorrect_loss_per_char": 0.8165857791900635, "correct_loss_per_token": 1.6123433113098145, "incorrect_loss_per_token": 1.633171558380127, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4847017526626587, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.4847017526626587, "logits_per_char": -0.7423508763313293, "num_chars": 2}, {"sum_logits": -1.5098859071731567, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5098859071731567, "logits_per_char": -0.7549429535865784, "num_chars": 2}, {"sum_logits": -1.5520703792572021, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5520703792572021, "logits_per_char": -0.7760351896286011, "num_chars": 2}, {"sum_logits": -1.6123433113098145, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.6123433113098145, "logits_per_char": -0.8061716556549072, "num_chars": 2}, {"sum_logits": -1.9860281944274902, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.9860281944274902, "logits_per_char": -0.9930140972137451, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 131, "native_id": "d00d3ba777cb3889a45799d72fca0a50", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5560954809188843, "incorrect_loss_raw": 1.6324149072170258, "correct_loss_per_char": 0.7780477404594421, "incorrect_loss_per_char": 0.8162074536085129, "correct_loss_per_token": 1.5560954809188843, "incorrect_loss_per_token": 1.6324149072170258, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5505497455596924, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.5505497455596924, "logits_per_char": -0.7752748727798462, "num_chars": 2}, {"sum_logits": -1.573297142982483, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.573297142982483, "logits_per_char": -0.7866485714912415, "num_chars": 2}, {"sum_logits": -1.5560954809188843, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5560954809188843, "logits_per_char": -0.7780477404594421, "num_chars": 2}, {"sum_logits": -1.769237756729126, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.769237756729126, "logits_per_char": -0.884618878364563, "num_chars": 2}, {"sum_logits": -1.6365749835968018, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6365749835968018, "logits_per_char": -0.8182874917984009, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 132, "native_id": "b1f36d1c8ab7e5a28783cb38e8709c27", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.381711721420288, "incorrect_loss_raw": 1.691058337688446, "correct_loss_per_char": 0.690855860710144, "incorrect_loss_per_char": 0.845529168844223, "correct_loss_per_token": 1.381711721420288, "incorrect_loss_per_token": 1.691058337688446, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.381711721420288, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.381711721420288, "logits_per_char": -0.690855860710144, "num_chars": 2}, {"sum_logits": -1.5017296075820923, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5017296075820923, "logits_per_char": -0.7508648037910461, "num_chars": 2}, {"sum_logits": -1.6720157861709595, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6720157861709595, "logits_per_char": -0.8360078930854797, "num_chars": 2}, {"sum_logits": -1.690423607826233, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.690423607826233, "logits_per_char": -0.8452118039131165, "num_chars": 2}, {"sum_logits": -1.9000643491744995, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.9000643491744995, "logits_per_char": -0.9500321745872498, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 133, "native_id": "a5e76dd088aab4f89e2fe93f6de6e46d", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6001449823379517, "incorrect_loss_raw": 1.6433899104595184, "correct_loss_per_char": 0.8000724911689758, "incorrect_loss_per_char": 0.8216949552297592, "correct_loss_per_token": 1.6001449823379517, "incorrect_loss_per_token": 1.6433899104595184, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.471290111541748, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.471290111541748, "logits_per_char": -0.735645055770874, "num_chars": 2}, {"sum_logits": -1.434475302696228, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.434475302696228, "logits_per_char": -0.717237651348114, "num_chars": 2}, {"sum_logits": -1.6623375415802002, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6623375415802002, "logits_per_char": -0.8311687707901001, "num_chars": 2}, {"sum_logits": -1.6001449823379517, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6001449823379517, "logits_per_char": -0.8000724911689758, "num_chars": 2}, {"sum_logits": -2.0054566860198975, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.0054566860198975, "logits_per_char": -1.0027283430099487, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 134, "native_id": "ac6f0e24dd6203cda43e1089dcf081d6", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7014646530151367, "incorrect_loss_raw": 1.6199721097946167, "correct_loss_per_char": 0.8507323265075684, "incorrect_loss_per_char": 0.8099860548973083, "correct_loss_per_token": 1.7014646530151367, "incorrect_loss_per_token": 1.6199721097946167, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4179024696350098, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": true, "logits_per_token": -1.4179024696350098, "logits_per_char": -0.7089512348175049, "num_chars": 2}, {"sum_logits": -1.4574189186096191, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": false, "logits_per_token": -1.4574189186096191, "logits_per_char": -0.7287094593048096, "num_chars": 2}, {"sum_logits": -1.5870060920715332, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": false, "logits_per_token": -1.5870060920715332, "logits_per_char": -0.7935030460357666, "num_chars": 2}, {"sum_logits": -1.7014646530151367, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": false, "logits_per_token": -1.7014646530151367, "logits_per_char": -0.8507323265075684, "num_chars": 2}, {"sum_logits": -2.0175609588623047, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": false, "logits_per_token": -2.0175609588623047, "logits_per_char": -1.0087804794311523, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 135, "native_id": "1ab746bcd100ccf513055fe93c61010b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6233079433441162, "incorrect_loss_raw": 1.6297983527183533, "correct_loss_per_char": 0.8116539716720581, "incorrect_loss_per_char": 0.8148991763591766, "correct_loss_per_token": 1.6233079433441162, "incorrect_loss_per_token": 1.6297983527183533, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4277478456497192, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.4277478456497192, "logits_per_char": -0.7138739228248596, "num_chars": 2}, {"sum_logits": -1.6233079433441162, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6233079433441162, "logits_per_char": -0.8116539716720581, "num_chars": 2}, {"sum_logits": -1.5800738334655762, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5800738334655762, "logits_per_char": -0.7900369167327881, "num_chars": 2}, {"sum_logits": -1.564988374710083, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.564988374710083, "logits_per_char": -0.7824941873550415, "num_chars": 2}, {"sum_logits": -1.9463833570480347, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.9463833570480347, "logits_per_char": -0.9731916785240173, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 136, "native_id": "af836abc58e0daf36df1d8d6830b70c5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4813694953918457, "incorrect_loss_raw": 1.7036252617835999, "correct_loss_per_char": 0.7406847476959229, "incorrect_loss_per_char": 0.8518126308917999, "correct_loss_per_token": 1.4813694953918457, "incorrect_loss_per_token": 1.7036252617835999, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2551383972167969, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.2551383972167969, "logits_per_char": -0.6275691986083984, "num_chars": 2}, {"sum_logits": -1.4813694953918457, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.4813694953918457, "logits_per_char": -0.7406847476959229, "num_chars": 2}, {"sum_logits": -1.68978750705719, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.68978750705719, "logits_per_char": -0.844893753528595, "num_chars": 2}, {"sum_logits": -1.6791807413101196, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6791807413101196, "logits_per_char": -0.8395903706550598, "num_chars": 2}, {"sum_logits": -2.190394401550293, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.190394401550293, "logits_per_char": -1.0951972007751465, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 137, "native_id": "2ed66cfd206723a006b37599b516ad6e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.752898097038269, "incorrect_loss_raw": 1.5867182910442352, "correct_loss_per_char": 0.8764490485191345, "incorrect_loss_per_char": 0.7933591455221176, "correct_loss_per_token": 1.752898097038269, "incorrect_loss_per_token": 1.5867182910442352, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4713473320007324, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": true, "logits_per_token": -1.4713473320007324, "logits_per_char": -0.7356736660003662, "num_chars": 2}, {"sum_logits": -1.5891770124435425, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.5891770124435425, "logits_per_char": -0.7945885062217712, "num_chars": 2}, {"sum_logits": -1.6487432718276978, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.6487432718276978, "logits_per_char": -0.8243716359138489, "num_chars": 2}, {"sum_logits": -1.6376055479049683, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.6376055479049683, "logits_per_char": -0.8188027739524841, "num_chars": 2}, {"sum_logits": -1.752898097038269, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.752898097038269, "logits_per_char": -0.8764490485191345, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 138, "native_id": "e89a2762d578cb7bc2cc0a5b2a16d933", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.063856601715088, "incorrect_loss_raw": 1.5421157777309418, "correct_loss_per_char": 1.031928300857544, "incorrect_loss_per_char": 0.7710578888654709, "correct_loss_per_token": 2.063856601715088, "incorrect_loss_per_token": 1.5421157777309418, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2623738050460815, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.2623738050460815, "logits_per_char": -0.6311869025230408, "num_chars": 2}, {"sum_logits": -1.5708515644073486, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5708515644073486, "logits_per_char": -0.7854257822036743, "num_chars": 2}, {"sum_logits": -1.6224548816680908, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.6224548816680908, "logits_per_char": -0.8112274408340454, "num_chars": 2}, {"sum_logits": -1.712782859802246, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.712782859802246, "logits_per_char": -0.856391429901123, "num_chars": 2}, {"sum_logits": -2.063856601715088, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -2.063856601715088, "logits_per_char": -1.031928300857544, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 139, "native_id": "43cec0fff43a976fade9112d02b66021", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6092559099197388, "incorrect_loss_raw": 1.6311662793159485, "correct_loss_per_char": 0.8046279549598694, "incorrect_loss_per_char": 0.8155831396579742, "correct_loss_per_token": 1.6092559099197388, "incorrect_loss_per_token": 1.6311662793159485, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6092559099197388, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.6092559099197388, "logits_per_char": -0.8046279549598694, "num_chars": 2}, {"sum_logits": -1.5615184307098389, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5615184307098389, "logits_per_char": -0.7807592153549194, "num_chars": 2}, {"sum_logits": -1.4611018896102905, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.4611018896102905, "logits_per_char": -0.7305509448051453, "num_chars": 2}, {"sum_logits": -1.581101894378662, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.581101894378662, "logits_per_char": -0.790550947189331, "num_chars": 2}, {"sum_logits": -1.9209429025650024, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.9209429025650024, "logits_per_char": -0.9604714512825012, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 140, "native_id": "30e66db11e0257a14a17108b90cd69fb", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.679686188697815, "incorrect_loss_raw": 1.6086790561676025, "correct_loss_per_char": 0.8398430943489075, "incorrect_loss_per_char": 0.8043395280838013, "correct_loss_per_token": 1.679686188697815, "incorrect_loss_per_token": 1.6086790561676025, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.460572600364685, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": true, "logits_per_token": -1.460572600364685, "logits_per_char": -0.7302863001823425, "num_chars": 2}, {"sum_logits": -1.533868432044983, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.533868432044983, "logits_per_char": -0.7669342160224915, "num_chars": 2}, {"sum_logits": -1.578623652458191, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.578623652458191, "logits_per_char": -0.7893118262290955, "num_chars": 2}, {"sum_logits": -1.679686188697815, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.679686188697815, "logits_per_char": -0.8398430943489075, "num_chars": 2}, {"sum_logits": -1.8616515398025513, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.8616515398025513, "logits_per_char": -0.9308257699012756, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 141, "native_id": "f21ef67b31bd36a3174b6b4c7b4bbc7b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.2385408878326416, "incorrect_loss_raw": 1.513323962688446, "correct_loss_per_char": 1.1192704439163208, "incorrect_loss_per_char": 0.756661981344223, "correct_loss_per_token": 2.2385408878326416, "incorrect_loss_per_token": 1.513323962688446, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.317055583000183, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.317055583000183, "logits_per_char": -0.6585277915000916, "num_chars": 2}, {"sum_logits": -1.48574697971344, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.48574697971344, "logits_per_char": -0.74287348985672, "num_chars": 2}, {"sum_logits": -1.5843970775604248, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5843970775604248, "logits_per_char": -0.7921985387802124, "num_chars": 2}, {"sum_logits": -1.6660962104797363, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.6660962104797363, "logits_per_char": -0.8330481052398682, "num_chars": 2}, {"sum_logits": -2.2385408878326416, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.2385408878326416, "logits_per_char": -1.1192704439163208, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 142, "native_id": "e476e2c8c278eaecfe1a8b884b6aeb8e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4012296199798584, "incorrect_loss_raw": 1.7317076921463013, "correct_loss_per_char": 0.7006148099899292, "incorrect_loss_per_char": 0.8658538460731506, "correct_loss_per_token": 1.4012296199798584, "incorrect_loss_per_token": 1.7317076921463013, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3088297843933105, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.3088297843933105, "logits_per_char": -0.6544148921966553, "num_chars": 2}, {"sum_logits": -1.4012296199798584, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.4012296199798584, "logits_per_char": -0.7006148099899292, "num_chars": 2}, {"sum_logits": -1.6722371578216553, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6722371578216553, "logits_per_char": -0.8361185789108276, "num_chars": 2}, {"sum_logits": -1.6672446727752686, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6672446727752686, "logits_per_char": -0.8336223363876343, "num_chars": 2}, {"sum_logits": -2.2785191535949707, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.2785191535949707, "logits_per_char": -1.1392595767974854, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 143, "native_id": "191e3c676f05a11d6b2565d8c27d2001", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.429473876953125, "incorrect_loss_raw": 1.6678830087184906, "correct_loss_per_char": 0.7147369384765625, "incorrect_loss_per_char": 0.8339415043592453, "correct_loss_per_token": 1.429473876953125, "incorrect_loss_per_token": 1.6678830087184906, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.429473876953125, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": true, "logits_per_token": -1.429473876953125, "logits_per_char": -0.7147369384765625, "num_chars": 2}, {"sum_logits": -1.6020427942276, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": false, "logits_per_token": -1.6020427942276, "logits_per_char": -0.8010213971138, "num_chars": 2}, {"sum_logits": -1.751465082168579, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": false, "logits_per_token": -1.751465082168579, "logits_per_char": -0.8757325410842896, "num_chars": 2}, {"sum_logits": -1.608963966369629, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": false, "logits_per_token": -1.608963966369629, "logits_per_char": -0.8044819831848145, "num_chars": 2}, {"sum_logits": -1.7090601921081543, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": false, "logits_per_token": -1.7090601921081543, "logits_per_char": -0.8545300960540771, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 144, "native_id": "99098375c7b651d524eebac72e358238", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7543854713439941, "incorrect_loss_raw": 1.6080051958560944, "correct_loss_per_char": 0.8771927356719971, "incorrect_loss_per_char": 0.8040025979280472, "correct_loss_per_token": 1.7543854713439941, "incorrect_loss_per_token": 1.6080051958560944, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2709031105041504, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": true, "logits_per_token": -1.2709031105041504, "logits_per_char": -0.6354515552520752, "num_chars": 2}, {"sum_logits": -1.6898101568222046, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.6898101568222046, "logits_per_char": -0.8449050784111023, "num_chars": 2}, {"sum_logits": -1.6661481857299805, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.6661481857299805, "logits_per_char": -0.8330740928649902, "num_chars": 2}, {"sum_logits": -1.7543854713439941, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.7543854713439941, "logits_per_char": -0.8771927356719971, "num_chars": 2}, {"sum_logits": -1.805159330368042, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.805159330368042, "logits_per_char": -0.902579665184021, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 145, "native_id": "290fac9f881a83d8bfb34355f8e71044", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9786403179168701, "incorrect_loss_raw": 1.5453139543533325, "correct_loss_per_char": 0.9893201589584351, "incorrect_loss_per_char": 0.7726569771766663, "correct_loss_per_token": 1.9786403179168701, "incorrect_loss_per_token": 1.5453139543533325, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4378831386566162, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": true, "logits_per_token": -1.4378831386566162, "logits_per_char": -0.7189415693283081, "num_chars": 2}, {"sum_logits": -1.4740347862243652, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.4740347862243652, "logits_per_char": -0.7370173931121826, "num_chars": 2}, {"sum_logits": -1.6332511901855469, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.6332511901855469, "logits_per_char": -0.8166255950927734, "num_chars": 2}, {"sum_logits": -1.6360867023468018, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.6360867023468018, "logits_per_char": -0.8180433511734009, "num_chars": 2}, {"sum_logits": -1.9786403179168701, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.9786403179168701, "logits_per_char": -0.9893201589584351, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 146, "native_id": "6c36226b23377a0dd0188bf56840e22a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6182963848114014, "incorrect_loss_raw": 1.6373433470726013, "correct_loss_per_char": 0.8091481924057007, "incorrect_loss_per_char": 0.8186716735363007, "correct_loss_per_token": 1.6182963848114014, "incorrect_loss_per_token": 1.6373433470726013, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3687491416931152, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.3687491416931152, "logits_per_char": -0.6843745708465576, "num_chars": 2}, {"sum_logits": -1.5271613597869873, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5271613597869873, "logits_per_char": -0.7635806798934937, "num_chars": 2}, {"sum_logits": -1.6182963848114014, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6182963848114014, "logits_per_char": -0.8091481924057007, "num_chars": 2}, {"sum_logits": -1.6773838996887207, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6773838996887207, "logits_per_char": -0.8386919498443604, "num_chars": 2}, {"sum_logits": -1.976078987121582, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.976078987121582, "logits_per_char": -0.988039493560791, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 147, "native_id": "aa5aa36557a5fbb93391506182f1025c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5310688018798828, "incorrect_loss_raw": 1.6475574374198914, "correct_loss_per_char": 0.7655344009399414, "incorrect_loss_per_char": 0.8237787187099457, "correct_loss_per_token": 1.5310688018798828, "incorrect_loss_per_token": 1.6475574374198914, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4416148662567139, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.4416148662567139, "logits_per_char": -0.7208074331283569, "num_chars": 2}, {"sum_logits": -1.6013703346252441, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6013703346252441, "logits_per_char": -0.8006851673126221, "num_chars": 2}, {"sum_logits": -1.5310688018798828, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5310688018798828, "logits_per_char": -0.7655344009399414, "num_chars": 2}, {"sum_logits": -1.681917428970337, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.681917428970337, "logits_per_char": -0.8409587144851685, "num_chars": 2}, {"sum_logits": -1.8653271198272705, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.8653271198272705, "logits_per_char": -0.9326635599136353, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 148, "native_id": "a38df3e750b1edd30f905e17af803c61", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5813243389129639, "incorrect_loss_raw": 1.6817338466644287, "correct_loss_per_char": 0.7906621694564819, "incorrect_loss_per_char": 0.8408669233322144, "correct_loss_per_token": 1.5813243389129639, "incorrect_loss_per_token": 1.6817338466644287, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2938792705535889, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.2938792705535889, "logits_per_char": -0.6469396352767944, "num_chars": 2}, {"sum_logits": -1.4417147636413574, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.4417147636413574, "logits_per_char": -0.7208573818206787, "num_chars": 2}, {"sum_logits": -1.5813243389129639, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5813243389129639, "logits_per_char": -0.7906621694564819, "num_chars": 2}, {"sum_logits": -1.7812504768371582, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.7812504768371582, "logits_per_char": -0.8906252384185791, "num_chars": 2}, {"sum_logits": -2.2100908756256104, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.2100908756256104, "logits_per_char": -1.1050454378128052, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 149, "native_id": "dba51270f789c75a2e38a5201b124d99", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.14638090133667, "incorrect_loss_raw": 1.524492233991623, "correct_loss_per_char": 1.073190450668335, "incorrect_loss_per_char": 0.7622461169958115, "correct_loss_per_token": 2.14638090133667, "incorrect_loss_per_token": 1.524492233991623, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3478926420211792, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.3478926420211792, "logits_per_char": -0.6739463210105896, "num_chars": 2}, {"sum_logits": -1.4672120809555054, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.4672120809555054, "logits_per_char": -0.7336060404777527, "num_chars": 2}, {"sum_logits": -1.5863840579986572, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5863840579986572, "logits_per_char": -0.7931920289993286, "num_chars": 2}, {"sum_logits": -1.69648015499115, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.69648015499115, "logits_per_char": -0.848240077495575, "num_chars": 2}, {"sum_logits": -2.14638090133667, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -2.14638090133667, "logits_per_char": -1.073190450668335, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 150, "native_id": "1be8ec824eb0c7218b6bc160fd191428", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.371907353401184, "incorrect_loss_raw": 1.711407333612442, "correct_loss_per_char": 0.685953676700592, "incorrect_loss_per_char": 0.855703666806221, "correct_loss_per_token": 1.371907353401184, "incorrect_loss_per_token": 1.711407333612442, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.371907353401184, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.371907353401184, "logits_per_char": -0.685953676700592, "num_chars": 2}, {"sum_logits": -1.4842066764831543, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.4842066764831543, "logits_per_char": -0.7421033382415771, "num_chars": 2}, {"sum_logits": -1.5362950563430786, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5362950563430786, "logits_per_char": -0.7681475281715393, "num_chars": 2}, {"sum_logits": -1.7319815158843994, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.7319815158843994, "logits_per_char": -0.8659907579421997, "num_chars": 2}, {"sum_logits": -2.0931460857391357, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -2.0931460857391357, "logits_per_char": -1.0465730428695679, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 151, "native_id": "0e80f2afe5c4f652e8720b52d7c06c87", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5329816341400146, "incorrect_loss_raw": 1.6727449297904968, "correct_loss_per_char": 0.7664908170700073, "incorrect_loss_per_char": 0.8363724648952484, "correct_loss_per_token": 1.5329816341400146, "incorrect_loss_per_token": 1.6727449297904968, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3320635557174683, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.3320635557174683, "logits_per_char": -0.6660317778587341, "num_chars": 2}, {"sum_logits": -1.5329816341400146, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5329816341400146, "logits_per_char": -0.7664908170700073, "num_chars": 2}, {"sum_logits": -1.574949026107788, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.574949026107788, "logits_per_char": -0.787474513053894, "num_chars": 2}, {"sum_logits": -1.711844563484192, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.711844563484192, "logits_per_char": -0.855922281742096, "num_chars": 2}, {"sum_logits": -2.072122573852539, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.072122573852539, "logits_per_char": -1.0360612869262695, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 152, "native_id": "b67971747e95ba425a5b81e0ba8d0b28", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.674452543258667, "incorrect_loss_raw": 1.6411950588226318, "correct_loss_per_char": 0.8372262716293335, "incorrect_loss_per_char": 0.8205975294113159, "correct_loss_per_token": 1.674452543258667, "incorrect_loss_per_token": 1.6411950588226318, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3386609554290771, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.3386609554290771, "logits_per_char": -0.6693304777145386, "num_chars": 2}, {"sum_logits": -1.4813683032989502, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.4813683032989502, "logits_per_char": -0.7406841516494751, "num_chars": 2}, {"sum_logits": -1.6042850017547607, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6042850017547607, "logits_per_char": -0.8021425008773804, "num_chars": 2}, {"sum_logits": -1.674452543258667, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.674452543258667, "logits_per_char": -0.8372262716293335, "num_chars": 2}, {"sum_logits": -2.1404659748077393, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.1404659748077393, "logits_per_char": -1.0702329874038696, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 153, "native_id": "fcd39cfa321728fea069a6ae4285b06f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6830899715423584, "incorrect_loss_raw": 1.6161224246025085, "correct_loss_per_char": 0.8415449857711792, "incorrect_loss_per_char": 0.8080612123012543, "correct_loss_per_token": 1.6830899715423584, "incorrect_loss_per_token": 1.6161224246025085, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4075642824172974, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.4075642824172974, "logits_per_char": -0.7037821412086487, "num_chars": 2}, {"sum_logits": -1.5235565900802612, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5235565900802612, "logits_per_char": -0.7617782950401306, "num_chars": 2}, {"sum_logits": -1.593052625656128, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.593052625656128, "logits_per_char": -0.796526312828064, "num_chars": 2}, {"sum_logits": -1.6830899715423584, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6830899715423584, "logits_per_char": -0.8415449857711792, "num_chars": 2}, {"sum_logits": -1.9403162002563477, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.9403162002563477, "logits_per_char": -0.9701581001281738, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 154, "native_id": "cb6766fb25daee911fc8e9816b98938c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.00012469291687, "incorrect_loss_raw": 1.5585832595825195, "correct_loss_per_char": 1.000062346458435, "incorrect_loss_per_char": 0.7792916297912598, "correct_loss_per_token": 2.00012469291687, "incorrect_loss_per_token": 1.5585832595825195, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2347261905670166, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": true, "logits_per_token": -1.2347261905670166, "logits_per_char": -0.6173630952835083, "num_chars": 2}, {"sum_logits": -1.5410656929016113, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.5410656929016113, "logits_per_char": -0.7705328464508057, "num_chars": 2}, {"sum_logits": -1.7249855995178223, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.7249855995178223, "logits_per_char": -0.8624927997589111, "num_chars": 2}, {"sum_logits": -1.733555555343628, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.733555555343628, "logits_per_char": -0.866777777671814, "num_chars": 2}, {"sum_logits": -2.00012469291687, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -2.00012469291687, "logits_per_char": -1.000062346458435, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 155, "native_id": "54231f875bb7fe4d3e4afb6eae64387c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5982098579406738, "incorrect_loss_raw": 1.6457623839378357, "correct_loss_per_char": 0.7991049289703369, "incorrect_loss_per_char": 0.8228811919689178, "correct_loss_per_token": 1.5982098579406738, "incorrect_loss_per_token": 1.6457623839378357, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3964577913284302, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.3964577913284302, "logits_per_char": -0.6982288956642151, "num_chars": 2}, {"sum_logits": -1.6011073589324951, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.6011073589324951, "logits_per_char": -0.8005536794662476, "num_chars": 2}, {"sum_logits": -1.562020182609558, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.562020182609558, "logits_per_char": -0.781010091304779, "num_chars": 2}, {"sum_logits": -1.5982098579406738, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5982098579406738, "logits_per_char": -0.7991049289703369, "num_chars": 2}, {"sum_logits": -2.0234642028808594, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -2.0234642028808594, "logits_per_char": -1.0117321014404297, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 156, "native_id": "7d7f7d7a8ae3b20ca9fc0da6efe467b4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4787037372589111, "incorrect_loss_raw": 1.6930918991565704, "correct_loss_per_char": 0.7393518686294556, "incorrect_loss_per_char": 0.8465459495782852, "correct_loss_per_token": 1.4787037372589111, "incorrect_loss_per_token": 1.6930918991565704, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.28766930103302, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.28766930103302, "logits_per_char": -0.64383465051651, "num_chars": 2}, {"sum_logits": -1.4787037372589111, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.4787037372589111, "logits_per_char": -0.7393518686294556, "num_chars": 2}, {"sum_logits": -1.6004612445831299, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6004612445831299, "logits_per_char": -0.8002306222915649, "num_chars": 2}, {"sum_logits": -1.7928051948547363, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.7928051948547363, "logits_per_char": -0.8964025974273682, "num_chars": 2}, {"sum_logits": -2.0914318561553955, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -2.0914318561553955, "logits_per_char": -1.0457159280776978, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 157, "native_id": "31b72d4e4ae7c672c20e27e42499ec79", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.017838716506958, "incorrect_loss_raw": 1.54331436753273, "correct_loss_per_char": 1.008919358253479, "incorrect_loss_per_char": 0.771657183766365, "correct_loss_per_token": 2.017838716506958, "incorrect_loss_per_token": 1.54331436753273, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3625444173812866, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": true, "logits_per_token": -1.3625444173812866, "logits_per_char": -0.6812722086906433, "num_chars": 2}, {"sum_logits": -1.5789138078689575, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5789138078689575, "logits_per_char": -0.7894569039344788, "num_chars": 2}, {"sum_logits": -1.522215723991394, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.522215723991394, "logits_per_char": -0.761107861995697, "num_chars": 2}, {"sum_logits": -1.7095835208892822, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.7095835208892822, "logits_per_char": -0.8547917604446411, "num_chars": 2}, {"sum_logits": -2.017838716506958, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -2.017838716506958, "logits_per_char": -1.008919358253479, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 158, "native_id": "26ce83b8e9a263079aa8cdbd5258d667", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4152288436889648, "incorrect_loss_raw": 1.6887166202068329, "correct_loss_per_char": 0.7076144218444824, "incorrect_loss_per_char": 0.8443583101034164, "correct_loss_per_token": 1.4152288436889648, "incorrect_loss_per_token": 1.6887166202068329, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4152288436889648, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": true, "logits_per_token": -1.4152288436889648, "logits_per_char": -0.7076144218444824, "num_chars": 2}, {"sum_logits": -1.547972559928894, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.547972559928894, "logits_per_char": -0.773986279964447, "num_chars": 2}, {"sum_logits": -1.5849980115890503, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.5849980115890503, "logits_per_char": -0.7924990057945251, "num_chars": 2}, {"sum_logits": -1.599960446357727, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.599960446357727, "logits_per_char": -0.7999802231788635, "num_chars": 2}, {"sum_logits": -2.02193546295166, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -2.02193546295166, "logits_per_char": -1.01096773147583, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 159, "native_id": "30138608d4934a75cf0911a06b021374", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9544777870178223, "incorrect_loss_raw": 1.550560176372528, "correct_loss_per_char": 0.9772388935089111, "incorrect_loss_per_char": 0.775280088186264, "correct_loss_per_token": 1.9544777870178223, "incorrect_loss_per_token": 1.550560176372528, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3908330202102661, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.3908330202102661, "logits_per_char": -0.6954165101051331, "num_chars": 2}, {"sum_logits": -1.5264800786972046, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5264800786972046, "logits_per_char": -0.7632400393486023, "num_chars": 2}, {"sum_logits": -1.6147468090057373, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6147468090057373, "logits_per_char": -0.8073734045028687, "num_chars": 2}, {"sum_logits": -1.6701807975769043, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6701807975769043, "logits_per_char": -0.8350903987884521, "num_chars": 2}, {"sum_logits": -1.9544777870178223, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.9544777870178223, "logits_per_char": -0.9772388935089111, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 160, "native_id": "01abce8c4964371d85a5be2019f75827", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3468585014343262, "incorrect_loss_raw": 1.7259568572044373, "correct_loss_per_char": 0.6734292507171631, "incorrect_loss_per_char": 0.8629784286022186, "correct_loss_per_token": 1.3468585014343262, "incorrect_loss_per_token": 1.7259568572044373, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3468585014343262, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.3468585014343262, "logits_per_char": -0.6734292507171631, "num_chars": 2}, {"sum_logits": -1.4863591194152832, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.4863591194152832, "logits_per_char": -0.7431795597076416, "num_chars": 2}, {"sum_logits": -1.5770111083984375, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5770111083984375, "logits_per_char": -0.7885055541992188, "num_chars": 2}, {"sum_logits": -1.6645309925079346, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6645309925079346, "logits_per_char": -0.8322654962539673, "num_chars": 2}, {"sum_logits": -2.1759262084960938, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.1759262084960938, "logits_per_char": -1.0879631042480469, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 161, "native_id": "3e2222c99e11fca2ad4af2d470eb8ea2_1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8010255098342896, "incorrect_loss_raw": 1.5805276036262512, "correct_loss_per_char": 0.9005127549171448, "incorrect_loss_per_char": 0.7902638018131256, "correct_loss_per_token": 1.8010255098342896, "incorrect_loss_per_token": 1.5805276036262512, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.421087384223938, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": true, "logits_per_token": -1.421087384223938, "logits_per_char": -0.710543692111969, "num_chars": 2}, {"sum_logits": -1.5283626317977905, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.5283626317977905, "logits_per_char": -0.7641813158988953, "num_chars": 2}, {"sum_logits": -1.6173993349075317, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.6173993349075317, "logits_per_char": -0.8086996674537659, "num_chars": 2}, {"sum_logits": -1.8010255098342896, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.8010255098342896, "logits_per_char": -0.9005127549171448, "num_chars": 2}, {"sum_logits": -1.7552610635757446, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.7552610635757446, "logits_per_char": -0.8776305317878723, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 162, "native_id": "847dbf5b73c3e8d49bb9a36491d95e79", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6675001382827759, "incorrect_loss_raw": 1.6156095564365387, "correct_loss_per_char": 0.8337500691413879, "incorrect_loss_per_char": 0.8078047782182693, "correct_loss_per_token": 1.6675001382827759, "incorrect_loss_per_token": 1.6156095564365387, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4743846654891968, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": true, "logits_per_token": -1.4743846654891968, "logits_per_char": -0.7371923327445984, "num_chars": 2}, {"sum_logits": -1.4925377368927002, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.4925377368927002, "logits_per_char": -0.7462688684463501, "num_chars": 2}, {"sum_logits": -1.6675001382827759, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.6675001382827759, "logits_per_char": -0.8337500691413879, "num_chars": 2}, {"sum_logits": -1.58169424533844, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.58169424533844, "logits_per_char": -0.79084712266922, "num_chars": 2}, {"sum_logits": -1.9138215780258179, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.9138215780258179, "logits_per_char": -0.9569107890129089, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 163, "native_id": "fa031cff8e11e75c68d6a99ef0e5ca3a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6549303531646729, "incorrect_loss_raw": 1.6414577066898346, "correct_loss_per_char": 0.8274651765823364, "incorrect_loss_per_char": 0.8207288533449173, "correct_loss_per_token": 1.6549303531646729, "incorrect_loss_per_token": 1.6414577066898346, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.393332600593567, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.393332600593567, "logits_per_char": -0.6966663002967834, "num_chars": 2}, {"sum_logits": -1.6549303531646729, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6549303531646729, "logits_per_char": -0.8274651765823364, "num_chars": 2}, {"sum_logits": -1.4403488636016846, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.4403488636016846, "logits_per_char": -0.7201744318008423, "num_chars": 2}, {"sum_logits": -1.6429443359375, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6429443359375, "logits_per_char": -0.82147216796875, "num_chars": 2}, {"sum_logits": -2.089205026626587, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.089205026626587, "logits_per_char": -1.0446025133132935, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 164, "native_id": "c592258c88295756833e9796e881057b", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5125491619110107, "incorrect_loss_raw": 1.6865994036197662, "correct_loss_per_char": 0.7562745809555054, "incorrect_loss_per_char": 0.8432997018098831, "correct_loss_per_token": 1.5125491619110107, "incorrect_loss_per_token": 1.6865994036197662, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.483444333076477, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.483444333076477, "logits_per_char": -0.7417221665382385, "num_chars": 2}, {"sum_logits": -1.3911091089248657, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.3911091089248657, "logits_per_char": -0.6955545544624329, "num_chars": 2}, {"sum_logits": -1.5125491619110107, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5125491619110107, "logits_per_char": -0.7562745809555054, "num_chars": 2}, {"sum_logits": -1.6551064252853394, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.6551064252853394, "logits_per_char": -0.8275532126426697, "num_chars": 2}, {"sum_logits": -2.216737747192383, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -2.216737747192383, "logits_per_char": -1.1083688735961914, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 165, "native_id": "e1403a7c581bc263aea2ed8d179826d1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4105615615844727, "incorrect_loss_raw": 1.6883483529090881, "correct_loss_per_char": 0.7052807807922363, "incorrect_loss_per_char": 0.8441741764545441, "correct_loss_per_token": 1.4105615615844727, "incorrect_loss_per_token": 1.6883483529090881, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4105615615844727, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.4105615615844727, "logits_per_char": -0.7052807807922363, "num_chars": 2}, {"sum_logits": -1.5345792770385742, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5345792770385742, "logits_per_char": -0.7672896385192871, "num_chars": 2}, {"sum_logits": -1.5344939231872559, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5344939231872559, "logits_per_char": -0.7672469615936279, "num_chars": 2}, {"sum_logits": -1.6782007217407227, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6782007217407227, "logits_per_char": -0.8391003608703613, "num_chars": 2}, {"sum_logits": -2.0061194896698, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.0061194896698, "logits_per_char": -1.0030597448349, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 166, "native_id": "15c38f66e811d6ed68cde931bc31d93c", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9094996452331543, "incorrect_loss_raw": 1.5545682311058044, "correct_loss_per_char": 0.9547498226165771, "incorrect_loss_per_char": 0.7772841155529022, "correct_loss_per_token": 1.9094996452331543, "incorrect_loss_per_token": 1.5545682311058044, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5430889129638672, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5430889129638672, "logits_per_char": -0.7715444564819336, "num_chars": 2}, {"sum_logits": -1.5756001472473145, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5756001472473145, "logits_per_char": -0.7878000736236572, "num_chars": 2}, {"sum_logits": -1.483168601989746, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.483168601989746, "logits_per_char": -0.741584300994873, "num_chars": 2}, {"sum_logits": -1.61641526222229, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.61641526222229, "logits_per_char": -0.808207631111145, "num_chars": 2}, {"sum_logits": -1.9094996452331543, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.9094996452331543, "logits_per_char": -0.9547498226165771, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 167, "native_id": "1ac54dbf6b67f27daa3d456416047584", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6045023202896118, "incorrect_loss_raw": 1.629115641117096, "correct_loss_per_char": 0.8022511601448059, "incorrect_loss_per_char": 0.814557820558548, "correct_loss_per_token": 1.6045023202896118, "incorrect_loss_per_token": 1.629115641117096, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.484132170677185, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.484132170677185, "logits_per_char": -0.7420660853385925, "num_chars": 2}, {"sum_logits": -1.4943910837173462, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.4943910837173462, "logits_per_char": -0.7471955418586731, "num_chars": 2}, {"sum_logits": -1.6556439399719238, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6556439399719238, "logits_per_char": -0.8278219699859619, "num_chars": 2}, {"sum_logits": -1.6045023202896118, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6045023202896118, "logits_per_char": -0.8022511601448059, "num_chars": 2}, {"sum_logits": -1.8822953701019287, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.8822953701019287, "logits_per_char": -0.9411476850509644, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 168, "native_id": "21763a65765b5405c9a54484c2e54a72", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3754370212554932, "incorrect_loss_raw": 1.707494467496872, "correct_loss_per_char": 0.6877185106277466, "incorrect_loss_per_char": 0.853747233748436, "correct_loss_per_token": 1.3754370212554932, "incorrect_loss_per_token": 1.707494467496872, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3754370212554932, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.3754370212554932, "logits_per_char": -0.6877185106277466, "num_chars": 2}, {"sum_logits": -1.5069178342819214, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5069178342819214, "logits_per_char": -0.7534589171409607, "num_chars": 2}, {"sum_logits": -1.5314531326293945, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5314531326293945, "logits_per_char": -0.7657265663146973, "num_chars": 2}, {"sum_logits": -1.7257826328277588, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.7257826328277588, "logits_per_char": -0.8628913164138794, "num_chars": 2}, {"sum_logits": -2.065824270248413, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -2.065824270248413, "logits_per_char": -1.0329121351242065, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 169, "native_id": "c492b8b9754a181c924c1df19998cbc7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6502019166946411, "incorrect_loss_raw": 1.6897996962070465, "correct_loss_per_char": 0.8251009583473206, "incorrect_loss_per_char": 0.8448998481035233, "correct_loss_per_token": 1.6502019166946411, "incorrect_loss_per_token": 1.6897996962070465, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2295254468917847, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.2295254468917847, "logits_per_char": -0.6147627234458923, "num_chars": 2}, {"sum_logits": -1.409777045249939, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.409777045249939, "logits_per_char": -0.7048885226249695, "num_chars": 2}, {"sum_logits": -1.6502019166946411, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6502019166946411, "logits_per_char": -0.8251009583473206, "num_chars": 2}, {"sum_logits": -1.7596110105514526, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.7596110105514526, "logits_per_char": -0.8798055052757263, "num_chars": 2}, {"sum_logits": -2.3602852821350098, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.3602852821350098, "logits_per_char": -1.1801426410675049, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 170, "native_id": "fff554fffa1a0adc64b8d1e21d55534b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7262251377105713, "incorrect_loss_raw": 1.6083475947380066, "correct_loss_per_char": 0.8631125688552856, "incorrect_loss_per_char": 0.8041737973690033, "correct_loss_per_token": 1.7262251377105713, "incorrect_loss_per_token": 1.6083475947380066, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3353962898254395, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.3353962898254395, "logits_per_char": -0.6676981449127197, "num_chars": 2}, {"sum_logits": -1.5391252040863037, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5391252040863037, "logits_per_char": -0.7695626020431519, "num_chars": 2}, {"sum_logits": -1.7262251377105713, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.7262251377105713, "logits_per_char": -0.8631125688552856, "num_chars": 2}, {"sum_logits": -1.6512339115142822, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6512339115142822, "logits_per_char": -0.8256169557571411, "num_chars": 2}, {"sum_logits": -1.907634973526001, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.907634973526001, "logits_per_char": -0.9538174867630005, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 171, "native_id": "8ea5720718c0e122efa6277edb511569", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6355828046798706, "incorrect_loss_raw": 1.6429913341999054, "correct_loss_per_char": 0.8177914023399353, "incorrect_loss_per_char": 0.8214956670999527, "correct_loss_per_token": 1.6355828046798706, "incorrect_loss_per_token": 1.6429913341999054, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3364908695220947, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.3364908695220947, "logits_per_char": -0.6682454347610474, "num_chars": 2}, {"sum_logits": -1.453235149383545, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.453235149383545, "logits_per_char": -0.7266175746917725, "num_chars": 2}, {"sum_logits": -1.767030119895935, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.767030119895935, "logits_per_char": -0.8835150599479675, "num_chars": 2}, {"sum_logits": -1.6355828046798706, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6355828046798706, "logits_per_char": -0.8177914023399353, "num_chars": 2}, {"sum_logits": -2.015209197998047, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -2.015209197998047, "logits_per_char": -1.0076045989990234, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 172, "native_id": "23e4257a49972efd8a97672f060be1c1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.460955262184143, "incorrect_loss_raw": 1.6652492880821228, "correct_loss_per_char": 0.7304776310920715, "incorrect_loss_per_char": 0.8326246440410614, "correct_loss_per_token": 1.460955262184143, "incorrect_loss_per_token": 1.6652492880821228, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.460955262184143, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.460955262184143, "logits_per_char": -0.7304776310920715, "num_chars": 2}, {"sum_logits": -1.5817853212356567, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5817853212356567, "logits_per_char": -0.7908926606178284, "num_chars": 2}, {"sum_logits": -1.5902906656265259, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5902906656265259, "logits_per_char": -0.7951453328132629, "num_chars": 2}, {"sum_logits": -1.5982028245925903, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5982028245925903, "logits_per_char": -0.7991014122962952, "num_chars": 2}, {"sum_logits": -1.8907183408737183, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.8907183408737183, "logits_per_char": -0.9453591704368591, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 173, "native_id": "a018d65a74b9e77d81014fd8f6d78f77", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6426347494125366, "incorrect_loss_raw": 1.632657378911972, "correct_loss_per_char": 0.8213173747062683, "incorrect_loss_per_char": 0.816328689455986, "correct_loss_per_token": 1.6426347494125366, "incorrect_loss_per_token": 1.632657378911972, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3694322109222412, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.3694322109222412, "logits_per_char": -0.6847161054611206, "num_chars": 2}, {"sum_logits": -1.5930159091949463, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5930159091949463, "logits_per_char": -0.7965079545974731, "num_chars": 2}, {"sum_logits": -1.5646384954452515, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5646384954452515, "logits_per_char": -0.7823192477226257, "num_chars": 2}, {"sum_logits": -1.6426347494125366, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6426347494125366, "logits_per_char": -0.8213173747062683, "num_chars": 2}, {"sum_logits": -2.003542900085449, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.003542900085449, "logits_per_char": -1.0017714500427246, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 174, "native_id": "24ceaf5c10863e73919b5f1b0f2db38e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.789489507675171, "incorrect_loss_raw": 1.5760151743888855, "correct_loss_per_char": 0.8947447538375854, "incorrect_loss_per_char": 0.7880075871944427, "correct_loss_per_token": 1.789489507675171, "incorrect_loss_per_token": 1.5760151743888855, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5059163570404053, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.5059163570404053, "logits_per_char": -0.7529581785202026, "num_chars": 2}, {"sum_logits": -1.619354486465454, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.619354486465454, "logits_per_char": -0.809677243232727, "num_chars": 2}, {"sum_logits": -1.6409990787506104, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6409990787506104, "logits_per_char": -0.8204995393753052, "num_chars": 2}, {"sum_logits": -1.5377907752990723, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5377907752990723, "logits_per_char": -0.7688953876495361, "num_chars": 2}, {"sum_logits": -1.789489507675171, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.789489507675171, "logits_per_char": -0.8947447538375854, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 175, "native_id": "900492bd731f8f615ed7c08155737d44", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.3475499153137207, "incorrect_loss_raw": 1.5083579123020172, "correct_loss_per_char": 1.1737749576568604, "incorrect_loss_per_char": 0.7541789561510086, "correct_loss_per_token": 2.3475499153137207, "incorrect_loss_per_token": 1.5083579123020172, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2483928203582764, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.2483928203582764, "logits_per_char": -0.6241964101791382, "num_chars": 2}, {"sum_logits": -1.426165223121643, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.426165223121643, "logits_per_char": -0.7130826115608215, "num_chars": 2}, {"sum_logits": -1.6491681337356567, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.6491681337356567, "logits_per_char": -0.8245840668678284, "num_chars": 2}, {"sum_logits": -1.7097054719924927, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.7097054719924927, "logits_per_char": -0.8548527359962463, "num_chars": 2}, {"sum_logits": -2.3475499153137207, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -2.3475499153137207, "logits_per_char": -1.1737749576568604, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 176, "native_id": "4e3f85dc92eaad4ae6bc6529d62e382c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5054826736450195, "incorrect_loss_raw": 1.7146964967250824, "correct_loss_per_char": 0.7527413368225098, "incorrect_loss_per_char": 0.8573482483625412, "correct_loss_per_token": 1.5054826736450195, "incorrect_loss_per_token": 1.7146964967250824, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2665477991104126, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.2665477991104126, "logits_per_char": -0.6332738995552063, "num_chars": 2}, {"sum_logits": -1.5054826736450195, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5054826736450195, "logits_per_char": -0.7527413368225098, "num_chars": 2}, {"sum_logits": -1.5587873458862305, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5587873458862305, "logits_per_char": -0.7793936729431152, "num_chars": 2}, {"sum_logits": -1.686626672744751, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.686626672744751, "logits_per_char": -0.8433133363723755, "num_chars": 2}, {"sum_logits": -2.3468241691589355, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.3468241691589355, "logits_per_char": -1.1734120845794678, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 177, "native_id": "fa1f17ca535c7e875f4f58510dc2f430", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9363561868667603, "incorrect_loss_raw": 1.550428867340088, "correct_loss_per_char": 0.9681780934333801, "incorrect_loss_per_char": 0.775214433670044, "correct_loss_per_token": 1.9363561868667603, "incorrect_loss_per_token": 1.550428867340088, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.401384711265564, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.401384711265564, "logits_per_char": -0.700692355632782, "num_chars": 2}, {"sum_logits": -1.6274007558822632, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.6274007558822632, "logits_per_char": -0.8137003779411316, "num_chars": 2}, {"sum_logits": -1.5910512208938599, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5910512208938599, "logits_per_char": -0.7955256104469299, "num_chars": 2}, {"sum_logits": -1.5818787813186646, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5818787813186646, "logits_per_char": -0.7909393906593323, "num_chars": 2}, {"sum_logits": -1.9363561868667603, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.9363561868667603, "logits_per_char": -0.9681780934333801, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 178, "native_id": "76b6f0765a3b2fba71021f902142edc0", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7042052745819092, "incorrect_loss_raw": 1.6427915692329407, "correct_loss_per_char": 0.8521026372909546, "incorrect_loss_per_char": 0.8213957846164703, "correct_loss_per_token": 1.7042052745819092, "incorrect_loss_per_token": 1.6427915692329407, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2100610733032227, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.2100610733032227, "logits_per_char": -0.6050305366516113, "num_chars": 2}, {"sum_logits": -1.5085103511810303, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5085103511810303, "logits_per_char": -0.7542551755905151, "num_chars": 2}, {"sum_logits": -1.7042052745819092, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.7042052745819092, "logits_per_char": -0.8521026372909546, "num_chars": 2}, {"sum_logits": -1.8510379791259766, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.8510379791259766, "logits_per_char": -0.9255189895629883, "num_chars": 2}, {"sum_logits": -2.001556873321533, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.001556873321533, "logits_per_char": -1.0007784366607666, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 179, "native_id": "f1368ab1d4ee05d72d555474fcd737d7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5230016708374023, "incorrect_loss_raw": 1.6819167733192444, "correct_loss_per_char": 0.7615008354187012, "incorrect_loss_per_char": 0.8409583866596222, "correct_loss_per_token": 1.5230016708374023, "incorrect_loss_per_token": 1.6819167733192444, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2351374626159668, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.2351374626159668, "logits_per_char": -0.6175687313079834, "num_chars": 2}, {"sum_logits": -1.5230016708374023, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5230016708374023, "logits_per_char": -0.7615008354187012, "num_chars": 2}, {"sum_logits": -1.6514997482299805, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.6514997482299805, "logits_per_char": -0.8257498741149902, "num_chars": 2}, {"sum_logits": -1.8076162338256836, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.8076162338256836, "logits_per_char": -0.9038081169128418, "num_chars": 2}, {"sum_logits": -2.0334136486053467, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -2.0334136486053467, "logits_per_char": -1.0167068243026733, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 180, "native_id": "3dee8fc7f0a3fbf4de111b6686fca157", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8051012754440308, "incorrect_loss_raw": 1.577357530593872, "correct_loss_per_char": 0.9025506377220154, "incorrect_loss_per_char": 0.788678765296936, "correct_loss_per_token": 1.8051012754440308, "incorrect_loss_per_token": 1.577357530593872, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4438047409057617, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": true, "logits_per_token": -1.4438047409057617, "logits_per_char": -0.7219023704528809, "num_chars": 2}, {"sum_logits": -1.5205152034759521, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.5205152034759521, "logits_per_char": -0.7602576017379761, "num_chars": 2}, {"sum_logits": -1.6034053564071655, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.6034053564071655, "logits_per_char": -0.8017026782035828, "num_chars": 2}, {"sum_logits": -1.8051012754440308, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.8051012754440308, "logits_per_char": -0.9025506377220154, "num_chars": 2}, {"sum_logits": -1.7417048215866089, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.7417048215866089, "logits_per_char": -0.8708524107933044, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 181, "native_id": "ea0e7771afd86a59fd9f7764b77e3fa4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6333844661712646, "incorrect_loss_raw": 1.656102031469345, "correct_loss_per_char": 0.8166922330856323, "incorrect_loss_per_char": 0.8280510157346725, "correct_loss_per_token": 1.6333844661712646, "incorrect_loss_per_token": 1.656102031469345, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3230352401733398, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.3230352401733398, "logits_per_char": -0.6615176200866699, "num_chars": 2}, {"sum_logits": -1.4774335622787476, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.4774335622787476, "logits_per_char": -0.7387167811393738, "num_chars": 2}, {"sum_logits": -1.6333844661712646, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.6333844661712646, "logits_per_char": -0.8166922330856323, "num_chars": 2}, {"sum_logits": -1.6618156433105469, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.6618156433105469, "logits_per_char": -0.8309078216552734, "num_chars": 2}, {"sum_logits": -2.162123680114746, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.162123680114746, "logits_per_char": -1.081061840057373, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 182, "native_id": "2c845646032bbf27fb3904330d59d324", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0359041690826416, "incorrect_loss_raw": 1.5340154767036438, "correct_loss_per_char": 1.0179520845413208, "incorrect_loss_per_char": 0.7670077383518219, "correct_loss_per_token": 2.0359041690826416, "incorrect_loss_per_token": 1.5340154767036438, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4153361320495605, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.4153361320495605, "logits_per_char": -0.7076680660247803, "num_chars": 2}, {"sum_logits": -1.5609712600708008, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5609712600708008, "logits_per_char": -0.7804856300354004, "num_chars": 2}, {"sum_logits": -1.558058738708496, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.558058738708496, "logits_per_char": -0.779029369354248, "num_chars": 2}, {"sum_logits": -1.6016957759857178, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6016957759857178, "logits_per_char": -0.8008478879928589, "num_chars": 2}, {"sum_logits": -2.0359041690826416, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.0359041690826416, "logits_per_char": -1.0179520845413208, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 183, "native_id": "bc08c354e5bead6863ea4a29cb8fa359", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9195586442947388, "incorrect_loss_raw": 1.5537797808647156, "correct_loss_per_char": 0.9597793221473694, "incorrect_loss_per_char": 0.7768898904323578, "correct_loss_per_token": 1.9195586442947388, "incorrect_loss_per_token": 1.5537797808647156, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4746477603912354, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.4746477603912354, "logits_per_char": -0.7373238801956177, "num_chars": 2}, {"sum_logits": -1.4676175117492676, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.4676175117492676, "logits_per_char": -0.7338087558746338, "num_chars": 2}, {"sum_logits": -1.5965441465377808, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5965441465377808, "logits_per_char": -0.7982720732688904, "num_chars": 2}, {"sum_logits": -1.6763097047805786, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6763097047805786, "logits_per_char": -0.8381548523902893, "num_chars": 2}, {"sum_logits": -1.9195586442947388, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.9195586442947388, "logits_per_char": -0.9597793221473694, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 184, "native_id": "fb35c7aa5694bab2cde4b7257bfae003", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.519195556640625, "incorrect_loss_raw": 1.653337150812149, "correct_loss_per_char": 0.7595977783203125, "incorrect_loss_per_char": 0.8266685754060745, "correct_loss_per_token": 1.519195556640625, "incorrect_loss_per_token": 1.653337150812149, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5189037322998047, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5189037322998047, "logits_per_char": -0.7594518661499023, "num_chars": 2}, {"sum_logits": -1.643190860748291, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.643190860748291, "logits_per_char": -0.8215954303741455, "num_chars": 2}, {"sum_logits": -1.519195556640625, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.519195556640625, "logits_per_char": -0.7595977783203125, "num_chars": 2}, {"sum_logits": -1.5188995599746704, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.5188995599746704, "logits_per_char": -0.7594497799873352, "num_chars": 2}, {"sum_logits": -1.93235445022583, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.93235445022583, "logits_per_char": -0.966177225112915, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 185, "native_id": "e2a9f0041d17a9944377a91bef5e0d0d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5958954095840454, "incorrect_loss_raw": 1.680785357952118, "correct_loss_per_char": 0.7979477047920227, "incorrect_loss_per_char": 0.840392678976059, "correct_loss_per_token": 1.5958954095840454, "incorrect_loss_per_token": 1.680785357952118, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2150678634643555, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.2150678634643555, "logits_per_char": -0.6075339317321777, "num_chars": 2}, {"sum_logits": -1.5974599123001099, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5974599123001099, "logits_per_char": -0.7987299561500549, "num_chars": 2}, {"sum_logits": -1.5958954095840454, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5958954095840454, "logits_per_char": -0.7979477047920227, "num_chars": 2}, {"sum_logits": -1.6914840936660767, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6914840936660767, "logits_per_char": -0.8457420468330383, "num_chars": 2}, {"sum_logits": -2.2191295623779297, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.2191295623779297, "logits_per_char": -1.1095647811889648, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 186, "native_id": "ae56eff01d05422ddbcb26be7181356a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.565245270729065, "incorrect_loss_raw": 1.6597289443016052, "correct_loss_per_char": 0.7826226353645325, "incorrect_loss_per_char": 0.8298644721508026, "correct_loss_per_token": 1.565245270729065, "incorrect_loss_per_token": 1.6597289443016052, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2966907024383545, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.2966907024383545, "logits_per_char": -0.6483453512191772, "num_chars": 2}, {"sum_logits": -1.565245270729065, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.565245270729065, "logits_per_char": -0.7826226353645325, "num_chars": 2}, {"sum_logits": -1.567097544670105, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.567097544670105, "logits_per_char": -0.7835487723350525, "num_chars": 2}, {"sum_logits": -1.7733856439590454, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.7733856439590454, "logits_per_char": -0.8866928219795227, "num_chars": 2}, {"sum_logits": -2.001741886138916, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.001741886138916, "logits_per_char": -1.000870943069458, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 187, "native_id": "895aa97bb84d874d71b2aed572cebfdd", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0141637325286865, "incorrect_loss_raw": 1.536091297864914, "correct_loss_per_char": 1.0070818662643433, "incorrect_loss_per_char": 0.768045648932457, "correct_loss_per_token": 2.0141637325286865, "incorrect_loss_per_token": 1.536091297864914, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4723659753799438, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.4723659753799438, "logits_per_char": -0.7361829876899719, "num_chars": 2}, {"sum_logits": -1.5311081409454346, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5311081409454346, "logits_per_char": -0.7655540704727173, "num_chars": 2}, {"sum_logits": -1.5999822616577148, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5999822616577148, "logits_per_char": -0.7999911308288574, "num_chars": 2}, {"sum_logits": -1.5409088134765625, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5409088134765625, "logits_per_char": -0.7704544067382812, "num_chars": 2}, {"sum_logits": -2.0141637325286865, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.0141637325286865, "logits_per_char": -1.0070818662643433, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 188, "native_id": "9d625e948e9c3777e7cc54ed8ffea135", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5196857452392578, "incorrect_loss_raw": 1.656050831079483, "correct_loss_per_char": 0.7598428726196289, "incorrect_loss_per_char": 0.8280254155397415, "correct_loss_per_token": 1.5196857452392578, "incorrect_loss_per_token": 1.656050831079483, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5196857452392578, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.5196857452392578, "logits_per_char": -0.7598428726196289, "num_chars": 2}, {"sum_logits": -1.5480684041976929, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.5480684041976929, "logits_per_char": -0.7740342020988464, "num_chars": 2}, {"sum_logits": -1.5097531080245972, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": true, "logits_per_token": -1.5097531080245972, "logits_per_char": -0.7548765540122986, "num_chars": 2}, {"sum_logits": -1.5912885665893555, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.5912885665893555, "logits_per_char": -0.7956442832946777, "num_chars": 2}, {"sum_logits": -1.9750932455062866, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.9750932455062866, "logits_per_char": -0.9875466227531433, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 189, "native_id": "d107d67d525a686fbd8282314d2ea33c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3862642049789429, "incorrect_loss_raw": 1.6944605112075806, "correct_loss_per_char": 0.6931321024894714, "incorrect_loss_per_char": 0.8472302556037903, "correct_loss_per_token": 1.3862642049789429, "incorrect_loss_per_token": 1.6944605112075806, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3862642049789429, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.3862642049789429, "logits_per_char": -0.6931321024894714, "num_chars": 2}, {"sum_logits": -1.5479509830474854, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5479509830474854, "logits_per_char": -0.7739754915237427, "num_chars": 2}, {"sum_logits": -1.5729331970214844, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5729331970214844, "logits_per_char": -0.7864665985107422, "num_chars": 2}, {"sum_logits": -1.6661758422851562, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6661758422851562, "logits_per_char": -0.8330879211425781, "num_chars": 2}, {"sum_logits": -1.9907820224761963, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.9907820224761963, "logits_per_char": -0.9953910112380981, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 190, "native_id": "fee5ff19811750ad019665af7b36b3c4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4636309146881104, "incorrect_loss_raw": 1.6628034710884094, "correct_loss_per_char": 0.7318154573440552, "incorrect_loss_per_char": 0.8314017355442047, "correct_loss_per_token": 1.4636309146881104, "incorrect_loss_per_token": 1.6628034710884094, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4636309146881104, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": true, "logits_per_token": -1.4636309146881104, "logits_per_char": -0.7318154573440552, "num_chars": 2}, {"sum_logits": -1.5266051292419434, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.5266051292419434, "logits_per_char": -0.7633025646209717, "num_chars": 2}, {"sum_logits": -1.6576898097991943, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.6576898097991943, "logits_per_char": -0.8288449048995972, "num_chars": 2}, {"sum_logits": -1.650230050086975, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.650230050086975, "logits_per_char": -0.8251150250434875, "num_chars": 2}, {"sum_logits": -1.816688895225525, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.816688895225525, "logits_per_char": -0.9083444476127625, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 191, "native_id": "e69da59cbcf2a302e4523571eba8186b", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8296579122543335, "incorrect_loss_raw": 1.5670686662197113, "correct_loss_per_char": 0.9148289561271667, "incorrect_loss_per_char": 0.7835343331098557, "correct_loss_per_token": 1.8296579122543335, "incorrect_loss_per_token": 1.5670686662197113, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5557810068130493, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.5557810068130493, "logits_per_char": -0.7778905034065247, "num_chars": 2}, {"sum_logits": -1.6029328107833862, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.6029328107833862, "logits_per_char": -0.8014664053916931, "num_chars": 2}, {"sum_logits": -1.5363373756408691, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": true, "logits_per_token": -1.5363373756408691, "logits_per_char": -0.7681686878204346, "num_chars": 2}, {"sum_logits": -1.5732234716415405, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.5732234716415405, "logits_per_char": -0.7866117358207703, "num_chars": 2}, {"sum_logits": -1.8296579122543335, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.8296579122543335, "logits_per_char": -0.9148289561271667, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 192, "native_id": "2dd138a63b5895cf737ced793cc668e7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.243856430053711, "incorrect_loss_raw": 1.5195015668869019, "correct_loss_per_char": 1.1219282150268555, "incorrect_loss_per_char": 0.7597507834434509, "correct_loss_per_token": 2.243856430053711, "incorrect_loss_per_token": 1.5195015668869019, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.27326500415802, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": true, "logits_per_token": -1.27326500415802, "logits_per_char": -0.63663250207901, "num_chars": 2}, {"sum_logits": -1.441506028175354, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.441506028175354, "logits_per_char": -0.720753014087677, "num_chars": 2}, {"sum_logits": -1.6143401861190796, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.6143401861190796, "logits_per_char": -0.8071700930595398, "num_chars": 2}, {"sum_logits": -1.7488950490951538, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.7488950490951538, "logits_per_char": -0.8744475245475769, "num_chars": 2}, {"sum_logits": -2.243856430053711, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -2.243856430053711, "logits_per_char": -1.1219282150268555, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 193, "native_id": "b33047f46db680a9b630c13e8ca115cc", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7537826299667358, "incorrect_loss_raw": 1.5857791304588318, "correct_loss_per_char": 0.8768913149833679, "incorrect_loss_per_char": 0.7928895652294159, "correct_loss_per_token": 1.7537826299667358, "incorrect_loss_per_token": 1.5857791304588318, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4930723905563354, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": true, "logits_per_token": -1.4930723905563354, "logits_per_char": -0.7465361952781677, "num_chars": 2}, {"sum_logits": -1.5331507921218872, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.5331507921218872, "logits_per_char": -0.7665753960609436, "num_chars": 2}, {"sum_logits": -1.6174367666244507, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.6174367666244507, "logits_per_char": -0.8087183833122253, "num_chars": 2}, {"sum_logits": -1.6994565725326538, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.6994565725326538, "logits_per_char": -0.8497282862663269, "num_chars": 2}, {"sum_logits": -1.7537826299667358, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.7537826299667358, "logits_per_char": -0.8768913149833679, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 194, "native_id": "f20d40bc4af588223e880e0bb58b27b8", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6801996231079102, "incorrect_loss_raw": 1.6391730606555939, "correct_loss_per_char": 0.8400998115539551, "incorrect_loss_per_char": 0.8195865303277969, "correct_loss_per_token": 1.6801996231079102, "incorrect_loss_per_token": 1.6391730606555939, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3200620412826538, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.3200620412826538, "logits_per_char": -0.6600310206413269, "num_chars": 2}, {"sum_logits": -1.4485652446746826, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.4485652446746826, "logits_per_char": -0.7242826223373413, "num_chars": 2}, {"sum_logits": -1.6801996231079102, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6801996231079102, "logits_per_char": -0.8400998115539551, "num_chars": 2}, {"sum_logits": -1.7053265571594238, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.7053265571594238, "logits_per_char": -0.8526632785797119, "num_chars": 2}, {"sum_logits": -2.0827383995056152, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.0827383995056152, "logits_per_char": -1.0413691997528076, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 195, "native_id": "b6b66d4519a84b8331ea55f84767e9df", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3177422285079956, "incorrect_loss_raw": 1.7196578681468964, "correct_loss_per_char": 0.6588711142539978, "incorrect_loss_per_char": 0.8598289340734482, "correct_loss_per_token": 1.3177422285079956, "incorrect_loss_per_token": 1.7196578681468964, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3177422285079956, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.3177422285079956, "logits_per_char": -0.6588711142539978, "num_chars": 2}, {"sum_logits": -1.6278685331344604, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6278685331344604, "logits_per_char": -0.8139342665672302, "num_chars": 2}, {"sum_logits": -1.601267695426941, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.601267695426941, "logits_per_char": -0.8006338477134705, "num_chars": 2}, {"sum_logits": -1.6835538148880005, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6835538148880005, "logits_per_char": -0.8417769074440002, "num_chars": 2}, {"sum_logits": -1.9659414291381836, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.9659414291381836, "logits_per_char": -0.9829707145690918, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 196, "native_id": "952cf4b2f7a434b2eeae9f4c7ed89c0a", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9224735498428345, "incorrect_loss_raw": 1.554980993270874, "correct_loss_per_char": 0.9612367749214172, "incorrect_loss_per_char": 0.777490496635437, "correct_loss_per_token": 1.9224735498428345, "incorrect_loss_per_token": 1.554980993270874, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5219813585281372, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5219813585281372, "logits_per_char": -0.7609906792640686, "num_chars": 2}, {"sum_logits": -1.4271188974380493, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.4271188974380493, "logits_per_char": -0.7135594487190247, "num_chars": 2}, {"sum_logits": -1.676843523979187, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.676843523979187, "logits_per_char": -0.8384217619895935, "num_chars": 2}, {"sum_logits": -1.5939801931381226, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5939801931381226, "logits_per_char": -0.7969900965690613, "num_chars": 2}, {"sum_logits": -1.9224735498428345, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.9224735498428345, "logits_per_char": -0.9612367749214172, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 197, "native_id": "b63e5cd88bfe75d29ff9fdc6dd97fed6", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2577744722366333, "incorrect_loss_raw": 1.7820929288864136, "correct_loss_per_char": 0.6288872361183167, "incorrect_loss_per_char": 0.8910464644432068, "correct_loss_per_token": 1.2577744722366333, "incorrect_loss_per_token": 1.7820929288864136, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2577744722366333, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.2577744722366333, "logits_per_char": -0.6288872361183167, "num_chars": 2}, {"sum_logits": -1.5181102752685547, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5181102752685547, "logits_per_char": -0.7590551376342773, "num_chars": 2}, {"sum_logits": -1.5593345165252686, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5593345165252686, "logits_per_char": -0.7796672582626343, "num_chars": 2}, {"sum_logits": -1.6582708358764648, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6582708358764648, "logits_per_char": -0.8291354179382324, "num_chars": 2}, {"sum_logits": -2.392656087875366, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.392656087875366, "logits_per_char": -1.196328043937683, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 198, "native_id": "ec5a336080e37fbe95d72ad5f9c65ba7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6413904428482056, "incorrect_loss_raw": 1.6444964408874512, "correct_loss_per_char": 0.8206952214241028, "incorrect_loss_per_char": 0.8222482204437256, "correct_loss_per_token": 1.6413904428482056, "incorrect_loss_per_token": 1.6444964408874512, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.352416753768921, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.352416753768921, "logits_per_char": -0.6762083768844604, "num_chars": 2}, {"sum_logits": -1.499721884727478, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.499721884727478, "logits_per_char": -0.749860942363739, "num_chars": 2}, {"sum_logits": -1.6413904428482056, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6413904428482056, "logits_per_char": -0.8206952214241028, "num_chars": 2}, {"sum_logits": -1.6429399251937866, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6429399251937866, "logits_per_char": -0.8214699625968933, "num_chars": 2}, {"sum_logits": -2.082907199859619, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.082907199859619, "logits_per_char": -1.0414535999298096, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 199, "native_id": "6386bcf080633bc3eeb3317a5435b7b7", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0217113494873047, "incorrect_loss_raw": 1.5388102531433105, "correct_loss_per_char": 1.0108556747436523, "incorrect_loss_per_char": 0.7694051265716553, "correct_loss_per_token": 2.0217113494873047, "incorrect_loss_per_token": 1.5388102531433105, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4602406024932861, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.4602406024932861, "logits_per_char": -0.7301203012466431, "num_chars": 2}, {"sum_logits": -1.4435489177703857, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.4435489177703857, "logits_per_char": -0.7217744588851929, "num_chars": 2}, {"sum_logits": -1.5487496852874756, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5487496852874756, "logits_per_char": -0.7743748426437378, "num_chars": 2}, {"sum_logits": -1.7027018070220947, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.7027018070220947, "logits_per_char": -0.8513509035110474, "num_chars": 2}, {"sum_logits": -2.0217113494873047, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.0217113494873047, "logits_per_char": -1.0108556747436523, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 200, "native_id": "43ab0ff711e60d51f943bbd2cdd6515a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0742809772491455, "incorrect_loss_raw": 1.5363601744174957, "correct_loss_per_char": 1.0371404886245728, "incorrect_loss_per_char": 0.7681800872087479, "correct_loss_per_token": 2.0742809772491455, "incorrect_loss_per_token": 1.5363601744174957, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3117823600769043, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.3117823600769043, "logits_per_char": -0.6558911800384521, "num_chars": 2}, {"sum_logits": -1.514121174812317, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.514121174812317, "logits_per_char": -0.7570605874061584, "num_chars": 2}, {"sum_logits": -1.6585845947265625, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.6585845947265625, "logits_per_char": -0.8292922973632812, "num_chars": 2}, {"sum_logits": -1.6609525680541992, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.6609525680541992, "logits_per_char": -0.8304762840270996, "num_chars": 2}, {"sum_logits": -2.0742809772491455, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.0742809772491455, "logits_per_char": -1.0371404886245728, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 201, "native_id": "11c4c78d61e8212f0984fd07eb22b669", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3001607656478882, "incorrect_loss_raw": 1.7704322934150696, "correct_loss_per_char": 0.6500803828239441, "incorrect_loss_per_char": 0.8852161467075348, "correct_loss_per_token": 1.3001607656478882, "incorrect_loss_per_token": 1.7704322934150696, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3001607656478882, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.3001607656478882, "logits_per_char": -0.6500803828239441, "num_chars": 2}, {"sum_logits": -1.5136640071868896, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5136640071868896, "logits_per_char": -0.7568320035934448, "num_chars": 2}, {"sum_logits": -1.5288054943084717, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5288054943084717, "logits_per_char": -0.7644027471542358, "num_chars": 2}, {"sum_logits": -1.6279494762420654, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6279494762420654, "logits_per_char": -0.8139747381210327, "num_chars": 2}, {"sum_logits": -2.4113101959228516, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.4113101959228516, "logits_per_char": -1.2056550979614258, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 202, "native_id": "e61891746aa94ab57aaa754614034aef", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5775474309921265, "incorrect_loss_raw": 1.6863654255867004, "correct_loss_per_char": 0.7887737154960632, "incorrect_loss_per_char": 0.8431827127933502, "correct_loss_per_token": 1.5775474309921265, "incorrect_loss_per_token": 1.6863654255867004, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2966549396514893, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.2966549396514893, "logits_per_char": -0.6483274698257446, "num_chars": 2}, {"sum_logits": -1.5496712923049927, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5496712923049927, "logits_per_char": -0.7748356461524963, "num_chars": 2}, {"sum_logits": -1.5775474309921265, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5775474309921265, "logits_per_char": -0.7887737154960632, "num_chars": 2}, {"sum_logits": -1.5951107740402222, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5951107740402222, "logits_per_char": -0.7975553870201111, "num_chars": 2}, {"sum_logits": -2.3040246963500977, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -2.3040246963500977, "logits_per_char": -1.1520123481750488, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 203, "native_id": "97da9aa4ea4b22744ec51cba49f35bfc", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5817956924438477, "incorrect_loss_raw": 1.6282170414924622, "correct_loss_per_char": 0.7908978462219238, "incorrect_loss_per_char": 0.8141085207462311, "correct_loss_per_token": 1.5817956924438477, "incorrect_loss_per_token": 1.6282170414924622, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5105762481689453, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.5105762481689453, "logits_per_char": -0.7552881240844727, "num_chars": 2}, {"sum_logits": -1.6333951950073242, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.6333951950073242, "logits_per_char": -0.8166975975036621, "num_chars": 2}, {"sum_logits": -1.5817956924438477, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5817956924438477, "logits_per_char": -0.7908978462219238, "num_chars": 2}, {"sum_logits": -1.559065341949463, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.559065341949463, "logits_per_char": -0.7795326709747314, "num_chars": 2}, {"sum_logits": -1.8098313808441162, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.8098313808441162, "logits_per_char": -0.9049156904220581, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 204, "native_id": "46241bc83e8d81196ae5783b2b9854a4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.319821834564209, "incorrect_loss_raw": 1.5140281021595001, "correct_loss_per_char": 1.1599109172821045, "incorrect_loss_per_char": 0.7570140510797501, "correct_loss_per_token": 2.319821834564209, "incorrect_loss_per_token": 1.5140281021595001, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2253764867782593, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.2253764867782593, "logits_per_char": -0.6126882433891296, "num_chars": 2}, {"sum_logits": -1.4665539264678955, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.4665539264678955, "logits_per_char": -0.7332769632339478, "num_chars": 2}, {"sum_logits": -1.7079026699066162, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.7079026699066162, "logits_per_char": -0.8539513349533081, "num_chars": 2}, {"sum_logits": -1.6562793254852295, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.6562793254852295, "logits_per_char": -0.8281396627426147, "num_chars": 2}, {"sum_logits": -2.319821834564209, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -2.319821834564209, "logits_per_char": -1.1599109172821045, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 205, "native_id": "18844d3aa4e52b331b5382c8244cf4db", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.496649980545044, "incorrect_loss_raw": 1.6900522112846375, "correct_loss_per_char": 0.748324990272522, "incorrect_loss_per_char": 0.8450261056423187, "correct_loss_per_token": 1.496649980545044, "incorrect_loss_per_token": 1.6900522112846375, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.496649980545044, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.496649980545044, "logits_per_char": -0.748324990272522, "num_chars": 2}, {"sum_logits": -1.4399293661117554, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.4399293661117554, "logits_per_char": -0.7199646830558777, "num_chars": 2}, {"sum_logits": -1.52456533908844, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.52456533908844, "logits_per_char": -0.76228266954422, "num_chars": 2}, {"sum_logits": -1.5662622451782227, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5662622451782227, "logits_per_char": -0.7831311225891113, "num_chars": 2}, {"sum_logits": -2.229451894760132, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.229451894760132, "logits_per_char": -1.114725947380066, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 206, "native_id": "056b33c7050c167b0d4348d40d169358", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4648315906524658, "incorrect_loss_raw": 1.678936243057251, "correct_loss_per_char": 0.7324157953262329, "incorrect_loss_per_char": 0.8394681215286255, "correct_loss_per_token": 1.4648315906524658, "incorrect_loss_per_token": 1.678936243057251, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4512684345245361, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.4512684345245361, "logits_per_char": -0.7256342172622681, "num_chars": 2}, {"sum_logits": -1.4648315906524658, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.4648315906524658, "logits_per_char": -0.7324157953262329, "num_chars": 2}, {"sum_logits": -1.5822665691375732, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5822665691375732, "logits_per_char": -0.7911332845687866, "num_chars": 2}, {"sum_logits": -1.6203289031982422, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6203289031982422, "logits_per_char": -0.8101644515991211, "num_chars": 2}, {"sum_logits": -2.0618810653686523, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.0618810653686523, "logits_per_char": -1.0309405326843262, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 207, "native_id": "31d7dd1d00aabe411568df3e72d5b5e0", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7030671834945679, "incorrect_loss_raw": 1.6145251393318176, "correct_loss_per_char": 0.8515335917472839, "incorrect_loss_per_char": 0.8072625696659088, "correct_loss_per_token": 1.7030671834945679, "incorrect_loss_per_token": 1.6145251393318176, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.395377278327942, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.395377278327942, "logits_per_char": -0.697688639163971, "num_chars": 2}, {"sum_logits": -1.517736792564392, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.517736792564392, "logits_per_char": -0.758868396282196, "num_chars": 2}, {"sum_logits": -1.58016836643219, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.58016836643219, "logits_per_char": -0.790084183216095, "num_chars": 2}, {"sum_logits": -1.7030671834945679, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.7030671834945679, "logits_per_char": -0.8515335917472839, "num_chars": 2}, {"sum_logits": -1.9648181200027466, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.9648181200027466, "logits_per_char": -0.9824090600013733, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 208, "native_id": "cbf3dd48b4d591fc872a53cd4b9dd3af", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6184552907943726, "incorrect_loss_raw": 1.616823136806488, "correct_loss_per_char": 0.8092276453971863, "incorrect_loss_per_char": 0.808411568403244, "correct_loss_per_token": 1.6184552907943726, "incorrect_loss_per_token": 1.616823136806488, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5260869264602661, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": true, "logits_per_token": -1.5260869264602661, "logits_per_char": -0.7630434632301331, "num_chars": 2}, {"sum_logits": -1.5831636190414429, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.5831636190414429, "logits_per_char": -0.7915818095207214, "num_chars": 2}, {"sum_logits": -1.6184552907943726, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.6184552907943726, "logits_per_char": -0.8092276453971863, "num_chars": 2}, {"sum_logits": -1.5872982740402222, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.5872982740402222, "logits_per_char": -0.7936491370201111, "num_chars": 2}, {"sum_logits": -1.770743727684021, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.770743727684021, "logits_per_char": -0.8853718638420105, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 209, "native_id": "60e8f1a86d4063895f340cd1e3c55f50", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.56201171875, "incorrect_loss_raw": 1.6470379531383514, "correct_loss_per_char": 0.781005859375, "incorrect_loss_per_char": 0.8235189765691757, "correct_loss_per_token": 1.56201171875, "incorrect_loss_per_token": 1.6470379531383514, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.455044150352478, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.455044150352478, "logits_per_char": -0.727522075176239, "num_chars": 2}, {"sum_logits": -1.5448718070983887, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5448718070983887, "logits_per_char": -0.7724359035491943, "num_chars": 2}, {"sum_logits": -1.56201171875, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.56201171875, "logits_per_char": -0.781005859375, "num_chars": 2}, {"sum_logits": -1.608567476272583, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.608567476272583, "logits_per_char": -0.8042837381362915, "num_chars": 2}, {"sum_logits": -1.979668378829956, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.979668378829956, "logits_per_char": -0.989834189414978, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 210, "native_id": "eee8cb7a0d806a62d2de24831f82e3e1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4059861898422241, "incorrect_loss_raw": 1.693010002374649, "correct_loss_per_char": 0.7029930949211121, "incorrect_loss_per_char": 0.8465050011873245, "correct_loss_per_token": 1.4059861898422241, "incorrect_loss_per_token": 1.693010002374649, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4059861898422241, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.4059861898422241, "logits_per_char": -0.7029930949211121, "num_chars": 2}, {"sum_logits": -1.4702023267745972, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.4702023267745972, "logits_per_char": -0.7351011633872986, "num_chars": 2}, {"sum_logits": -1.62690269947052, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.62690269947052, "logits_per_char": -0.81345134973526, "num_chars": 2}, {"sum_logits": -1.64791738986969, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.64791738986969, "logits_per_char": -0.823958694934845, "num_chars": 2}, {"sum_logits": -2.027017593383789, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.027017593383789, "logits_per_char": -1.0135087966918945, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 211, "native_id": "9a23a7f04e63bf9f4c7dfe50c58abfd2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.543563723564148, "incorrect_loss_raw": 1.6640346348285675, "correct_loss_per_char": 0.771781861782074, "incorrect_loss_per_char": 0.8320173174142838, "correct_loss_per_token": 1.543563723564148, "incorrect_loss_per_token": 1.6640346348285675, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4052709341049194, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.4052709341049194, "logits_per_char": -0.7026354670524597, "num_chars": 2}, {"sum_logits": -1.4691344499588013, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.4691344499588013, "logits_per_char": -0.7345672249794006, "num_chars": 2}, {"sum_logits": -1.543563723564148, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.543563723564148, "logits_per_char": -0.771781861782074, "num_chars": 2}, {"sum_logits": -1.7256351709365845, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.7256351709365845, "logits_per_char": -0.8628175854682922, "num_chars": 2}, {"sum_logits": -2.056097984313965, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -2.056097984313965, "logits_per_char": -1.0280489921569824, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 212, "native_id": "e3426e4f60c142aa3d813479f79d6305", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6715941429138184, "incorrect_loss_raw": 1.6348881721496582, "correct_loss_per_char": 0.8357970714569092, "incorrect_loss_per_char": 0.8174440860748291, "correct_loss_per_token": 1.6715941429138184, "incorrect_loss_per_token": 1.6348881721496582, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3358027935028076, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.3358027935028076, "logits_per_char": -0.6679013967514038, "num_chars": 2}, {"sum_logits": -1.5187056064605713, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5187056064605713, "logits_per_char": -0.7593528032302856, "num_chars": 2}, {"sum_logits": -1.6715941429138184, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6715941429138184, "logits_per_char": -0.8357970714569092, "num_chars": 2}, {"sum_logits": -1.606255054473877, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.606255054473877, "logits_per_char": -0.8031275272369385, "num_chars": 2}, {"sum_logits": -2.078789234161377, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -2.078789234161377, "logits_per_char": -1.0393946170806885, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 213, "native_id": "3526550b02d9594abd4fc43553010fc6", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.612119197845459, "incorrect_loss_raw": 1.633696436882019, "correct_loss_per_char": 0.8060595989227295, "incorrect_loss_per_char": 0.8168482184410095, "correct_loss_per_token": 1.612119197845459, "incorrect_loss_per_token": 1.633696436882019, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5006670951843262, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5006670951843262, "logits_per_char": -0.7503335475921631, "num_chars": 2}, {"sum_logits": -1.4969367980957031, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.4969367980957031, "logits_per_char": -0.7484683990478516, "num_chars": 2}, {"sum_logits": -1.5474462509155273, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5474462509155273, "logits_per_char": -0.7737231254577637, "num_chars": 2}, {"sum_logits": -1.612119197845459, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.612119197845459, "logits_per_char": -0.8060595989227295, "num_chars": 2}, {"sum_logits": -1.9897356033325195, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.9897356033325195, "logits_per_char": -0.9948678016662598, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 214, "native_id": "e567c94d88829fb07a30e3d46c02e664", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9672788381576538, "incorrect_loss_raw": 1.5491876602172852, "correct_loss_per_char": 0.9836394190788269, "incorrect_loss_per_char": 0.7745938301086426, "correct_loss_per_token": 1.9672788381576538, "incorrect_loss_per_token": 1.5491876602172852, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.398174524307251, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.398174524307251, "logits_per_char": -0.6990872621536255, "num_chars": 2}, {"sum_logits": -1.6724556684494019, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6724556684494019, "logits_per_char": -0.8362278342247009, "num_chars": 2}, {"sum_logits": -1.5848984718322754, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5848984718322754, "logits_per_char": -0.7924492359161377, "num_chars": 2}, {"sum_logits": -1.5412219762802124, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5412219762802124, "logits_per_char": -0.7706109881401062, "num_chars": 2}, {"sum_logits": -1.9672788381576538, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.9672788381576538, "logits_per_char": -0.9836394190788269, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 215, "native_id": "cf5a710c931779fb3dde198e0ace3b6a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.109138011932373, "incorrect_loss_raw": 1.531675785779953, "correct_loss_per_char": 1.0545690059661865, "incorrect_loss_per_char": 0.7658378928899765, "correct_loss_per_token": 2.109138011932373, "incorrect_loss_per_token": 1.531675785779953, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3301957845687866, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.3301957845687866, "logits_per_char": -0.6650978922843933, "num_chars": 2}, {"sum_logits": -1.4979989528656006, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.4979989528656006, "logits_per_char": -0.7489994764328003, "num_chars": 2}, {"sum_logits": -1.575452208518982, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.575452208518982, "logits_per_char": -0.787726104259491, "num_chars": 2}, {"sum_logits": -1.7230561971664429, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.7230561971664429, "logits_per_char": -0.8615280985832214, "num_chars": 2}, {"sum_logits": -2.109138011932373, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -2.109138011932373, "logits_per_char": -1.0545690059661865, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 216, "native_id": "0f2377604e628c55ba588366139396b9", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9875916242599487, "incorrect_loss_raw": 1.5437199175357819, "correct_loss_per_char": 0.9937958121299744, "incorrect_loss_per_char": 0.7718599587678909, "correct_loss_per_token": 1.9875916242599487, "incorrect_loss_per_token": 1.5437199175357819, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.409270167350769, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.409270167350769, "logits_per_char": -0.7046350836753845, "num_chars": 2}, {"sum_logits": -1.6208878755569458, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6208878755569458, "logits_per_char": -0.8104439377784729, "num_chars": 2}, {"sum_logits": -1.5931216478347778, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5931216478347778, "logits_per_char": -0.7965608239173889, "num_chars": 2}, {"sum_logits": -1.5515999794006348, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5515999794006348, "logits_per_char": -0.7757999897003174, "num_chars": 2}, {"sum_logits": -1.9875916242599487, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.9875916242599487, "logits_per_char": -0.9937958121299744, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 217, "native_id": "ada088b7c97de80336ad043757c2db16", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.27743661403656, "incorrect_loss_raw": 1.7466111779212952, "correct_loss_per_char": 0.63871830701828, "incorrect_loss_per_char": 0.8733055889606476, "correct_loss_per_token": 1.27743661403656, "incorrect_loss_per_token": 1.7466111779212952, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.27743661403656, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": true, "logits_per_token": -1.27743661403656, "logits_per_char": -0.63871830701828, "num_chars": 2}, {"sum_logits": -1.499648094177246, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.499648094177246, "logits_per_char": -0.749824047088623, "num_chars": 2}, {"sum_logits": -1.6148457527160645, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.6148457527160645, "logits_per_char": -0.8074228763580322, "num_chars": 2}, {"sum_logits": -1.751366376876831, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.751366376876831, "logits_per_char": -0.8756831884384155, "num_chars": 2}, {"sum_logits": -2.120584487915039, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -2.120584487915039, "logits_per_char": -1.0602922439575195, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 218, "native_id": "beef0aa2058297904bb4acc1dc340c85", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4780269861221313, "incorrect_loss_raw": 1.6953275799751282, "correct_loss_per_char": 0.7390134930610657, "incorrect_loss_per_char": 0.8476637899875641, "correct_loss_per_token": 1.4780269861221313, "incorrect_loss_per_token": 1.6953275799751282, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2712342739105225, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.2712342739105225, "logits_per_char": -0.6356171369552612, "num_chars": 2}, {"sum_logits": -1.4780269861221313, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.4780269861221313, "logits_per_char": -0.7390134930610657, "num_chars": 2}, {"sum_logits": -1.6756237745285034, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6756237745285034, "logits_per_char": -0.8378118872642517, "num_chars": 2}, {"sum_logits": -1.7650648355484009, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.7650648355484009, "logits_per_char": -0.8825324177742004, "num_chars": 2}, {"sum_logits": -2.069387435913086, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.069387435913086, "logits_per_char": -1.034693717956543, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 219, "native_id": "ba9a05bd2086c0d37733e26479d6630f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9885704517364502, "incorrect_loss_raw": 1.5432783663272858, "correct_loss_per_char": 0.9942852258682251, "incorrect_loss_per_char": 0.7716391831636429, "correct_loss_per_token": 1.9885704517364502, "incorrect_loss_per_token": 1.5432783663272858, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4048163890838623, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.4048163890838623, "logits_per_char": -0.7024081945419312, "num_chars": 2}, {"sum_logits": -1.510727047920227, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.510727047920227, "logits_per_char": -0.7553635239601135, "num_chars": 2}, {"sum_logits": -1.5905423164367676, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5905423164367676, "logits_per_char": -0.7952711582183838, "num_chars": 2}, {"sum_logits": -1.6670277118682861, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6670277118682861, "logits_per_char": -0.8335138559341431, "num_chars": 2}, {"sum_logits": -1.9885704517364502, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.9885704517364502, "logits_per_char": -0.9942852258682251, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 220, "native_id": "6b0bf501aa68b06ddc5ad72ac5ff68fc", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6350247859954834, "incorrect_loss_raw": 1.644675225019455, "correct_loss_per_char": 0.8175123929977417, "incorrect_loss_per_char": 0.8223376125097275, "correct_loss_per_token": 1.6350247859954834, "incorrect_loss_per_token": 1.644675225019455, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3547035455703735, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.3547035455703735, "logits_per_char": -0.6773517727851868, "num_chars": 2}, {"sum_logits": -1.4858968257904053, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.4858968257904053, "logits_per_char": -0.7429484128952026, "num_chars": 2}, {"sum_logits": -1.6350247859954834, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6350247859954834, "logits_per_char": -0.8175123929977417, "num_chars": 2}, {"sum_logits": -1.6549055576324463, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6549055576324463, "logits_per_char": -0.8274527788162231, "num_chars": 2}, {"sum_logits": -2.0831949710845947, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -2.0831949710845947, "logits_per_char": -1.0415974855422974, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 221, "native_id": "926298bbdd03ce96acfeb4408b888b61", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5011413097381592, "incorrect_loss_raw": 1.6955011487007141, "correct_loss_per_char": 0.7505706548690796, "incorrect_loss_per_char": 0.8477505743503571, "correct_loss_per_token": 1.5011413097381592, "incorrect_loss_per_token": 1.6955011487007141, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2872629165649414, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": true, "logits_per_token": -1.2872629165649414, "logits_per_char": -0.6436314582824707, "num_chars": 2}, {"sum_logits": -1.5011413097381592, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": false, "logits_per_token": -1.5011413097381592, "logits_per_char": -0.7505706548690796, "num_chars": 2}, {"sum_logits": -1.5924843549728394, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": false, "logits_per_token": -1.5924843549728394, "logits_per_char": -0.7962421774864197, "num_chars": 2}, {"sum_logits": -1.7164384126663208, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": false, "logits_per_token": -1.7164384126663208, "logits_per_char": -0.8582192063331604, "num_chars": 2}, {"sum_logits": -2.185818910598755, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": false, "logits_per_token": -2.185818910598755, "logits_per_char": -1.0929094552993774, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 222, "native_id": "faa0aa438b94c19be8ff52ee80d9e298", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3356035947799683, "incorrect_loss_raw": 1.7306072413921356, "correct_loss_per_char": 0.6678017973899841, "incorrect_loss_per_char": 0.8653036206960678, "correct_loss_per_token": 1.3356035947799683, "incorrect_loss_per_token": 1.7306072413921356, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3356035947799683, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.3356035947799683, "logits_per_char": -0.6678017973899841, "num_chars": 2}, {"sum_logits": -1.4900585412979126, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.4900585412979126, "logits_per_char": -0.7450292706489563, "num_chars": 2}, {"sum_logits": -1.5984874963760376, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5984874963760376, "logits_per_char": -0.7992437481880188, "num_chars": 2}, {"sum_logits": -1.6375290155410767, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6375290155410767, "logits_per_char": -0.8187645077705383, "num_chars": 2}, {"sum_logits": -2.1963539123535156, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -2.1963539123535156, "logits_per_char": -1.0981769561767578, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 223, "native_id": "9310c39a0752f28640c3a05cba1d5ca7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.291224241256714, "incorrect_loss_raw": 1.5013402998447418, "correct_loss_per_char": 1.145612120628357, "incorrect_loss_per_char": 0.7506701499223709, "correct_loss_per_token": 2.291224241256714, "incorrect_loss_per_token": 1.5013402998447418, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.368958830833435, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": true, "logits_per_token": -1.368958830833435, "logits_per_char": -0.6844794154167175, "num_chars": 2}, {"sum_logits": -1.556270956993103, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.556270956993103, "logits_per_char": -0.7781354784965515, "num_chars": 2}, {"sum_logits": -1.5029891729354858, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5029891729354858, "logits_per_char": -0.7514945864677429, "num_chars": 2}, {"sum_logits": -1.5771422386169434, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5771422386169434, "logits_per_char": -0.7885711193084717, "num_chars": 2}, {"sum_logits": -2.291224241256714, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -2.291224241256714, "logits_per_char": -1.145612120628357, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 224, "native_id": "fee5f4e9d8e37f0183e36eb9b8dbcbb9", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3596054315567017, "incorrect_loss_raw": 1.736155480146408, "correct_loss_per_char": 0.6798027157783508, "incorrect_loss_per_char": 0.868077740073204, "correct_loss_per_token": 1.3596054315567017, "incorrect_loss_per_token": 1.736155480146408, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3596054315567017, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.3596054315567017, "logits_per_char": -0.6798027157783508, "num_chars": 2}, {"sum_logits": -1.4159904718399048, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.4159904718399048, "logits_per_char": -0.7079952359199524, "num_chars": 2}, {"sum_logits": -1.6010204553604126, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6010204553604126, "logits_per_char": -0.8005102276802063, "num_chars": 2}, {"sum_logits": -1.6508618593215942, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6508618593215942, "logits_per_char": -0.8254309296607971, "num_chars": 2}, {"sum_logits": -2.2767491340637207, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.2767491340637207, "logits_per_char": -1.1383745670318604, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 225, "native_id": "5392af3f1c4665e95ff3354e5115de42", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6657912731170654, "incorrect_loss_raw": 1.6327289640903473, "correct_loss_per_char": 0.8328956365585327, "incorrect_loss_per_char": 0.8163644820451736, "correct_loss_per_token": 1.6657912731170654, "incorrect_loss_per_token": 1.6327289640903473, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.249545931816101, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.249545931816101, "logits_per_char": -0.6247729659080505, "num_chars": 2}, {"sum_logits": -1.6784214973449707, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.6784214973449707, "logits_per_char": -0.8392107486724854, "num_chars": 2}, {"sum_logits": -1.6863155364990234, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.6863155364990234, "logits_per_char": -0.8431577682495117, "num_chars": 2}, {"sum_logits": -1.6657912731170654, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.6657912731170654, "logits_per_char": -0.8328956365585327, "num_chars": 2}, {"sum_logits": -1.916632890701294, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.916632890701294, "logits_per_char": -0.958316445350647, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 226, "native_id": "4c5c74b3287492d6ddb2da4c8c0fd51a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5601016283035278, "incorrect_loss_raw": 1.67400661110878, "correct_loss_per_char": 0.7800508141517639, "incorrect_loss_per_char": 0.83700330555439, "correct_loss_per_token": 1.5601016283035278, "incorrect_loss_per_token": 1.67400661110878, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.36667001247406, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.36667001247406, "logits_per_char": -0.68333500623703, "num_chars": 2}, {"sum_logits": -1.5419576168060303, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5419576168060303, "logits_per_char": -0.7709788084030151, "num_chars": 2}, {"sum_logits": -1.5601016283035278, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5601016283035278, "logits_per_char": -0.7800508141517639, "num_chars": 2}, {"sum_logits": -1.5836915969848633, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5836915969848633, "logits_per_char": -0.7918457984924316, "num_chars": 2}, {"sum_logits": -2.203707218170166, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -2.203707218170166, "logits_per_char": -1.101853609085083, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 227, "native_id": "52f3eb6c9a6b9671050fc769d465ed03", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3316073417663574, "incorrect_loss_raw": 1.7530841529369354, "correct_loss_per_char": 0.6658036708831787, "incorrect_loss_per_char": 0.8765420764684677, "correct_loss_per_token": 1.3316073417663574, "incorrect_loss_per_token": 1.7530841529369354, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3316073417663574, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.3316073417663574, "logits_per_char": -0.6658036708831787, "num_chars": 2}, {"sum_logits": -1.414723515510559, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.414723515510559, "logits_per_char": -0.7073617577552795, "num_chars": 2}, {"sum_logits": -1.5855695009231567, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5855695009231567, "logits_per_char": -0.7927847504615784, "num_chars": 2}, {"sum_logits": -1.68136465549469, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.68136465549469, "logits_per_char": -0.840682327747345, "num_chars": 2}, {"sum_logits": -2.330678939819336, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.330678939819336, "logits_per_char": -1.165339469909668, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 228, "native_id": "03ee30b5801b61aee791a551a9d9a49f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3017600774765015, "incorrect_loss_raw": 1.7373628318309784, "correct_loss_per_char": 0.6508800387382507, "incorrect_loss_per_char": 0.8686814159154892, "correct_loss_per_token": 1.3017600774765015, "incorrect_loss_per_token": 1.7373628318309784, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3017600774765015, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.3017600774765015, "logits_per_char": -0.6508800387382507, "num_chars": 2}, {"sum_logits": -1.4873943328857422, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.4873943328857422, "logits_per_char": -0.7436971664428711, "num_chars": 2}, {"sum_logits": -1.6989467144012451, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.6989467144012451, "logits_per_char": -0.8494733572006226, "num_chars": 2}, {"sum_logits": -1.679913878440857, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.679913878440857, "logits_per_char": -0.8399569392204285, "num_chars": 2}, {"sum_logits": -2.0831964015960693, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.0831964015960693, "logits_per_char": -1.0415982007980347, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 229, "native_id": "6d1d483745bc0aae0f4dd04e851ceffb", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.585500717163086, "incorrect_loss_raw": 1.6510896682739258, "correct_loss_per_char": 0.792750358581543, "incorrect_loss_per_char": 0.8255448341369629, "correct_loss_per_token": 1.585500717163086, "incorrect_loss_per_token": 1.6510896682739258, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3305625915527344, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": true, "logits_per_token": -1.3305625915527344, "logits_per_char": -0.6652812957763672, "num_chars": 2}, {"sum_logits": -1.585500717163086, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.585500717163086, "logits_per_char": -0.792750358581543, "num_chars": 2}, {"sum_logits": -1.5904209613800049, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5904209613800049, "logits_per_char": -0.7952104806900024, "num_chars": 2}, {"sum_logits": -1.659867286682129, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.659867286682129, "logits_per_char": -0.8299336433410645, "num_chars": 2}, {"sum_logits": -2.023507833480835, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -2.023507833480835, "logits_per_char": -1.0117539167404175, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 230, "native_id": "bf10bfda7328c8671e15adf8546b64d7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5972986221313477, "incorrect_loss_raw": 1.6348142623901367, "correct_loss_per_char": 0.7986493110656738, "incorrect_loss_per_char": 0.8174071311950684, "correct_loss_per_token": 1.5972986221313477, "incorrect_loss_per_token": 1.6348142623901367, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4356653690338135, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.4356653690338135, "logits_per_char": -0.7178326845169067, "num_chars": 2}, {"sum_logits": -1.5359430313110352, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5359430313110352, "logits_per_char": -0.7679715156555176, "num_chars": 2}, {"sum_logits": -1.5972986221313477, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5972986221313477, "logits_per_char": -0.7986493110656738, "num_chars": 2}, {"sum_logits": -1.62642502784729, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.62642502784729, "logits_per_char": -0.813212513923645, "num_chars": 2}, {"sum_logits": -1.9412236213684082, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.9412236213684082, "logits_per_char": -0.9706118106842041, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 231, "native_id": "0b3a3ee40dd25be9735ac5e3342ca4dd", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.213045358657837, "incorrect_loss_raw": 1.5253232717514038, "correct_loss_per_char": 1.1065226793289185, "incorrect_loss_per_char": 0.7626616358757019, "correct_loss_per_token": 2.213045358657837, "incorrect_loss_per_token": 1.5253232717514038, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.312394142150879, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.312394142150879, "logits_per_char": -0.6561970710754395, "num_chars": 2}, {"sum_logits": -1.380298376083374, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.380298376083374, "logits_per_char": -0.690149188041687, "num_chars": 2}, {"sum_logits": -1.6195635795593262, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6195635795593262, "logits_per_char": -0.8097817897796631, "num_chars": 2}, {"sum_logits": -1.7890369892120361, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.7890369892120361, "logits_per_char": -0.8945184946060181, "num_chars": 2}, {"sum_logits": -2.213045358657837, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -2.213045358657837, "logits_per_char": -1.1065226793289185, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 232, "native_id": "77e2a0b469b56bea81921a4a945ffcb5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.44185471534729, "incorrect_loss_raw": 1.6780788600444794, "correct_loss_per_char": 0.720927357673645, "incorrect_loss_per_char": 0.8390394300222397, "correct_loss_per_token": 1.44185471534729, "incorrect_loss_per_token": 1.6780788600444794, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.44185471534729, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.44185471534729, "logits_per_char": -0.720927357673645, "num_chars": 2}, {"sum_logits": -1.5869827270507812, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5869827270507812, "logits_per_char": -0.7934913635253906, "num_chars": 2}, {"sum_logits": -1.5881435871124268, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5881435871124268, "logits_per_char": -0.7940717935562134, "num_chars": 2}, {"sum_logits": -1.554045557975769, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.554045557975769, "logits_per_char": -0.7770227789878845, "num_chars": 2}, {"sum_logits": -1.9831435680389404, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.9831435680389404, "logits_per_char": -0.9915717840194702, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 233, "native_id": "dc964e4f6df6b70815e81e466d0ff717", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5837815999984741, "incorrect_loss_raw": 1.6474768817424774, "correct_loss_per_char": 0.7918907999992371, "incorrect_loss_per_char": 0.8237384408712387, "correct_loss_per_token": 1.5837815999984741, "incorrect_loss_per_token": 1.6474768817424774, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.457762360572815, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.457762360572815, "logits_per_char": -0.7288811802864075, "num_chars": 2}, {"sum_logits": -1.5837815999984741, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5837815999984741, "logits_per_char": -0.7918907999992371, "num_chars": 2}, {"sum_logits": -1.504804015159607, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.504804015159607, "logits_per_char": -0.7524020075798035, "num_chars": 2}, {"sum_logits": -1.5841158628463745, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5841158628463745, "logits_per_char": -0.7920579314231873, "num_chars": 2}, {"sum_logits": -2.0432252883911133, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.0432252883911133, "logits_per_char": -1.0216126441955566, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 234, "native_id": "6b9221c1af583ffb43580857d6fde38a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4631880521774292, "incorrect_loss_raw": 1.7091931104660034, "correct_loss_per_char": 0.7315940260887146, "incorrect_loss_per_char": 0.8545965552330017, "correct_loss_per_token": 1.4631880521774292, "incorrect_loss_per_token": 1.7091931104660034, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2477655410766602, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.2477655410766602, "logits_per_char": -0.6238827705383301, "num_chars": 2}, {"sum_logits": -1.4631880521774292, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.4631880521774292, "logits_per_char": -0.7315940260887146, "num_chars": 2}, {"sum_logits": -1.7191615104675293, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.7191615104675293, "logits_per_char": -0.8595807552337646, "num_chars": 2}, {"sum_logits": -1.6934385299682617, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6934385299682617, "logits_per_char": -0.8467192649841309, "num_chars": 2}, {"sum_logits": -2.1764068603515625, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.1764068603515625, "logits_per_char": -1.0882034301757812, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 235, "native_id": "4dc2c4596b08e9bfd893174e67bff40a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7601712942123413, "incorrect_loss_raw": 1.6036995947360992, "correct_loss_per_char": 0.8800856471061707, "incorrect_loss_per_char": 0.8018497973680496, "correct_loss_per_token": 1.7601712942123413, "incorrect_loss_per_token": 1.6036995947360992, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.391161322593689, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": true, "logits_per_token": -1.391161322593689, "logits_per_char": -0.6955806612968445, "num_chars": 2}, {"sum_logits": -1.421095848083496, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.421095848083496, "logits_per_char": -0.710547924041748, "num_chars": 2}, {"sum_logits": -1.6810017824172974, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.6810017824172974, "logits_per_char": -0.8405008912086487, "num_chars": 2}, {"sum_logits": -1.7601712942123413, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.7601712942123413, "logits_per_char": -0.8800856471061707, "num_chars": 2}, {"sum_logits": -1.9215394258499146, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.9215394258499146, "logits_per_char": -0.9607697129249573, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 236, "native_id": "8ae24d3ff199077a59e0d970feb665b7", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.565886378288269, "incorrect_loss_raw": 1.6325551271438599, "correct_loss_per_char": 0.7829431891441345, "incorrect_loss_per_char": 0.8162775635719299, "correct_loss_per_token": 1.565886378288269, "incorrect_loss_per_token": 1.6325551271438599, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.565886378288269, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.565886378288269, "logits_per_char": -0.7829431891441345, "num_chars": 2}, {"sum_logits": -1.5083204507827759, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": true, "logits_per_token": -1.5083204507827759, "logits_per_char": -0.7541602253913879, "num_chars": 2}, {"sum_logits": -1.592208981513977, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.592208981513977, "logits_per_char": -0.7961044907569885, "num_chars": 2}, {"sum_logits": -1.6065608263015747, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.6065608263015747, "logits_per_char": -0.8032804131507874, "num_chars": 2}, {"sum_logits": -1.8231302499771118, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.8231302499771118, "logits_per_char": -0.9115651249885559, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 237, "native_id": "d64a676e9d22e7edd12e7f4ce267a9f0", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.515298843383789, "incorrect_loss_raw": 1.6813403964042664, "correct_loss_per_char": 0.7576494216918945, "incorrect_loss_per_char": 0.8406701982021332, "correct_loss_per_token": 1.515298843383789, "incorrect_loss_per_token": 1.6813403964042664, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2536795139312744, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": true, "logits_per_token": -1.2536795139312744, "logits_per_char": -0.6268397569656372, "num_chars": 2}, {"sum_logits": -1.515298843383789, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.515298843383789, "logits_per_char": -0.7576494216918945, "num_chars": 2}, {"sum_logits": -1.705111026763916, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.705111026763916, "logits_per_char": -0.852555513381958, "num_chars": 2}, {"sum_logits": -1.7065284252166748, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.7065284252166748, "logits_per_char": -0.8532642126083374, "num_chars": 2}, {"sum_logits": -2.0600426197052, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -2.0600426197052, "logits_per_char": -1.0300213098526, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 238, "native_id": "54ecb521df1d0f5b130a393c42b4126d", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6192563772201538, "incorrect_loss_raw": 1.6256535053253174, "correct_loss_per_char": 0.8096281886100769, "incorrect_loss_per_char": 0.8128267526626587, "correct_loss_per_token": 1.6192563772201538, "incorrect_loss_per_token": 1.6256535053253174, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5096486806869507, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.5096486806869507, "logits_per_char": -0.7548243403434753, "num_chars": 2}, {"sum_logits": -1.5016090869903564, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": true, "logits_per_token": -1.5016090869903564, "logits_per_char": -0.7508045434951782, "num_chars": 2}, {"sum_logits": -1.6192563772201538, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.6192563772201538, "logits_per_char": -0.8096281886100769, "num_chars": 2}, {"sum_logits": -1.6050664186477661, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.6050664186477661, "logits_per_char": -0.8025332093238831, "num_chars": 2}, {"sum_logits": -1.8862898349761963, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.8862898349761963, "logits_per_char": -0.9431449174880981, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 239, "native_id": "b7276bb9139ec25c98c7e3822404eb6c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5591357946395874, "incorrect_loss_raw": 1.6703374087810516, "correct_loss_per_char": 0.7795678973197937, "incorrect_loss_per_char": 0.8351687043905258, "correct_loss_per_token": 1.5591357946395874, "incorrect_loss_per_token": 1.6703374087810516, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4029370546340942, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.4029370546340942, "logits_per_char": -0.7014685273170471, "num_chars": 2}, {"sum_logits": -1.5591357946395874, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5591357946395874, "logits_per_char": -0.7795678973197937, "num_chars": 2}, {"sum_logits": -1.53520667552948, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.53520667552948, "logits_per_char": -0.76760333776474, "num_chars": 2}, {"sum_logits": -1.5393909215927124, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5393909215927124, "logits_per_char": -0.7696954607963562, "num_chars": 2}, {"sum_logits": -2.20381498336792, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.20381498336792, "logits_per_char": -1.10190749168396, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 240, "native_id": "ecb8758b0d088f9aedc182a516dd1190", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.1539735794067383, "incorrect_loss_raw": 1.520297259092331, "correct_loss_per_char": 1.0769867897033691, "incorrect_loss_per_char": 0.7601486295461655, "correct_loss_per_token": 2.1539735794067383, "incorrect_loss_per_token": 1.520297259092331, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3676294088363647, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.3676294088363647, "logits_per_char": -0.6838147044181824, "num_chars": 2}, {"sum_logits": -1.5133339166641235, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5133339166641235, "logits_per_char": -0.7566669583320618, "num_chars": 2}, {"sum_logits": -1.5544557571411133, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5544557571411133, "logits_per_char": -0.7772278785705566, "num_chars": 2}, {"sum_logits": -1.6457699537277222, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6457699537277222, "logits_per_char": -0.8228849768638611, "num_chars": 2}, {"sum_logits": -2.1539735794067383, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.1539735794067383, "logits_per_char": -1.0769867897033691, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 241, "native_id": "f2645d0ee8662b6553954cee7e77979e", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4050008058547974, "incorrect_loss_raw": 1.6945320963859558, "correct_loss_per_char": 0.7025004029273987, "incorrect_loss_per_char": 0.8472660481929779, "correct_loss_per_token": 1.4050008058547974, "incorrect_loss_per_token": 1.6945320963859558, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4359782934188843, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.4359782934188843, "logits_per_char": -0.7179891467094421, "num_chars": 2}, {"sum_logits": -1.4050008058547974, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": true, "logits_per_token": -1.4050008058547974, "logits_per_char": -0.7025004029273987, "num_chars": 2}, {"sum_logits": -1.6262959241867065, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.6262959241867065, "logits_per_char": -0.8131479620933533, "num_chars": 2}, {"sum_logits": -1.7232481241226196, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.7232481241226196, "logits_per_char": -0.8616240620613098, "num_chars": 2}, {"sum_logits": -1.9926060438156128, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.9926060438156128, "logits_per_char": -0.9963030219078064, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 242, "native_id": "ea6d1a739ea841be282e13789270651e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6589254140853882, "incorrect_loss_raw": 1.659274935722351, "correct_loss_per_char": 0.8294627070426941, "incorrect_loss_per_char": 0.8296374678611755, "correct_loss_per_token": 1.6589254140853882, "incorrect_loss_per_token": 1.659274935722351, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2050888538360596, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.2050888538360596, "logits_per_char": -0.6025444269180298, "num_chars": 2}, {"sum_logits": -1.575411319732666, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.575411319732666, "logits_per_char": -0.787705659866333, "num_chars": 2}, {"sum_logits": -1.6589254140853882, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6589254140853882, "logits_per_char": -0.8294627070426941, "num_chars": 2}, {"sum_logits": -1.7236227989196777, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.7236227989196777, "logits_per_char": -0.8618113994598389, "num_chars": 2}, {"sum_logits": -2.132976770401001, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.132976770401001, "logits_per_char": -1.0664883852005005, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 243, "native_id": "c82ed0c2a2e115452b4d596c5faafbcf", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7757641077041626, "incorrect_loss_raw": 1.6074416935443878, "correct_loss_per_char": 0.8878820538520813, "incorrect_loss_per_char": 0.8037208467721939, "correct_loss_per_token": 1.7757641077041626, "incorrect_loss_per_token": 1.6074416935443878, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3139485120773315, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.3139485120773315, "logits_per_char": -0.6569742560386658, "num_chars": 2}, {"sum_logits": -1.5247231721878052, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5247231721878052, "logits_per_char": -0.7623615860939026, "num_chars": 2}, {"sum_logits": -1.59091055393219, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.59091055393219, "logits_per_char": -0.795455276966095, "num_chars": 2}, {"sum_logits": -1.7757641077041626, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.7757641077041626, "logits_per_char": -0.8878820538520813, "num_chars": 2}, {"sum_logits": -2.0001845359802246, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.0001845359802246, "logits_per_char": -1.0000922679901123, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 244, "native_id": "163d83851ecd4a4144b31b8738e4c335", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5243569612503052, "incorrect_loss_raw": 1.6484348475933075, "correct_loss_per_char": 0.7621784806251526, "incorrect_loss_per_char": 0.8242174237966537, "correct_loss_per_token": 1.5243569612503052, "incorrect_loss_per_token": 1.6484348475933075, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4256081581115723, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.4256081581115723, "logits_per_char": -0.7128040790557861, "num_chars": 2}, {"sum_logits": -1.5243569612503052, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5243569612503052, "logits_per_char": -0.7621784806251526, "num_chars": 2}, {"sum_logits": -1.715190052986145, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.715190052986145, "logits_per_char": -0.8575950264930725, "num_chars": 2}, {"sum_logits": -1.6795681715011597, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6795681715011597, "logits_per_char": -0.8397840857505798, "num_chars": 2}, {"sum_logits": -1.773373007774353, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.773373007774353, "logits_per_char": -0.8866865038871765, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 245, "native_id": "095767956c500ca1af7cf7671556de5b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2257338762283325, "incorrect_loss_raw": 1.748925119638443, "correct_loss_per_char": 0.6128669381141663, "incorrect_loss_per_char": 0.8744625598192215, "correct_loss_per_token": 1.2257338762283325, "incorrect_loss_per_token": 1.748925119638443, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2257338762283325, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": true, "logits_per_token": -1.2257338762283325, "logits_per_char": -0.6128669381141663, "num_chars": 2}, {"sum_logits": -1.5761523246765137, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.5761523246765137, "logits_per_char": -0.7880761623382568, "num_chars": 2}, {"sum_logits": -1.7290245294570923, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.7290245294570923, "logits_per_char": -0.8645122647285461, "num_chars": 2}, {"sum_logits": -1.7598258256912231, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.7598258256912231, "logits_per_char": -0.8799129128456116, "num_chars": 2}, {"sum_logits": -1.9306977987289429, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.9306977987289429, "logits_per_char": -0.9653488993644714, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 246, "native_id": "d31ee38f67d1173275e120b8ad36039c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6533992290496826, "incorrect_loss_raw": 1.6495350301265717, "correct_loss_per_char": 0.8266996145248413, "incorrect_loss_per_char": 0.8247675150632858, "correct_loss_per_token": 1.6533992290496826, "incorrect_loss_per_token": 1.6495350301265717, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3410027027130127, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.3410027027130127, "logits_per_char": -0.6705013513565063, "num_chars": 2}, {"sum_logits": -1.409195065498352, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.409195065498352, "logits_per_char": -0.704597532749176, "num_chars": 2}, {"sum_logits": -1.6533992290496826, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6533992290496826, "logits_per_char": -0.8266996145248413, "num_chars": 2}, {"sum_logits": -1.7328236103057861, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.7328236103057861, "logits_per_char": -0.8664118051528931, "num_chars": 2}, {"sum_logits": -2.1151187419891357, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.1151187419891357, "logits_per_char": -1.0575593709945679, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 247, "native_id": "c410a4626dfce4b4cfd3e5937602cd77", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3556280136108398, "incorrect_loss_raw": 1.7149330377578735, "correct_loss_per_char": 0.6778140068054199, "incorrect_loss_per_char": 0.8574665188789368, "correct_loss_per_token": 1.3556280136108398, "incorrect_loss_per_token": 1.7149330377578735, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3556280136108398, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.3556280136108398, "logits_per_char": -0.6778140068054199, "num_chars": 2}, {"sum_logits": -1.557304859161377, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.557304859161377, "logits_per_char": -0.7786524295806885, "num_chars": 2}, {"sum_logits": -1.5806658267974854, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5806658267974854, "logits_per_char": -0.7903329133987427, "num_chars": 2}, {"sum_logits": -1.595224142074585, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.595224142074585, "logits_per_char": -0.7976120710372925, "num_chars": 2}, {"sum_logits": -2.126537322998047, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.126537322998047, "logits_per_char": -1.0632686614990234, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 248, "native_id": "14d760e43728e9e4643c414627f2b596", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6014983654022217, "incorrect_loss_raw": 1.6211287379264832, "correct_loss_per_char": 0.8007491827011108, "incorrect_loss_per_char": 0.8105643689632416, "correct_loss_per_token": 1.6014983654022217, "incorrect_loss_per_token": 1.6211287379264832, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4962241649627686, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": true, "logits_per_token": -1.4962241649627686, "logits_per_char": -0.7481120824813843, "num_chars": 2}, {"sum_logits": -1.6445269584655762, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.6445269584655762, "logits_per_char": -0.8222634792327881, "num_chars": 2}, {"sum_logits": -1.6162731647491455, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.6162731647491455, "logits_per_char": -0.8081365823745728, "num_chars": 2}, {"sum_logits": -1.7274906635284424, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.7274906635284424, "logits_per_char": -0.8637453317642212, "num_chars": 2}, {"sum_logits": -1.6014983654022217, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.6014983654022217, "logits_per_char": -0.8007491827011108, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 249, "native_id": "abcf1b550b4d44f46d4f68b8e1d98ec8", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.584472894668579, "incorrect_loss_raw": 1.635288417339325, "correct_loss_per_char": 0.7922364473342896, "incorrect_loss_per_char": 0.8176442086696625, "correct_loss_per_token": 1.584472894668579, "incorrect_loss_per_token": 1.635288417339325, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4907869100570679, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": true, "logits_per_token": -1.4907869100570679, "logits_per_char": -0.7453934550285339, "num_chars": 2}, {"sum_logits": -1.5132876634597778, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.5132876634597778, "logits_per_char": -0.7566438317298889, "num_chars": 2}, {"sum_logits": -1.6593822240829468, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.6593822240829468, "logits_per_char": -0.8296911120414734, "num_chars": 2}, {"sum_logits": -1.584472894668579, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.584472894668579, "logits_per_char": -0.7922364473342896, "num_chars": 2}, {"sum_logits": -1.8776968717575073, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.8776968717575073, "logits_per_char": -0.9388484358787537, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 250, "native_id": "5b8af6f26335dbd501b0104c71e26d9e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6043877601623535, "incorrect_loss_raw": 1.6655450463294983, "correct_loss_per_char": 0.8021938800811768, "incorrect_loss_per_char": 0.8327725231647491, "correct_loss_per_token": 1.6043877601623535, "incorrect_loss_per_token": 1.6655450463294983, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2618520259857178, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.2618520259857178, "logits_per_char": -0.6309260129928589, "num_chars": 2}, {"sum_logits": -1.6043877601623535, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.6043877601623535, "logits_per_char": -0.8021938800811768, "num_chars": 2}, {"sum_logits": -1.572446346282959, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.572446346282959, "logits_per_char": -0.7862231731414795, "num_chars": 2}, {"sum_logits": -1.6880617141723633, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.6880617141723633, "logits_per_char": -0.8440308570861816, "num_chars": 2}, {"sum_logits": -2.139820098876953, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -2.139820098876953, "logits_per_char": -1.0699100494384766, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 251, "native_id": "4364b4b342fb7b44434bd6694bf8fd51", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6850864887237549, "incorrect_loss_raw": 1.6130488216876984, "correct_loss_per_char": 0.8425432443618774, "incorrect_loss_per_char": 0.8065244108438492, "correct_loss_per_token": 1.6850864887237549, "incorrect_loss_per_token": 1.6130488216876984, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.378213882446289, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": true, "logits_per_token": -1.378213882446289, "logits_per_char": -0.6891069412231445, "num_chars": 2}, {"sum_logits": -1.5378419160842896, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.5378419160842896, "logits_per_char": -0.7689209580421448, "num_chars": 2}, {"sum_logits": -1.6816558837890625, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.6816558837890625, "logits_per_char": -0.8408279418945312, "num_chars": 2}, {"sum_logits": -1.6850864887237549, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.6850864887237549, "logits_per_char": -0.8425432443618774, "num_chars": 2}, {"sum_logits": -1.8544836044311523, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.8544836044311523, "logits_per_char": -0.9272418022155762, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 252, "native_id": "3ffe67fb009529d9b0c49ccd7141ee4a", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6376458406448364, "incorrect_loss_raw": 1.62667316198349, "correct_loss_per_char": 0.8188229203224182, "incorrect_loss_per_char": 0.813336580991745, "correct_loss_per_token": 1.6376458406448364, "incorrect_loss_per_token": 1.62667316198349, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5262545347213745, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.5262545347213745, "logits_per_char": -0.7631272673606873, "num_chars": 2}, {"sum_logits": -1.4653562307357788, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": true, "logits_per_token": -1.4653562307357788, "logits_per_char": -0.7326781153678894, "num_chars": 2}, {"sum_logits": -1.5447150468826294, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.5447150468826294, "logits_per_char": -0.7723575234413147, "num_chars": 2}, {"sum_logits": -1.6376458406448364, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.6376458406448364, "logits_per_char": -0.8188229203224182, "num_chars": 2}, {"sum_logits": -1.9703668355941772, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.9703668355941772, "logits_per_char": -0.9851834177970886, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 253, "native_id": "f372587fa4c99d5bebf0d0eb987c44e2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9443129301071167, "incorrect_loss_raw": 1.55657297372818, "correct_loss_per_char": 0.9721564650535583, "incorrect_loss_per_char": 0.77828648686409, "correct_loss_per_token": 1.9443129301071167, "incorrect_loss_per_token": 1.55657297372818, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3619861602783203, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": true, "logits_per_token": -1.3619861602783203, "logits_per_char": -0.6809930801391602, "num_chars": 2}, {"sum_logits": -1.5838676691055298, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5838676691055298, "logits_per_char": -0.7919338345527649, "num_chars": 2}, {"sum_logits": -1.542719841003418, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.542719841003418, "logits_per_char": -0.771359920501709, "num_chars": 2}, {"sum_logits": -1.7377182245254517, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.7377182245254517, "logits_per_char": -0.8688591122627258, "num_chars": 2}, {"sum_logits": -1.9443129301071167, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.9443129301071167, "logits_per_char": -0.9721564650535583, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 254, "native_id": "d35a8a3bd560fdd651ecf314878ed30f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.276169776916504, "incorrect_loss_raw": 1.5206388235092163, "correct_loss_per_char": 1.138084888458252, "incorrect_loss_per_char": 0.7603194117546082, "correct_loss_per_token": 2.276169776916504, "incorrect_loss_per_token": 1.5206388235092163, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.228778600692749, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.228778600692749, "logits_per_char": -0.6143893003463745, "num_chars": 2}, {"sum_logits": -1.4476041793823242, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.4476041793823242, "logits_per_char": -0.7238020896911621, "num_chars": 2}, {"sum_logits": -1.6518467664718628, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.6518467664718628, "logits_per_char": -0.8259233832359314, "num_chars": 2}, {"sum_logits": -1.7543257474899292, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.7543257474899292, "logits_per_char": -0.8771628737449646, "num_chars": 2}, {"sum_logits": -2.276169776916504, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.276169776916504, "logits_per_char": -1.138084888458252, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 255, "native_id": "0542414710025f56b0c26e1bae5c4d06", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6361889839172363, "incorrect_loss_raw": 1.6353807151317596, "correct_loss_per_char": 0.8180944919586182, "incorrect_loss_per_char": 0.8176903575658798, "correct_loss_per_token": 1.6361889839172363, "incorrect_loss_per_token": 1.6353807151317596, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6361889839172363, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6361889839172363, "logits_per_char": -0.8180944919586182, "num_chars": 2}, {"sum_logits": -1.528624415397644, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.528624415397644, "logits_per_char": -0.764312207698822, "num_chars": 2}, {"sum_logits": -1.4750587940216064, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.4750587940216064, "logits_per_char": -0.7375293970108032, "num_chars": 2}, {"sum_logits": -1.4904329776763916, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.4904329776763916, "logits_per_char": -0.7452164888381958, "num_chars": 2}, {"sum_logits": -2.0474066734313965, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -2.0474066734313965, "logits_per_char": -1.0237033367156982, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 256, "native_id": "1875f70cf736c68c7a9df3ef870224a1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2503044605255127, "incorrect_loss_raw": 1.7402712106704712, "correct_loss_per_char": 0.6251522302627563, "incorrect_loss_per_char": 0.8701356053352356, "correct_loss_per_token": 1.2503044605255127, "incorrect_loss_per_token": 1.7402712106704712, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2503044605255127, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.2503044605255127, "logits_per_char": -0.6251522302627563, "num_chars": 2}, {"sum_logits": -1.566156268119812, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.566156268119812, "logits_per_char": -0.783078134059906, "num_chars": 2}, {"sum_logits": -1.710640788078308, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.710640788078308, "logits_per_char": -0.855320394039154, "num_chars": 2}, {"sum_logits": -1.7283605337142944, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.7283605337142944, "logits_per_char": -0.8641802668571472, "num_chars": 2}, {"sum_logits": -1.9559272527694702, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.9559272527694702, "logits_per_char": -0.9779636263847351, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 257, "native_id": "83250ae2dfeb2e3886ead4cde8e1290f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7014291286468506, "incorrect_loss_raw": 1.6240020394325256, "correct_loss_per_char": 0.8507145643234253, "incorrect_loss_per_char": 0.8120010197162628, "correct_loss_per_token": 1.7014291286468506, "incorrect_loss_per_token": 1.6240020394325256, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3346154689788818, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.3346154689788818, "logits_per_char": -0.6673077344894409, "num_chars": 2}, {"sum_logits": -1.4928185939788818, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.4928185939788818, "logits_per_char": -0.7464092969894409, "num_chars": 2}, {"sum_logits": -1.7014291286468506, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.7014291286468506, "logits_per_char": -0.8507145643234253, "num_chars": 2}, {"sum_logits": -1.6431729793548584, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6431729793548584, "logits_per_char": -0.8215864896774292, "num_chars": 2}, {"sum_logits": -2.0254011154174805, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -2.0254011154174805, "logits_per_char": -1.0127005577087402, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 258, "native_id": "70c39372c0d50566554fd72c768b75f6", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9593721628189087, "incorrect_loss_raw": 1.5570846796035767, "correct_loss_per_char": 0.9796860814094543, "incorrect_loss_per_char": 0.7785423398017883, "correct_loss_per_token": 1.9593721628189087, "incorrect_loss_per_token": 1.5570846796035767, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3312435150146484, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.3312435150146484, "logits_per_char": -0.6656217575073242, "num_chars": 2}, {"sum_logits": -1.5145106315612793, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5145106315612793, "logits_per_char": -0.7572553157806396, "num_chars": 2}, {"sum_logits": -1.6011303663253784, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6011303663253784, "logits_per_char": -0.8005651831626892, "num_chars": 2}, {"sum_logits": -1.7814542055130005, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.7814542055130005, "logits_per_char": -0.8907271027565002, "num_chars": 2}, {"sum_logits": -1.9593721628189087, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.9593721628189087, "logits_per_char": -0.9796860814094543, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 259, "native_id": "c21ec5b367f409a0288d616f626555ae", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3192412853240967, "incorrect_loss_raw": 1.7140419483184814, "correct_loss_per_char": 0.6596206426620483, "incorrect_loss_per_char": 0.8570209741592407, "correct_loss_per_token": 1.3192412853240967, "incorrect_loss_per_token": 1.7140419483184814, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3192412853240967, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.3192412853240967, "logits_per_char": -0.6596206426620483, "num_chars": 2}, {"sum_logits": -1.5250695943832397, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5250695943832397, "logits_per_char": -0.7625347971916199, "num_chars": 2}, {"sum_logits": -1.6550320386886597, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.6550320386886597, "logits_per_char": -0.8275160193443298, "num_chars": 2}, {"sum_logits": -1.7497462034225464, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.7497462034225464, "logits_per_char": -0.8748731017112732, "num_chars": 2}, {"sum_logits": -1.92631995677948, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.92631995677948, "logits_per_char": -0.96315997838974, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 260, "native_id": "a2cd03ed068f6d613e85f3a60f4db0a1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6284682750701904, "incorrect_loss_raw": 1.6151169836521149, "correct_loss_per_char": 0.8142341375350952, "incorrect_loss_per_char": 0.8075584918260574, "correct_loss_per_token": 1.6284682750701904, "incorrect_loss_per_token": 1.6151169836521149, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6268556118011475, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.6268556118011475, "logits_per_char": -0.8134278059005737, "num_chars": 2}, {"sum_logits": -1.4893385171890259, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": true, "logits_per_token": -1.4893385171890259, "logits_per_char": -0.7446692585945129, "num_chars": 2}, {"sum_logits": -1.6284682750701904, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.6284682750701904, "logits_per_char": -0.8142341375350952, "num_chars": 2}, {"sum_logits": -1.705207347869873, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.705207347869873, "logits_per_char": -0.8526036739349365, "num_chars": 2}, {"sum_logits": -1.639066457748413, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.639066457748413, "logits_per_char": -0.8195332288742065, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 261, "native_id": "d2871dc28c82471e5d7f71f79e49c257", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6664563417434692, "incorrect_loss_raw": 1.6263048648834229, "correct_loss_per_char": 0.8332281708717346, "incorrect_loss_per_char": 0.8131524324417114, "correct_loss_per_token": 1.6664563417434692, "incorrect_loss_per_token": 1.6263048648834229, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.330835223197937, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": true, "logits_per_token": -1.330835223197937, "logits_per_char": -0.6654176115989685, "num_chars": 2}, {"sum_logits": -1.5302304029464722, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.5302304029464722, "logits_per_char": -0.7651152014732361, "num_chars": 2}, {"sum_logits": -1.7246392965316772, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.7246392965316772, "logits_per_char": -0.8623196482658386, "num_chars": 2}, {"sum_logits": -1.6664563417434692, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.6664563417434692, "logits_per_char": -0.8332281708717346, "num_chars": 2}, {"sum_logits": -1.919514536857605, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.919514536857605, "logits_per_char": -0.9597572684288025, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 262, "native_id": "94770e75c4e2000e717b4218ddff19e8", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6744438409805298, "incorrect_loss_raw": 1.608822911977768, "correct_loss_per_char": 0.8372219204902649, "incorrect_loss_per_char": 0.804411455988884, "correct_loss_per_token": 1.6744438409805298, "incorrect_loss_per_token": 1.608822911977768, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4495042562484741, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.4495042562484741, "logits_per_char": -0.7247521281242371, "num_chars": 2}, {"sum_logits": -1.6925941705703735, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6925941705703735, "logits_per_char": -0.8462970852851868, "num_chars": 2}, {"sum_logits": -1.507209062576294, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.507209062576294, "logits_per_char": -0.753604531288147, "num_chars": 2}, {"sum_logits": -1.6744438409805298, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6744438409805298, "logits_per_char": -0.8372219204902649, "num_chars": 2}, {"sum_logits": -1.7859841585159302, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.7859841585159302, "logits_per_char": -0.8929920792579651, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 263, "native_id": "08ad17d3ca1838b8724d21cf5921ec52", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.795447587966919, "incorrect_loss_raw": 1.5898564159870148, "correct_loss_per_char": 0.8977237939834595, "incorrect_loss_per_char": 0.7949282079935074, "correct_loss_per_token": 1.795447587966919, "incorrect_loss_per_token": 1.5898564159870148, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4024150371551514, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.4024150371551514, "logits_per_char": -0.7012075185775757, "num_chars": 2}, {"sum_logits": -1.5707321166992188, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5707321166992188, "logits_per_char": -0.7853660583496094, "num_chars": 2}, {"sum_logits": -1.4956244230270386, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.4956244230270386, "logits_per_char": -0.7478122115135193, "num_chars": 2}, {"sum_logits": -1.795447587966919, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.795447587966919, "logits_per_char": -0.8977237939834595, "num_chars": 2}, {"sum_logits": -1.8906540870666504, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.8906540870666504, "logits_per_char": -0.9453270435333252, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 264, "native_id": "21fb76bd8349628b441c76f47c33e77b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4506936073303223, "incorrect_loss_raw": 1.681274950504303, "correct_loss_per_char": 0.7253468036651611, "incorrect_loss_per_char": 0.8406374752521515, "correct_loss_per_token": 1.4506936073303223, "incorrect_loss_per_token": 1.681274950504303, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4506936073303223, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.4506936073303223, "logits_per_char": -0.7253468036651611, "num_chars": 2}, {"sum_logits": -1.4796142578125, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.4796142578125, "logits_per_char": -0.73980712890625, "num_chars": 2}, {"sum_logits": -1.5370004177093506, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5370004177093506, "logits_per_char": -0.7685002088546753, "num_chars": 2}, {"sum_logits": -1.7113028764724731, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.7113028764724731, "logits_per_char": -0.8556514382362366, "num_chars": 2}, {"sum_logits": -1.9971822500228882, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.9971822500228882, "logits_per_char": -0.9985911250114441, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 265, "native_id": "e151b44e0a7bf08a1dd3c861eef09161", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.503800392150879, "incorrect_loss_raw": 1.7077037990093231, "correct_loss_per_char": 0.7519001960754395, "incorrect_loss_per_char": 0.8538518995046616, "correct_loss_per_token": 1.503800392150879, "incorrect_loss_per_token": 1.7077037990093231, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2648848295211792, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": true, "logits_per_token": -1.2648848295211792, "logits_per_char": -0.6324424147605896, "num_chars": 2}, {"sum_logits": -1.5325489044189453, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.5325489044189453, "logits_per_char": -0.7662744522094727, "num_chars": 2}, {"sum_logits": -1.503800392150879, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.503800392150879, "logits_per_char": -0.7519001960754395, "num_chars": 2}, {"sum_logits": -1.779548168182373, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.779548168182373, "logits_per_char": -0.8897740840911865, "num_chars": 2}, {"sum_logits": -2.253833293914795, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -2.253833293914795, "logits_per_char": -1.1269166469573975, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 266, "native_id": "46351b3a6beb694c5f623583a3b1473d", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.614769458770752, "incorrect_loss_raw": 1.6304001808166504, "correct_loss_per_char": 0.807384729385376, "incorrect_loss_per_char": 0.8152000904083252, "correct_loss_per_token": 1.614769458770752, "incorrect_loss_per_token": 1.6304001808166504, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6287388801574707, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6287388801574707, "logits_per_char": -0.8143694400787354, "num_chars": 2}, {"sum_logits": -1.614769458770752, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.614769458770752, "logits_per_char": -0.807384729385376, "num_chars": 2}, {"sum_logits": -1.5475242137908936, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5475242137908936, "logits_per_char": -0.7737621068954468, "num_chars": 2}, {"sum_logits": -1.4390559196472168, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.4390559196472168, "logits_per_char": -0.7195279598236084, "num_chars": 2}, {"sum_logits": -1.9062817096710205, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.9062817096710205, "logits_per_char": -0.9531408548355103, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 267, "native_id": "db75e16788cf56d5dfb9773eaf91fe7e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8775428533554077, "incorrect_loss_raw": 1.5744002163410187, "correct_loss_per_char": 0.9387714266777039, "incorrect_loss_per_char": 0.7872001081705093, "correct_loss_per_token": 1.8775428533554077, "incorrect_loss_per_token": 1.5744002163410187, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2953667640686035, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": true, "logits_per_token": -1.2953667640686035, "logits_per_char": -0.6476833820343018, "num_chars": 2}, {"sum_logits": -1.5460878610610962, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.5460878610610962, "logits_per_char": -0.7730439305305481, "num_chars": 2}, {"sum_logits": -1.6656872034072876, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.6656872034072876, "logits_per_char": -0.8328436017036438, "num_chars": 2}, {"sum_logits": -1.7904590368270874, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.7904590368270874, "logits_per_char": -0.8952295184135437, "num_chars": 2}, {"sum_logits": -1.8775428533554077, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.8775428533554077, "logits_per_char": -0.9387714266777039, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 268, "native_id": "ffd89796a9b09bef56c5803f188764c6", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3777223825454712, "incorrect_loss_raw": 1.6966606080532074, "correct_loss_per_char": 0.6888611912727356, "incorrect_loss_per_char": 0.8483303040266037, "correct_loss_per_token": 1.3777223825454712, "incorrect_loss_per_token": 1.6966606080532074, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3777223825454712, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": true, "logits_per_token": -1.3777223825454712, "logits_per_char": -0.6888611912727356, "num_chars": 2}, {"sum_logits": -1.5171483755111694, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.5171483755111694, "logits_per_char": -0.7585741877555847, "num_chars": 2}, {"sum_logits": -1.5979654788970947, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.5979654788970947, "logits_per_char": -0.7989827394485474, "num_chars": 2}, {"sum_logits": -1.7050373554229736, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.7050373554229736, "logits_per_char": -0.8525186777114868, "num_chars": 2}, {"sum_logits": -1.9664912223815918, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.9664912223815918, "logits_per_char": -0.9832456111907959, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 269, "native_id": "5622e49306bb82ec1cec817ad0506c60", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6975033283233643, "incorrect_loss_raw": 1.6042645573616028, "correct_loss_per_char": 0.8487516641616821, "incorrect_loss_per_char": 0.8021322786808014, "correct_loss_per_token": 1.6975033283233643, "incorrect_loss_per_token": 1.6042645573616028, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4148945808410645, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.4148945808410645, "logits_per_char": -0.7074472904205322, "num_chars": 2}, {"sum_logits": -1.5537796020507812, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5537796020507812, "logits_per_char": -0.7768898010253906, "num_chars": 2}, {"sum_logits": -1.6975033283233643, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6975033283233643, "logits_per_char": -0.8487516641616821, "num_chars": 2}, {"sum_logits": -1.6275897026062012, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6275897026062012, "logits_per_char": -0.8137948513031006, "num_chars": 2}, {"sum_logits": -1.8207943439483643, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.8207943439483643, "logits_per_char": -0.9103971719741821, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 270, "native_id": "6efaeb796307036719635242fa5ad0f3", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.320115566253662, "incorrect_loss_raw": 1.510522574186325, "correct_loss_per_char": 1.160057783126831, "incorrect_loss_per_char": 0.7552612870931625, "correct_loss_per_token": 2.320115566253662, "incorrect_loss_per_token": 1.510522574186325, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2694289684295654, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.2694289684295654, "logits_per_char": -0.6347144842147827, "num_chars": 2}, {"sum_logits": -1.454494595527649, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.454494595527649, "logits_per_char": -0.7272472977638245, "num_chars": 2}, {"sum_logits": -1.5464569330215454, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5464569330215454, "logits_per_char": -0.7732284665107727, "num_chars": 2}, {"sum_logits": -1.7717097997665405, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.7717097997665405, "logits_per_char": -0.8858548998832703, "num_chars": 2}, {"sum_logits": -2.320115566253662, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.320115566253662, "logits_per_char": -1.160057783126831, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 271, "native_id": "114d310d1198abffaf8b88dab5a55aa7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5972397327423096, "incorrect_loss_raw": 1.6966877281665802, "correct_loss_per_char": 0.7986198663711548, "incorrect_loss_per_char": 0.8483438640832901, "correct_loss_per_token": 1.5972397327423096, "incorrect_loss_per_token": 1.6966877281665802, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.250418782234192, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.250418782234192, "logits_per_char": -0.625209391117096, "num_chars": 2}, {"sum_logits": -1.4653005599975586, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.4653005599975586, "logits_per_char": -0.7326502799987793, "num_chars": 2}, {"sum_logits": -1.5972397327423096, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5972397327423096, "logits_per_char": -0.7986198663711548, "num_chars": 2}, {"sum_logits": -1.7061452865600586, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.7061452865600586, "logits_per_char": -0.8530726432800293, "num_chars": 2}, {"sum_logits": -2.3648862838745117, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.3648862838745117, "logits_per_char": -1.1824431419372559, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 272, "native_id": "0f79faf5337706f2e0e39c15bbd2e99a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5834208726882935, "incorrect_loss_raw": 1.6407264471054077, "correct_loss_per_char": 0.7917104363441467, "incorrect_loss_per_char": 0.8203632235527039, "correct_loss_per_token": 1.5834208726882935, "incorrect_loss_per_token": 1.6407264471054077, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4728516340255737, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.4728516340255737, "logits_per_char": -0.7364258170127869, "num_chars": 2}, {"sum_logits": -1.5834208726882935, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5834208726882935, "logits_per_char": -0.7917104363441467, "num_chars": 2}, {"sum_logits": -1.5194753408432007, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5194753408432007, "logits_per_char": -0.7597376704216003, "num_chars": 2}, {"sum_logits": -1.5880736112594604, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5880736112594604, "logits_per_char": -0.7940368056297302, "num_chars": 2}, {"sum_logits": -1.982505202293396, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.982505202293396, "logits_per_char": -0.991252601146698, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 273, "native_id": "b62d7d1b5eec31be0b65146a9fc069e0", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6119894981384277, "incorrect_loss_raw": 1.6648150086402893, "correct_loss_per_char": 0.8059947490692139, "incorrect_loss_per_char": 0.8324075043201447, "correct_loss_per_token": 1.6119894981384277, "incorrect_loss_per_token": 1.6648150086402893, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2393872737884521, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.2393872737884521, "logits_per_char": -0.6196936368942261, "num_chars": 2}, {"sum_logits": -1.6119894981384277, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.6119894981384277, "logits_per_char": -0.8059947490692139, "num_chars": 2}, {"sum_logits": -1.641235113143921, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.641235113143921, "logits_per_char": -0.8206175565719604, "num_chars": 2}, {"sum_logits": -1.6589021682739258, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.6589021682739258, "logits_per_char": -0.8294510841369629, "num_chars": 2}, {"sum_logits": -2.1197354793548584, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -2.1197354793548584, "logits_per_char": -1.0598677396774292, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 274, "native_id": "1342c6aec9f5179d6ea6fa5fefbe5188", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7107418775558472, "incorrect_loss_raw": 1.599396139383316, "correct_loss_per_char": 0.8553709387779236, "incorrect_loss_per_char": 0.799698069691658, "correct_loss_per_token": 1.7107418775558472, "incorrect_loss_per_token": 1.599396139383316, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5010992288589478, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5010992288589478, "logits_per_char": -0.7505496144294739, "num_chars": 2}, {"sum_logits": -1.480086088180542, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.480086088180542, "logits_per_char": -0.740043044090271, "num_chars": 2}, {"sum_logits": -1.6007026433944702, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.6007026433944702, "logits_per_char": -0.8003513216972351, "num_chars": 2}, {"sum_logits": -1.7107418775558472, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.7107418775558472, "logits_per_char": -0.8553709387779236, "num_chars": 2}, {"sum_logits": -1.8156965970993042, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.8156965970993042, "logits_per_char": -0.9078482985496521, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 275, "native_id": "c74ae684ba6c76e2a913493483678c9d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.351499319076538, "incorrect_loss_raw": 1.7028025388717651, "correct_loss_per_char": 0.675749659538269, "incorrect_loss_per_char": 0.8514012694358826, "correct_loss_per_token": 1.351499319076538, "incorrect_loss_per_token": 1.7028025388717651, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.351499319076538, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": true, "logits_per_token": -1.351499319076538, "logits_per_char": -0.675749659538269, "num_chars": 2}, {"sum_logits": -1.724761724472046, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.724761724472046, "logits_per_char": -0.862380862236023, "num_chars": 2}, {"sum_logits": -1.5143252611160278, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.5143252611160278, "logits_per_char": -0.7571626305580139, "num_chars": 2}, {"sum_logits": -1.6370749473571777, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.6370749473571777, "logits_per_char": -0.8185374736785889, "num_chars": 2}, {"sum_logits": -1.935048222541809, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.935048222541809, "logits_per_char": -0.9675241112709045, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 276, "native_id": "411e50225637b76187cc36b24fe3127c", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4674592018127441, "incorrect_loss_raw": 1.6678475737571716, "correct_loss_per_char": 0.7337296009063721, "incorrect_loss_per_char": 0.8339237868785858, "correct_loss_per_token": 1.4674592018127441, "incorrect_loss_per_token": 1.6678475737571716, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.484481692314148, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.484481692314148, "logits_per_char": -0.742240846157074, "num_chars": 2}, {"sum_logits": -1.60732901096344, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.60732901096344, "logits_per_char": -0.80366450548172, "num_chars": 2}, {"sum_logits": -1.4674592018127441, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.4674592018127441, "logits_per_char": -0.7337296009063721, "num_chars": 2}, {"sum_logits": -1.6257399320602417, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.6257399320602417, "logits_per_char": -0.8128699660301208, "num_chars": 2}, {"sum_logits": -1.953839659690857, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.953839659690857, "logits_per_char": -0.9769198298454285, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 277, "native_id": "2a0e82bbf1471290c93c8f2a11af197f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.613843321800232, "incorrect_loss_raw": 1.6540395021438599, "correct_loss_per_char": 0.806921660900116, "incorrect_loss_per_char": 0.8270197510719299, "correct_loss_per_token": 1.613843321800232, "incorrect_loss_per_token": 1.6540395021438599, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.339223861694336, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.339223861694336, "logits_per_char": -0.669611930847168, "num_chars": 2}, {"sum_logits": -1.455212116241455, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.455212116241455, "logits_per_char": -0.7276060581207275, "num_chars": 2}, {"sum_logits": -1.613843321800232, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.613843321800232, "logits_per_char": -0.806921660900116, "num_chars": 2}, {"sum_logits": -1.7356505393981934, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.7356505393981934, "logits_per_char": -0.8678252696990967, "num_chars": 2}, {"sum_logits": -2.086071491241455, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -2.086071491241455, "logits_per_char": -1.0430357456207275, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 278, "native_id": "eaadd7a4b18cb48c00f85c3975750fe7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1693408489227295, "incorrect_loss_raw": 1.792874127626419, "correct_loss_per_char": 0.5846704244613647, "incorrect_loss_per_char": 0.8964370638132095, "correct_loss_per_token": 1.1693408489227295, "incorrect_loss_per_token": 1.792874127626419, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1693408489227295, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.1693408489227295, "logits_per_char": -0.5846704244613647, "num_chars": 2}, {"sum_logits": -1.5875154733657837, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5875154733657837, "logits_per_char": -0.7937577366828918, "num_chars": 2}, {"sum_logits": -1.7397698163986206, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.7397698163986206, "logits_per_char": -0.8698849081993103, "num_chars": 2}, {"sum_logits": -1.6320205926895142, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.6320205926895142, "logits_per_char": -0.8160102963447571, "num_chars": 2}, {"sum_logits": -2.212190628051758, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -2.212190628051758, "logits_per_char": -1.106095314025879, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 279, "native_id": "403c9b067ef7363efffa822bb08c5426", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5624762773513794, "incorrect_loss_raw": 1.6411609053611755, "correct_loss_per_char": 0.7812381386756897, "incorrect_loss_per_char": 0.8205804526805878, "correct_loss_per_token": 1.5624762773513794, "incorrect_loss_per_token": 1.6411609053611755, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4385546445846558, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.4385546445846558, "logits_per_char": -0.7192773222923279, "num_chars": 2}, {"sum_logits": -1.5624762773513794, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5624762773513794, "logits_per_char": -0.7812381386756897, "num_chars": 2}, {"sum_logits": -1.6045371294021606, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6045371294021606, "logits_per_char": -0.8022685647010803, "num_chars": 2}, {"sum_logits": -1.6191357374191284, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6191357374191284, "logits_per_char": -0.8095678687095642, "num_chars": 2}, {"sum_logits": -1.9024161100387573, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.9024161100387573, "logits_per_char": -0.9512080550193787, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 280, "native_id": "adf228312401c9ff421a4da1b46bb70a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9383920431137085, "incorrect_loss_raw": 1.5537839829921722, "correct_loss_per_char": 0.9691960215568542, "incorrect_loss_per_char": 0.7768919914960861, "correct_loss_per_token": 1.9383920431137085, "incorrect_loss_per_token": 1.5537839829921722, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.388340950012207, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.388340950012207, "logits_per_char": -0.6941704750061035, "num_chars": 2}, {"sum_logits": -1.5760880708694458, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5760880708694458, "logits_per_char": -0.7880440354347229, "num_chars": 2}, {"sum_logits": -1.617331624031067, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.617331624031067, "logits_per_char": -0.8086658120155334, "num_chars": 2}, {"sum_logits": -1.6333752870559692, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.6333752870559692, "logits_per_char": -0.8166876435279846, "num_chars": 2}, {"sum_logits": -1.9383920431137085, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.9383920431137085, "logits_per_char": -0.9691960215568542, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 281, "native_id": "57c85e4c7ea2501ef9d8f304b524e2e4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5047528743743896, "incorrect_loss_raw": 1.663477897644043, "correct_loss_per_char": 0.7523764371871948, "incorrect_loss_per_char": 0.8317389488220215, "correct_loss_per_token": 1.5047528743743896, "incorrect_loss_per_token": 1.663477897644043, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5022385120391846, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.5022385120391846, "logits_per_char": -0.7511192560195923, "num_chars": 2}, {"sum_logits": -1.5047528743743896, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5047528743743896, "logits_per_char": -0.7523764371871948, "num_chars": 2}, {"sum_logits": -1.5152502059936523, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5152502059936523, "logits_per_char": -0.7576251029968262, "num_chars": 2}, {"sum_logits": -1.6191067695617676, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6191067695617676, "logits_per_char": -0.8095533847808838, "num_chars": 2}, {"sum_logits": -2.0173161029815674, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -2.0173161029815674, "logits_per_char": -1.0086580514907837, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 282, "native_id": "c22f30eee57f7191ee07e9a916460f68", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4976845979690552, "incorrect_loss_raw": 1.657731831073761, "correct_loss_per_char": 0.7488422989845276, "incorrect_loss_per_char": 0.8288659155368805, "correct_loss_per_token": 1.4976845979690552, "incorrect_loss_per_token": 1.657731831073761, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4976845979690552, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.4976845979690552, "logits_per_char": -0.7488422989845276, "num_chars": 2}, {"sum_logits": -1.4461263418197632, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.4461263418197632, "logits_per_char": -0.7230631709098816, "num_chars": 2}, {"sum_logits": -1.627543568611145, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.627543568611145, "logits_per_char": -0.8137717843055725, "num_chars": 2}, {"sum_logits": -1.6783801317214966, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6783801317214966, "logits_per_char": -0.8391900658607483, "num_chars": 2}, {"sum_logits": -1.8788772821426392, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.8788772821426392, "logits_per_char": -0.9394386410713196, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 283, "native_id": "026cb9c07a583ec933f2c4c67ae73836", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5187987089157104, "incorrect_loss_raw": 1.6842684149742126, "correct_loss_per_char": 0.7593993544578552, "incorrect_loss_per_char": 0.8421342074871063, "correct_loss_per_token": 1.5187987089157104, "incorrect_loss_per_token": 1.6842684149742126, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3566503524780273, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.3566503524780273, "logits_per_char": -0.6783251762390137, "num_chars": 2}, {"sum_logits": -1.5187987089157104, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5187987089157104, "logits_per_char": -0.7593993544578552, "num_chars": 2}, {"sum_logits": -1.607580542564392, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.607580542564392, "logits_per_char": -0.803790271282196, "num_chars": 2}, {"sum_logits": -1.5718058347702026, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5718058347702026, "logits_per_char": -0.7859029173851013, "num_chars": 2}, {"sum_logits": -2.2010369300842285, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -2.2010369300842285, "logits_per_char": -1.1005184650421143, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 284, "native_id": "c57ed32566a2db1ec3d6e4fd595b9d05", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5933961868286133, "incorrect_loss_raw": 1.696610450744629, "correct_loss_per_char": 0.7966980934143066, "incorrect_loss_per_char": 0.8483052253723145, "correct_loss_per_token": 1.5933961868286133, "incorrect_loss_per_token": 1.696610450744629, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2037125825881958, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.2037125825881958, "logits_per_char": -0.6018562912940979, "num_chars": 2}, {"sum_logits": -1.5185712575912476, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5185712575912476, "logits_per_char": -0.7592856287956238, "num_chars": 2}, {"sum_logits": -1.5933961868286133, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5933961868286133, "logits_per_char": -0.7966980934143066, "num_chars": 2}, {"sum_logits": -1.7746219635009766, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.7746219635009766, "logits_per_char": -0.8873109817504883, "num_chars": 2}, {"sum_logits": -2.2895359992980957, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.2895359992980957, "logits_per_char": -1.1447679996490479, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 285, "native_id": "93b52e7ea1acf10db891e9355e234123", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.384352684020996, "incorrect_loss_raw": 1.7102194428443909, "correct_loss_per_char": 0.692176342010498, "incorrect_loss_per_char": 0.8551097214221954, "correct_loss_per_token": 1.384352684020996, "incorrect_loss_per_token": 1.7102194428443909, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.384352684020996, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.384352684020996, "logits_per_char": -0.692176342010498, "num_chars": 2}, {"sum_logits": -1.4202167987823486, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.4202167987823486, "logits_per_char": -0.7101083993911743, "num_chars": 2}, {"sum_logits": -1.614902377128601, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.614902377128601, "logits_per_char": -0.8074511885643005, "num_chars": 2}, {"sum_logits": -1.7034937143325806, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.7034937143325806, "logits_per_char": -0.8517468571662903, "num_chars": 2}, {"sum_logits": -2.102264881134033, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.102264881134033, "logits_per_char": -1.0511324405670166, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 286, "native_id": "dbdad44029098d4b1d202d6d857d6092", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3147459030151367, "incorrect_loss_raw": 1.7249968349933624, "correct_loss_per_char": 0.6573729515075684, "incorrect_loss_per_char": 0.8624984174966812, "correct_loss_per_token": 1.3147459030151367, "incorrect_loss_per_token": 1.7249968349933624, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3147459030151367, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.3147459030151367, "logits_per_char": -0.6573729515075684, "num_chars": 2}, {"sum_logits": -1.6539102792739868, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.6539102792739868, "logits_per_char": -0.8269551396369934, "num_chars": 2}, {"sum_logits": -1.5415128469467163, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5415128469467163, "logits_per_char": -0.7707564234733582, "num_chars": 2}, {"sum_logits": -1.6341205835342407, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.6341205835342407, "logits_per_char": -0.8170602917671204, "num_chars": 2}, {"sum_logits": -2.070443630218506, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -2.070443630218506, "logits_per_char": -1.035221815109253, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 287, "native_id": "69d0f70c173dda17934836d618ca7093", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6061911582946777, "incorrect_loss_raw": 1.6340003609657288, "correct_loss_per_char": 0.8030955791473389, "incorrect_loss_per_char": 0.8170001804828644, "correct_loss_per_token": 1.6061911582946777, "incorrect_loss_per_token": 1.6340003609657288, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4146305322647095, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": true, "logits_per_token": -1.4146305322647095, "logits_per_char": -0.7073152661323547, "num_chars": 2}, {"sum_logits": -1.5498627424240112, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.5498627424240112, "logits_per_char": -0.7749313712120056, "num_chars": 2}, {"sum_logits": -1.6061911582946777, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.6061911582946777, "logits_per_char": -0.8030955791473389, "num_chars": 2}, {"sum_logits": -1.6245722770690918, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.6245722770690918, "logits_per_char": -0.8122861385345459, "num_chars": 2}, {"sum_logits": -1.9469358921051025, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.9469358921051025, "logits_per_char": -0.9734679460525513, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 288, "native_id": "e5697a25935c5249d2108f55e245f3e4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6961065530776978, "incorrect_loss_raw": 1.601927399635315, "correct_loss_per_char": 0.8480532765388489, "incorrect_loss_per_char": 0.8009636998176575, "correct_loss_per_token": 1.6961065530776978, "incorrect_loss_per_token": 1.601927399635315, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5109816789627075, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": true, "logits_per_token": -1.5109816789627075, "logits_per_char": -0.7554908394813538, "num_chars": 2}, {"sum_logits": -1.51205575466156, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.51205575466156, "logits_per_char": -0.75602787733078, "num_chars": 2}, {"sum_logits": -1.6961065530776978, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.6961065530776978, "logits_per_char": -0.8480532765388489, "num_chars": 2}, {"sum_logits": -1.581847071647644, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.581847071647644, "logits_per_char": -0.790923535823822, "num_chars": 2}, {"sum_logits": -1.8028250932693481, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.8028250932693481, "logits_per_char": -0.9014125466346741, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 289, "native_id": "99af85081085e6228c6d78c95be01968", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9735792875289917, "incorrect_loss_raw": 1.5492179989814758, "correct_loss_per_char": 0.9867896437644958, "incorrect_loss_per_char": 0.7746089994907379, "correct_loss_per_token": 1.9735792875289917, "incorrect_loss_per_token": 1.5492179989814758, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3806463479995728, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.3806463479995728, "logits_per_char": -0.6903231739997864, "num_chars": 2}, {"sum_logits": -1.5327719449996948, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5327719449996948, "logits_per_char": -0.7663859724998474, "num_chars": 2}, {"sum_logits": -1.583046555519104, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.583046555519104, "logits_per_char": -0.791523277759552, "num_chars": 2}, {"sum_logits": -1.7004071474075317, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.7004071474075317, "logits_per_char": -0.8502035737037659, "num_chars": 2}, {"sum_logits": -1.9735792875289917, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.9735792875289917, "logits_per_char": -0.9867896437644958, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 290, "native_id": "235094c966bcbdc94701b41b969f9c75", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4905529022216797, "incorrect_loss_raw": 1.6912643611431122, "correct_loss_per_char": 0.7452764511108398, "incorrect_loss_per_char": 0.8456321805715561, "correct_loss_per_token": 1.4905529022216797, "incorrect_loss_per_token": 1.6912643611431122, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3888262510299683, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.3888262510299683, "logits_per_char": -0.6944131255149841, "num_chars": 2}, {"sum_logits": -1.4905529022216797, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.4905529022216797, "logits_per_char": -0.7452764511108398, "num_chars": 2}, {"sum_logits": -1.5935628414154053, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5935628414154053, "logits_per_char": -0.7967814207077026, "num_chars": 2}, {"sum_logits": -1.623941421508789, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.623941421508789, "logits_per_char": -0.8119707107543945, "num_chars": 2}, {"sum_logits": -2.158726930618286, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.158726930618286, "logits_per_char": -1.079363465309143, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 291, "native_id": "99789083502af9bf111876a00fae44ac", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5170090198516846, "incorrect_loss_raw": 1.6714720129966736, "correct_loss_per_char": 0.7585045099258423, "incorrect_loss_per_char": 0.8357360064983368, "correct_loss_per_token": 1.5170090198516846, "incorrect_loss_per_token": 1.6714720129966736, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4276630878448486, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.4276630878448486, "logits_per_char": -0.7138315439224243, "num_chars": 2}, {"sum_logits": -1.5172362327575684, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5172362327575684, "logits_per_char": -0.7586181163787842, "num_chars": 2}, {"sum_logits": -1.5170090198516846, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5170090198516846, "logits_per_char": -0.7585045099258423, "num_chars": 2}, {"sum_logits": -1.6309871673583984, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6309871673583984, "logits_per_char": -0.8154935836791992, "num_chars": 2}, {"sum_logits": -2.110001564025879, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.110001564025879, "logits_per_char": -1.0550007820129395, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 292, "native_id": "1d44fb5f4b7f1e23ff6c1c083db81ba1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.177900791168213, "incorrect_loss_raw": 1.5499672889709473, "correct_loss_per_char": 1.0889503955841064, "incorrect_loss_per_char": 0.7749836444854736, "correct_loss_per_token": 2.177900791168213, "incorrect_loss_per_token": 1.5499672889709473, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1599676609039307, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.1599676609039307, "logits_per_char": -0.5799838304519653, "num_chars": 2}, {"sum_logits": -1.429023027420044, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.429023027420044, "logits_per_char": -0.714511513710022, "num_chars": 2}, {"sum_logits": -1.779660940170288, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.779660940170288, "logits_per_char": -0.889830470085144, "num_chars": 2}, {"sum_logits": -1.8312175273895264, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.8312175273895264, "logits_per_char": -0.9156087636947632, "num_chars": 2}, {"sum_logits": -2.177900791168213, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -2.177900791168213, "logits_per_char": -1.0889503955841064, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 293, "native_id": "194b66240f6fab75749c1e30ed09ea09", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9013458490371704, "incorrect_loss_raw": 1.556597650051117, "correct_loss_per_char": 0.9506729245185852, "incorrect_loss_per_char": 0.7782988250255585, "correct_loss_per_token": 1.9013458490371704, "incorrect_loss_per_token": 1.556597650051117, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.479016900062561, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.479016900062561, "logits_per_char": -0.7395084500312805, "num_chars": 2}, {"sum_logits": -1.4960931539535522, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.4960931539535522, "logits_per_char": -0.7480465769767761, "num_chars": 2}, {"sum_logits": -1.5970641374588013, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5970641374588013, "logits_per_char": -0.7985320687294006, "num_chars": 2}, {"sum_logits": -1.6542164087295532, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6542164087295532, "logits_per_char": -0.8271082043647766, "num_chars": 2}, {"sum_logits": -1.9013458490371704, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.9013458490371704, "logits_per_char": -0.9506729245185852, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 294, "native_id": "83dad4fe630fddbdcd5b18ef890c66f2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0427002906799316, "incorrect_loss_raw": 1.539493888616562, "correct_loss_per_char": 1.0213501453399658, "incorrect_loss_per_char": 0.769746944308281, "correct_loss_per_token": 2.0427002906799316, "incorrect_loss_per_token": 1.539493888616562, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3536025285720825, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.3536025285720825, "logits_per_char": -0.6768012642860413, "num_chars": 2}, {"sum_logits": -1.483901023864746, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.483901023864746, "logits_per_char": -0.741950511932373, "num_chars": 2}, {"sum_logits": -1.606207013130188, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.606207013130188, "logits_per_char": -0.803103506565094, "num_chars": 2}, {"sum_logits": -1.714264988899231, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.714264988899231, "logits_per_char": -0.8571324944496155, "num_chars": 2}, {"sum_logits": -2.0427002906799316, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -2.0427002906799316, "logits_per_char": -1.0213501453399658, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 295, "native_id": "3ebc5ddd2e97fe37fcb52aa2a9e2e1a7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4714237451553345, "incorrect_loss_raw": 1.6895792484283447, "correct_loss_per_char": 0.7357118725776672, "incorrect_loss_per_char": 0.8447896242141724, "correct_loss_per_token": 1.4714237451553345, "incorrect_loss_per_token": 1.6895792484283447, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3515384197235107, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.3515384197235107, "logits_per_char": -0.6757692098617554, "num_chars": 2}, {"sum_logits": -1.4714237451553345, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.4714237451553345, "logits_per_char": -0.7357118725776672, "num_chars": 2}, {"sum_logits": -1.57915198802948, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.57915198802948, "logits_per_char": -0.78957599401474, "num_chars": 2}, {"sum_logits": -1.723507285118103, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.723507285118103, "logits_per_char": -0.8617536425590515, "num_chars": 2}, {"sum_logits": -2.104119300842285, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.104119300842285, "logits_per_char": -1.0520596504211426, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 296, "native_id": "9ed019338a48216de9eadf64faaf1ce0", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6080180406570435, "incorrect_loss_raw": 1.6584390103816986, "correct_loss_per_char": 0.8040090203285217, "incorrect_loss_per_char": 0.8292195051908493, "correct_loss_per_token": 1.6080180406570435, "incorrect_loss_per_token": 1.6584390103816986, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.404895544052124, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.404895544052124, "logits_per_char": -0.702447772026062, "num_chars": 2}, {"sum_logits": -1.576375126838684, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.576375126838684, "logits_per_char": -0.788187563419342, "num_chars": 2}, {"sum_logits": -1.466418743133545, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.466418743133545, "logits_per_char": -0.7332093715667725, "num_chars": 2}, {"sum_logits": -1.6080180406570435, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6080180406570435, "logits_per_char": -0.8040090203285217, "num_chars": 2}, {"sum_logits": -2.1860666275024414, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.1860666275024414, "logits_per_char": -1.0930333137512207, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 297, "native_id": "d1d2585e0ba1160948b7c5822a99b7a1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5116411447525024, "incorrect_loss_raw": 1.6851482093334198, "correct_loss_per_char": 0.7558205723762512, "incorrect_loss_per_char": 0.8425741046667099, "correct_loss_per_token": 1.5116411447525024, "incorrect_loss_per_token": 1.6851482093334198, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3281329870224, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": true, "logits_per_token": -1.3281329870224, "logits_per_char": -0.6640664935112, "num_chars": 2}, {"sum_logits": -1.5116411447525024, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": false, "logits_per_token": -1.5116411447525024, "logits_per_char": -0.7558205723762512, "num_chars": 2}, {"sum_logits": -1.578668475151062, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": false, "logits_per_token": -1.578668475151062, "logits_per_char": -0.789334237575531, "num_chars": 2}, {"sum_logits": -1.6643725633621216, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": false, "logits_per_token": -1.6643725633621216, "logits_per_char": -0.8321862816810608, "num_chars": 2}, {"sum_logits": -2.1694188117980957, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": false, "logits_per_token": -2.1694188117980957, "logits_per_char": -1.0847094058990479, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 298, "native_id": "e34a0d1331c6bd4574ffe308e3fbd389", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.3493804931640625, "incorrect_loss_raw": 1.5214103758335114, "correct_loss_per_char": 1.1746902465820312, "incorrect_loss_per_char": 0.7607051879167557, "correct_loss_per_token": 2.3493804931640625, "incorrect_loss_per_token": 1.5214103758335114, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1589244604110718, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.1589244604110718, "logits_per_char": -0.5794622302055359, "num_chars": 2}, {"sum_logits": -1.4561243057250977, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.4561243057250977, "logits_per_char": -0.7280621528625488, "num_chars": 2}, {"sum_logits": -1.670573353767395, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.670573353767395, "logits_per_char": -0.8352866768836975, "num_chars": 2}, {"sum_logits": -1.800019383430481, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.800019383430481, "logits_per_char": -0.9000096917152405, "num_chars": 2}, {"sum_logits": -2.3493804931640625, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -2.3493804931640625, "logits_per_char": -1.1746902465820312, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 299, "native_id": "4858669d0193e5d9384dc37d4bb5c00c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4021954536437988, "incorrect_loss_raw": 1.69170942902565, "correct_loss_per_char": 0.7010977268218994, "incorrect_loss_per_char": 0.845854714512825, "correct_loss_per_token": 1.4021954536437988, "incorrect_loss_per_token": 1.69170942902565, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4021954536437988, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.4021954536437988, "logits_per_char": -0.7010977268218994, "num_chars": 2}, {"sum_logits": -1.5176678895950317, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5176678895950317, "logits_per_char": -0.7588339447975159, "num_chars": 2}, {"sum_logits": -1.6212931871414185, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.6212931871414185, "logits_per_char": -0.8106465935707092, "num_chars": 2}, {"sum_logits": -1.6185053586959839, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.6185053586959839, "logits_per_char": -0.8092526793479919, "num_chars": 2}, {"sum_logits": -2.009371280670166, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -2.009371280670166, "logits_per_char": -1.004685640335083, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 300, "native_id": "8fd82cdc253835814153fe7222e9967c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.982369303703308, "incorrect_loss_raw": 1.5519787669181824, "correct_loss_per_char": 0.991184651851654, "incorrect_loss_per_char": 0.7759893834590912, "correct_loss_per_token": 1.982369303703308, "incorrect_loss_per_token": 1.5519787669181824, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3212867975234985, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.3212867975234985, "logits_per_char": -0.6606433987617493, "num_chars": 2}, {"sum_logits": -1.5330511331558228, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5330511331558228, "logits_per_char": -0.7665255665779114, "num_chars": 2}, {"sum_logits": -1.6568974256515503, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6568974256515503, "logits_per_char": -0.8284487128257751, "num_chars": 2}, {"sum_logits": -1.696679711341858, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.696679711341858, "logits_per_char": -0.848339855670929, "num_chars": 2}, {"sum_logits": -1.982369303703308, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.982369303703308, "logits_per_char": -0.991184651851654, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 301, "native_id": "66458bf8599c3ef1e7b50fa527531882", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8623647689819336, "incorrect_loss_raw": 1.5727365612983704, "correct_loss_per_char": 0.9311823844909668, "incorrect_loss_per_char": 0.7863682806491852, "correct_loss_per_token": 1.8623647689819336, "incorrect_loss_per_token": 1.5727365612983704, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3621647357940674, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": true, "logits_per_token": -1.3621647357940674, "logits_per_char": -0.6810823678970337, "num_chars": 2}, {"sum_logits": -1.6067819595336914, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.6067819595336914, "logits_per_char": -0.8033909797668457, "num_chars": 2}, {"sum_logits": -1.7146050930023193, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.7146050930023193, "logits_per_char": -0.8573025465011597, "num_chars": 2}, {"sum_logits": -1.6073944568634033, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.6073944568634033, "logits_per_char": -0.8036972284317017, "num_chars": 2}, {"sum_logits": -1.8623647689819336, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.8623647689819336, "logits_per_char": -0.9311823844909668, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 302, "native_id": "879239b8a788f3c9e3dfdd0862f3d7c5", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7021424770355225, "incorrect_loss_raw": 1.5965895056724548, "correct_loss_per_char": 0.8510712385177612, "incorrect_loss_per_char": 0.7982947528362274, "correct_loss_per_token": 1.7021424770355225, "incorrect_loss_per_token": 1.5965895056724548, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.604264497756958, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.604264497756958, "logits_per_char": -0.802132248878479, "num_chars": 2}, {"sum_logits": -1.679858922958374, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.679858922958374, "logits_per_char": -0.839929461479187, "num_chars": 2}, {"sum_logits": -1.7021424770355225, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.7021424770355225, "logits_per_char": -0.8510712385177612, "num_chars": 2}, {"sum_logits": -1.5176992416381836, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": true, "logits_per_token": -1.5176992416381836, "logits_per_char": -0.7588496208190918, "num_chars": 2}, {"sum_logits": -1.5845353603363037, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.5845353603363037, "logits_per_char": -0.7922676801681519, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 303, "native_id": "8a69e6df5e8ad6c9e6828aa66c59d046", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.595776081085205, "incorrect_loss_raw": 1.6311860978603363, "correct_loss_per_char": 0.7978880405426025, "incorrect_loss_per_char": 0.8155930489301682, "correct_loss_per_token": 1.595776081085205, "incorrect_loss_per_token": 1.6311860978603363, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4828228950500488, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": true, "logits_per_token": -1.4828228950500488, "logits_per_char": -0.7414114475250244, "num_chars": 2}, {"sum_logits": -1.6747480630874634, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.6747480630874634, "logits_per_char": -0.8373740315437317, "num_chars": 2}, {"sum_logits": -1.518560767173767, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.518560767173767, "logits_per_char": -0.7592803835868835, "num_chars": 2}, {"sum_logits": -1.595776081085205, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.595776081085205, "logits_per_char": -0.7978880405426025, "num_chars": 2}, {"sum_logits": -1.848612666130066, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.848612666130066, "logits_per_char": -0.924306333065033, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 304, "native_id": "8d275acea05fd16295c659c504576a9b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5764851570129395, "incorrect_loss_raw": 1.6485641300678253, "correct_loss_per_char": 0.7882425785064697, "incorrect_loss_per_char": 0.8242820650339127, "correct_loss_per_token": 1.5764851570129395, "incorrect_loss_per_token": 1.6485641300678253, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3362394571304321, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.3362394571304321, "logits_per_char": -0.6681197285652161, "num_chars": 2}, {"sum_logits": -1.5764851570129395, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5764851570129395, "logits_per_char": -0.7882425785064697, "num_chars": 2}, {"sum_logits": -1.6170361042022705, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6170361042022705, "logits_per_char": -0.8085180521011353, "num_chars": 2}, {"sum_logits": -1.6868937015533447, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6868937015533447, "logits_per_char": -0.8434468507766724, "num_chars": 2}, {"sum_logits": -1.954087257385254, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.954087257385254, "logits_per_char": -0.977043628692627, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 305, "native_id": "91629c6f9e4af3e6acf385eb23fd8068", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6039870977401733, "incorrect_loss_raw": 1.6361076533794403, "correct_loss_per_char": 0.8019935488700867, "incorrect_loss_per_char": 0.8180538266897202, "correct_loss_per_token": 1.6039870977401733, "incorrect_loss_per_token": 1.6361076533794403, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3802462816238403, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.3802462816238403, "logits_per_char": -0.6901231408119202, "num_chars": 2}, {"sum_logits": -1.6762045621871948, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.6762045621871948, "logits_per_char": -0.8381022810935974, "num_chars": 2}, {"sum_logits": -1.567901849746704, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.567901849746704, "logits_per_char": -0.783950924873352, "num_chars": 2}, {"sum_logits": -1.6039870977401733, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.6039870977401733, "logits_per_char": -0.8019935488700867, "num_chars": 2}, {"sum_logits": -1.920077919960022, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.920077919960022, "logits_per_char": -0.960038959980011, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 306, "native_id": "59eb56f366407ac7db72996be265883b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9168338775634766, "incorrect_loss_raw": 1.557159572839737, "correct_loss_per_char": 0.9584169387817383, "incorrect_loss_per_char": 0.7785797864198685, "correct_loss_per_token": 1.9168338775634766, "incorrect_loss_per_token": 1.557159572839737, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3997437953948975, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.3997437953948975, "logits_per_char": -0.6998718976974487, "num_chars": 2}, {"sum_logits": -1.6380021572113037, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6380021572113037, "logits_per_char": -0.8190010786056519, "num_chars": 2}, {"sum_logits": -1.6069040298461914, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6069040298461914, "logits_per_char": -0.8034520149230957, "num_chars": 2}, {"sum_logits": -1.5839883089065552, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5839883089065552, "logits_per_char": -0.7919941544532776, "num_chars": 2}, {"sum_logits": -1.9168338775634766, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.9168338775634766, "logits_per_char": -0.9584169387817383, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 307, "native_id": "4ab069f2e979d51f2c5929f590d09982", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4964176416397095, "incorrect_loss_raw": 1.6724026501178741, "correct_loss_per_char": 0.7482088208198547, "incorrect_loss_per_char": 0.8362013250589371, "correct_loss_per_token": 1.4964176416397095, "incorrect_loss_per_token": 1.6724026501178741, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5022904872894287, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5022904872894287, "logits_per_char": -0.7511452436447144, "num_chars": 2}, {"sum_logits": -1.5353325605392456, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5353325605392456, "logits_per_char": -0.7676662802696228, "num_chars": 2}, {"sum_logits": -1.4964176416397095, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": true, "logits_per_token": -1.4964176416397095, "logits_per_char": -0.7482088208198547, "num_chars": 2}, {"sum_logits": -1.561955213546753, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.561955213546753, "logits_per_char": -0.7809776067733765, "num_chars": 2}, {"sum_logits": -2.0900323390960693, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -2.0900323390960693, "logits_per_char": -1.0450161695480347, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 308, "native_id": "d6bb990e8c409d2b3af37a2da198e01f", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.593043327331543, "incorrect_loss_raw": 1.6255135834217072, "correct_loss_per_char": 0.7965216636657715, "incorrect_loss_per_char": 0.8127567917108536, "correct_loss_per_token": 1.593043327331543, "incorrect_loss_per_token": 1.6255135834217072, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.58251953125, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.58251953125, "logits_per_char": -0.791259765625, "num_chars": 2}, {"sum_logits": -1.593043327331543, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.593043327331543, "logits_per_char": -0.7965216636657715, "num_chars": 2}, {"sum_logits": -1.5867273807525635, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5867273807525635, "logits_per_char": -0.7933636903762817, "num_chars": 2}, {"sum_logits": -1.542452335357666, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.542452335357666, "logits_per_char": -0.771226167678833, "num_chars": 2}, {"sum_logits": -1.7903550863265991, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.7903550863265991, "logits_per_char": -0.8951775431632996, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 309, "native_id": "c5ad166ab5c5f5f067aa02b20f482523", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.049544095993042, "incorrect_loss_raw": 1.5347364842891693, "correct_loss_per_char": 1.024772047996521, "incorrect_loss_per_char": 0.7673682421445847, "correct_loss_per_token": 2.049544095993042, "incorrect_loss_per_token": 1.5347364842891693, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4502556324005127, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.4502556324005127, "logits_per_char": -0.7251278162002563, "num_chars": 2}, {"sum_logits": -1.4298866987228394, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.4298866987228394, "logits_per_char": -0.7149433493614197, "num_chars": 2}, {"sum_logits": -1.610811471939087, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.610811471939087, "logits_per_char": -0.8054057359695435, "num_chars": 2}, {"sum_logits": -1.6479921340942383, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6479921340942383, "logits_per_char": -0.8239960670471191, "num_chars": 2}, {"sum_logits": -2.049544095993042, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -2.049544095993042, "logits_per_char": -1.024772047996521, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 310, "native_id": "ceafca2445b1b974d085a8cce38e8e44", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5361636877059937, "incorrect_loss_raw": 1.6571855545043945, "correct_loss_per_char": 0.7680818438529968, "incorrect_loss_per_char": 0.8285927772521973, "correct_loss_per_token": 1.5361636877059937, "incorrect_loss_per_token": 1.6571855545043945, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5361636877059937, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5361636877059937, "logits_per_char": -0.7680818438529968, "num_chars": 2}, {"sum_logits": -1.650447964668274, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.650447964668274, "logits_per_char": -0.825223982334137, "num_chars": 2}, {"sum_logits": -1.558994174003601, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.558994174003601, "logits_per_char": -0.7794970870018005, "num_chars": 2}, {"sum_logits": -1.4188051223754883, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.4188051223754883, "logits_per_char": -0.7094025611877441, "num_chars": 2}, {"sum_logits": -2.000494956970215, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.000494956970215, "logits_per_char": -1.0002474784851074, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 311, "native_id": "2ef2ae21a2d3a9ecbd5c45ff378d10e3", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6820539236068726, "incorrect_loss_raw": 1.624833881855011, "correct_loss_per_char": 0.8410269618034363, "incorrect_loss_per_char": 0.8124169409275055, "correct_loss_per_token": 1.6820539236068726, "incorrect_loss_per_token": 1.624833881855011, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4497065544128418, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.4497065544128418, "logits_per_char": -0.7248532772064209, "num_chars": 2}, {"sum_logits": -1.4874337911605835, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.4874337911605835, "logits_per_char": -0.7437168955802917, "num_chars": 2}, {"sum_logits": -1.5133365392684937, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5133365392684937, "logits_per_char": -0.7566682696342468, "num_chars": 2}, {"sum_logits": -1.6820539236068726, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6820539236068726, "logits_per_char": -0.8410269618034363, "num_chars": 2}, {"sum_logits": -2.048858642578125, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.048858642578125, "logits_per_char": -1.0244293212890625, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 312, "native_id": "793672da43fbc609e8c5760630c7e239", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.075922966003418, "incorrect_loss_raw": 1.5310754776000977, "correct_loss_per_char": 1.037961483001709, "incorrect_loss_per_char": 0.7655377388000488, "correct_loss_per_token": 2.075922966003418, "incorrect_loss_per_token": 1.5310754776000977, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3804091215133667, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.3804091215133667, "logits_per_char": -0.6902045607566833, "num_chars": 2}, {"sum_logits": -1.5283275842666626, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5283275842666626, "logits_per_char": -0.7641637921333313, "num_chars": 2}, {"sum_logits": -1.5412720441818237, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5412720441818237, "logits_per_char": -0.7706360220909119, "num_chars": 2}, {"sum_logits": -1.6742931604385376, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.6742931604385376, "logits_per_char": -0.8371465802192688, "num_chars": 2}, {"sum_logits": -2.075922966003418, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -2.075922966003418, "logits_per_char": -1.037961483001709, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 313, "native_id": "558cb0bc25387ce38d71f64ef6f1fa57", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5086638927459717, "incorrect_loss_raw": 1.6602236926555634, "correct_loss_per_char": 0.7543319463729858, "incorrect_loss_per_char": 0.8301118463277817, "correct_loss_per_token": 1.5086638927459717, "incorrect_loss_per_token": 1.6602236926555634, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3832918405532837, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": true, "logits_per_token": -1.3832918405532837, "logits_per_char": -0.6916459202766418, "num_chars": 2}, {"sum_logits": -1.5086638927459717, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.5086638927459717, "logits_per_char": -0.7543319463729858, "num_chars": 2}, {"sum_logits": -1.6820497512817383, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.6820497512817383, "logits_per_char": -0.8410248756408691, "num_chars": 2}, {"sum_logits": -1.8012187480926514, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.8012187480926514, "logits_per_char": -0.9006093740463257, "num_chars": 2}, {"sum_logits": -1.77433443069458, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.77433443069458, "logits_per_char": -0.88716721534729, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 314, "native_id": "2c9f4a98ce774cd734b6e384d95051a7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5018813610076904, "incorrect_loss_raw": 1.6648226380348206, "correct_loss_per_char": 0.7509406805038452, "incorrect_loss_per_char": 0.8324113190174103, "correct_loss_per_token": 1.5018813610076904, "incorrect_loss_per_token": 1.6648226380348206, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3656845092773438, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.3656845092773438, "logits_per_char": -0.6828422546386719, "num_chars": 2}, {"sum_logits": -1.5018813610076904, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5018813610076904, "logits_per_char": -0.7509406805038452, "num_chars": 2}, {"sum_logits": -1.637258529663086, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.637258529663086, "logits_per_char": -0.818629264831543, "num_chars": 2}, {"sum_logits": -1.7470550537109375, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.7470550537109375, "logits_per_char": -0.8735275268554688, "num_chars": 2}, {"sum_logits": -1.909292459487915, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.909292459487915, "logits_per_char": -0.9546462297439575, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 315, "native_id": "33c84708785f88c19737ef5b0e31a64b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6763108968734741, "incorrect_loss_raw": 1.6169747114181519, "correct_loss_per_char": 0.8381554484367371, "incorrect_loss_per_char": 0.8084873557090759, "correct_loss_per_token": 1.6763108968734741, "incorrect_loss_per_token": 1.6169747114181519, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4729002714157104, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": true, "logits_per_token": -1.4729002714157104, "logits_per_char": -0.7364501357078552, "num_chars": 2}, {"sum_logits": -1.533185362815857, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.533185362815857, "logits_per_char": -0.7665926814079285, "num_chars": 2}, {"sum_logits": -1.5283221006393433, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.5283221006393433, "logits_per_char": -0.7641610503196716, "num_chars": 2}, {"sum_logits": -1.6763108968734741, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.6763108968734741, "logits_per_char": -0.8381554484367371, "num_chars": 2}, {"sum_logits": -1.9334911108016968, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.9334911108016968, "logits_per_char": -0.9667455554008484, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 316, "native_id": "d867f76d000bdb59b9b4cb982bd7f0a0", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6276841163635254, "incorrect_loss_raw": 1.6155099868774414, "correct_loss_per_char": 0.8138420581817627, "incorrect_loss_per_char": 0.8077549934387207, "correct_loss_per_token": 1.6276841163635254, "incorrect_loss_per_token": 1.6155099868774414, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5255656242370605, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": true, "logits_per_token": -1.5255656242370605, "logits_per_char": -0.7627828121185303, "num_chars": 2}, {"sum_logits": -1.631591796875, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.631591796875, "logits_per_char": -0.8157958984375, "num_chars": 2}, {"sum_logits": -1.7559072971343994, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.7559072971343994, "logits_per_char": -0.8779536485671997, "num_chars": 2}, {"sum_logits": -1.6276841163635254, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.6276841163635254, "logits_per_char": -0.8138420581817627, "num_chars": 2}, {"sum_logits": -1.5489752292633057, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.5489752292633057, "logits_per_char": -0.7744876146316528, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 317, "native_id": "8c607d2e2e897d74048fcc794137b683", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.653588891029358, "incorrect_loss_raw": 1.6423181593418121, "correct_loss_per_char": 0.826794445514679, "incorrect_loss_per_char": 0.8211590796709061, "correct_loss_per_token": 1.653588891029358, "incorrect_loss_per_token": 1.6423181593418121, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2991812229156494, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.2991812229156494, "logits_per_char": -0.6495906114578247, "num_chars": 2}, {"sum_logits": -1.498901605606079, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.498901605606079, "logits_per_char": -0.7494508028030396, "num_chars": 2}, {"sum_logits": -1.7115408182144165, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.7115408182144165, "logits_per_char": -0.8557704091072083, "num_chars": 2}, {"sum_logits": -1.653588891029358, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.653588891029358, "logits_per_char": -0.826794445514679, "num_chars": 2}, {"sum_logits": -2.0596489906311035, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -2.0596489906311035, "logits_per_char": -1.0298244953155518, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 318, "native_id": "5215e26c99b2a9b376fb1c70096a388a", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5964772701263428, "incorrect_loss_raw": 1.6310763657093048, "correct_loss_per_char": 0.7982386350631714, "incorrect_loss_per_char": 0.8155381828546524, "correct_loss_per_token": 1.5964772701263428, "incorrect_loss_per_token": 1.6310763657093048, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5887136459350586, "num_tokens": 1, "num_tokens_all": 301, "is_greedy": false, "logits_per_token": -1.5887136459350586, "logits_per_char": -0.7943568229675293, "num_chars": 2}, {"sum_logits": -1.5964772701263428, "num_tokens": 1, "num_tokens_all": 301, "is_greedy": false, "logits_per_token": -1.5964772701263428, "logits_per_char": -0.7982386350631714, "num_chars": 2}, {"sum_logits": -1.6032283306121826, "num_tokens": 1, "num_tokens_all": 301, "is_greedy": false, "logits_per_token": -1.6032283306121826, "logits_per_char": -0.8016141653060913, "num_chars": 2}, {"sum_logits": -1.4715603590011597, "num_tokens": 1, "num_tokens_all": 301, "is_greedy": true, "logits_per_token": -1.4715603590011597, "logits_per_char": -0.7357801795005798, "num_chars": 2}, {"sum_logits": -1.8608031272888184, "num_tokens": 1, "num_tokens_all": 301, "is_greedy": false, "logits_per_token": -1.8608031272888184, "logits_per_char": -0.9304015636444092, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 319, "native_id": "668dc6bce771b10cbf6336f3ec76520a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5901380777359009, "incorrect_loss_raw": 1.6458987295627594, "correct_loss_per_char": 0.7950690388679504, "incorrect_loss_per_char": 0.8229493647813797, "correct_loss_per_token": 1.5901380777359009, "incorrect_loss_per_token": 1.6458987295627594, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.350564956665039, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": true, "logits_per_token": -1.350564956665039, "logits_per_char": -0.6752824783325195, "num_chars": 2}, {"sum_logits": -1.5901380777359009, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.5901380777359009, "logits_per_char": -0.7950690388679504, "num_chars": 2}, {"sum_logits": -1.6563810110092163, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.6563810110092163, "logits_per_char": -0.8281905055046082, "num_chars": 2}, {"sum_logits": -1.6170202493667603, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.6170202493667603, "logits_per_char": -0.8085101246833801, "num_chars": 2}, {"sum_logits": -1.959628701210022, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.959628701210022, "logits_per_char": -0.979814350605011, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 320, "native_id": "a339fe08f1f50463ee180b797e99ebcc", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.601534128189087, "incorrect_loss_raw": 1.6415632963180542, "correct_loss_per_char": 0.8007670640945435, "incorrect_loss_per_char": 0.8207816481590271, "correct_loss_per_token": 1.601534128189087, "incorrect_loss_per_token": 1.6415632963180542, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4135297536849976, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.4135297536849976, "logits_per_char": -0.7067648768424988, "num_chars": 2}, {"sum_logits": -1.55079185962677, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.55079185962677, "logits_per_char": -0.775395929813385, "num_chars": 2}, {"sum_logits": -1.588271141052246, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.588271141052246, "logits_per_char": -0.794135570526123, "num_chars": 2}, {"sum_logits": -1.601534128189087, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.601534128189087, "logits_per_char": -0.8007670640945435, "num_chars": 2}, {"sum_logits": -2.013660430908203, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.013660430908203, "logits_per_char": -1.0068302154541016, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 321, "native_id": "526cd34f5b2afefbbb7830434785f298", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.415851354598999, "incorrect_loss_raw": 1.6934011280536652, "correct_loss_per_char": 0.7079256772994995, "incorrect_loss_per_char": 0.8467005640268326, "correct_loss_per_token": 1.415851354598999, "incorrect_loss_per_token": 1.6934011280536652, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.415851354598999, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": true, "logits_per_token": -1.415851354598999, "logits_per_char": -0.7079256772994995, "num_chars": 2}, {"sum_logits": -1.5497384071350098, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5497384071350098, "logits_per_char": -0.7748692035675049, "num_chars": 2}, {"sum_logits": -1.4972593784332275, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.4972593784332275, "logits_per_char": -0.7486296892166138, "num_chars": 2}, {"sum_logits": -1.6488794088363647, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.6488794088363647, "logits_per_char": -0.8244397044181824, "num_chars": 2}, {"sum_logits": -2.0777273178100586, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -2.0777273178100586, "logits_per_char": -1.0388636589050293, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 322, "native_id": "6c1c1c282cebe8917f607f0dbc1c102e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0797128677368164, "incorrect_loss_raw": 1.5470856428146362, "correct_loss_per_char": 1.0398564338684082, "incorrect_loss_per_char": 0.7735428214073181, "correct_loss_per_token": 2.0797128677368164, "incorrect_loss_per_token": 1.5470856428146362, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2627227306365967, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.2627227306365967, "logits_per_char": -0.6313613653182983, "num_chars": 2}, {"sum_logits": -1.462878942489624, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.462878942489624, "logits_per_char": -0.731439471244812, "num_chars": 2}, {"sum_logits": -1.6551355123519897, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.6551355123519897, "logits_per_char": -0.8275677561759949, "num_chars": 2}, {"sum_logits": -1.8076053857803345, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.8076053857803345, "logits_per_char": -0.9038026928901672, "num_chars": 2}, {"sum_logits": -2.0797128677368164, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -2.0797128677368164, "logits_per_char": -1.0398564338684082, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 323, "native_id": "b5baf77d3855935c87f01f5fb2216667", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7897026538848877, "incorrect_loss_raw": 1.614258199930191, "correct_loss_per_char": 0.8948513269424438, "incorrect_loss_per_char": 0.8071290999650955, "correct_loss_per_token": 1.7897026538848877, "incorrect_loss_per_token": 1.614258199930191, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3093184232711792, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.3093184232711792, "logits_per_char": -0.6546592116355896, "num_chars": 2}, {"sum_logits": -1.4599229097366333, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.4599229097366333, "logits_per_char": -0.7299614548683167, "num_chars": 2}, {"sum_logits": -1.6134454011917114, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6134454011917114, "logits_per_char": -0.8067227005958557, "num_chars": 2}, {"sum_logits": -1.7897026538848877, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.7897026538848877, "logits_per_char": -0.8948513269424438, "num_chars": 2}, {"sum_logits": -2.0743460655212402, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.0743460655212402, "logits_per_char": -1.0371730327606201, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 324, "native_id": "83808e92381b2e5f4cdf55d1391645ae", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.939379334449768, "incorrect_loss_raw": 1.5486465096473694, "correct_loss_per_char": 0.969689667224884, "incorrect_loss_per_char": 0.7743232548236847, "correct_loss_per_token": 1.939379334449768, "incorrect_loss_per_token": 1.5486465096473694, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4991340637207031, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": true, "logits_per_token": -1.4991340637207031, "logits_per_char": -0.7495670318603516, "num_chars": 2}, {"sum_logits": -1.602110743522644, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": false, "logits_per_token": -1.602110743522644, "logits_per_char": -0.801055371761322, "num_chars": 2}, {"sum_logits": -1.5474038124084473, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": false, "logits_per_token": -1.5474038124084473, "logits_per_char": -0.7737019062042236, "num_chars": 2}, {"sum_logits": -1.545937418937683, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": false, "logits_per_token": -1.545937418937683, "logits_per_char": -0.7729687094688416, "num_chars": 2}, {"sum_logits": -1.939379334449768, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": false, "logits_per_token": -1.939379334449768, "logits_per_char": -0.969689667224884, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 325, "native_id": "1a86310d7279097205a3403752c3b914", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5563775300979614, "incorrect_loss_raw": 1.6425518691539764, "correct_loss_per_char": 0.7781887650489807, "incorrect_loss_per_char": 0.8212759345769882, "correct_loss_per_token": 1.5563775300979614, "incorrect_loss_per_token": 1.6425518691539764, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4864001274108887, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.4864001274108887, "logits_per_char": -0.7432000637054443, "num_chars": 2}, {"sum_logits": -1.5563775300979614, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5563775300979614, "logits_per_char": -0.7781887650489807, "num_chars": 2}, {"sum_logits": -1.5332847833633423, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5332847833633423, "logits_per_char": -0.7666423916816711, "num_chars": 2}, {"sum_logits": -1.7189456224441528, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.7189456224441528, "logits_per_char": -0.8594728112220764, "num_chars": 2}, {"sum_logits": -1.831576943397522, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.831576943397522, "logits_per_char": -0.915788471698761, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 326, "native_id": "b4130d1790948134f3aeab9d3d79c181", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4917904138565063, "incorrect_loss_raw": 1.6480408906936646, "correct_loss_per_char": 0.7458952069282532, "incorrect_loss_per_char": 0.8240204453468323, "correct_loss_per_token": 1.4917904138565063, "incorrect_loss_per_token": 1.6480408906936646, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4917904138565063, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.4917904138565063, "logits_per_char": -0.7458952069282532, "num_chars": 2}, {"sum_logits": -1.7084386348724365, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.7084386348724365, "logits_per_char": -0.8542193174362183, "num_chars": 2}, {"sum_logits": -1.6385490894317627, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6385490894317627, "logits_per_char": -0.8192745447158813, "num_chars": 2}, {"sum_logits": -1.6416733264923096, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6416733264923096, "logits_per_char": -0.8208366632461548, "num_chars": 2}, {"sum_logits": -1.6035025119781494, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6035025119781494, "logits_per_char": -0.8017512559890747, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 327, "native_id": "a5097b7f56d20217679f28201801476f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6102744340896606, "incorrect_loss_raw": 1.6650270521640778, "correct_loss_per_char": 0.8051372170448303, "incorrect_loss_per_char": 0.8325135260820389, "correct_loss_per_token": 1.6102744340896606, "incorrect_loss_per_token": 1.6650270521640778, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2902976274490356, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.2902976274490356, "logits_per_char": -0.6451488137245178, "num_chars": 2}, {"sum_logits": -1.4972875118255615, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.4972875118255615, "logits_per_char": -0.7486437559127808, "num_chars": 2}, {"sum_logits": -1.6102744340896606, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6102744340896606, "logits_per_char": -0.8051372170448303, "num_chars": 2}, {"sum_logits": -1.7056496143341064, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.7056496143341064, "logits_per_char": -0.8528248071670532, "num_chars": 2}, {"sum_logits": -2.1668734550476074, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.1668734550476074, "logits_per_char": -1.0834367275238037, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 328, "native_id": "bcc5dd6292a64d8fa17cd07c360b335d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6774659156799316, "incorrect_loss_raw": 1.6118586361408234, "correct_loss_per_char": 0.8387329578399658, "incorrect_loss_per_char": 0.8059293180704117, "correct_loss_per_token": 1.6774659156799316, "incorrect_loss_per_token": 1.6118586361408234, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4483859539031982, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.4483859539031982, "logits_per_char": -0.7241929769515991, "num_chars": 2}, {"sum_logits": -1.5212620496749878, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5212620496749878, "logits_per_char": -0.7606310248374939, "num_chars": 2}, {"sum_logits": -1.6774659156799316, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6774659156799316, "logits_per_char": -0.8387329578399658, "num_chars": 2}, {"sum_logits": -1.5895040035247803, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5895040035247803, "logits_per_char": -0.7947520017623901, "num_chars": 2}, {"sum_logits": -1.8882825374603271, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.8882825374603271, "logits_per_char": -0.9441412687301636, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 329, "native_id": "cfc7fccb8449a2a950c9d2a50991420e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5066941976547241, "incorrect_loss_raw": 1.691026747226715, "correct_loss_per_char": 0.7533470988273621, "incorrect_loss_per_char": 0.8455133736133575, "correct_loss_per_token": 1.5066941976547241, "incorrect_loss_per_token": 1.691026747226715, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2720603942871094, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.2720603942871094, "logits_per_char": -0.6360301971435547, "num_chars": 2}, {"sum_logits": -1.5066941976547241, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5066941976547241, "logits_per_char": -0.7533470988273621, "num_chars": 2}, {"sum_logits": -1.626571536064148, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.626571536064148, "logits_per_char": -0.813285768032074, "num_chars": 2}, {"sum_logits": -1.7208904027938843, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.7208904027938843, "logits_per_char": -0.8604452013969421, "num_chars": 2}, {"sum_logits": -2.1445846557617188, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.1445846557617188, "logits_per_char": -1.0722923278808594, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 330, "native_id": "2e83c5989a018bec6d5f5ac7d3b72f49", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5118303298950195, "incorrect_loss_raw": 1.6831573247909546, "correct_loss_per_char": 0.7559151649475098, "incorrect_loss_per_char": 0.8415786623954773, "correct_loss_per_token": 1.5118303298950195, "incorrect_loss_per_token": 1.6831573247909546, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2336664199829102, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": true, "logits_per_token": -1.2336664199829102, "logits_per_char": -0.6168332099914551, "num_chars": 2}, {"sum_logits": -1.5118303298950195, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.5118303298950195, "logits_per_char": -0.7559151649475098, "num_chars": 2}, {"sum_logits": -1.729487657546997, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.729487657546997, "logits_per_char": -0.8647438287734985, "num_chars": 2}, {"sum_logits": -1.7818520069122314, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.7818520069122314, "logits_per_char": -0.8909260034561157, "num_chars": 2}, {"sum_logits": -1.9876232147216797, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.9876232147216797, "logits_per_char": -0.9938116073608398, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 331, "native_id": "34b2d6aecdb5af8efacf0b0aa7e3989f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.497827410697937, "incorrect_loss_raw": 1.660931408405304, "correct_loss_per_char": 0.7489137053489685, "incorrect_loss_per_char": 0.830465704202652, "correct_loss_per_token": 1.497827410697937, "incorrect_loss_per_token": 1.660931408405304, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.497827410697937, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.497827410697937, "logits_per_char": -0.7489137053489685, "num_chars": 2}, {"sum_logits": -1.5123533010482788, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5123533010482788, "logits_per_char": -0.7561766505241394, "num_chars": 2}, {"sum_logits": -1.5333760976791382, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5333760976791382, "logits_per_char": -0.7666880488395691, "num_chars": 2}, {"sum_logits": -1.6281975507736206, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6281975507736206, "logits_per_char": -0.8140987753868103, "num_chars": 2}, {"sum_logits": -1.9697986841201782, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.9697986841201782, "logits_per_char": -0.9848993420600891, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 332, "native_id": "2ec7f8fe7948f9997e73f9bff7ba6e05", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2172764539718628, "incorrect_loss_raw": 1.765647441148758, "correct_loss_per_char": 0.6086382269859314, "incorrect_loss_per_char": 0.882823720574379, "correct_loss_per_token": 1.2172764539718628, "incorrect_loss_per_token": 1.765647441148758, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2172764539718628, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.2172764539718628, "logits_per_char": -0.6086382269859314, "num_chars": 2}, {"sum_logits": -1.6059819459915161, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6059819459915161, "logits_per_char": -0.8029909729957581, "num_chars": 2}, {"sum_logits": -1.6457101106643677, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6457101106643677, "logits_per_char": -0.8228550553321838, "num_chars": 2}, {"sum_logits": -1.6764453649520874, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6764453649520874, "logits_per_char": -0.8382226824760437, "num_chars": 2}, {"sum_logits": -2.1344523429870605, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -2.1344523429870605, "logits_per_char": -1.0672261714935303, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 333, "native_id": "651785ed4f7b0bd2e7ca9f70a42acea5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3146804571151733, "incorrect_loss_raw": 1.7455336153507233, "correct_loss_per_char": 0.6573402285575867, "incorrect_loss_per_char": 0.8727668076753616, "correct_loss_per_token": 1.3146804571151733, "incorrect_loss_per_token": 1.7455336153507233, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3146804571151733, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.3146804571151733, "logits_per_char": -0.6573402285575867, "num_chars": 2}, {"sum_logits": -1.4779647588729858, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.4779647588729858, "logits_per_char": -0.7389823794364929, "num_chars": 2}, {"sum_logits": -1.6542984247207642, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6542984247207642, "logits_per_char": -0.8271492123603821, "num_chars": 2}, {"sum_logits": -1.6075893640518188, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6075893640518188, "logits_per_char": -0.8037946820259094, "num_chars": 2}, {"sum_logits": -2.242281913757324, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.242281913757324, "logits_per_char": -1.121140956878662, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 334, "native_id": "ee46995407eb6357bb5410d49d378629", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5418221950531006, "incorrect_loss_raw": 1.643390953540802, "correct_loss_per_char": 0.7709110975265503, "incorrect_loss_per_char": 0.821695476770401, "correct_loss_per_token": 1.5418221950531006, "incorrect_loss_per_token": 1.643390953540802, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5418221950531006, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.5418221950531006, "logits_per_char": -0.7709110975265503, "num_chars": 2}, {"sum_logits": -1.6454551219940186, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.6454551219940186, "logits_per_char": -0.8227275609970093, "num_chars": 2}, {"sum_logits": -1.7115869522094727, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.7115869522094727, "logits_per_char": -0.8557934761047363, "num_chars": 2}, {"sum_logits": -1.4181795120239258, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": true, "logits_per_token": -1.4181795120239258, "logits_per_char": -0.7090897560119629, "num_chars": 2}, {"sum_logits": -1.798342227935791, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.798342227935791, "logits_per_char": -0.8991711139678955, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 335, "native_id": "303aedda3a5ab8d853cbe4edc4b914c6", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5711607933044434, "incorrect_loss_raw": 1.6393329799175262, "correct_loss_per_char": 0.7855803966522217, "incorrect_loss_per_char": 0.8196664899587631, "correct_loss_per_token": 1.5711607933044434, "incorrect_loss_per_token": 1.6393329799175262, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4780160188674927, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.4780160188674927, "logits_per_char": -0.7390080094337463, "num_chars": 2}, {"sum_logits": -1.567331314086914, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.567331314086914, "logits_per_char": -0.783665657043457, "num_chars": 2}, {"sum_logits": -1.5711607933044434, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5711607933044434, "logits_per_char": -0.7855803966522217, "num_chars": 2}, {"sum_logits": -1.5767240524291992, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5767240524291992, "logits_per_char": -0.7883620262145996, "num_chars": 2}, {"sum_logits": -1.935260534286499, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.935260534286499, "logits_per_char": -0.9676302671432495, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 336, "native_id": "720b98fbc365736597147c984f6bd301", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4628416299819946, "incorrect_loss_raw": 1.6898570358753204, "correct_loss_per_char": 0.7314208149909973, "incorrect_loss_per_char": 0.8449285179376602, "correct_loss_per_token": 1.4628416299819946, "incorrect_loss_per_token": 1.6898570358753204, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3332937955856323, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.3332937955856323, "logits_per_char": -0.6666468977928162, "num_chars": 2}, {"sum_logits": -1.4628416299819946, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.4628416299819946, "logits_per_char": -0.7314208149909973, "num_chars": 2}, {"sum_logits": -1.5881468057632446, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5881468057632446, "logits_per_char": -0.7940734028816223, "num_chars": 2}, {"sum_logits": -1.7893553972244263, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.7893553972244263, "logits_per_char": -0.8946776986122131, "num_chars": 2}, {"sum_logits": -2.0486321449279785, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -2.0486321449279785, "logits_per_char": -1.0243160724639893, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 337, "native_id": "c611875b43b67b91030b889b267bbcb3", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6888880729675293, "incorrect_loss_raw": 1.603328824043274, "correct_loss_per_char": 0.8444440364837646, "incorrect_loss_per_char": 0.801664412021637, "correct_loss_per_token": 1.6888880729675293, "incorrect_loss_per_token": 1.603328824043274, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4627450704574585, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": true, "logits_per_token": -1.4627450704574585, "logits_per_char": -0.7313725352287292, "num_chars": 2}, {"sum_logits": -1.6888880729675293, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.6888880729675293, "logits_per_char": -0.8444440364837646, "num_chars": 2}, {"sum_logits": -1.6422613859176636, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.6422613859176636, "logits_per_char": -0.8211306929588318, "num_chars": 2}, {"sum_logits": -1.5711816549301147, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.5711816549301147, "logits_per_char": -0.7855908274650574, "num_chars": 2}, {"sum_logits": -1.7371271848678589, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.7371271848678589, "logits_per_char": -0.8685635924339294, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 338, "native_id": "0547da29ffab9b441bae8870cd0f9dab", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2096445560455322, "incorrect_loss_raw": 1.771524041891098, "correct_loss_per_char": 0.6048222780227661, "incorrect_loss_per_char": 0.885762020945549, "correct_loss_per_token": 1.2096445560455322, "incorrect_loss_per_token": 1.771524041891098, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2096445560455322, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": true, "logits_per_token": -1.2096445560455322, "logits_per_char": -0.6048222780227661, "num_chars": 2}, {"sum_logits": -1.5400320291519165, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.5400320291519165, "logits_per_char": -0.7700160145759583, "num_chars": 2}, {"sum_logits": -1.6892420053482056, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.6892420053482056, "logits_per_char": -0.8446210026741028, "num_chars": 2}, {"sum_logits": -1.7144705057144165, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.7144705057144165, "logits_per_char": -0.8572352528572083, "num_chars": 2}, {"sum_logits": -2.1423516273498535, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -2.1423516273498535, "logits_per_char": -1.0711758136749268, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 339, "native_id": "21e312c7fd1a52341ce35b66457eab36", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3634291887283325, "incorrect_loss_raw": 1.7035760879516602, "correct_loss_per_char": 0.6817145943641663, "incorrect_loss_per_char": 0.8517880439758301, "correct_loss_per_token": 1.3634291887283325, "incorrect_loss_per_token": 1.7035760879516602, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3634291887283325, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.3634291887283325, "logits_per_char": -0.6817145943641663, "num_chars": 2}, {"sum_logits": -1.533566951751709, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.533566951751709, "logits_per_char": -0.7667834758758545, "num_chars": 2}, {"sum_logits": -1.5774662494659424, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5774662494659424, "logits_per_char": -0.7887331247329712, "num_chars": 2}, {"sum_logits": -1.7182955741882324, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.7182955741882324, "logits_per_char": -0.8591477870941162, "num_chars": 2}, {"sum_logits": -1.9849755764007568, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.9849755764007568, "logits_per_char": -0.9924877882003784, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 340, "native_id": "82e26bc22af89c38d54aa2d00dcb8a2b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1947704553604126, "incorrect_loss_raw": 1.7958373129367828, "correct_loss_per_char": 0.5973852276802063, "incorrect_loss_per_char": 0.8979186564683914, "correct_loss_per_token": 1.1947704553604126, "incorrect_loss_per_token": 1.7958373129367828, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1947704553604126, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.1947704553604126, "logits_per_char": -0.5973852276802063, "num_chars": 2}, {"sum_logits": -1.4415251016616821, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.4415251016616821, "logits_per_char": -0.7207625508308411, "num_chars": 2}, {"sum_logits": -1.6645499467849731, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6645499467849731, "logits_per_char": -0.8322749733924866, "num_chars": 2}, {"sum_logits": -1.8207377195358276, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.8207377195358276, "logits_per_char": -0.9103688597679138, "num_chars": 2}, {"sum_logits": -2.2565364837646484, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -2.2565364837646484, "logits_per_char": -1.1282682418823242, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 341, "native_id": "f75357e48c3026cfa4da3dba9f91bb21", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.4294183254241943, "incorrect_loss_raw": 1.4949435889720917, "correct_loss_per_char": 1.2147091627120972, "incorrect_loss_per_char": 0.7474717944860458, "correct_loss_per_token": 2.4294183254241943, "incorrect_loss_per_token": 1.4949435889720917, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2513872385025024, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.2513872385025024, "logits_per_char": -0.6256936192512512, "num_chars": 2}, {"sum_logits": -1.5188615322113037, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5188615322113037, "logits_per_char": -0.7594307661056519, "num_chars": 2}, {"sum_logits": -1.5890874862670898, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5890874862670898, "logits_per_char": -0.7945437431335449, "num_chars": 2}, {"sum_logits": -1.6204380989074707, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6204380989074707, "logits_per_char": -0.8102190494537354, "num_chars": 2}, {"sum_logits": -2.4294183254241943, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.4294183254241943, "logits_per_char": -1.2147091627120972, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 342, "native_id": "64931f9097155672bfe3e16f03b2c195", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.617072582244873, "incorrect_loss_raw": 1.6262025833129883, "correct_loss_per_char": 0.8085362911224365, "incorrect_loss_per_char": 0.8131012916564941, "correct_loss_per_token": 1.617072582244873, "incorrect_loss_per_token": 1.6262025833129883, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5393459796905518, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5393459796905518, "logits_per_char": -0.7696729898452759, "num_chars": 2}, {"sum_logits": -1.617072582244873, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.617072582244873, "logits_per_char": -0.8085362911224365, "num_chars": 2}, {"sum_logits": -1.5304821729660034, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5304821729660034, "logits_per_char": -0.7652410864830017, "num_chars": 2}, {"sum_logits": -1.5185788869857788, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.5185788869857788, "logits_per_char": -0.7592894434928894, "num_chars": 2}, {"sum_logits": -1.9164032936096191, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.9164032936096191, "logits_per_char": -0.9582016468048096, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 343, "native_id": "5de3248caa2e5ed83dd0ec45a15eae18", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.54680597782135, "incorrect_loss_raw": 1.666077733039856, "correct_loss_per_char": 0.773402988910675, "incorrect_loss_per_char": 0.833038866519928, "correct_loss_per_token": 1.54680597782135, "incorrect_loss_per_token": 1.666077733039856, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3759746551513672, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.3759746551513672, "logits_per_char": -0.6879873275756836, "num_chars": 2}, {"sum_logits": -1.54680597782135, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.54680597782135, "logits_per_char": -0.773402988910675, "num_chars": 2}, {"sum_logits": -1.5701637268066406, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5701637268066406, "logits_per_char": -0.7850818634033203, "num_chars": 2}, {"sum_logits": -1.6164860725402832, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6164860725402832, "logits_per_char": -0.8082430362701416, "num_chars": 2}, {"sum_logits": -2.101686477661133, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.101686477661133, "logits_per_char": -1.0508432388305664, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 344, "native_id": "0611dfbf5114084723d75f59b4f67412", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9614757299423218, "incorrect_loss_raw": 1.5488078594207764, "correct_loss_per_char": 0.9807378649711609, "incorrect_loss_per_char": 0.7744039297103882, "correct_loss_per_token": 1.9614757299423218, "incorrect_loss_per_token": 1.5488078594207764, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4028502702713013, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": true, "logits_per_token": -1.4028502702713013, "logits_per_char": -0.7014251351356506, "num_chars": 2}, {"sum_logits": -1.5237371921539307, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5237371921539307, "logits_per_char": -0.7618685960769653, "num_chars": 2}, {"sum_logits": -1.6611384153366089, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.6611384153366089, "logits_per_char": -0.8305692076683044, "num_chars": 2}, {"sum_logits": -1.6075055599212646, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.6075055599212646, "logits_per_char": -0.8037527799606323, "num_chars": 2}, {"sum_logits": -1.9614757299423218, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.9614757299423218, "logits_per_char": -0.9807378649711609, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 345, "native_id": "5b8d76889510384b38b72945e8d28f53", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0726704597473145, "incorrect_loss_raw": 1.5369855463504791, "correct_loss_per_char": 1.0363352298736572, "incorrect_loss_per_char": 0.7684927731752396, "correct_loss_per_token": 2.0726704597473145, "incorrect_loss_per_token": 1.5369855463504791, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3485620021820068, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.3485620021820068, "logits_per_char": -0.6742810010910034, "num_chars": 2}, {"sum_logits": -1.437630295753479, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.437630295753479, "logits_per_char": -0.7188151478767395, "num_chars": 2}, {"sum_logits": -1.6466106176376343, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6466106176376343, "logits_per_char": -0.8233053088188171, "num_chars": 2}, {"sum_logits": -1.7151392698287964, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.7151392698287964, "logits_per_char": -0.8575696349143982, "num_chars": 2}, {"sum_logits": -2.0726704597473145, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -2.0726704597473145, "logits_per_char": -1.0363352298736572, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 346, "native_id": "d81f5c49bc060dc799681bf4cacac73a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7623162269592285, "incorrect_loss_raw": 1.684867262840271, "correct_loss_per_char": 0.8811581134796143, "incorrect_loss_per_char": 0.8424336314201355, "correct_loss_per_token": 1.7623162269592285, "incorrect_loss_per_token": 1.684867262840271, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1620765924453735, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.1620765924453735, "logits_per_char": -0.5810382962226868, "num_chars": 2}, {"sum_logits": -1.4301847219467163, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.4301847219467163, "logits_per_char": -0.7150923609733582, "num_chars": 2}, {"sum_logits": -1.674504280090332, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.674504280090332, "logits_per_char": -0.837252140045166, "num_chars": 2}, {"sum_logits": -1.7623162269592285, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.7623162269592285, "logits_per_char": -0.8811581134796143, "num_chars": 2}, {"sum_logits": -2.472703456878662, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -2.472703456878662, "logits_per_char": -1.236351728439331, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 347, "native_id": "aaf4fa38433c84b3bd0a86551259ce62", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.686991572380066, "incorrect_loss_raw": 1.6491506397724152, "correct_loss_per_char": 0.843495786190033, "incorrect_loss_per_char": 0.8245753198862076, "correct_loss_per_token": 1.686991572380066, "incorrect_loss_per_token": 1.6491506397724152, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2997084856033325, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.2997084856033325, "logits_per_char": -0.6498542428016663, "num_chars": 2}, {"sum_logits": -1.4997316598892212, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.4997316598892212, "logits_per_char": -0.7498658299446106, "num_chars": 2}, {"sum_logits": -1.5811513662338257, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5811513662338257, "logits_per_char": -0.7905756831169128, "num_chars": 2}, {"sum_logits": -1.686991572380066, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.686991572380066, "logits_per_char": -0.843495786190033, "num_chars": 2}, {"sum_logits": -2.2160110473632812, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -2.2160110473632812, "logits_per_char": -1.1080055236816406, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 348, "native_id": "33ea932a876ac0361c9eefeff1d24e92", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1061155796051025, "incorrect_loss_raw": 1.8340131044387817, "correct_loss_per_char": 0.5530577898025513, "incorrect_loss_per_char": 0.9170065522193909, "correct_loss_per_token": 1.1061155796051025, "incorrect_loss_per_token": 1.8340131044387817, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1061155796051025, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.1061155796051025, "logits_per_char": -0.5530577898025513, "num_chars": 2}, {"sum_logits": -1.5187017917633057, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5187017917633057, "logits_per_char": -0.7593508958816528, "num_chars": 2}, {"sum_logits": -1.7140095233917236, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.7140095233917236, "logits_per_char": -0.8570047616958618, "num_chars": 2}, {"sum_logits": -1.8106317520141602, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.8106317520141602, "logits_per_char": -0.9053158760070801, "num_chars": 2}, {"sum_logits": -2.2927093505859375, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -2.2927093505859375, "logits_per_char": -1.1463546752929688, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 349, "native_id": "aead08289ca9abfcd169f935ea228ee5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.036271095275879, "incorrect_loss_raw": 1.6659489572048187, "correct_loss_per_char": 1.0181355476379395, "incorrect_loss_per_char": 0.8329744786024094, "correct_loss_per_token": 2.036271095275879, "incorrect_loss_per_token": 1.6659489572048187, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.003770351409912, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.003770351409912, "logits_per_char": -0.501885175704956, "num_chars": 2}, {"sum_logits": -1.4212355613708496, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.4212355613708496, "logits_per_char": -0.7106177806854248, "num_chars": 2}, {"sum_logits": -1.7532585859298706, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.7532585859298706, "logits_per_char": -0.8766292929649353, "num_chars": 2}, {"sum_logits": -2.036271095275879, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -2.036271095275879, "logits_per_char": -1.0181355476379395, "num_chars": 2}, {"sum_logits": -2.4855313301086426, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -2.4855313301086426, "logits_per_char": -1.2427656650543213, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 350, "native_id": "adbddc80b10bf25f09c6c2bee4e3c59b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9587483406066895, "incorrect_loss_raw": 1.5485012531280518, "correct_loss_per_char": 0.9793741703033447, "incorrect_loss_per_char": 0.7742506265640259, "correct_loss_per_token": 1.9587483406066895, "incorrect_loss_per_token": 1.5485012531280518, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4218097925186157, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.4218097925186157, "logits_per_char": -0.7109048962593079, "num_chars": 2}, {"sum_logits": -1.541855812072754, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.541855812072754, "logits_per_char": -0.770927906036377, "num_chars": 2}, {"sum_logits": -1.5694347620010376, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5694347620010376, "logits_per_char": -0.7847173810005188, "num_chars": 2}, {"sum_logits": -1.6609046459197998, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6609046459197998, "logits_per_char": -0.8304523229598999, "num_chars": 2}, {"sum_logits": -1.9587483406066895, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.9587483406066895, "logits_per_char": -0.9793741703033447, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 351, "native_id": "1caf93d6a22dc8190e19c14bbe1fafda", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4765552282333374, "incorrect_loss_raw": 1.7574557960033417, "correct_loss_per_char": 0.7382776141166687, "incorrect_loss_per_char": 0.8787278980016708, "correct_loss_per_token": 1.4765552282333374, "incorrect_loss_per_token": 1.7574557960033417, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.049688458442688, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": true, "logits_per_token": -1.049688458442688, "logits_per_char": -0.524844229221344, "num_chars": 2}, {"sum_logits": -1.4765552282333374, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.4765552282333374, "logits_per_char": -0.7382776141166687, "num_chars": 2}, {"sum_logits": -1.837152123451233, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.837152123451233, "logits_per_char": -0.9185760617256165, "num_chars": 2}, {"sum_logits": -1.867745041847229, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.867745041847229, "logits_per_char": -0.9338725209236145, "num_chars": 2}, {"sum_logits": -2.275237560272217, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -2.275237560272217, "logits_per_char": -1.1376187801361084, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 352, "native_id": "0bf4d64ad0eee7224acb3a4eb85accb2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5247009992599487, "incorrect_loss_raw": 1.67728653550148, "correct_loss_per_char": 0.7623504996299744, "incorrect_loss_per_char": 0.83864326775074, "correct_loss_per_token": 1.5247009992599487, "incorrect_loss_per_token": 1.67728653550148, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3651248216629028, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.3651248216629028, "logits_per_char": -0.6825624108314514, "num_chars": 2}, {"sum_logits": -1.5247009992599487, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5247009992599487, "logits_per_char": -0.7623504996299744, "num_chars": 2}, {"sum_logits": -1.5347063541412354, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5347063541412354, "logits_per_char": -0.7673531770706177, "num_chars": 2}, {"sum_logits": -1.6702139377593994, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6702139377593994, "logits_per_char": -0.8351069688796997, "num_chars": 2}, {"sum_logits": -2.139101028442383, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.139101028442383, "logits_per_char": -1.0695505142211914, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 353, "native_id": "b93532cae23e505628dd88568da3337e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6235870122909546, "incorrect_loss_raw": 1.6384250819683075, "correct_loss_per_char": 0.8117935061454773, "incorrect_loss_per_char": 0.8192125409841537, "correct_loss_per_token": 1.6235870122909546, "incorrect_loss_per_token": 1.6384250819683075, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4191462993621826, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": true, "logits_per_token": -1.4191462993621826, "logits_per_char": -0.7095731496810913, "num_chars": 2}, {"sum_logits": -1.6235870122909546, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.6235870122909546, "logits_per_char": -0.8117935061454773, "num_chars": 2}, {"sum_logits": -1.5159721374511719, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5159721374511719, "logits_per_char": -0.7579860687255859, "num_chars": 2}, {"sum_logits": -1.5692239999771118, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5692239999771118, "logits_per_char": -0.7846119999885559, "num_chars": 2}, {"sum_logits": -2.0493578910827637, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -2.0493578910827637, "logits_per_char": -1.0246789455413818, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 354, "native_id": "2d3c9d3dff1a7a8253180cb3de1ceeea", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5288169384002686, "incorrect_loss_raw": 1.661048799753189, "correct_loss_per_char": 0.7644084692001343, "incorrect_loss_per_char": 0.8305243998765945, "correct_loss_per_token": 1.5288169384002686, "incorrect_loss_per_token": 1.661048799753189, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4065848588943481, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.4065848588943481, "logits_per_char": -0.7032924294471741, "num_chars": 2}, {"sum_logits": -1.5288169384002686, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5288169384002686, "logits_per_char": -0.7644084692001343, "num_chars": 2}, {"sum_logits": -1.616324543952942, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.616324543952942, "logits_per_char": -0.808162271976471, "num_chars": 2}, {"sum_logits": -1.6006659269332886, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6006659269332886, "logits_per_char": -0.8003329634666443, "num_chars": 2}, {"sum_logits": -2.0206198692321777, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -2.0206198692321777, "logits_per_char": -1.0103099346160889, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 355, "native_id": "70701f5d1d62e58d5c74e2e303bb4065", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1852245330810547, "incorrect_loss_raw": 1.808118849992752, "correct_loss_per_char": 0.5926122665405273, "incorrect_loss_per_char": 0.904059424996376, "correct_loss_per_token": 1.1852245330810547, "incorrect_loss_per_token": 1.808118849992752, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1852245330810547, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.1852245330810547, "logits_per_char": -0.5926122665405273, "num_chars": 2}, {"sum_logits": -1.5078977346420288, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5078977346420288, "logits_per_char": -0.7539488673210144, "num_chars": 2}, {"sum_logits": -1.6707383394241333, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6707383394241333, "logits_per_char": -0.8353691697120667, "num_chars": 2}, {"sum_logits": -1.6681252717971802, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6681252717971802, "logits_per_char": -0.8340626358985901, "num_chars": 2}, {"sum_logits": -2.385714054107666, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -2.385714054107666, "logits_per_char": -1.192857027053833, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 356, "native_id": "eacd87f297193033669a93160ae3776f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5037128925323486, "incorrect_loss_raw": 1.6714097559452057, "correct_loss_per_char": 0.7518564462661743, "incorrect_loss_per_char": 0.8357048779726028, "correct_loss_per_token": 1.5037128925323486, "incorrect_loss_per_token": 1.6714097559452057, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4143530130386353, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.4143530130386353, "logits_per_char": -0.7071765065193176, "num_chars": 2}, {"sum_logits": -1.5037128925323486, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5037128925323486, "logits_per_char": -0.7518564462661743, "num_chars": 2}, {"sum_logits": -1.5317288637161255, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5317288637161255, "logits_per_char": -0.7658644318580627, "num_chars": 2}, {"sum_logits": -1.6794055700302124, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6794055700302124, "logits_per_char": -0.8397027850151062, "num_chars": 2}, {"sum_logits": -2.0601515769958496, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.0601515769958496, "logits_per_char": -1.0300757884979248, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 357, "native_id": "8e1b0792e441a5d54ae47a4b24f48977", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5510988235473633, "incorrect_loss_raw": 1.6554527878761292, "correct_loss_per_char": 0.7755494117736816, "incorrect_loss_per_char": 0.8277263939380646, "correct_loss_per_token": 1.5510988235473633, "incorrect_loss_per_token": 1.6554527878761292, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4366607666015625, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.4366607666015625, "logits_per_char": -0.7183303833007812, "num_chars": 2}, {"sum_logits": -1.5510988235473633, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5510988235473633, "logits_per_char": -0.7755494117736816, "num_chars": 2}, {"sum_logits": -1.6558159589767456, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6558159589767456, "logits_per_char": -0.8279079794883728, "num_chars": 2}, {"sum_logits": -1.510013461112976, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.510013461112976, "logits_per_char": -0.755006730556488, "num_chars": 2}, {"sum_logits": -2.0193209648132324, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -2.0193209648132324, "logits_per_char": -1.0096604824066162, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 358, "native_id": "b4cde6a56fb19afc84876ebf2fb9e71a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.568715214729309, "incorrect_loss_raw": 1.6473897397518158, "correct_loss_per_char": 0.7843576073646545, "incorrect_loss_per_char": 0.8236948698759079, "correct_loss_per_token": 1.568715214729309, "incorrect_loss_per_token": 1.6473897397518158, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4247890710830688, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.4247890710830688, "logits_per_char": -0.7123945355415344, "num_chars": 2}, {"sum_logits": -1.5181212425231934, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5181212425231934, "logits_per_char": -0.7590606212615967, "num_chars": 2}, {"sum_logits": -1.568715214729309, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.568715214729309, "logits_per_char": -0.7843576073646545, "num_chars": 2}, {"sum_logits": -1.6672731637954712, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6672731637954712, "logits_per_char": -0.8336365818977356, "num_chars": 2}, {"sum_logits": -1.9793754816055298, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.9793754816055298, "logits_per_char": -0.9896877408027649, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 359, "native_id": "095c5bc5fbaf12b384e9f7df47fdec16", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.573107123374939, "incorrect_loss_raw": 1.6717569231987, "correct_loss_per_char": 0.7865535616874695, "incorrect_loss_per_char": 0.83587846159935, "correct_loss_per_token": 1.573107123374939, "incorrect_loss_per_token": 1.6717569231987, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2647836208343506, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.2647836208343506, "logits_per_char": -0.6323918104171753, "num_chars": 2}, {"sum_logits": -1.6567009687423706, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6567009687423706, "logits_per_char": -0.8283504843711853, "num_chars": 2}, {"sum_logits": -1.573107123374939, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.573107123374939, "logits_per_char": -0.7865535616874695, "num_chars": 2}, {"sum_logits": -1.6259161233901978, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6259161233901978, "logits_per_char": -0.8129580616950989, "num_chars": 2}, {"sum_logits": -2.139626979827881, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.139626979827881, "logits_per_char": -1.0698134899139404, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 360, "native_id": "494c501dbbfd36c602aae9e5b8e0cfff", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.685144066810608, "incorrect_loss_raw": 1.6104851365089417, "correct_loss_per_char": 0.842572033405304, "incorrect_loss_per_char": 0.8052425682544708, "correct_loss_per_token": 1.685144066810608, "incorrect_loss_per_token": 1.6104851365089417, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.361503005027771, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.361503005027771, "logits_per_char": -0.6807515025138855, "num_chars": 2}, {"sum_logits": -1.685144066810608, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.685144066810608, "logits_per_char": -0.842572033405304, "num_chars": 2}, {"sum_logits": -1.6859115362167358, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6859115362167358, "logits_per_char": -0.8429557681083679, "num_chars": 2}, {"sum_logits": -1.6091430187225342, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6091430187225342, "logits_per_char": -0.8045715093612671, "num_chars": 2}, {"sum_logits": -1.7853829860687256, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.7853829860687256, "logits_per_char": -0.8926914930343628, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 361, "native_id": "5a7f6fd97b2c9ad05f773bc8b2ecf441", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.567284107208252, "incorrect_loss_raw": 1.6486082673072815, "correct_loss_per_char": 0.783642053604126, "incorrect_loss_per_char": 0.8243041336536407, "correct_loss_per_token": 1.567284107208252, "incorrect_loss_per_token": 1.6486082673072815, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.476266622543335, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.476266622543335, "logits_per_char": -0.7381333112716675, "num_chars": 2}, {"sum_logits": -1.672396183013916, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.672396183013916, "logits_per_char": -0.836198091506958, "num_chars": 2}, {"sum_logits": -1.469484806060791, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.469484806060791, "logits_per_char": -0.7347424030303955, "num_chars": 2}, {"sum_logits": -1.567284107208252, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.567284107208252, "logits_per_char": -0.783642053604126, "num_chars": 2}, {"sum_logits": -1.976285457611084, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.976285457611084, "logits_per_char": -0.988142728805542, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 362, "native_id": "5279a2ea333ba8a5bf3a7637a7279da1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.5330049991607666, "incorrect_loss_raw": 1.639417052268982, "correct_loss_per_char": 0.7665024995803833, "incorrect_loss_per_char": 0.819708526134491, "correct_loss_per_token": 1.5330049991607666, "incorrect_loss_per_token": 1.639417052268982, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6214042901992798, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6214042901992798, "logits_per_char": -0.8107021450996399, "num_chars": 2}, {"sum_logits": -1.5378292798995972, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5378292798995972, "logits_per_char": -0.7689146399497986, "num_chars": 2}, {"sum_logits": -1.6674529314041138, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6674529314041138, "logits_per_char": -0.8337264657020569, "num_chars": 2}, {"sum_logits": -1.5330049991607666, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.5330049991607666, "logits_per_char": -0.7665024995803833, "num_chars": 2}, {"sum_logits": -1.730981707572937, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.730981707572937, "logits_per_char": -0.8654908537864685, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 363, "native_id": "42c46e28baf0fc617a07419286178c0a", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.335080146789551, "incorrect_loss_raw": 1.4989368319511414, "correct_loss_per_char": 1.1675400733947754, "incorrect_loss_per_char": 0.7494684159755707, "correct_loss_per_token": 2.335080146789551, "incorrect_loss_per_token": 1.4989368319511414, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4862585067749023, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.4862585067749023, "logits_per_char": -0.7431292533874512, "num_chars": 2}, {"sum_logits": -1.37016761302948, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.37016761302948, "logits_per_char": -0.68508380651474, "num_chars": 2}, {"sum_logits": -1.512331247329712, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.512331247329712, "logits_per_char": -0.756165623664856, "num_chars": 2}, {"sum_logits": -1.6269899606704712, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6269899606704712, "logits_per_char": -0.8134949803352356, "num_chars": 2}, {"sum_logits": -2.335080146789551, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -2.335080146789551, "logits_per_char": -1.1675400733947754, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 364, "native_id": "c76304b4962f94ab9f20f09cf4a1a7c1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4190479516983032, "incorrect_loss_raw": 1.7138852179050446, "correct_loss_per_char": 0.7095239758491516, "incorrect_loss_per_char": 0.8569426089525223, "correct_loss_per_token": 1.4190479516983032, "incorrect_loss_per_token": 1.7138852179050446, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.393379807472229, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.393379807472229, "logits_per_char": -0.6966899037361145, "num_chars": 2}, {"sum_logits": -1.4190479516983032, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.4190479516983032, "logits_per_char": -0.7095239758491516, "num_chars": 2}, {"sum_logits": -1.5881372690200806, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5881372690200806, "logits_per_char": -0.7940686345100403, "num_chars": 2}, {"sum_logits": -1.654150366783142, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.654150366783142, "logits_per_char": -0.827075183391571, "num_chars": 2}, {"sum_logits": -2.2198734283447266, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -2.2198734283447266, "logits_per_char": -1.1099367141723633, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 365, "native_id": "8b23cd355ffc8b6e7aa5459ffb21b4e0", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6075148582458496, "incorrect_loss_raw": 1.6220779418945312, "correct_loss_per_char": 0.8037574291229248, "incorrect_loss_per_char": 0.8110389709472656, "correct_loss_per_token": 1.6075148582458496, "incorrect_loss_per_token": 1.6220779418945312, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5445013046264648, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5445013046264648, "logits_per_char": -0.7722506523132324, "num_chars": 2}, {"sum_logits": -1.6075148582458496, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.6075148582458496, "logits_per_char": -0.8037574291229248, "num_chars": 2}, {"sum_logits": -1.6518845558166504, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.6518845558166504, "logits_per_char": -0.8259422779083252, "num_chars": 2}, {"sum_logits": -1.5000429153442383, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.5000429153442383, "logits_per_char": -0.7500214576721191, "num_chars": 2}, {"sum_logits": -1.7918829917907715, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.7918829917907715, "logits_per_char": -0.8959414958953857, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 366, "native_id": "c35f7de9e9005fcf654cb0b23f17acd6", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6417361497879028, "incorrect_loss_raw": 1.625669240951538, "correct_loss_per_char": 0.8208680748939514, "incorrect_loss_per_char": 0.812834620475769, "correct_loss_per_token": 1.6417361497879028, "incorrect_loss_per_token": 1.625669240951538, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4447861909866333, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": true, "logits_per_token": -1.4447861909866333, "logits_per_char": -0.7223930954933167, "num_chars": 2}, {"sum_logits": -1.4761642217636108, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.4761642217636108, "logits_per_char": -0.7380821108818054, "num_chars": 2}, {"sum_logits": -1.6417361497879028, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.6417361497879028, "logits_per_char": -0.8208680748939514, "num_chars": 2}, {"sum_logits": -1.6546980142593384, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.6546980142593384, "logits_per_char": -0.8273490071296692, "num_chars": 2}, {"sum_logits": -1.9270285367965698, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.9270285367965698, "logits_per_char": -0.9635142683982849, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 367, "native_id": "d910859b9d1acae40456dbeaa8334bc0", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7050609588623047, "incorrect_loss_raw": 1.6022042334079742, "correct_loss_per_char": 0.8525304794311523, "incorrect_loss_per_char": 0.8011021167039871, "correct_loss_per_token": 1.7050609588623047, "incorrect_loss_per_token": 1.6022042334079742, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4283291101455688, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": true, "logits_per_token": -1.4283291101455688, "logits_per_char": -0.7141645550727844, "num_chars": 2}, {"sum_logits": -1.5545082092285156, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.5545082092285156, "logits_per_char": -0.7772541046142578, "num_chars": 2}, {"sum_logits": -1.7050609588623047, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.7050609588623047, "logits_per_char": -0.8525304794311523, "num_chars": 2}, {"sum_logits": -1.6397600173950195, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.6397600173950195, "logits_per_char": -0.8198800086975098, "num_chars": 2}, {"sum_logits": -1.786219596862793, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.786219596862793, "logits_per_char": -0.8931097984313965, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 368, "native_id": "6ca8439d062de4d43d7d471c508b78db", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4322519302368164, "incorrect_loss_raw": 1.6745584905147552, "correct_loss_per_char": 0.7161259651184082, "incorrect_loss_per_char": 0.8372792452573776, "correct_loss_per_token": 1.4322519302368164, "incorrect_loss_per_token": 1.6745584905147552, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4322519302368164, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.4322519302368164, "logits_per_char": -0.7161259651184082, "num_chars": 2}, {"sum_logits": -1.4618545770645142, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.4618545770645142, "logits_per_char": -0.7309272885322571, "num_chars": 2}, {"sum_logits": -1.7711119651794434, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.7711119651794434, "logits_per_char": -0.8855559825897217, "num_chars": 2}, {"sum_logits": -1.6901772022247314, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6901772022247314, "logits_per_char": -0.8450886011123657, "num_chars": 2}, {"sum_logits": -1.775090217590332, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.775090217590332, "logits_per_char": -0.887545108795166, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 369, "native_id": "ddd8c62ec94b4f94eeefdd05b9208a71", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5553544759750366, "incorrect_loss_raw": 1.6670265197753906, "correct_loss_per_char": 0.7776772379875183, "incorrect_loss_per_char": 0.8335132598876953, "correct_loss_per_token": 1.5553544759750366, "incorrect_loss_per_token": 1.6670265197753906, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3834377527236938, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.3834377527236938, "logits_per_char": -0.6917188763618469, "num_chars": 2}, {"sum_logits": -1.4762599468231201, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.4762599468231201, "logits_per_char": -0.7381299734115601, "num_chars": 2}, {"sum_logits": -1.5553544759750366, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5553544759750366, "logits_per_char": -0.7776772379875183, "num_chars": 2}, {"sum_logits": -1.6814442873001099, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6814442873001099, "logits_per_char": -0.8407221436500549, "num_chars": 2}, {"sum_logits": -2.1269640922546387, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.1269640922546387, "logits_per_char": -1.0634820461273193, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 370, "native_id": "72b638200414a526b598de0e01a044df", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5416111946105957, "incorrect_loss_raw": 1.6827121078968048, "correct_loss_per_char": 0.7708055973052979, "incorrect_loss_per_char": 0.8413560539484024, "correct_loss_per_token": 1.5416111946105957, "incorrect_loss_per_token": 1.6827121078968048, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3653377294540405, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": true, "logits_per_token": -1.3653377294540405, "logits_per_char": -0.6826688647270203, "num_chars": 2}, {"sum_logits": -1.4739807844161987, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.4739807844161987, "logits_per_char": -0.7369903922080994, "num_chars": 2}, {"sum_logits": -1.5416111946105957, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.5416111946105957, "logits_per_char": -0.7708055973052979, "num_chars": 2}, {"sum_logits": -1.660312533378601, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.660312533378601, "logits_per_char": -0.8301562666893005, "num_chars": 2}, {"sum_logits": -2.231217384338379, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -2.231217384338379, "logits_per_char": -1.1156086921691895, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 371, "native_id": "c770870c88f35f9d110217049c5a7334", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5377895832061768, "incorrect_loss_raw": 1.6486931443214417, "correct_loss_per_char": 0.7688947916030884, "incorrect_loss_per_char": 0.8243465721607208, "correct_loss_per_token": 1.5377895832061768, "incorrect_loss_per_token": 1.6486931443214417, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4388399124145508, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.4388399124145508, "logits_per_char": -0.7194199562072754, "num_chars": 2}, {"sum_logits": -1.5377895832061768, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5377895832061768, "logits_per_char": -0.7688947916030884, "num_chars": 2}, {"sum_logits": -1.5877878665924072, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5877878665924072, "logits_per_char": -0.7938939332962036, "num_chars": 2}, {"sum_logits": -1.6436216831207275, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6436216831207275, "logits_per_char": -0.8218108415603638, "num_chars": 2}, {"sum_logits": -1.924523115158081, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.924523115158081, "logits_per_char": -0.9622615575790405, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 372, "native_id": "1d8d9e3504c8c58a3b923ddc155c19b0", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5524078607559204, "incorrect_loss_raw": 1.6598592102527618, "correct_loss_per_char": 0.7762039303779602, "incorrect_loss_per_char": 0.8299296051263809, "correct_loss_per_token": 1.5524078607559204, "incorrect_loss_per_token": 1.6598592102527618, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3985040187835693, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.3985040187835693, "logits_per_char": -0.6992520093917847, "num_chars": 2}, {"sum_logits": -1.5524078607559204, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5524078607559204, "logits_per_char": -0.7762039303779602, "num_chars": 2}, {"sum_logits": -1.5643346309661865, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5643346309661865, "logits_per_char": -0.7821673154830933, "num_chars": 2}, {"sum_logits": -1.6119626760482788, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6119626760482788, "logits_per_char": -0.8059813380241394, "num_chars": 2}, {"sum_logits": -2.0646355152130127, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.0646355152130127, "logits_per_char": -1.0323177576065063, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 373, "native_id": "95acebea992a26c3a7c3bfb45845fa83", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6926766633987427, "incorrect_loss_raw": 1.6090903282165527, "correct_loss_per_char": 0.8463383316993713, "incorrect_loss_per_char": 0.8045451641082764, "correct_loss_per_token": 1.6926766633987427, "incorrect_loss_per_token": 1.6090903282165527, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4491602182388306, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.4491602182388306, "logits_per_char": -0.7245801091194153, "num_chars": 2}, {"sum_logits": -1.523768663406372, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.523768663406372, "logits_per_char": -0.761884331703186, "num_chars": 2}, {"sum_logits": -1.6926766633987427, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6926766633987427, "logits_per_char": -0.8463383316993713, "num_chars": 2}, {"sum_logits": -1.5677297115325928, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5677297115325928, "logits_per_char": -0.7838648557662964, "num_chars": 2}, {"sum_logits": -1.8957027196884155, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.8957027196884155, "logits_per_char": -0.9478513598442078, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 374, "native_id": "c2c2a387fd9a6a26cff636008de21f71", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.557984709739685, "incorrect_loss_raw": 1.650796800851822, "correct_loss_per_char": 0.7789923548698425, "incorrect_loss_per_char": 0.825398400425911, "correct_loss_per_token": 1.557984709739685, "incorrect_loss_per_token": 1.650796800851822, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3287601470947266, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": true, "logits_per_token": -1.3287601470947266, "logits_per_char": -0.6643800735473633, "num_chars": 2}, {"sum_logits": -1.557984709739685, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.557984709739685, "logits_per_char": -0.7789923548698425, "num_chars": 2}, {"sum_logits": -1.6198474168777466, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.6198474168777466, "logits_per_char": -0.8099237084388733, "num_chars": 2}, {"sum_logits": -1.756334662437439, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.756334662437439, "logits_per_char": -0.8781673312187195, "num_chars": 2}, {"sum_logits": -1.8982449769973755, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.8982449769973755, "logits_per_char": -0.9491224884986877, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 375, "native_id": "57e96118fee6e2bbac5f59790fc833c0", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.405929446220398, "incorrect_loss_raw": 1.6815595626831055, "correct_loss_per_char": 0.702964723110199, "incorrect_loss_per_char": 0.8407797813415527, "correct_loss_per_token": 1.405929446220398, "incorrect_loss_per_token": 1.6815595626831055, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.405929446220398, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.405929446220398, "logits_per_char": -0.702964723110199, "num_chars": 2}, {"sum_logits": -1.6086684465408325, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6086684465408325, "logits_per_char": -0.8043342232704163, "num_chars": 2}, {"sum_logits": -1.5641452074050903, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5641452074050903, "logits_per_char": -0.7820726037025452, "num_chars": 2}, {"sum_logits": -1.6461879014968872, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6461879014968872, "logits_per_char": -0.8230939507484436, "num_chars": 2}, {"sum_logits": -1.9072366952896118, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.9072366952896118, "logits_per_char": -0.9536183476448059, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 376, "native_id": "b9b82aa4c236cd342ff95455b8516a42", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4447219371795654, "incorrect_loss_raw": 1.6867055892944336, "correct_loss_per_char": 0.7223609685897827, "incorrect_loss_per_char": 0.8433527946472168, "correct_loss_per_token": 1.4447219371795654, "incorrect_loss_per_token": 1.6867055892944336, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4447219371795654, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.4447219371795654, "logits_per_char": -0.7223609685897827, "num_chars": 2}, {"sum_logits": -1.4525089263916016, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.4525089263916016, "logits_per_char": -0.7262544631958008, "num_chars": 2}, {"sum_logits": -1.5474658012390137, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5474658012390137, "logits_per_char": -0.7737329006195068, "num_chars": 2}, {"sum_logits": -1.6748363971710205, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6748363971710205, "logits_per_char": -0.8374181985855103, "num_chars": 2}, {"sum_logits": -2.0720112323760986, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.0720112323760986, "logits_per_char": -1.0360056161880493, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 377, "native_id": "41fac392c6a5827c1b6682d5d3798e59", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5757498741149902, "incorrect_loss_raw": 1.6351574659347534, "correct_loss_per_char": 0.7878749370574951, "incorrect_loss_per_char": 0.8175787329673767, "correct_loss_per_token": 1.5757498741149902, "incorrect_loss_per_token": 1.6351574659347534, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4902901649475098, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.4902901649475098, "logits_per_char": -0.7451450824737549, "num_chars": 2}, {"sum_logits": -1.5757498741149902, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5757498741149902, "logits_per_char": -0.7878749370574951, "num_chars": 2}, {"sum_logits": -1.514458417892456, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.514458417892456, "logits_per_char": -0.757229208946228, "num_chars": 2}, {"sum_logits": -1.6671299934387207, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.6671299934387207, "logits_per_char": -0.8335649967193604, "num_chars": 2}, {"sum_logits": -1.8687512874603271, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.8687512874603271, "logits_per_char": -0.9343756437301636, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 378, "native_id": "5c224410a40c9269b1e542cfcb430d35", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6621222496032715, "incorrect_loss_raw": 1.6334959268569946, "correct_loss_per_char": 0.8310611248016357, "incorrect_loss_per_char": 0.8167479634284973, "correct_loss_per_token": 1.6621222496032715, "incorrect_loss_per_token": 1.6334959268569946, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3947932720184326, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": true, "logits_per_token": -1.3947932720184326, "logits_per_char": -0.6973966360092163, "num_chars": 2}, {"sum_logits": -1.6621222496032715, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.6621222496032715, "logits_per_char": -0.8310611248016357, "num_chars": 2}, {"sum_logits": -1.5371654033660889, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5371654033660889, "logits_per_char": -0.7685827016830444, "num_chars": 2}, {"sum_logits": -1.5224831104278564, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5224831104278564, "logits_per_char": -0.7612415552139282, "num_chars": 2}, {"sum_logits": -2.0795419216156006, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -2.0795419216156006, "logits_per_char": -1.0397709608078003, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 379, "native_id": "0b90c6710a65eb55fea4cc92895bf601", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9088077545166016, "incorrect_loss_raw": 1.572273850440979, "correct_loss_per_char": 0.9544038772583008, "incorrect_loss_per_char": 0.7861369252204895, "correct_loss_per_token": 1.9088077545166016, "incorrect_loss_per_token": 1.572273850440979, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2332683801651, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.2332683801651, "logits_per_char": -0.61663419008255, "num_chars": 2}, {"sum_logits": -1.6704843044281006, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6704843044281006, "logits_per_char": -0.8352421522140503, "num_chars": 2}, {"sum_logits": -1.7321739196777344, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.7321739196777344, "logits_per_char": -0.8660869598388672, "num_chars": 2}, {"sum_logits": -1.653168797492981, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.653168797492981, "logits_per_char": -0.8265843987464905, "num_chars": 2}, {"sum_logits": -1.9088077545166016, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.9088077545166016, "logits_per_char": -0.9544038772583008, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 380, "native_id": "70af2b5df22ec96901350dfa3c9ee74f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.5320613384246826, "incorrect_loss_raw": 1.6370608806610107, "correct_loss_per_char": 0.7660306692123413, "incorrect_loss_per_char": 0.8185304403305054, "correct_loss_per_token": 1.5320613384246826, "incorrect_loss_per_token": 1.6370608806610107, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5320613384246826, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": true, "logits_per_token": -1.5320613384246826, "logits_per_char": -0.7660306692123413, "num_chars": 2}, {"sum_logits": -1.5824756622314453, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.5824756622314453, "logits_per_char": -0.7912378311157227, "num_chars": 2}, {"sum_logits": -1.7007660865783691, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.7007660865783691, "logits_per_char": -0.8503830432891846, "num_chars": 2}, {"sum_logits": -1.6069824695587158, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.6069824695587158, "logits_per_char": -0.8034912347793579, "num_chars": 2}, {"sum_logits": -1.6580193042755127, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.6580193042755127, "logits_per_char": -0.8290096521377563, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 381, "native_id": "f9243ef9f0037657c337d3c6a9832f05", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6689178943634033, "incorrect_loss_raw": 1.61151584982872, "correct_loss_per_char": 0.8344589471817017, "incorrect_loss_per_char": 0.80575792491436, "correct_loss_per_token": 1.6689178943634033, "incorrect_loss_per_token": 1.61151584982872, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.431238055229187, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.431238055229187, "logits_per_char": -0.7156190276145935, "num_chars": 2}, {"sum_logits": -1.529132604598999, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.529132604598999, "logits_per_char": -0.7645663022994995, "num_chars": 2}, {"sum_logits": -1.6700339317321777, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6700339317321777, "logits_per_char": -0.8350169658660889, "num_chars": 2}, {"sum_logits": -1.6689178943634033, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6689178943634033, "logits_per_char": -0.8344589471817017, "num_chars": 2}, {"sum_logits": -1.8156588077545166, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.8156588077545166, "logits_per_char": -0.9078294038772583, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 382, "native_id": "27f2074270ea8a5e8f5ec2a017ec4a50", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7592281103134155, "incorrect_loss_raw": 1.5991544723510742, "correct_loss_per_char": 0.8796140551567078, "incorrect_loss_per_char": 0.7995772361755371, "correct_loss_per_token": 1.7592281103134155, "incorrect_loss_per_token": 1.5991544723510742, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4655492305755615, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": true, "logits_per_token": -1.4655492305755615, "logits_per_char": -0.7327746152877808, "num_chars": 2}, {"sum_logits": -1.5440287590026855, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.5440287590026855, "logits_per_char": -0.7720143795013428, "num_chars": 2}, {"sum_logits": -1.7592281103134155, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.7592281103134155, "logits_per_char": -0.8796140551567078, "num_chars": 2}, {"sum_logits": -1.556236743927002, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.556236743927002, "logits_per_char": -0.778118371963501, "num_chars": 2}, {"sum_logits": -1.8308031558990479, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.8308031558990479, "logits_per_char": -0.9154015779495239, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 383, "native_id": "63b3652d54c8c0e571f6bb50de318bf0", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4864486455917358, "incorrect_loss_raw": 1.6729413866996765, "correct_loss_per_char": 0.7432243227958679, "incorrect_loss_per_char": 0.8364706933498383, "correct_loss_per_token": 1.4864486455917358, "incorrect_loss_per_token": 1.6729413866996765, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3849892616271973, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.3849892616271973, "logits_per_char": -0.6924946308135986, "num_chars": 2}, {"sum_logits": -1.4864486455917358, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.4864486455917358, "logits_per_char": -0.7432243227958679, "num_chars": 2}, {"sum_logits": -1.659924864768982, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.659924864768982, "logits_per_char": -0.829962432384491, "num_chars": 2}, {"sum_logits": -1.6270517110824585, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6270517110824585, "logits_per_char": -0.8135258555412292, "num_chars": 2}, {"sum_logits": -2.0197997093200684, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -2.0197997093200684, "logits_per_char": -1.0098998546600342, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 384, "native_id": "0843c51212a3c2eee660fab5648c9e19", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7036921977996826, "incorrect_loss_raw": 1.6161776781082153, "correct_loss_per_char": 0.8518460988998413, "incorrect_loss_per_char": 0.8080888390541077, "correct_loss_per_token": 1.7036921977996826, "incorrect_loss_per_token": 1.6161776781082153, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3906636238098145, "num_tokens": 1, "num_tokens_all": 301, "is_greedy": true, "logits_per_token": -1.3906636238098145, "logits_per_char": -0.6953318119049072, "num_chars": 2}, {"sum_logits": -1.5290331840515137, "num_tokens": 1, "num_tokens_all": 301, "is_greedy": false, "logits_per_token": -1.5290331840515137, "logits_per_char": -0.7645165920257568, "num_chars": 2}, {"sum_logits": -1.7036921977996826, "num_tokens": 1, "num_tokens_all": 301, "is_greedy": false, "logits_per_token": -1.7036921977996826, "logits_per_char": -0.8518460988998413, "num_chars": 2}, {"sum_logits": -1.5928959846496582, "num_tokens": 1, "num_tokens_all": 301, "is_greedy": false, "logits_per_token": -1.5928959846496582, "logits_per_char": -0.7964479923248291, "num_chars": 2}, {"sum_logits": -1.952117919921875, "num_tokens": 1, "num_tokens_all": 301, "is_greedy": false, "logits_per_token": -1.952117919921875, "logits_per_char": -0.9760589599609375, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 385, "native_id": "1b3d286458a7e7f069222de0376d06da", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.645995020866394, "incorrect_loss_raw": 1.6562470495700836, "correct_loss_per_char": 0.822997510433197, "incorrect_loss_per_char": 0.8281235247850418, "correct_loss_per_token": 1.645995020866394, "incorrect_loss_per_token": 1.6562470495700836, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.278359293937683, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.278359293937683, "logits_per_char": -0.6391796469688416, "num_chars": 2}, {"sum_logits": -1.4917393922805786, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.4917393922805786, "logits_per_char": -0.7458696961402893, "num_chars": 2}, {"sum_logits": -1.645995020866394, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.645995020866394, "logits_per_char": -0.822997510433197, "num_chars": 2}, {"sum_logits": -1.7005006074905396, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.7005006074905396, "logits_per_char": -0.8502503037452698, "num_chars": 2}, {"sum_logits": -2.154388904571533, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.154388904571533, "logits_per_char": -1.0771944522857666, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 386, "native_id": "86e2aabfb9d401567f04d87a648ff776", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6503444910049438, "incorrect_loss_raw": 1.610846221446991, "correct_loss_per_char": 0.8251722455024719, "incorrect_loss_per_char": 0.8054231107234955, "correct_loss_per_token": 1.6503444910049438, "incorrect_loss_per_token": 1.610846221446991, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.474716067314148, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": true, "logits_per_token": -1.474716067314148, "logits_per_char": -0.737358033657074, "num_chars": 2}, {"sum_logits": -1.6319693326950073, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.6319693326950073, "logits_per_char": -0.8159846663475037, "num_chars": 2}, {"sum_logits": -1.6503444910049438, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.6503444910049438, "logits_per_char": -0.8251722455024719, "num_chars": 2}, {"sum_logits": -1.6029647588729858, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.6029647588729858, "logits_per_char": -0.8014823794364929, "num_chars": 2}, {"sum_logits": -1.7337347269058228, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.7337347269058228, "logits_per_char": -0.8668673634529114, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 387, "native_id": "092c24369367b3c7457198f3ce160fe3", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6774122714996338, "incorrect_loss_raw": 1.619200438261032, "correct_loss_per_char": 0.8387061357498169, "incorrect_loss_per_char": 0.809600219130516, "correct_loss_per_token": 1.6774122714996338, "incorrect_loss_per_token": 1.619200438261032, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4881216287612915, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.4881216287612915, "logits_per_char": -0.7440608143806458, "num_chars": 2}, {"sum_logits": -1.500133991241455, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.500133991241455, "logits_per_char": -0.7500669956207275, "num_chars": 2}, {"sum_logits": -1.6774122714996338, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6774122714996338, "logits_per_char": -0.8387061357498169, "num_chars": 2}, {"sum_logits": -1.5149604082107544, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5149604082107544, "logits_per_char": -0.7574802041053772, "num_chars": 2}, {"sum_logits": -1.9735857248306274, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.9735857248306274, "logits_per_char": -0.9867928624153137, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 388, "native_id": "cab9eea2a91b1bd5c0a01b11f594f154", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.5118041038513184, "incorrect_loss_raw": 1.6549222469329834, "correct_loss_per_char": 0.7559020519256592, "incorrect_loss_per_char": 0.8274611234664917, "correct_loss_per_token": 1.5118041038513184, "incorrect_loss_per_token": 1.6549222469329834, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5118041038513184, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.5118041038513184, "logits_per_char": -0.7559020519256592, "num_chars": 2}, {"sum_logits": -1.6169869899749756, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6169869899749756, "logits_per_char": -0.8084934949874878, "num_chars": 2}, {"sum_logits": -1.5296857357025146, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5296857357025146, "logits_per_char": -0.7648428678512573, "num_chars": 2}, {"sum_logits": -1.5575907230377197, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5575907230377197, "logits_per_char": -0.7787953615188599, "num_chars": 2}, {"sum_logits": -1.9154255390167236, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.9154255390167236, "logits_per_char": -0.9577127695083618, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 389, "native_id": "6e77de03bee86d6c20780e14f00944d0", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4561984539031982, "incorrect_loss_raw": 1.6953375935554504, "correct_loss_per_char": 0.7280992269515991, "incorrect_loss_per_char": 0.8476687967777252, "correct_loss_per_token": 1.4561984539031982, "incorrect_loss_per_token": 1.6953375935554504, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4560396671295166, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.4560396671295166, "logits_per_char": -0.7280198335647583, "num_chars": 2}, {"sum_logits": -1.5695725679397583, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5695725679397583, "logits_per_char": -0.7847862839698792, "num_chars": 2}, {"sum_logits": -1.4561984539031982, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.4561984539031982, "logits_per_char": -0.7280992269515991, "num_chars": 2}, {"sum_logits": -1.5558212995529175, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5558212995529175, "logits_per_char": -0.7779106497764587, "num_chars": 2}, {"sum_logits": -2.1999168395996094, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.1999168395996094, "logits_per_char": -1.0999584197998047, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 390, "native_id": "7f25dbab26165b3c8800c2733ca759d6", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5755292177200317, "incorrect_loss_raw": 1.6282092034816742, "correct_loss_per_char": 0.7877646088600159, "incorrect_loss_per_char": 0.8141046017408371, "correct_loss_per_token": 1.5755292177200317, "incorrect_loss_per_token": 1.6282092034816742, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6599262952804565, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6599262952804565, "logits_per_char": -0.8299631476402283, "num_chars": 2}, {"sum_logits": -1.5755292177200317, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5755292177200317, "logits_per_char": -0.7877646088600159, "num_chars": 2}, {"sum_logits": -1.5812466144561768, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5812466144561768, "logits_per_char": -0.7906233072280884, "num_chars": 2}, {"sum_logits": -1.568406581878662, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.568406581878662, "logits_per_char": -0.784203290939331, "num_chars": 2}, {"sum_logits": -1.7032573223114014, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.7032573223114014, "logits_per_char": -0.8516286611557007, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 391, "native_id": "9024493a3edbaf555fda5b477e835bf5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0222630500793457, "incorrect_loss_raw": 1.5365442037582397, "correct_loss_per_char": 1.0111315250396729, "incorrect_loss_per_char": 0.7682721018791199, "correct_loss_per_token": 2.0222630500793457, "incorrect_loss_per_token": 1.5365442037582397, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4370744228363037, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.4370744228363037, "logits_per_char": -0.7185372114181519, "num_chars": 2}, {"sum_logits": -1.5781500339508057, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5781500339508057, "logits_per_char": -0.7890750169754028, "num_chars": 2}, {"sum_logits": -1.5471123456954956, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5471123456954956, "logits_per_char": -0.7735561728477478, "num_chars": 2}, {"sum_logits": -1.583840012550354, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.583840012550354, "logits_per_char": -0.791920006275177, "num_chars": 2}, {"sum_logits": -2.0222630500793457, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.0222630500793457, "logits_per_char": -1.0111315250396729, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 392, "native_id": "fc59ab1a9e6d2b51126dd828d30e9167", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6138259172439575, "incorrect_loss_raw": 1.6391800343990326, "correct_loss_per_char": 0.8069129586219788, "incorrect_loss_per_char": 0.8195900171995163, "correct_loss_per_token": 1.6138259172439575, "incorrect_loss_per_token": 1.6391800343990326, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4006154537200928, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": true, "logits_per_token": -1.4006154537200928, "logits_per_char": -0.7003077268600464, "num_chars": 2}, {"sum_logits": -1.6078267097473145, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.6078267097473145, "logits_per_char": -0.8039133548736572, "num_chars": 2}, {"sum_logits": -1.5285407304763794, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.5285407304763794, "logits_per_char": -0.7642703652381897, "num_chars": 2}, {"sum_logits": -1.6138259172439575, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.6138259172439575, "logits_per_char": -0.8069129586219788, "num_chars": 2}, {"sum_logits": -2.0197372436523438, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -2.0197372436523438, "logits_per_char": -1.0098686218261719, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 393, "native_id": "5a50ea4bb2d13dc4f620ebd45025d445", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6657434701919556, "incorrect_loss_raw": 1.6043838262557983, "correct_loss_per_char": 0.8328717350959778, "incorrect_loss_per_char": 0.8021919131278992, "correct_loss_per_token": 1.6657434701919556, "incorrect_loss_per_token": 1.6043838262557983, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.463375210762024, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.463375210762024, "logits_per_char": -0.731687605381012, "num_chars": 2}, {"sum_logits": -1.6021090745925903, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6021090745925903, "logits_per_char": -0.8010545372962952, "num_chars": 2}, {"sum_logits": -1.6723271608352661, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6723271608352661, "logits_per_char": -0.8361635804176331, "num_chars": 2}, {"sum_logits": -1.679723858833313, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.679723858833313, "logits_per_char": -0.8398619294166565, "num_chars": 2}, {"sum_logits": -1.6657434701919556, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6657434701919556, "logits_per_char": -0.8328717350959778, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 394, "native_id": "8becd2ee4e86258566a9c2b0e6d9544e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5465235710144043, "incorrect_loss_raw": 1.6535069942474365, "correct_loss_per_char": 0.7732617855072021, "incorrect_loss_per_char": 0.8267534971237183, "correct_loss_per_token": 1.5465235710144043, "incorrect_loss_per_token": 1.6535069942474365, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.330904245376587, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": true, "logits_per_token": -1.330904245376587, "logits_per_char": -0.6654521226882935, "num_chars": 2}, {"sum_logits": -1.5465235710144043, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.5465235710144043, "logits_per_char": -0.7732617855072021, "num_chars": 2}, {"sum_logits": -1.6576991081237793, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.6576991081237793, "logits_per_char": -0.8288495540618896, "num_chars": 2}, {"sum_logits": -1.7420594692230225, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.7420594692230225, "logits_per_char": -0.8710297346115112, "num_chars": 2}, {"sum_logits": -1.8833651542663574, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.8833651542663574, "logits_per_char": -0.9416825771331787, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 395, "native_id": "2a21820a135e1a49883525c055c74a0b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.022676706314087, "incorrect_loss_raw": 1.536105990409851, "correct_loss_per_char": 1.0113383531570435, "incorrect_loss_per_char": 0.7680529952049255, "correct_loss_per_token": 2.022676706314087, "incorrect_loss_per_token": 1.536105990409851, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4319689273834229, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.4319689273834229, "logits_per_char": -0.7159844636917114, "num_chars": 2}, {"sum_logits": -1.5570979118347168, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5570979118347168, "logits_per_char": -0.7785489559173584, "num_chars": 2}, {"sum_logits": -1.5313456058502197, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5313456058502197, "logits_per_char": -0.7656728029251099, "num_chars": 2}, {"sum_logits": -1.624011516571045, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.624011516571045, "logits_per_char": -0.8120057582855225, "num_chars": 2}, {"sum_logits": -2.022676706314087, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.022676706314087, "logits_per_char": -1.0113383531570435, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 396, "native_id": "e5adfec0b5ba691ec752f9b5e0fb8084", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6453282833099365, "incorrect_loss_raw": 1.6276564002037048, "correct_loss_per_char": 0.8226641416549683, "incorrect_loss_per_char": 0.8138282001018524, "correct_loss_per_token": 1.6453282833099365, "incorrect_loss_per_token": 1.6276564002037048, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.457727074623108, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": true, "logits_per_token": -1.457727074623108, "logits_per_char": -0.728863537311554, "num_chars": 2}, {"sum_logits": -1.5379019975662231, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.5379019975662231, "logits_per_char": -0.7689509987831116, "num_chars": 2}, {"sum_logits": -1.5208866596221924, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.5208866596221924, "logits_per_char": -0.7604433298110962, "num_chars": 2}, {"sum_logits": -1.6453282833099365, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.6453282833099365, "logits_per_char": -0.8226641416549683, "num_chars": 2}, {"sum_logits": -1.994109869003296, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.994109869003296, "logits_per_char": -0.997054934501648, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 397, "native_id": "406e15b76269d20b5448a91648094291", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8778377771377563, "incorrect_loss_raw": 1.566014289855957, "correct_loss_per_char": 0.9389188885688782, "incorrect_loss_per_char": 0.7830071449279785, "correct_loss_per_token": 1.8778377771377563, "incorrect_loss_per_token": 1.566014289855957, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4191471338272095, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.4191471338272095, "logits_per_char": -0.7095735669136047, "num_chars": 2}, {"sum_logits": -1.7304009199142456, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.7304009199142456, "logits_per_char": -0.8652004599571228, "num_chars": 2}, {"sum_logits": -1.5690380334854126, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5690380334854126, "logits_per_char": -0.7845190167427063, "num_chars": 2}, {"sum_logits": -1.5454710721969604, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5454710721969604, "logits_per_char": -0.7727355360984802, "num_chars": 2}, {"sum_logits": -1.8778377771377563, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.8778377771377563, "logits_per_char": -0.9389188885688782, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 398, "native_id": "9c596382ea15768f95b5ef9ceec191dc", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6257280111312866, "incorrect_loss_raw": 1.6641463339328766, "correct_loss_per_char": 0.8128640055656433, "incorrect_loss_per_char": 0.8320731669664383, "correct_loss_per_token": 1.6257280111312866, "incorrect_loss_per_token": 1.6641463339328766, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3477615118026733, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": true, "logits_per_token": -1.3477615118026733, "logits_per_char": -0.6738807559013367, "num_chars": 2}, {"sum_logits": -1.4107831716537476, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.4107831716537476, "logits_per_char": -0.7053915858268738, "num_chars": 2}, {"sum_logits": -1.6257280111312866, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.6257280111312866, "logits_per_char": -0.8128640055656433, "num_chars": 2}, {"sum_logits": -1.6891282796859741, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.6891282796859741, "logits_per_char": -0.8445641398429871, "num_chars": 2}, {"sum_logits": -2.2089123725891113, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -2.2089123725891113, "logits_per_char": -1.1044561862945557, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 399, "native_id": "7a3d0c94438a5c8a09364aaebb848a2c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.392136812210083, "incorrect_loss_raw": 1.6929995119571686, "correct_loss_per_char": 0.6960684061050415, "incorrect_loss_per_char": 0.8464997559785843, "correct_loss_per_token": 1.392136812210083, "incorrect_loss_per_token": 1.6929995119571686, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.392136812210083, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.392136812210083, "logits_per_char": -0.6960684061050415, "num_chars": 2}, {"sum_logits": -1.6382770538330078, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6382770538330078, "logits_per_char": -0.8191385269165039, "num_chars": 2}, {"sum_logits": -1.5228592157363892, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5228592157363892, "logits_per_char": -0.7614296078681946, "num_chars": 2}, {"sum_logits": -1.6155123710632324, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6155123710632324, "logits_per_char": -0.8077561855316162, "num_chars": 2}, {"sum_logits": -1.995349407196045, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.995349407196045, "logits_per_char": -0.9976747035980225, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 400, "native_id": "1ef68db97654f30cd3701b942fadc934", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4385762214660645, "incorrect_loss_raw": 1.7279791831970215, "correct_loss_per_char": 0.7192881107330322, "incorrect_loss_per_char": 0.8639895915985107, "correct_loss_per_token": 1.4385762214660645, "incorrect_loss_per_token": 1.7279791831970215, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.35475754737854, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.35475754737854, "logits_per_char": -0.67737877368927, "num_chars": 2}, {"sum_logits": -1.4385762214660645, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.4385762214660645, "logits_per_char": -0.7192881107330322, "num_chars": 2}, {"sum_logits": -1.4921540021896362, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.4921540021896362, "logits_per_char": -0.7460770010948181, "num_chars": 2}, {"sum_logits": -1.7165592908859253, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.7165592908859253, "logits_per_char": -0.8582796454429626, "num_chars": 2}, {"sum_logits": -2.3484458923339844, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.3484458923339844, "logits_per_char": -1.1742229461669922, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 401, "native_id": "abb090bbc572be1016bcd5f261f28e76", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.649112582206726, "incorrect_loss_raw": 1.6603564620018005, "correct_loss_per_char": 0.824556291103363, "incorrect_loss_per_char": 0.8301782310009003, "correct_loss_per_token": 1.649112582206726, "incorrect_loss_per_token": 1.6603564620018005, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2474313974380493, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.2474313974380493, "logits_per_char": -0.6237156987190247, "num_chars": 2}, {"sum_logits": -1.5126452445983887, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5126452445983887, "logits_per_char": -0.7563226222991943, "num_chars": 2}, {"sum_logits": -1.649112582206726, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.649112582206726, "logits_per_char": -0.824556291103363, "num_chars": 2}, {"sum_logits": -1.7317677736282349, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.7317677736282349, "logits_per_char": -0.8658838868141174, "num_chars": 2}, {"sum_logits": -2.1495814323425293, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -2.1495814323425293, "logits_per_char": -1.0747907161712646, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 402, "native_id": "91f2532a832a35cba1b08a3c767be6da", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9239020347595215, "incorrect_loss_raw": 1.5535618662834167, "correct_loss_per_char": 0.9619510173797607, "incorrect_loss_per_char": 0.7767809331417084, "correct_loss_per_token": 1.9239020347595215, "incorrect_loss_per_token": 1.5535618662834167, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4257144927978516, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.4257144927978516, "logits_per_char": -0.7128572463989258, "num_chars": 2}, {"sum_logits": -1.5555126667022705, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5555126667022705, "logits_per_char": -0.7777563333511353, "num_chars": 2}, {"sum_logits": -1.640014410018921, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.640014410018921, "logits_per_char": -0.8200072050094604, "num_chars": 2}, {"sum_logits": -1.593005895614624, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.593005895614624, "logits_per_char": -0.796502947807312, "num_chars": 2}, {"sum_logits": -1.9239020347595215, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.9239020347595215, "logits_per_char": -0.9619510173797607, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 403, "native_id": "f8544c9679d27b747dfad3b8d7aac87a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9500690698623657, "incorrect_loss_raw": 1.549653798341751, "correct_loss_per_char": 0.9750345349311829, "incorrect_loss_per_char": 0.7748268991708755, "correct_loss_per_token": 1.9500690698623657, "incorrect_loss_per_token": 1.549653798341751, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4326214790344238, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.4326214790344238, "logits_per_char": -0.7163107395172119, "num_chars": 2}, {"sum_logits": -1.5676658153533936, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5676658153533936, "logits_per_char": -0.7838329076766968, "num_chars": 2}, {"sum_logits": -1.5962742567062378, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5962742567062378, "logits_per_char": -0.7981371283531189, "num_chars": 2}, {"sum_logits": -1.6020536422729492, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6020536422729492, "logits_per_char": -0.8010268211364746, "num_chars": 2}, {"sum_logits": -1.9500690698623657, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.9500690698623657, "logits_per_char": -0.9750345349311829, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 404, "native_id": "a7f423c1636ba9e36d18e381928c5dcc", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.714538812637329, "incorrect_loss_raw": 1.6058569550514221, "correct_loss_per_char": 0.8572694063186646, "incorrect_loss_per_char": 0.8029284775257111, "correct_loss_per_token": 1.714538812637329, "incorrect_loss_per_token": 1.6058569550514221, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3728742599487305, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": true, "logits_per_token": -1.3728742599487305, "logits_per_char": -0.6864371299743652, "num_chars": 2}, {"sum_logits": -1.538391351699829, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.538391351699829, "logits_per_char": -0.7691956758499146, "num_chars": 2}, {"sum_logits": -1.714538812637329, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.714538812637329, "logits_per_char": -0.8572694063186646, "num_chars": 2}, {"sum_logits": -1.6797459125518799, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.6797459125518799, "logits_per_char": -0.8398729562759399, "num_chars": 2}, {"sum_logits": -1.832416296005249, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.832416296005249, "logits_per_char": -0.9162081480026245, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 405, "native_id": "e1d354cbfcd620e5dacf83c17746c4b3", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.556673526763916, "incorrect_loss_raw": 1.6375300288200378, "correct_loss_per_char": 0.778336763381958, "incorrect_loss_per_char": 0.8187650144100189, "correct_loss_per_token": 1.556673526763916, "incorrect_loss_per_token": 1.6375300288200378, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5300164222717285, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5300164222717285, "logits_per_char": -0.7650082111358643, "num_chars": 2}, {"sum_logits": -1.6546742916107178, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6546742916107178, "logits_per_char": -0.8273371458053589, "num_chars": 2}, {"sum_logits": -1.5082130432128906, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.5082130432128906, "logits_per_char": -0.7541065216064453, "num_chars": 2}, {"sum_logits": -1.556673526763916, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.556673526763916, "logits_per_char": -0.778336763381958, "num_chars": 2}, {"sum_logits": -1.8572163581848145, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.8572163581848145, "logits_per_char": -0.9286081790924072, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 406, "native_id": "53e1e50d204f6ad5a0f69429eadae82e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.105170726776123, "incorrect_loss_raw": 1.8421448767185211, "correct_loss_per_char": 0.5525853633880615, "incorrect_loss_per_char": 0.9210724383592606, "correct_loss_per_token": 1.105170726776123, "incorrect_loss_per_token": 1.8421448767185211, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.105170726776123, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.105170726776123, "logits_per_char": -0.5525853633880615, "num_chars": 2}, {"sum_logits": -1.4553838968276978, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.4553838968276978, "logits_per_char": -0.7276919484138489, "num_chars": 2}, {"sum_logits": -1.7659826278686523, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.7659826278686523, "logits_per_char": -0.8829913139343262, "num_chars": 2}, {"sum_logits": -1.826812982559204, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.826812982559204, "logits_per_char": -0.913406491279602, "num_chars": 2}, {"sum_logits": -2.3203999996185303, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -2.3203999996185303, "logits_per_char": -1.1601999998092651, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 407, "native_id": "48205cc84aab5e455b22e17c3cc7277d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5417910814285278, "incorrect_loss_raw": 1.6935134530067444, "correct_loss_per_char": 0.7708955407142639, "incorrect_loss_per_char": 0.8467567265033722, "correct_loss_per_token": 1.5417910814285278, "incorrect_loss_per_token": 1.6935134530067444, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1796903610229492, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.1796903610229492, "logits_per_char": -0.5898451805114746, "num_chars": 2}, {"sum_logits": -1.5417910814285278, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5417910814285278, "logits_per_char": -0.7708955407142639, "num_chars": 2}, {"sum_logits": -1.7260289192199707, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.7260289192199707, "logits_per_char": -0.8630144596099854, "num_chars": 2}, {"sum_logits": -1.7233669757843018, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.7233669757843018, "logits_per_char": -0.8616834878921509, "num_chars": 2}, {"sum_logits": -2.144967555999756, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.144967555999756, "logits_per_char": -1.072483777999878, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 408, "native_id": "0f7419d25337e0a75503a015ae777905", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3754066228866577, "incorrect_loss_raw": 1.7056637108325958, "correct_loss_per_char": 0.6877033114433289, "incorrect_loss_per_char": 0.8528318554162979, "correct_loss_per_token": 1.3754066228866577, "incorrect_loss_per_token": 1.7056637108325958, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3754066228866577, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.3754066228866577, "logits_per_char": -0.6877033114433289, "num_chars": 2}, {"sum_logits": -1.507907509803772, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.507907509803772, "logits_per_char": -0.753953754901886, "num_chars": 2}, {"sum_logits": -1.5599790811538696, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5599790811538696, "logits_per_char": -0.7799895405769348, "num_chars": 2}, {"sum_logits": -1.6912840604782104, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6912840604782104, "logits_per_char": -0.8456420302391052, "num_chars": 2}, {"sum_logits": -2.0634841918945312, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.0634841918945312, "logits_per_char": -1.0317420959472656, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 409, "native_id": "5cac4da628f0a58db980649079bd5784", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4308140277862549, "incorrect_loss_raw": 1.69692924618721, "correct_loss_per_char": 0.7154070138931274, "incorrect_loss_per_char": 0.848464623093605, "correct_loss_per_token": 1.4308140277862549, "incorrect_loss_per_token": 1.69692924618721, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4699556827545166, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.4699556827545166, "logits_per_char": -0.7349778413772583, "num_chars": 2}, {"sum_logits": -1.561657428741455, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.561657428741455, "logits_per_char": -0.7808287143707275, "num_chars": 2}, {"sum_logits": -1.6586793661117554, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6586793661117554, "logits_per_char": -0.8293396830558777, "num_chars": 2}, {"sum_logits": -1.4308140277862549, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.4308140277862549, "logits_per_char": -0.7154070138931274, "num_chars": 2}, {"sum_logits": -2.0974245071411133, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -2.0974245071411133, "logits_per_char": -1.0487122535705566, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 410, "native_id": "78d1218aeff70a70904767349e3c4c53", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6020421981811523, "incorrect_loss_raw": 1.6757875084877014, "correct_loss_per_char": 0.8010210990905762, "incorrect_loss_per_char": 0.8378937542438507, "correct_loss_per_token": 1.6020421981811523, "incorrect_loss_per_token": 1.6757875084877014, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3527472019195557, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.3527472019195557, "logits_per_char": -0.6763736009597778, "num_chars": 2}, {"sum_logits": -1.4601495265960693, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.4601495265960693, "logits_per_char": -0.7300747632980347, "num_chars": 2}, {"sum_logits": -1.6020421981811523, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6020421981811523, "logits_per_char": -0.8010210990905762, "num_chars": 2}, {"sum_logits": -1.59006929397583, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.59006929397583, "logits_per_char": -0.795034646987915, "num_chars": 2}, {"sum_logits": -2.3001840114593506, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -2.3001840114593506, "logits_per_char": -1.1500920057296753, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 411, "native_id": "cce13a32fedb997c017d3fac87c34912", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6255066394805908, "incorrect_loss_raw": 1.6251167058944702, "correct_loss_per_char": 0.8127533197402954, "incorrect_loss_per_char": 0.8125583529472351, "correct_loss_per_token": 1.6255066394805908, "incorrect_loss_per_token": 1.6251167058944702, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5081419944763184, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.5081419944763184, "logits_per_char": -0.7540709972381592, "num_chars": 2}, {"sum_logits": -1.6255066394805908, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.6255066394805908, "logits_per_char": -0.8127533197402954, "num_chars": 2}, {"sum_logits": -1.4708895683288574, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": true, "logits_per_token": -1.4708895683288574, "logits_per_char": -0.7354447841644287, "num_chars": 2}, {"sum_logits": -1.6007342338562012, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.6007342338562012, "logits_per_char": -0.8003671169281006, "num_chars": 2}, {"sum_logits": -1.920701026916504, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.920701026916504, "logits_per_char": -0.960350513458252, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 412, "native_id": "6714487b839f648e348ac972ed114af3", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4746159315109253, "incorrect_loss_raw": 1.723026841878891, "correct_loss_per_char": 0.7373079657554626, "incorrect_loss_per_char": 0.8615134209394455, "correct_loss_per_token": 1.4746159315109253, "incorrect_loss_per_token": 1.723026841878891, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1864728927612305, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.1864728927612305, "logits_per_char": -0.5932364463806152, "num_chars": 2}, {"sum_logits": -1.4746159315109253, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.4746159315109253, "logits_per_char": -0.7373079657554626, "num_chars": 2}, {"sum_logits": -1.6754992008209229, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6754992008209229, "logits_per_char": -0.8377496004104614, "num_chars": 2}, {"sum_logits": -1.8224917650222778, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.8224917650222778, "logits_per_char": -0.9112458825111389, "num_chars": 2}, {"sum_logits": -2.207643508911133, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -2.207643508911133, "logits_per_char": -1.1038217544555664, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 413, "native_id": "3e536d9253bfac45de83e8ee291ca143", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.575646996498108, "incorrect_loss_raw": 1.6461383402347565, "correct_loss_per_char": 0.787823498249054, "incorrect_loss_per_char": 0.8230691701173782, "correct_loss_per_token": 1.575646996498108, "incorrect_loss_per_token": 1.6461383402347565, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4417675733566284, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": true, "logits_per_token": -1.4417675733566284, "logits_per_char": -0.7208837866783142, "num_chars": 2}, {"sum_logits": -1.575646996498108, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.575646996498108, "logits_per_char": -0.787823498249054, "num_chars": 2}, {"sum_logits": -1.5293077230453491, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5293077230453491, "logits_per_char": -0.7646538615226746, "num_chars": 2}, {"sum_logits": -1.6034497022628784, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.6034497022628784, "logits_per_char": -0.8017248511314392, "num_chars": 2}, {"sum_logits": -2.01002836227417, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -2.01002836227417, "logits_per_char": -1.005014181137085, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 414, "native_id": "9f830faa0f8e3d7fb3a658c15a5fbe63", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.691168189048767, "incorrect_loss_raw": 1.6586512923240662, "correct_loss_per_char": 0.8455840945243835, "incorrect_loss_per_char": 0.8293256461620331, "correct_loss_per_token": 1.691168189048767, "incorrect_loss_per_token": 1.6586512923240662, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3016722202301025, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.3016722202301025, "logits_per_char": -0.6508361101150513, "num_chars": 2}, {"sum_logits": -1.4100674390792847, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.4100674390792847, "logits_per_char": -0.7050337195396423, "num_chars": 2}, {"sum_logits": -1.6507560014724731, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6507560014724731, "logits_per_char": -0.8253780007362366, "num_chars": 2}, {"sum_logits": -1.691168189048767, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.691168189048767, "logits_per_char": -0.8455840945243835, "num_chars": 2}, {"sum_logits": -2.2721095085144043, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.2721095085144043, "logits_per_char": -1.1360547542572021, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 415, "native_id": "bbcef409e0acb71b515acc144d5b402c_1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6512449979782104, "incorrect_loss_raw": 1.628050297498703, "correct_loss_per_char": 0.8256224989891052, "incorrect_loss_per_char": 0.8140251487493515, "correct_loss_per_token": 1.6512449979782104, "incorrect_loss_per_token": 1.628050297498703, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3108680248260498, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.3108680248260498, "logits_per_char": -0.6554340124130249, "num_chars": 2}, {"sum_logits": -1.5787032842636108, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5787032842636108, "logits_per_char": -0.7893516421318054, "num_chars": 2}, {"sum_logits": -1.7172483205795288, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.7172483205795288, "logits_per_char": -0.8586241602897644, "num_chars": 2}, {"sum_logits": -1.6512449979782104, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6512449979782104, "logits_per_char": -0.8256224989891052, "num_chars": 2}, {"sum_logits": -1.9053815603256226, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.9053815603256226, "logits_per_char": -0.9526907801628113, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 416, "native_id": "cbb0c9a69ca0922371a48177087ef407", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4040662050247192, "incorrect_loss_raw": 1.6950059533119202, "correct_loss_per_char": 0.7020331025123596, "incorrect_loss_per_char": 0.8475029766559601, "correct_loss_per_token": 1.4040662050247192, "incorrect_loss_per_token": 1.6950059533119202, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.528531551361084, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.528531551361084, "logits_per_char": -0.764265775680542, "num_chars": 2}, {"sum_logits": -1.6007879972457886, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.6007879972457886, "logits_per_char": -0.8003939986228943, "num_chars": 2}, {"sum_logits": -1.4040662050247192, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": true, "logits_per_token": -1.4040662050247192, "logits_per_char": -0.7020331025123596, "num_chars": 2}, {"sum_logits": -1.6107062101364136, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.6107062101364136, "logits_per_char": -0.8053531050682068, "num_chars": 2}, {"sum_logits": -2.0399980545043945, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -2.0399980545043945, "logits_per_char": -1.0199990272521973, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 417, "native_id": "b92f786638796fc028947ac0e9a44fef", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9294061660766602, "incorrect_loss_raw": 1.553258329629898, "correct_loss_per_char": 0.9647030830383301, "incorrect_loss_per_char": 0.776629164814949, "correct_loss_per_token": 1.9294061660766602, "incorrect_loss_per_token": 1.553258329629898, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4181077480316162, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.4181077480316162, "logits_per_char": -0.7090538740158081, "num_chars": 2}, {"sum_logits": -1.5860515832901, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5860515832901, "logits_per_char": -0.79302579164505, "num_chars": 2}, {"sum_logits": -1.61582612991333, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.61582612991333, "logits_per_char": -0.807913064956665, "num_chars": 2}, {"sum_logits": -1.593047857284546, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.593047857284546, "logits_per_char": -0.796523928642273, "num_chars": 2}, {"sum_logits": -1.9294061660766602, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.9294061660766602, "logits_per_char": -0.9647030830383301, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 418, "native_id": "5abeb4a2126597d4ef7b5a32e9e22abf", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.141023874282837, "incorrect_loss_raw": 1.518740177154541, "correct_loss_per_char": 1.0705119371414185, "incorrect_loss_per_char": 0.7593700885772705, "correct_loss_per_token": 2.141023874282837, "incorrect_loss_per_token": 1.518740177154541, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4193240404129028, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": true, "logits_per_token": -1.4193240404129028, "logits_per_char": -0.7096620202064514, "num_chars": 2}, {"sum_logits": -1.5095268487930298, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.5095268487930298, "logits_per_char": -0.7547634243965149, "num_chars": 2}, {"sum_logits": -1.5752718448638916, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.5752718448638916, "logits_per_char": -0.7876359224319458, "num_chars": 2}, {"sum_logits": -1.5708379745483398, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.5708379745483398, "logits_per_char": -0.7854189872741699, "num_chars": 2}, {"sum_logits": -2.141023874282837, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -2.141023874282837, "logits_per_char": -1.0705119371414185, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 419, "native_id": "8d4b0312f02be445e09a9462873d02bb", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7056688070297241, "incorrect_loss_raw": 1.6092478334903717, "correct_loss_per_char": 0.8528344035148621, "incorrect_loss_per_char": 0.8046239167451859, "correct_loss_per_token": 1.7056688070297241, "incorrect_loss_per_token": 1.6092478334903717, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3973804712295532, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.3973804712295532, "logits_per_char": -0.6986902356147766, "num_chars": 2}, {"sum_logits": -1.510183334350586, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.510183334350586, "logits_per_char": -0.755091667175293, "num_chars": 2}, {"sum_logits": -1.6406875848770142, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6406875848770142, "logits_per_char": -0.8203437924385071, "num_chars": 2}, {"sum_logits": -1.7056688070297241, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.7056688070297241, "logits_per_char": -0.8528344035148621, "num_chars": 2}, {"sum_logits": -1.8887399435043335, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.8887399435043335, "logits_per_char": -0.9443699717521667, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 420, "native_id": "f7140f00ddd8d1c5d93b05ea32ad1fff", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6591582298278809, "incorrect_loss_raw": 1.637254238128662, "correct_loss_per_char": 0.8295791149139404, "incorrect_loss_per_char": 0.818627119064331, "correct_loss_per_token": 1.6591582298278809, "incorrect_loss_per_token": 1.637254238128662, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4556708335876465, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.4556708335876465, "logits_per_char": -0.7278354167938232, "num_chars": 2}, {"sum_logits": -1.3786704540252686, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": true, "logits_per_token": -1.3786704540252686, "logits_per_char": -0.6893352270126343, "num_chars": 2}, {"sum_logits": -1.6466407775878906, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.6466407775878906, "logits_per_char": -0.8233203887939453, "num_chars": 2}, {"sum_logits": -1.6591582298278809, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.6591582298278809, "logits_per_char": -0.8295791149139404, "num_chars": 2}, {"sum_logits": -2.0680348873138428, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -2.0680348873138428, "logits_per_char": -1.0340174436569214, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 421, "native_id": "8b3b598a647dfd2d63fcedce5f461040", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5559275150299072, "incorrect_loss_raw": 1.6447429656982422, "correct_loss_per_char": 0.7779637575149536, "incorrect_loss_per_char": 0.8223714828491211, "correct_loss_per_token": 1.5559275150299072, "incorrect_loss_per_token": 1.6447429656982422, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5559275150299072, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5559275150299072, "logits_per_char": -0.7779637575149536, "num_chars": 2}, {"sum_logits": -1.4963982105255127, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.4963982105255127, "logits_per_char": -0.7481991052627563, "num_chars": 2}, {"sum_logits": -1.5252866744995117, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5252866744995117, "logits_per_char": -0.7626433372497559, "num_chars": 2}, {"sum_logits": -1.6038293838500977, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6038293838500977, "logits_per_char": -0.8019146919250488, "num_chars": 2}, {"sum_logits": -1.9534575939178467, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.9534575939178467, "logits_per_char": -0.9767287969589233, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 422, "native_id": "7a900bc3a373806b6c56f0e19534005f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.227513313293457, "incorrect_loss_raw": 1.5349904298782349, "correct_loss_per_char": 1.1137566566467285, "incorrect_loss_per_char": 0.7674952149391174, "correct_loss_per_token": 2.227513313293457, "incorrect_loss_per_token": 1.5349904298782349, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1950592994689941, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.1950592994689941, "logits_per_char": -0.5975296497344971, "num_chars": 2}, {"sum_logits": -1.4666588306427002, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.4666588306427002, "logits_per_char": -0.7333294153213501, "num_chars": 2}, {"sum_logits": -1.6244440078735352, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6244440078735352, "logits_per_char": -0.8122220039367676, "num_chars": 2}, {"sum_logits": -1.85379958152771, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.85379958152771, "logits_per_char": -0.926899790763855, "num_chars": 2}, {"sum_logits": -2.227513313293457, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.227513313293457, "logits_per_char": -1.1137566566467285, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 423, "native_id": "3d79c10ddf26a5ed7dc0bb168fb0b3ed", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4910615682601929, "incorrect_loss_raw": 1.6595399975776672, "correct_loss_per_char": 0.7455307841300964, "incorrect_loss_per_char": 0.8297699987888336, "correct_loss_per_token": 1.4910615682601929, "incorrect_loss_per_token": 1.6595399975776672, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.542773962020874, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.542773962020874, "logits_per_char": -0.771386981010437, "num_chars": 2}, {"sum_logits": -1.4910615682601929, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": true, "logits_per_token": -1.4910615682601929, "logits_per_char": -0.7455307841300964, "num_chars": 2}, {"sum_logits": -1.6196568012237549, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.6196568012237549, "logits_per_char": -0.8098284006118774, "num_chars": 2}, {"sum_logits": -1.5487170219421387, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.5487170219421387, "logits_per_char": -0.7743585109710693, "num_chars": 2}, {"sum_logits": -1.9270122051239014, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.9270122051239014, "logits_per_char": -0.9635061025619507, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 424, "native_id": "b7091d2bfcea421d787ce9e7982f104a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6159437894821167, "incorrect_loss_raw": 1.6475909054279327, "correct_loss_per_char": 0.8079718947410583, "incorrect_loss_per_char": 0.8237954527139664, "correct_loss_per_token": 1.6159437894821167, "incorrect_loss_per_token": 1.6475909054279327, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3586827516555786, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": true, "logits_per_token": -1.3586827516555786, "logits_per_char": -0.6793413758277893, "num_chars": 2}, {"sum_logits": -1.464683175086975, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.464683175086975, "logits_per_char": -0.7323415875434875, "num_chars": 2}, {"sum_logits": -1.6159437894821167, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.6159437894821167, "logits_per_char": -0.8079718947410583, "num_chars": 2}, {"sum_logits": -1.724835753440857, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.724835753440857, "logits_per_char": -0.8624178767204285, "num_chars": 2}, {"sum_logits": -2.0421619415283203, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -2.0421619415283203, "logits_per_char": -1.0210809707641602, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 425, "native_id": "d060ab71d0efff3cab5960089a6bb3a2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5241820812225342, "incorrect_loss_raw": 1.678735077381134, "correct_loss_per_char": 0.7620910406112671, "incorrect_loss_per_char": 0.839367538690567, "correct_loss_per_token": 1.5241820812225342, "incorrect_loss_per_token": 1.678735077381134, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.314849615097046, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.314849615097046, "logits_per_char": -0.657424807548523, "num_chars": 2}, {"sum_logits": -1.5241820812225342, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5241820812225342, "logits_per_char": -0.7620910406112671, "num_chars": 2}, {"sum_logits": -1.5614535808563232, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5614535808563232, "logits_per_char": -0.7807267904281616, "num_chars": 2}, {"sum_logits": -1.7129485607147217, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.7129485607147217, "logits_per_char": -0.8564742803573608, "num_chars": 2}, {"sum_logits": -2.1256885528564453, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -2.1256885528564453, "logits_per_char": -1.0628442764282227, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 426, "native_id": "b399f6008d90dbd92bcce5abed4c1fd1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3701726198196411, "incorrect_loss_raw": 1.7251835763454437, "correct_loss_per_char": 0.6850863099098206, "incorrect_loss_per_char": 0.8625917881727219, "correct_loss_per_token": 1.3701726198196411, "incorrect_loss_per_token": 1.7251835763454437, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3701726198196411, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.3701726198196411, "logits_per_char": -0.6850863099098206, "num_chars": 2}, {"sum_logits": -1.527680516242981, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.527680516242981, "logits_per_char": -0.7638402581214905, "num_chars": 2}, {"sum_logits": -1.5273462533950806, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5273462533950806, "logits_per_char": -0.7636731266975403, "num_chars": 2}, {"sum_logits": -1.5958667993545532, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5958667993545532, "logits_per_char": -0.7979333996772766, "num_chars": 2}, {"sum_logits": -2.24984073638916, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.24984073638916, "logits_per_char": -1.12492036819458, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 427, "native_id": "80c19c62338edae0e8a1f5c6fec0d29a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5552691221237183, "incorrect_loss_raw": 1.6758773922920227, "correct_loss_per_char": 0.7776345610618591, "incorrect_loss_per_char": 0.8379386961460114, "correct_loss_per_token": 1.5552691221237183, "incorrect_loss_per_token": 1.6758773922920227, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4071018695831299, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": true, "logits_per_token": -1.4071018695831299, "logits_per_char": -0.7035509347915649, "num_chars": 2}, {"sum_logits": -1.5552691221237183, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.5552691221237183, "logits_per_char": -0.7776345610618591, "num_chars": 2}, {"sum_logits": -1.5224992036819458, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.5224992036819458, "logits_per_char": -0.7612496018409729, "num_chars": 2}, {"sum_logits": -1.545223593711853, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.545223593711853, "logits_per_char": -0.7726117968559265, "num_chars": 2}, {"sum_logits": -2.228684902191162, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -2.228684902191162, "logits_per_char": -1.114342451095581, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 428, "native_id": "1a4e83b433620cb2d7d806882f8d57e4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.321972370147705, "incorrect_loss_raw": 1.7218152284622192, "correct_loss_per_char": 0.6609861850738525, "incorrect_loss_per_char": 0.8609076142311096, "correct_loss_per_token": 1.321972370147705, "incorrect_loss_per_token": 1.7218152284622192, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.321972370147705, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.321972370147705, "logits_per_char": -0.6609861850738525, "num_chars": 2}, {"sum_logits": -1.573256254196167, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.573256254196167, "logits_per_char": -0.7866281270980835, "num_chars": 2}, {"sum_logits": -1.6159203052520752, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6159203052520752, "logits_per_char": -0.8079601526260376, "num_chars": 2}, {"sum_logits": -1.6222255229949951, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6222255229949951, "logits_per_char": -0.8111127614974976, "num_chars": 2}, {"sum_logits": -2.0758588314056396, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -2.0758588314056396, "logits_per_char": -1.0379294157028198, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 429, "native_id": "b9e04a53c0ee7325b901de4d12d56884", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.545723557472229, "incorrect_loss_raw": 1.6430020034313202, "correct_loss_per_char": 0.7728617787361145, "incorrect_loss_per_char": 0.8215010017156601, "correct_loss_per_token": 1.545723557472229, "incorrect_loss_per_token": 1.6430020034313202, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.498928427696228, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.498928427696228, "logits_per_char": -0.749464213848114, "num_chars": 2}, {"sum_logits": -1.5276252031326294, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5276252031326294, "logits_per_char": -0.7638126015663147, "num_chars": 2}, {"sum_logits": -1.545723557472229, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.545723557472229, "logits_per_char": -0.7728617787361145, "num_chars": 2}, {"sum_logits": -1.6530914306640625, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.6530914306640625, "logits_per_char": -0.8265457153320312, "num_chars": 2}, {"sum_logits": -1.8923629522323608, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.8923629522323608, "logits_per_char": -0.9461814761161804, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 430, "native_id": "7490aa460f66000555a8a94008179cbb", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8581725358963013, "incorrect_loss_raw": 1.5692170560359955, "correct_loss_per_char": 0.9290862679481506, "incorrect_loss_per_char": 0.7846085280179977, "correct_loss_per_token": 1.8581725358963013, "incorrect_loss_per_token": 1.5692170560359955, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3685169219970703, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.3685169219970703, "logits_per_char": -0.6842584609985352, "num_chars": 2}, {"sum_logits": -1.55998694896698, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.55998694896698, "logits_per_char": -0.77999347448349, "num_chars": 2}, {"sum_logits": -1.6346708536148071, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.6346708536148071, "logits_per_char": -0.8173354268074036, "num_chars": 2}, {"sum_logits": -1.7136934995651245, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.7136934995651245, "logits_per_char": -0.8568467497825623, "num_chars": 2}, {"sum_logits": -1.8581725358963013, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.8581725358963013, "logits_per_char": -0.9290862679481506, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 431, "native_id": "ad8ee2965a33ff4b0e3d2ac732676594", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.842266321182251, "incorrect_loss_raw": 1.5705058574676514, "correct_loss_per_char": 0.9211331605911255, "incorrect_loss_per_char": 0.7852529287338257, "correct_loss_per_token": 1.842266321182251, "incorrect_loss_per_token": 1.5705058574676514, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4493788480758667, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": true, "logits_per_token": -1.4493788480758667, "logits_per_char": -0.7246894240379333, "num_chars": 2}, {"sum_logits": -1.4651747941970825, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": false, "logits_per_token": -1.4651747941970825, "logits_per_char": -0.7325873970985413, "num_chars": 2}, {"sum_logits": -1.6746885776519775, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": false, "logits_per_token": -1.6746885776519775, "logits_per_char": -0.8373442888259888, "num_chars": 2}, {"sum_logits": -1.6927812099456787, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": false, "logits_per_token": -1.6927812099456787, "logits_per_char": -0.8463906049728394, "num_chars": 2}, {"sum_logits": -1.842266321182251, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": false, "logits_per_token": -1.842266321182251, "logits_per_char": -0.9211331605911255, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 432, "native_id": "64d2310eff6b661baeb41b4ccc392e35", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7577648162841797, "incorrect_loss_raw": 1.6495427191257477, "correct_loss_per_char": 0.8788824081420898, "incorrect_loss_per_char": 0.8247713595628738, "correct_loss_per_token": 1.7577648162841797, "incorrect_loss_per_token": 1.6495427191257477, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2093275785446167, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.2093275785446167, "logits_per_char": -0.6046637892723083, "num_chars": 2}, {"sum_logits": -1.49375581741333, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.49375581741333, "logits_per_char": -0.746877908706665, "num_chars": 2}, {"sum_logits": -1.6253297328948975, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6253297328948975, "logits_per_char": -0.8126648664474487, "num_chars": 2}, {"sum_logits": -1.7577648162841797, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.7577648162841797, "logits_per_char": -0.8788824081420898, "num_chars": 2}, {"sum_logits": -2.2697577476501465, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.2697577476501465, "logits_per_char": -1.1348788738250732, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 433, "native_id": "6b1f5ebd9d0dbc7e34a598456a6091a8", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.660834789276123, "incorrect_loss_raw": 1.61974036693573, "correct_loss_per_char": 0.8304173946380615, "incorrect_loss_per_char": 0.809870183467865, "correct_loss_per_token": 1.660834789276123, "incorrect_loss_per_token": 1.61974036693573, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4828455448150635, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.4828455448150635, "logits_per_char": -0.7414227724075317, "num_chars": 2}, {"sum_logits": -1.4157640933990479, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.4157640933990479, "logits_per_char": -0.7078820466995239, "num_chars": 2}, {"sum_logits": -1.660834789276123, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.660834789276123, "logits_per_char": -0.8304173946380615, "num_chars": 2}, {"sum_logits": -1.6933846473693848, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6933846473693848, "logits_per_char": -0.8466923236846924, "num_chars": 2}, {"sum_logits": -1.8869671821594238, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.8869671821594238, "logits_per_char": -0.9434835910797119, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 434, "native_id": "080ef6941410139d6869e78122bc741e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6150658130645752, "incorrect_loss_raw": 1.6624024212360382, "correct_loss_per_char": 0.8075329065322876, "incorrect_loss_per_char": 0.8312012106180191, "correct_loss_per_token": 1.6150658130645752, "incorrect_loss_per_token": 1.6624024212360382, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3175337314605713, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": true, "logits_per_token": -1.3175337314605713, "logits_per_char": -0.6587668657302856, "num_chars": 2}, {"sum_logits": -1.4731062650680542, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.4731062650680542, "logits_per_char": -0.7365531325340271, "num_chars": 2}, {"sum_logits": -1.6150658130645752, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.6150658130645752, "logits_per_char": -0.8075329065322876, "num_chars": 2}, {"sum_logits": -1.689328670501709, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.689328670501709, "logits_per_char": -0.8446643352508545, "num_chars": 2}, {"sum_logits": -2.1696410179138184, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -2.1696410179138184, "logits_per_char": -1.0848205089569092, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 435, "native_id": "6c70d98cfb8e97fda8caefcee761a229", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7335245609283447, "incorrect_loss_raw": 1.5979670882225037, "correct_loss_per_char": 0.8667622804641724, "incorrect_loss_per_char": 0.7989835441112518, "correct_loss_per_token": 1.7335245609283447, "incorrect_loss_per_token": 1.5979670882225037, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3999049663543701, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.3999049663543701, "logits_per_char": -0.6999524831771851, "num_chars": 2}, {"sum_logits": -1.523416519165039, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.523416519165039, "logits_per_char": -0.7617082595825195, "num_chars": 2}, {"sum_logits": -1.6442556381225586, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6442556381225586, "logits_per_char": -0.8221278190612793, "num_chars": 2}, {"sum_logits": -1.7335245609283447, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.7335245609283447, "logits_per_char": -0.8667622804641724, "num_chars": 2}, {"sum_logits": -1.8242912292480469, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.8242912292480469, "logits_per_char": -0.9121456146240234, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 436, "native_id": "75ac594b4fdbfba006e61315d1b2c815", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3835643529891968, "incorrect_loss_raw": 1.7214464247226715, "correct_loss_per_char": 0.6917821764945984, "incorrect_loss_per_char": 0.8607232123613358, "correct_loss_per_token": 1.3835643529891968, "incorrect_loss_per_token": 1.7214464247226715, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3835643529891968, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.3835643529891968, "logits_per_char": -0.6917821764945984, "num_chars": 2}, {"sum_logits": -1.4251230955123901, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.4251230955123901, "logits_per_char": -0.7125615477561951, "num_chars": 2}, {"sum_logits": -1.5120466947555542, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5120466947555542, "logits_per_char": -0.7560233473777771, "num_chars": 2}, {"sum_logits": -1.7678674459457397, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.7678674459457397, "logits_per_char": -0.8839337229728699, "num_chars": 2}, {"sum_logits": -2.180748462677002, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.180748462677002, "logits_per_char": -1.090374231338501, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 437, "native_id": "5a8e7d2f97f76adb23fbd59a009d16f0", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2903772592544556, "incorrect_loss_raw": 1.7200046479701996, "correct_loss_per_char": 0.6451886296272278, "incorrect_loss_per_char": 0.8600023239850998, "correct_loss_per_token": 1.2903772592544556, "incorrect_loss_per_token": 1.7200046479701996, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2903772592544556, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": true, "logits_per_token": -1.2903772592544556, "logits_per_char": -0.6451886296272278, "num_chars": 2}, {"sum_logits": -1.6442248821258545, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.6442248821258545, "logits_per_char": -0.8221124410629272, "num_chars": 2}, {"sum_logits": -1.7855734825134277, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.7855734825134277, "logits_per_char": -0.8927867412567139, "num_chars": 2}, {"sum_logits": -1.6827514171600342, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.6827514171600342, "logits_per_char": -0.8413757085800171, "num_chars": 2}, {"sum_logits": -1.767468810081482, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.767468810081482, "logits_per_char": -0.883734405040741, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 438, "native_id": "178cb8153123716aa94f286b615149d4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5830117464065552, "incorrect_loss_raw": 1.6605059504508972, "correct_loss_per_char": 0.7915058732032776, "incorrect_loss_per_char": 0.8302529752254486, "correct_loss_per_token": 1.5830117464065552, "incorrect_loss_per_token": 1.6605059504508972, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4162707328796387, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": true, "logits_per_token": -1.4162707328796387, "logits_per_char": -0.7081353664398193, "num_chars": 2}, {"sum_logits": -1.5735033750534058, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.5735033750534058, "logits_per_char": -0.7867516875267029, "num_chars": 2}, {"sum_logits": -1.4955500364303589, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.4955500364303589, "logits_per_char": -0.7477750182151794, "num_chars": 2}, {"sum_logits": -1.5830117464065552, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.5830117464065552, "logits_per_char": -0.7915058732032776, "num_chars": 2}, {"sum_logits": -2.1566996574401855, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -2.1566996574401855, "logits_per_char": -1.0783498287200928, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 439, "native_id": "cc917ca0e03c91a5141920f5a902a36c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4490751028060913, "incorrect_loss_raw": 1.6724084913730621, "correct_loss_per_char": 0.7245375514030457, "incorrect_loss_per_char": 0.8362042456865311, "correct_loss_per_token": 1.4490751028060913, "incorrect_loss_per_token": 1.6724084913730621, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4490751028060913, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.4490751028060913, "logits_per_char": -0.7245375514030457, "num_chars": 2}, {"sum_logits": -1.5372951030731201, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5372951030731201, "logits_per_char": -0.7686475515365601, "num_chars": 2}, {"sum_logits": -1.6958181858062744, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.6958181858062744, "logits_per_char": -0.8479090929031372, "num_chars": 2}, {"sum_logits": -1.611520767211914, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.611520767211914, "logits_per_char": -0.805760383605957, "num_chars": 2}, {"sum_logits": -1.84499990940094, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.84499990940094, "logits_per_char": -0.92249995470047, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 440, "native_id": "a7d51b753c2113d8b2dbd0ebb5375855_1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7155053615570068, "incorrect_loss_raw": 1.609940767288208, "correct_loss_per_char": 0.8577526807785034, "incorrect_loss_per_char": 0.804970383644104, "correct_loss_per_token": 1.7155053615570068, "incorrect_loss_per_token": 1.609940767288208, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4509146213531494, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": true, "logits_per_token": -1.4509146213531494, "logits_per_char": -0.7254573106765747, "num_chars": 2}, {"sum_logits": -1.4562788009643555, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.4562788009643555, "logits_per_char": -0.7281394004821777, "num_chars": 2}, {"sum_logits": -1.5799083709716797, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.5799083709716797, "logits_per_char": -0.7899541854858398, "num_chars": 2}, {"sum_logits": -1.7155053615570068, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.7155053615570068, "logits_per_char": -0.8577526807785034, "num_chars": 2}, {"sum_logits": -1.9526612758636475, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.9526612758636475, "logits_per_char": -0.9763306379318237, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 441, "native_id": "e71da9e95b321763c86e879a47bbd327", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.1171634197235107, "incorrect_loss_raw": 1.5248459577560425, "correct_loss_per_char": 1.0585817098617554, "incorrect_loss_per_char": 0.7624229788780212, "correct_loss_per_token": 2.1171634197235107, "incorrect_loss_per_token": 1.5248459577560425, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.387488842010498, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": true, "logits_per_token": -1.387488842010498, "logits_per_char": -0.693744421005249, "num_chars": 2}, {"sum_logits": -1.4709060192108154, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": false, "logits_per_token": -1.4709060192108154, "logits_per_char": -0.7354530096054077, "num_chars": 2}, {"sum_logits": -1.6380736827850342, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": false, "logits_per_token": -1.6380736827850342, "logits_per_char": -0.8190368413925171, "num_chars": 2}, {"sum_logits": -1.6029152870178223, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": false, "logits_per_token": -1.6029152870178223, "logits_per_char": -0.8014576435089111, "num_chars": 2}, {"sum_logits": -2.1171634197235107, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": false, "logits_per_token": -2.1171634197235107, "logits_per_char": -1.0585817098617554, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 442, "native_id": "ec86900559a0faf2aef066e511a4cfa6", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.599197506904602, "incorrect_loss_raw": 1.6709841787815094, "correct_loss_per_char": 0.799598753452301, "incorrect_loss_per_char": 0.8354920893907547, "correct_loss_per_token": 1.599197506904602, "incorrect_loss_per_token": 1.6709841787815094, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2481695413589478, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.2481695413589478, "logits_per_char": -0.6240847706794739, "num_chars": 2}, {"sum_logits": -1.599197506904602, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.599197506904602, "logits_per_char": -0.799598753452301, "num_chars": 2}, {"sum_logits": -1.5836187601089478, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5836187601089478, "logits_per_char": -0.7918093800544739, "num_chars": 2}, {"sum_logits": -1.7179092168807983, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.7179092168807983, "logits_per_char": -0.8589546084403992, "num_chars": 2}, {"sum_logits": -2.1342391967773438, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -2.1342391967773438, "logits_per_char": -1.0671195983886719, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 443, "native_id": "d312741df1b14bcbe358f4f30aff3994", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.076580047607422, "incorrect_loss_raw": 1.539204180240631, "correct_loss_per_char": 1.038290023803711, "incorrect_loss_per_char": 0.7696020901203156, "correct_loss_per_token": 2.076580047607422, "incorrect_loss_per_token": 1.539204180240631, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3063102960586548, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.3063102960586548, "logits_per_char": -0.6531551480293274, "num_chars": 2}, {"sum_logits": -1.4700161218643188, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.4700161218643188, "logits_per_char": -0.7350080609321594, "num_chars": 2}, {"sum_logits": -1.678958773612976, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.678958773612976, "logits_per_char": -0.839479386806488, "num_chars": 2}, {"sum_logits": -1.7015315294265747, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.7015315294265747, "logits_per_char": -0.8507657647132874, "num_chars": 2}, {"sum_logits": -2.076580047607422, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -2.076580047607422, "logits_per_char": -1.038290023803711, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 444, "native_id": "0df3f58645b4bc306093845fb297a50e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.10377836227417, "incorrect_loss_raw": 1.5382096767425537, "correct_loss_per_char": 1.051889181137085, "incorrect_loss_per_char": 0.7691048383712769, "correct_loss_per_token": 2.10377836227417, "incorrect_loss_per_token": 1.5382096767425537, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2925838232040405, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": true, "logits_per_token": -1.2925838232040405, "logits_per_char": -0.6462919116020203, "num_chars": 2}, {"sum_logits": -1.4364575147628784, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.4364575147628784, "logits_per_char": -0.7182287573814392, "num_chars": 2}, {"sum_logits": -1.7009705305099487, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.7009705305099487, "logits_per_char": -0.8504852652549744, "num_chars": 2}, {"sum_logits": -1.7228268384933472, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.7228268384933472, "logits_per_char": -0.8614134192466736, "num_chars": 2}, {"sum_logits": -2.10377836227417, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -2.10377836227417, "logits_per_char": -1.051889181137085, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 445, "native_id": "27d9b4df2ca50112d282331df4923e96", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6698418855667114, "incorrect_loss_raw": 1.6043736934661865, "correct_loss_per_char": 0.8349209427833557, "incorrect_loss_per_char": 0.8021868467330933, "correct_loss_per_token": 1.6698418855667114, "incorrect_loss_per_token": 1.6043736934661865, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4972630739212036, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": true, "logits_per_token": -1.4972630739212036, "logits_per_char": -0.7486315369606018, "num_chars": 2}, {"sum_logits": -1.5856269598007202, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.5856269598007202, "logits_per_char": -0.7928134799003601, "num_chars": 2}, {"sum_logits": -1.7006481885910034, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.7006481885910034, "logits_per_char": -0.8503240942955017, "num_chars": 2}, {"sum_logits": -1.6339565515518188, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.6339565515518188, "logits_per_char": -0.8169782757759094, "num_chars": 2}, {"sum_logits": -1.6698418855667114, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.6698418855667114, "logits_per_char": -0.8349209427833557, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 446, "native_id": "ab755203f41a2e241f0ee8a53c54f287", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6342891454696655, "incorrect_loss_raw": 1.643509417772293, "correct_loss_per_char": 0.8171445727348328, "incorrect_loss_per_char": 0.8217547088861465, "correct_loss_per_token": 1.6342891454696655, "incorrect_loss_per_token": 1.643509417772293, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4799679517745972, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.4799679517745972, "logits_per_char": -0.7399839758872986, "num_chars": 2}, {"sum_logits": -1.4323558807373047, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.4323558807373047, "logits_per_char": -0.7161779403686523, "num_chars": 2}, {"sum_logits": -1.5363142490386963, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5363142490386963, "logits_per_char": -0.7681571245193481, "num_chars": 2}, {"sum_logits": -1.6342891454696655, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6342891454696655, "logits_per_char": -0.8171445727348328, "num_chars": 2}, {"sum_logits": -2.125399589538574, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -2.125399589538574, "logits_per_char": -1.062699794769287, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 447, "native_id": "f13efb91090dd28fd2b3c1f4dde680fd", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3809869289398193, "incorrect_loss_raw": 1.7144842743873596, "correct_loss_per_char": 0.6904934644699097, "incorrect_loss_per_char": 0.8572421371936798, "correct_loss_per_token": 1.3809869289398193, "incorrect_loss_per_token": 1.7144842743873596, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3809869289398193, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.3809869289398193, "logits_per_char": -0.6904934644699097, "num_chars": 2}, {"sum_logits": -1.4663851261138916, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.4663851261138916, "logits_per_char": -0.7331925630569458, "num_chars": 2}, {"sum_logits": -1.6228023767471313, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6228023767471313, "logits_per_char": -0.8114011883735657, "num_chars": 2}, {"sum_logits": -1.615350604057312, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.615350604057312, "logits_per_char": -0.807675302028656, "num_chars": 2}, {"sum_logits": -2.1533989906311035, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.1533989906311035, "logits_per_char": -1.0766994953155518, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 448, "native_id": "e98031901c815e55040d9fe28c4d9387", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.681710124015808, "incorrect_loss_raw": 1.6288565695285797, "correct_loss_per_char": 0.840855062007904, "incorrect_loss_per_char": 0.8144282847642899, "correct_loss_per_token": 1.681710124015808, "incorrect_loss_per_token": 1.6288565695285797, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4090176820755005, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.4090176820755005, "logits_per_char": -0.7045088410377502, "num_chars": 2}, {"sum_logits": -1.5848175287246704, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5848175287246704, "logits_per_char": -0.7924087643623352, "num_chars": 2}, {"sum_logits": -1.4612020254135132, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.4612020254135132, "logits_per_char": -0.7306010127067566, "num_chars": 2}, {"sum_logits": -1.681710124015808, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.681710124015808, "logits_per_char": -0.840855062007904, "num_chars": 2}, {"sum_logits": -2.0603890419006348, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.0603890419006348, "logits_per_char": -1.0301945209503174, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 449, "native_id": "fb64149cf01c5b496d986f56852273e9", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6257460117340088, "incorrect_loss_raw": 1.6274204850196838, "correct_loss_per_char": 0.8128730058670044, "incorrect_loss_per_char": 0.8137102425098419, "correct_loss_per_token": 1.6257460117340088, "incorrect_loss_per_token": 1.6274204850196838, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4889962673187256, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.4889962673187256, "logits_per_char": -0.7444981336593628, "num_chars": 2}, {"sum_logits": -1.5632905960083008, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5632905960083008, "logits_per_char": -0.7816452980041504, "num_chars": 2}, {"sum_logits": -1.5145478248596191, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5145478248596191, "logits_per_char": -0.7572739124298096, "num_chars": 2}, {"sum_logits": -1.6257460117340088, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.6257460117340088, "logits_per_char": -0.8128730058670044, "num_chars": 2}, {"sum_logits": -1.9428472518920898, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.9428472518920898, "logits_per_char": -0.9714236259460449, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 450, "native_id": "2ac72eaf30a633c410b1bd658bbef0ba", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6671808958053589, "incorrect_loss_raw": 1.6329298317432404, "correct_loss_per_char": 0.8335904479026794, "incorrect_loss_per_char": 0.8164649158716202, "correct_loss_per_token": 1.6671808958053589, "incorrect_loss_per_token": 1.6329298317432404, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4108079671859741, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.4108079671859741, "logits_per_char": -0.7054039835929871, "num_chars": 2}, {"sum_logits": -1.5591202974319458, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5591202974319458, "logits_per_char": -0.7795601487159729, "num_chars": 2}, {"sum_logits": -1.6671808958053589, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6671808958053589, "logits_per_char": -0.8335904479026794, "num_chars": 2}, {"sum_logits": -1.4816175699234009, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.4816175699234009, "logits_per_char": -0.7408087849617004, "num_chars": 2}, {"sum_logits": -2.0801734924316406, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.0801734924316406, "logits_per_char": -1.0400867462158203, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 451, "native_id": "22fc45d9e6d0baea4a5b0526504225b8", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5035176277160645, "incorrect_loss_raw": 1.6783691048622131, "correct_loss_per_char": 0.7517588138580322, "incorrect_loss_per_char": 0.8391845524311066, "correct_loss_per_token": 1.5035176277160645, "incorrect_loss_per_token": 1.6783691048622131, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3266706466674805, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.3266706466674805, "logits_per_char": -0.6633353233337402, "num_chars": 2}, {"sum_logits": -1.5035176277160645, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5035176277160645, "logits_per_char": -0.7517588138580322, "num_chars": 2}, {"sum_logits": -1.5841528177261353, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5841528177261353, "logits_per_char": -0.7920764088630676, "num_chars": 2}, {"sum_logits": -1.739881157875061, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.739881157875061, "logits_per_char": -0.8699405789375305, "num_chars": 2}, {"sum_logits": -2.062771797180176, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -2.062771797180176, "logits_per_char": -1.031385898590088, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 452, "native_id": "4ef3d70648ee3cea028bc5ed0fdfda28", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7000311613082886, "incorrect_loss_raw": 1.619138926267624, "correct_loss_per_char": 0.8500155806541443, "incorrect_loss_per_char": 0.809569463133812, "correct_loss_per_token": 1.7000311613082886, "incorrect_loss_per_token": 1.619138926267624, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.351231336593628, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": true, "logits_per_token": -1.351231336593628, "logits_per_char": -0.675615668296814, "num_chars": 2}, {"sum_logits": -1.509010910987854, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.509010910987854, "logits_per_char": -0.754505455493927, "num_chars": 2}, {"sum_logits": -1.7000311613082886, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.7000311613082886, "logits_per_char": -0.8500155806541443, "num_chars": 2}, {"sum_logits": -1.6313396692276, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.6313396692276, "logits_per_char": -0.8156698346138, "num_chars": 2}, {"sum_logits": -1.9849737882614136, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.9849737882614136, "logits_per_char": -0.9924868941307068, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 453, "native_id": "059155c50d1b04da7373e309868e67d2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6057440042495728, "incorrect_loss_raw": 1.6306480765342712, "correct_loss_per_char": 0.8028720021247864, "incorrect_loss_per_char": 0.8153240382671356, "correct_loss_per_token": 1.6057440042495728, "incorrect_loss_per_token": 1.6306480765342712, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4803155660629272, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.4803155660629272, "logits_per_char": -0.7401577830314636, "num_chars": 2}, {"sum_logits": -1.5021735429763794, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5021735429763794, "logits_per_char": -0.7510867714881897, "num_chars": 2}, {"sum_logits": -1.6057440042495728, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6057440042495728, "logits_per_char": -0.8028720021247864, "num_chars": 2}, {"sum_logits": -1.6222552061080933, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6222552061080933, "logits_per_char": -0.8111276030540466, "num_chars": 2}, {"sum_logits": -1.917847990989685, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.917847990989685, "logits_per_char": -0.9589239954948425, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 454, "native_id": "33d023a6806390eb8195380331e17404_1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6212098598480225, "incorrect_loss_raw": 1.6337443590164185, "correct_loss_per_char": 0.8106049299240112, "incorrect_loss_per_char": 0.8168721795082092, "correct_loss_per_token": 1.6212098598480225, "incorrect_loss_per_token": 1.6337443590164185, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3541126251220703, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.3541126251220703, "logits_per_char": -0.6770563125610352, "num_chars": 2}, {"sum_logits": -1.5350689888000488, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5350689888000488, "logits_per_char": -0.7675344944000244, "num_chars": 2}, {"sum_logits": -1.6212098598480225, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6212098598480225, "logits_per_char": -0.8106049299240112, "num_chars": 2}, {"sum_logits": -1.7387781143188477, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.7387781143188477, "logits_per_char": -0.8693890571594238, "num_chars": 2}, {"sum_logits": -1.907017707824707, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.907017707824707, "logits_per_char": -0.9535088539123535, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 455, "native_id": "63f7ad481a63fc8c6dffe00519d4a167", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2325572967529297, "incorrect_loss_raw": 1.7605310082435608, "correct_loss_per_char": 0.6162786483764648, "incorrect_loss_per_char": 0.8802655041217804, "correct_loss_per_token": 1.2325572967529297, "incorrect_loss_per_token": 1.7605310082435608, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2325572967529297, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.2325572967529297, "logits_per_char": -0.6162786483764648, "num_chars": 2}, {"sum_logits": -1.5179047584533691, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5179047584533691, "logits_per_char": -0.7589523792266846, "num_chars": 2}, {"sum_logits": -1.648963212966919, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.648963212966919, "logits_per_char": -0.8244816064834595, "num_chars": 2}, {"sum_logits": -1.7656044960021973, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.7656044960021973, "logits_per_char": -0.8828022480010986, "num_chars": 2}, {"sum_logits": -2.109651565551758, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -2.109651565551758, "logits_per_char": -1.054825782775879, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 456, "native_id": "a2daf73d33541af0846673afd8e49abe", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5231930017471313, "incorrect_loss_raw": 1.6485038995742798, "correct_loss_per_char": 0.7615965008735657, "incorrect_loss_per_char": 0.8242519497871399, "correct_loss_per_token": 1.5231930017471313, "incorrect_loss_per_token": 1.6485038995742798, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5231930017471313, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5231930017471313, "logits_per_char": -0.7615965008735657, "num_chars": 2}, {"sum_logits": -1.5035332441329956, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.5035332441329956, "logits_per_char": -0.7517666220664978, "num_chars": 2}, {"sum_logits": -1.68346107006073, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.68346107006073, "logits_per_char": -0.841730535030365, "num_chars": 2}, {"sum_logits": -1.539698839187622, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.539698839187622, "logits_per_char": -0.769849419593811, "num_chars": 2}, {"sum_logits": -1.8673224449157715, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.8673224449157715, "logits_per_char": -0.9336612224578857, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 457, "native_id": "7d70208061ae3185bcfc9e912ee9e141", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3217358589172363, "incorrect_loss_raw": 1.7200179398059845, "correct_loss_per_char": 0.6608679294586182, "incorrect_loss_per_char": 0.8600089699029922, "correct_loss_per_token": 1.3217358589172363, "incorrect_loss_per_token": 1.7200179398059845, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3217358589172363, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": true, "logits_per_token": -1.3217358589172363, "logits_per_char": -0.6608679294586182, "num_chars": 2}, {"sum_logits": -1.545986533164978, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.545986533164978, "logits_per_char": -0.772993266582489, "num_chars": 2}, {"sum_logits": -1.5834394693374634, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.5834394693374634, "logits_per_char": -0.7917197346687317, "num_chars": 2}, {"sum_logits": -1.7305175065994263, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.7305175065994263, "logits_per_char": -0.8652587532997131, "num_chars": 2}, {"sum_logits": -2.0201282501220703, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -2.0201282501220703, "logits_per_char": -1.0100641250610352, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 458, "native_id": "9003c4748b08d5a734747e499599ff20", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6526626348495483, "incorrect_loss_raw": 1.6783212721347809, "correct_loss_per_char": 0.8263313174247742, "incorrect_loss_per_char": 0.8391606360673904, "correct_loss_per_token": 1.6526626348495483, "incorrect_loss_per_token": 1.6783212721347809, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2071112394332886, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.2071112394332886, "logits_per_char": -0.6035556197166443, "num_chars": 2}, {"sum_logits": -1.4784692525863647, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.4784692525863647, "logits_per_char": -0.7392346262931824, "num_chars": 2}, {"sum_logits": -1.6526626348495483, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6526626348495483, "logits_per_char": -0.8263313174247742, "num_chars": 2}, {"sum_logits": -1.7473820447921753, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.7473820447921753, "logits_per_char": -0.8736910223960876, "num_chars": 2}, {"sum_logits": -2.280322551727295, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.280322551727295, "logits_per_char": -1.1401612758636475, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 459, "native_id": "28aac6d39cdd270d2a6a28e1985484cb", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5668243169784546, "incorrect_loss_raw": 1.6538422107696533, "correct_loss_per_char": 0.7834121584892273, "incorrect_loss_per_char": 0.8269211053848267, "correct_loss_per_token": 1.5668243169784546, "incorrect_loss_per_token": 1.6538422107696533, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4952507019042969, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.4952507019042969, "logits_per_char": -0.7476253509521484, "num_chars": 2}, {"sum_logits": -1.4753358364105225, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.4753358364105225, "logits_per_char": -0.7376679182052612, "num_chars": 2}, {"sum_logits": -1.5668243169784546, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5668243169784546, "logits_per_char": -0.7834121584892273, "num_chars": 2}, {"sum_logits": -1.5712487697601318, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5712487697601318, "logits_per_char": -0.7856243848800659, "num_chars": 2}, {"sum_logits": -2.073533535003662, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -2.073533535003662, "logits_per_char": -1.036766767501831, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 460, "native_id": "8bdbb8caefcc607a9ec7579aa0c87cba", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5535736083984375, "incorrect_loss_raw": 1.6350183486938477, "correct_loss_per_char": 0.7767868041992188, "incorrect_loss_per_char": 0.8175091743469238, "correct_loss_per_token": 1.5535736083984375, "incorrect_loss_per_token": 1.6350183486938477, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5535736083984375, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.5535736083984375, "logits_per_char": -0.7767868041992188, "num_chars": 2}, {"sum_logits": -1.5131137371063232, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": true, "logits_per_token": -1.5131137371063232, "logits_per_char": -0.7565568685531616, "num_chars": 2}, {"sum_logits": -1.636713981628418, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.636713981628418, "logits_per_char": -0.818356990814209, "num_chars": 2}, {"sum_logits": -1.6361753940582275, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.6361753940582275, "logits_per_char": -0.8180876970291138, "num_chars": 2}, {"sum_logits": -1.7540702819824219, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.7540702819824219, "logits_per_char": -0.8770351409912109, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 461, "native_id": "95a85df48902d23eb3fda25a99fca1a0", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6672476530075073, "incorrect_loss_raw": 1.6524496674537659, "correct_loss_per_char": 0.8336238265037537, "incorrect_loss_per_char": 0.8262248337268829, "correct_loss_per_token": 1.6672476530075073, "incorrect_loss_per_token": 1.6524496674537659, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1601049900054932, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.1601049900054932, "logits_per_char": -0.5800524950027466, "num_chars": 2}, {"sum_logits": -1.6672476530075073, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6672476530075073, "logits_per_char": -0.8336238265037537, "num_chars": 2}, {"sum_logits": -1.6789637804031372, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6789637804031372, "logits_per_char": -0.8394818902015686, "num_chars": 2}, {"sum_logits": -1.746376872062683, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.746376872062683, "logits_per_char": -0.8731884360313416, "num_chars": 2}, {"sum_logits": -2.02435302734375, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -2.02435302734375, "logits_per_char": -1.012176513671875, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 462, "native_id": "79c3378b7660d328902d7c0ad442a37f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.228947162628174, "incorrect_loss_raw": 1.5139403641223907, "correct_loss_per_char": 1.114473581314087, "incorrect_loss_per_char": 0.7569701820611954, "correct_loss_per_token": 2.228947162628174, "incorrect_loss_per_token": 1.5139403641223907, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3164883852005005, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.3164883852005005, "logits_per_char": -0.6582441926002502, "num_chars": 2}, {"sum_logits": -1.5075106620788574, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5075106620788574, "logits_per_char": -0.7537553310394287, "num_chars": 2}, {"sum_logits": -1.5485503673553467, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5485503673553467, "logits_per_char": -0.7742751836776733, "num_chars": 2}, {"sum_logits": -1.6832120418548584, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6832120418548584, "logits_per_char": -0.8416060209274292, "num_chars": 2}, {"sum_logits": -2.228947162628174, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -2.228947162628174, "logits_per_char": -1.114473581314087, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 463, "native_id": "8c12e5864463cfcd03f4d0ab67949d01", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.476257085800171, "incorrect_loss_raw": 1.682786464691162, "correct_loss_per_char": 0.7381285429000854, "incorrect_loss_per_char": 0.841393232345581, "correct_loss_per_token": 1.476257085800171, "incorrect_loss_per_token": 1.682786464691162, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3229758739471436, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": true, "logits_per_token": -1.3229758739471436, "logits_per_char": -0.6614879369735718, "num_chars": 2}, {"sum_logits": -1.476257085800171, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.476257085800171, "logits_per_char": -0.7381285429000854, "num_chars": 2}, {"sum_logits": -1.666574239730835, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.666574239730835, "logits_per_char": -0.8332871198654175, "num_chars": 2}, {"sum_logits": -1.7143423557281494, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.7143423557281494, "logits_per_char": -0.8571711778640747, "num_chars": 2}, {"sum_logits": -2.0272533893585205, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -2.0272533893585205, "logits_per_char": -1.0136266946792603, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 464, "native_id": "e145618c2062eb9ea8928fdb0d42185e", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6155692338943481, "incorrect_loss_raw": 1.6210295855998993, "correct_loss_per_char": 0.8077846169471741, "incorrect_loss_per_char": 0.8105147927999496, "correct_loss_per_token": 1.6155692338943481, "incorrect_loss_per_token": 1.6210295855998993, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6155692338943481, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6155692338943481, "logits_per_char": -0.8077846169471741, "num_chars": 2}, {"sum_logits": -1.5312526226043701, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.5312526226043701, "logits_per_char": -0.7656263113021851, "num_chars": 2}, {"sum_logits": -1.5418974161148071, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5418974161148071, "logits_per_char": -0.7709487080574036, "num_chars": 2}, {"sum_logits": -1.605307936668396, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.605307936668396, "logits_per_char": -0.802653968334198, "num_chars": 2}, {"sum_logits": -1.805660367012024, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.805660367012024, "logits_per_char": -0.902830183506012, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 465, "native_id": "35872be88df5f6c4a6600020266a5458", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5877405405044556, "incorrect_loss_raw": 1.6365796327590942, "correct_loss_per_char": 0.7938702702522278, "incorrect_loss_per_char": 0.8182898163795471, "correct_loss_per_token": 1.5877405405044556, "incorrect_loss_per_token": 1.6365796327590942, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4122966527938843, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.4122966527938843, "logits_per_char": -0.7061483263969421, "num_chars": 2}, {"sum_logits": -1.7146154642105103, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.7146154642105103, "logits_per_char": -0.8573077321052551, "num_chars": 2}, {"sum_logits": -1.5877405405044556, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5877405405044556, "logits_per_char": -0.7938702702522278, "num_chars": 2}, {"sum_logits": -1.5374764204025269, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5374764204025269, "logits_per_char": -0.7687382102012634, "num_chars": 2}, {"sum_logits": -1.8819299936294556, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.8819299936294556, "logits_per_char": -0.9409649968147278, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 466, "native_id": "055817d8d703d3c2802545e3fccdcde3", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5016443729400635, "incorrect_loss_raw": 1.698465257883072, "correct_loss_per_char": 0.7508221864700317, "incorrect_loss_per_char": 0.849232628941536, "correct_loss_per_token": 1.5016443729400635, "incorrect_loss_per_token": 1.698465257883072, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2228916883468628, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": true, "logits_per_token": -1.2228916883468628, "logits_per_char": -0.6114458441734314, "num_chars": 2}, {"sum_logits": -1.5016443729400635, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5016443729400635, "logits_per_char": -0.7508221864700317, "num_chars": 2}, {"sum_logits": -1.6442776918411255, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.6442776918411255, "logits_per_char": -0.8221388459205627, "num_chars": 2}, {"sum_logits": -1.810623288154602, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.810623288154602, "logits_per_char": -0.905311644077301, "num_chars": 2}, {"sum_logits": -2.1160683631896973, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -2.1160683631896973, "logits_per_char": -1.0580341815948486, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 467, "native_id": "5ef6cdb85468df482e3aa6fa339d6e41", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.398047924041748, "incorrect_loss_raw": 1.6890403032302856, "correct_loss_per_char": 0.699023962020874, "incorrect_loss_per_char": 0.8445201516151428, "correct_loss_per_token": 1.398047924041748, "incorrect_loss_per_token": 1.6890403032302856, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.398047924041748, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.398047924041748, "logits_per_char": -0.699023962020874, "num_chars": 2}, {"sum_logits": -1.508832335472107, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.508832335472107, "logits_per_char": -0.7544161677360535, "num_chars": 2}, {"sum_logits": -1.6654280424118042, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6654280424118042, "logits_per_char": -0.8327140212059021, "num_chars": 2}, {"sum_logits": -1.65764319896698, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.65764319896698, "logits_per_char": -0.82882159948349, "num_chars": 2}, {"sum_logits": -1.9242576360702515, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.9242576360702515, "logits_per_char": -0.9621288180351257, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 468, "native_id": "1e939cc6fef999953d692b57caab254b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5713125467300415, "incorrect_loss_raw": 1.695660561323166, "correct_loss_per_char": 0.7856562733650208, "incorrect_loss_per_char": 0.847830280661583, "correct_loss_per_token": 1.5713125467300415, "incorrect_loss_per_token": 1.695660561323166, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2125409841537476, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.2125409841537476, "logits_per_char": -0.6062704920768738, "num_chars": 2}, {"sum_logits": -1.5713125467300415, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5713125467300415, "logits_per_char": -0.7856562733650208, "num_chars": 2}, {"sum_logits": -1.539944052696228, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.539944052696228, "logits_per_char": -0.769972026348114, "num_chars": 2}, {"sum_logits": -1.7594753503799438, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.7594753503799438, "logits_per_char": -0.8797376751899719, "num_chars": 2}, {"sum_logits": -2.270681858062744, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -2.270681858062744, "logits_per_char": -1.135340929031372, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 469, "native_id": "3a3b5d4a517ef70d25eb558f1a622937", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8585567474365234, "incorrect_loss_raw": 1.5678046345710754, "correct_loss_per_char": 0.9292783737182617, "incorrect_loss_per_char": 0.7839023172855377, "correct_loss_per_token": 1.8585567474365234, "incorrect_loss_per_token": 1.5678046345710754, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.442277431488037, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": true, "logits_per_token": -1.442277431488037, "logits_per_char": -0.7211387157440186, "num_chars": 2}, {"sum_logits": -1.4972469806671143, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.4972469806671143, "logits_per_char": -0.7486234903335571, "num_chars": 2}, {"sum_logits": -1.6485674381256104, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.6485674381256104, "logits_per_char": -0.8242837190628052, "num_chars": 2}, {"sum_logits": -1.68312668800354, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.68312668800354, "logits_per_char": -0.84156334400177, "num_chars": 2}, {"sum_logits": -1.8585567474365234, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.8585567474365234, "logits_per_char": -0.9292783737182617, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 470, "native_id": "a943522f7d407cef369d5d3f1bf48589", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.605931043624878, "incorrect_loss_raw": 1.6449988186359406, "correct_loss_per_char": 0.802965521812439, "incorrect_loss_per_char": 0.8224994093179703, "correct_loss_per_token": 1.605931043624878, "incorrect_loss_per_token": 1.6449988186359406, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3712995052337646, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": true, "logits_per_token": -1.3712995052337646, "logits_per_char": -0.6856497526168823, "num_chars": 2}, {"sum_logits": -1.4970275163650513, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.4970275163650513, "logits_per_char": -0.7485137581825256, "num_chars": 2}, {"sum_logits": -1.605931043624878, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.605931043624878, "logits_per_char": -0.802965521812439, "num_chars": 2}, {"sum_logits": -1.6808526515960693, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.6808526515960693, "logits_per_char": -0.8404263257980347, "num_chars": 2}, {"sum_logits": -2.030815601348877, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -2.030815601348877, "logits_per_char": -1.0154078006744385, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 471, "native_id": "57a343d72031b668e5eb91868420e915", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.368035912513733, "incorrect_loss_raw": 1.7011514902114868, "correct_loss_per_char": 0.6840179562568665, "incorrect_loss_per_char": 0.8505757451057434, "correct_loss_per_token": 1.368035912513733, "incorrect_loss_per_token": 1.7011514902114868, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.368035912513733, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.368035912513733, "logits_per_char": -0.6840179562568665, "num_chars": 2}, {"sum_logits": -1.5099447965621948, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5099447965621948, "logits_per_char": -0.7549723982810974, "num_chars": 2}, {"sum_logits": -1.6302028894424438, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.6302028894424438, "logits_per_char": -0.8151014447212219, "num_chars": 2}, {"sum_logits": -1.6766104698181152, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.6766104698181152, "logits_per_char": -0.8383052349090576, "num_chars": 2}, {"sum_logits": -1.9878478050231934, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.9878478050231934, "logits_per_char": -0.9939239025115967, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 472, "native_id": "c4b1a57e7880b9cb367f9c67abf5605f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.578622817993164, "incorrect_loss_raw": 1.6384204626083374, "correct_loss_per_char": 0.789311408996582, "incorrect_loss_per_char": 0.8192102313041687, "correct_loss_per_token": 1.578622817993164, "incorrect_loss_per_token": 1.6384204626083374, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.404040813446045, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.404040813446045, "logits_per_char": -0.7020204067230225, "num_chars": 2}, {"sum_logits": -1.578622817993164, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.578622817993164, "logits_per_char": -0.789311408996582, "num_chars": 2}, {"sum_logits": -1.595478892326355, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.595478892326355, "logits_per_char": -0.7977394461631775, "num_chars": 2}, {"sum_logits": -1.6724375486373901, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6724375486373901, "logits_per_char": -0.8362187743186951, "num_chars": 2}, {"sum_logits": -1.8817245960235596, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.8817245960235596, "logits_per_char": -0.9408622980117798, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 473, "native_id": "e313d7967f72c2b880213daaaf4b7181", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6910464763641357, "incorrect_loss_raw": 1.6791401505470276, "correct_loss_per_char": 0.8455232381820679, "incorrect_loss_per_char": 0.8395700752735138, "correct_loss_per_token": 1.6910464763641357, "incorrect_loss_per_token": 1.6791401505470276, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1388064622879028, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.1388064622879028, "logits_per_char": -0.5694032311439514, "num_chars": 2}, {"sum_logits": -1.5068155527114868, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5068155527114868, "logits_per_char": -0.7534077763557434, "num_chars": 2}, {"sum_logits": -1.6910464763641357, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6910464763641357, "logits_per_char": -0.8455232381820679, "num_chars": 2}, {"sum_logits": -1.7901499271392822, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.7901499271392822, "logits_per_char": -0.8950749635696411, "num_chars": 2}, {"sum_logits": -2.2807886600494385, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.2807886600494385, "logits_per_char": -1.1403943300247192, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 474, "native_id": "3c7992df7fda23bcdeacb1f1f6b73448", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6914509534835815, "incorrect_loss_raw": 1.632501482963562, "correct_loss_per_char": 0.8457254767417908, "incorrect_loss_per_char": 0.816250741481781, "correct_loss_per_token": 1.6914509534835815, "incorrect_loss_per_token": 1.632501482963562, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2681999206542969, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": true, "logits_per_token": -1.2681999206542969, "logits_per_char": -0.6340999603271484, "num_chars": 2}, {"sum_logits": -1.6071659326553345, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.6071659326553345, "logits_per_char": -0.8035829663276672, "num_chars": 2}, {"sum_logits": -1.6914509534835815, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.6914509534835815, "logits_per_char": -0.8457254767417908, "num_chars": 2}, {"sum_logits": -1.6482471227645874, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.6482471227645874, "logits_per_char": -0.8241235613822937, "num_chars": 2}, {"sum_logits": -2.0063929557800293, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -2.0063929557800293, "logits_per_char": -1.0031964778900146, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 475, "native_id": "d6644eacdb543a60545d2eb1ac7e6dbd", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5253883600234985, "incorrect_loss_raw": 1.648499071598053, "correct_loss_per_char": 0.7626941800117493, "incorrect_loss_per_char": 0.8242495357990265, "correct_loss_per_token": 1.5253883600234985, "incorrect_loss_per_token": 1.648499071598053, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5253883600234985, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.5253883600234985, "logits_per_char": -0.7626941800117493, "num_chars": 2}, {"sum_logits": -1.6023861169815063, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.6023861169815063, "logits_per_char": -0.8011930584907532, "num_chars": 2}, {"sum_logits": -1.643248200416565, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.643248200416565, "logits_per_char": -0.8216241002082825, "num_chars": 2}, {"sum_logits": -1.4914506673812866, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": true, "logits_per_token": -1.4914506673812866, "logits_per_char": -0.7457253336906433, "num_chars": 2}, {"sum_logits": -1.856911301612854, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.856911301612854, "logits_per_char": -0.928455650806427, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 476, "native_id": "d1ad9b79f54205b6b9ac19a27f9c2be5", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.867143154144287, "incorrect_loss_raw": 1.563730537891388, "correct_loss_per_char": 0.9335715770721436, "incorrect_loss_per_char": 0.781865268945694, "correct_loss_per_token": 1.867143154144287, "incorrect_loss_per_token": 1.563730537891388, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5411696434020996, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.5411696434020996, "logits_per_char": -0.7705848217010498, "num_chars": 2}, {"sum_logits": -1.7025353908538818, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.7025353908538818, "logits_per_char": -0.8512676954269409, "num_chars": 2}, {"sum_logits": -1.4564971923828125, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": true, "logits_per_token": -1.4564971923828125, "logits_per_char": -0.7282485961914062, "num_chars": 2}, {"sum_logits": -1.5547199249267578, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.5547199249267578, "logits_per_char": -0.7773599624633789, "num_chars": 2}, {"sum_logits": -1.867143154144287, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.867143154144287, "logits_per_char": -0.9335715770721436, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 477, "native_id": "f116ee6620c0f171e5db54bc03a5f2e2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6486711502075195, "incorrect_loss_raw": 1.6565802991390228, "correct_loss_per_char": 0.8243355751037598, "incorrect_loss_per_char": 0.8282901495695114, "correct_loss_per_token": 1.6486711502075195, "incorrect_loss_per_token": 1.6565802991390228, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2541838884353638, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.2541838884353638, "logits_per_char": -0.6270919442176819, "num_chars": 2}, {"sum_logits": -1.5616933107376099, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5616933107376099, "logits_per_char": -0.7808466553688049, "num_chars": 2}, {"sum_logits": -1.6486711502075195, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.6486711502075195, "logits_per_char": -0.8243355751037598, "num_chars": 2}, {"sum_logits": -1.6675101518630981, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.6675101518630981, "logits_per_char": -0.8337550759315491, "num_chars": 2}, {"sum_logits": -2.1429338455200195, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.1429338455200195, "logits_per_char": -1.0714669227600098, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 478, "native_id": "ea82f9e938cbfce85fb498ce46264253", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6345791816711426, "incorrect_loss_raw": 1.6800590455532074, "correct_loss_per_char": 0.8172895908355713, "incorrect_loss_per_char": 0.8400295227766037, "correct_loss_per_token": 1.6345791816711426, "incorrect_loss_per_token": 1.6800590455532074, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2073825597763062, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.2073825597763062, "logits_per_char": -0.6036912798881531, "num_chars": 2}, {"sum_logits": -1.5172512531280518, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5172512531280518, "logits_per_char": -0.7586256265640259, "num_chars": 2}, {"sum_logits": -1.6345791816711426, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6345791816711426, "logits_per_char": -0.8172895908355713, "num_chars": 2}, {"sum_logits": -1.7214593887329102, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.7214593887329102, "logits_per_char": -0.8607296943664551, "num_chars": 2}, {"sum_logits": -2.2741429805755615, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.2741429805755615, "logits_per_char": -1.1370714902877808, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 479, "native_id": "edbb57ac2f476679ae547f75ec2bef3e", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6731597185134888, "incorrect_loss_raw": 1.6037117540836334, "correct_loss_per_char": 0.8365798592567444, "incorrect_loss_per_char": 0.8018558770418167, "correct_loss_per_token": 1.6731597185134888, "incorrect_loss_per_token": 1.6037117540836334, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6509132385253906, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": false, "logits_per_token": -1.6509132385253906, "logits_per_char": -0.8254566192626953, "num_chars": 2}, {"sum_logits": -1.6731597185134888, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": false, "logits_per_token": -1.6731597185134888, "logits_per_char": -0.8365798592567444, "num_chars": 2}, {"sum_logits": -1.6465359926223755, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": false, "logits_per_token": -1.6465359926223755, "logits_per_char": -0.8232679963111877, "num_chars": 2}, {"sum_logits": -1.4774435758590698, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": true, "logits_per_token": -1.4774435758590698, "logits_per_char": -0.7387217879295349, "num_chars": 2}, {"sum_logits": -1.6399542093276978, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": false, "logits_per_token": -1.6399542093276978, "logits_per_char": -0.8199771046638489, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 480, "native_id": "07a99d5f2ca7028febeb9f09604b36c8", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8783763647079468, "incorrect_loss_raw": 1.5735015571117401, "correct_loss_per_char": 0.9391881823539734, "incorrect_loss_per_char": 0.7867507785558701, "correct_loss_per_token": 1.8783763647079468, "incorrect_loss_per_token": 1.5735015571117401, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4447665214538574, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.4447665214538574, "logits_per_char": -0.7223832607269287, "num_chars": 2}, {"sum_logits": -1.8783763647079468, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.8783763647079468, "logits_per_char": -0.9391881823539734, "num_chars": 2}, {"sum_logits": -1.5941559076309204, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5941559076309204, "logits_per_char": -0.7970779538154602, "num_chars": 2}, {"sum_logits": -1.4188517332077026, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.4188517332077026, "logits_per_char": -0.7094258666038513, "num_chars": 2}, {"sum_logits": -1.83623206615448, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.83623206615448, "logits_per_char": -0.91811603307724, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 481, "native_id": "b42ef8be1748c19fa5938de5396f8fad", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4329339265823364, "incorrect_loss_raw": 1.7039331495761871, "correct_loss_per_char": 0.7164669632911682, "incorrect_loss_per_char": 0.8519665747880936, "correct_loss_per_token": 1.4329339265823364, "incorrect_loss_per_token": 1.7039331495761871, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3756715059280396, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.3756715059280396, "logits_per_char": -0.6878357529640198, "num_chars": 2}, {"sum_logits": -1.4329339265823364, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.4329339265823364, "logits_per_char": -0.7164669632911682, "num_chars": 2}, {"sum_logits": -1.5624266862869263, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5624266862869263, "logits_per_char": -0.7812133431434631, "num_chars": 2}, {"sum_logits": -1.710490107536316, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.710490107536316, "logits_per_char": -0.855245053768158, "num_chars": 2}, {"sum_logits": -2.167144298553467, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -2.167144298553467, "logits_per_char": -1.0835721492767334, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 482, "native_id": "236691d38665d7bcdd0c9b9834252a51", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6445595026016235, "incorrect_loss_raw": 1.6137284636497498, "correct_loss_per_char": 0.8222797513008118, "incorrect_loss_per_char": 0.8068642318248749, "correct_loss_per_token": 1.6445595026016235, "incorrect_loss_per_token": 1.6137284636497498, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4758411645889282, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.4758411645889282, "logits_per_char": -0.7379205822944641, "num_chars": 2}, {"sum_logits": -1.5744153261184692, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5744153261184692, "logits_per_char": -0.7872076630592346, "num_chars": 2}, {"sum_logits": -1.6596143245697021, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.6596143245697021, "logits_per_char": -0.8298071622848511, "num_chars": 2}, {"sum_logits": -1.6445595026016235, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.6445595026016235, "logits_per_char": -0.8222797513008118, "num_chars": 2}, {"sum_logits": -1.7450430393218994, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.7450430393218994, "logits_per_char": -0.8725215196609497, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 483, "native_id": "8ef78abb86fc282ccb02bbc495f13030", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7492815256118774, "incorrect_loss_raw": 1.6013992130756378, "correct_loss_per_char": 0.8746407628059387, "incorrect_loss_per_char": 0.8006996065378189, "correct_loss_per_token": 1.7492815256118774, "incorrect_loss_per_token": 1.6013992130756378, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3816094398498535, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": true, "logits_per_token": -1.3816094398498535, "logits_per_char": -0.6908047199249268, "num_chars": 2}, {"sum_logits": -1.5280746221542358, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.5280746221542358, "logits_per_char": -0.7640373110771179, "num_chars": 2}, {"sum_logits": -1.6028684377670288, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.6028684377670288, "logits_per_char": -0.8014342188835144, "num_chars": 2}, {"sum_logits": -1.7492815256118774, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.7492815256118774, "logits_per_char": -0.8746407628059387, "num_chars": 2}, {"sum_logits": -1.893044352531433, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.893044352531433, "logits_per_char": -0.9465221762657166, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 484, "native_id": "313d033c33ec475e04e628f87c5686bd", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6009788513183594, "incorrect_loss_raw": 1.6388919949531555, "correct_loss_per_char": 0.8004894256591797, "incorrect_loss_per_char": 0.8194459974765778, "correct_loss_per_token": 1.6009788513183594, "incorrect_loss_per_token": 1.6388919949531555, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3765203952789307, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": true, "logits_per_token": -1.3765203952789307, "logits_per_char": -0.6882601976394653, "num_chars": 2}, {"sum_logits": -1.5795905590057373, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.5795905590057373, "logits_per_char": -0.7897952795028687, "num_chars": 2}, {"sum_logits": -1.6009788513183594, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.6009788513183594, "logits_per_char": -0.8004894256591797, "num_chars": 2}, {"sum_logits": -1.6314103603363037, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.6314103603363037, "logits_per_char": -0.8157051801681519, "num_chars": 2}, {"sum_logits": -1.9680466651916504, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.9680466651916504, "logits_per_char": -0.9840233325958252, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 485, "native_id": "d581e0ad6a4c89465dc1a527bd2d3f77", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.017850399017334, "incorrect_loss_raw": 1.5488397777080536, "correct_loss_per_char": 1.008925199508667, "incorrect_loss_per_char": 0.7744198888540268, "correct_loss_per_token": 2.017850399017334, "incorrect_loss_per_token": 1.5488397777080536, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3479396104812622, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.3479396104812622, "logits_per_char": -0.6739698052406311, "num_chars": 2}, {"sum_logits": -1.411529541015625, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.411529541015625, "logits_per_char": -0.7057647705078125, "num_chars": 2}, {"sum_logits": -1.6716123819351196, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6716123819351196, "logits_per_char": -0.8358061909675598, "num_chars": 2}, {"sum_logits": -1.7642775774002075, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.7642775774002075, "logits_per_char": -0.8821387887001038, "num_chars": 2}, {"sum_logits": -2.017850399017334, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.017850399017334, "logits_per_char": -1.008925199508667, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 486, "native_id": "f232bfea2a7611999688a252e476c040", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4747107028961182, "incorrect_loss_raw": 1.673039197921753, "correct_loss_per_char": 0.7373553514480591, "incorrect_loss_per_char": 0.8365195989608765, "correct_loss_per_token": 1.4747107028961182, "incorrect_loss_per_token": 1.673039197921753, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4331520795822144, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": true, "logits_per_token": -1.4331520795822144, "logits_per_char": -0.7165760397911072, "num_chars": 2}, {"sum_logits": -1.4747107028961182, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.4747107028961182, "logits_per_char": -0.7373553514480591, "num_chars": 2}, {"sum_logits": -1.6244220733642578, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.6244220733642578, "logits_per_char": -0.8122110366821289, "num_chars": 2}, {"sum_logits": -1.6295253038406372, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.6295253038406372, "logits_per_char": -0.8147626519203186, "num_chars": 2}, {"sum_logits": -2.0050573348999023, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -2.0050573348999023, "logits_per_char": -1.0025286674499512, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 487, "native_id": "91756d8e475d8d59fa0a4e35f408e366", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6253256797790527, "incorrect_loss_raw": 1.6905890703201294, "correct_loss_per_char": 0.8126628398895264, "incorrect_loss_per_char": 0.8452945351600647, "correct_loss_per_token": 1.6253256797790527, "incorrect_loss_per_token": 1.6905890703201294, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2423852682113647, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.2423852682113647, "logits_per_char": -0.6211926341056824, "num_chars": 2}, {"sum_logits": -1.4734854698181152, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.4734854698181152, "logits_per_char": -0.7367427349090576, "num_chars": 2}, {"sum_logits": -1.6671892404556274, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6671892404556274, "logits_per_char": -0.8335946202278137, "num_chars": 2}, {"sum_logits": -1.6253256797790527, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6253256797790527, "logits_per_char": -0.8126628398895264, "num_chars": 2}, {"sum_logits": -2.37929630279541, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.37929630279541, "logits_per_char": -1.189648151397705, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 488, "native_id": "866ea9c668c0b42df19fa20865e31f77", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6834454536437988, "incorrect_loss_raw": 1.6347788870334625, "correct_loss_per_char": 0.8417227268218994, "incorrect_loss_per_char": 0.8173894435167313, "correct_loss_per_token": 1.6834454536437988, "incorrect_loss_per_token": 1.6347788870334625, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.365127682685852, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.365127682685852, "logits_per_char": -0.682563841342926, "num_chars": 2}, {"sum_logits": -1.460191011428833, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.460191011428833, "logits_per_char": -0.7300955057144165, "num_chars": 2}, {"sum_logits": -1.6155521869659424, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6155521869659424, "logits_per_char": -0.8077760934829712, "num_chars": 2}, {"sum_logits": -1.6834454536437988, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6834454536437988, "logits_per_char": -0.8417227268218994, "num_chars": 2}, {"sum_logits": -2.0982446670532227, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -2.0982446670532227, "logits_per_char": -1.0491223335266113, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 489, "native_id": "22015315e7ff79386877828b4fa27799", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5751806497573853, "incorrect_loss_raw": 1.6548928320407867, "correct_loss_per_char": 0.7875903248786926, "incorrect_loss_per_char": 0.8274464160203934, "correct_loss_per_token": 1.5751806497573853, "incorrect_loss_per_token": 1.6548928320407867, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4722660779953003, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.4722660779953003, "logits_per_char": -0.7361330389976501, "num_chars": 2}, {"sum_logits": -1.5427998304367065, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5427998304367065, "logits_per_char": -0.7713999152183533, "num_chars": 2}, {"sum_logits": -1.4968451261520386, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.4968451261520386, "logits_per_char": -0.7484225630760193, "num_chars": 2}, {"sum_logits": -1.5751806497573853, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5751806497573853, "logits_per_char": -0.7875903248786926, "num_chars": 2}, {"sum_logits": -2.1076602935791016, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.1076602935791016, "logits_per_char": -1.0538301467895508, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 490, "native_id": "484f6e4fb8e6431b010c299490b72e3c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4714525938034058, "incorrect_loss_raw": 1.6851942837238312, "correct_loss_per_char": 0.7357262969017029, "incorrect_loss_per_char": 0.8425971418619156, "correct_loss_per_token": 1.4714525938034058, "incorrect_loss_per_token": 1.6851942837238312, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4296447038650513, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.4296447038650513, "logits_per_char": -0.7148223519325256, "num_chars": 2}, {"sum_logits": -1.4714525938034058, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.4714525938034058, "logits_per_char": -0.7357262969017029, "num_chars": 2}, {"sum_logits": -1.5535115003585815, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5535115003585815, "logits_per_char": -0.7767557501792908, "num_chars": 2}, {"sum_logits": -1.6340137720108032, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6340137720108032, "logits_per_char": -0.8170068860054016, "num_chars": 2}, {"sum_logits": -2.1236071586608887, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.1236071586608887, "logits_per_char": -1.0618035793304443, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 491, "native_id": "7322d0dcf2e27c7032626a3639f5696b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.2749204635620117, "incorrect_loss_raw": 1.5165001451969147, "correct_loss_per_char": 1.1374602317810059, "incorrect_loss_per_char": 0.7582500725984573, "correct_loss_per_token": 2.2749204635620117, "incorrect_loss_per_token": 1.5165001451969147, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2525198459625244, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.2525198459625244, "logits_per_char": -0.6262599229812622, "num_chars": 2}, {"sum_logits": -1.5235908031463623, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5235908031463623, "logits_per_char": -0.7617954015731812, "num_chars": 2}, {"sum_logits": -1.5206794738769531, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5206794738769531, "logits_per_char": -0.7603397369384766, "num_chars": 2}, {"sum_logits": -1.7692104578018188, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.7692104578018188, "logits_per_char": -0.8846052289009094, "num_chars": 2}, {"sum_logits": -2.2749204635620117, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.2749204635620117, "logits_per_char": -1.1374602317810059, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 492, "native_id": "0519b0b0869681c2884f53dbfa43e538", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6903064250946045, "incorrect_loss_raw": 1.6339434385299683, "correct_loss_per_char": 0.8451532125473022, "incorrect_loss_per_char": 0.8169717192649841, "correct_loss_per_token": 1.6903064250946045, "incorrect_loss_per_token": 1.6339434385299683, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.266096591949463, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": true, "logits_per_token": -1.266096591949463, "logits_per_char": -0.6330482959747314, "num_chars": 2}, {"sum_logits": -1.5686378479003906, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.5686378479003906, "logits_per_char": -0.7843189239501953, "num_chars": 2}, {"sum_logits": -1.6903064250946045, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.6903064250946045, "logits_per_char": -0.8451532125473022, "num_chars": 2}, {"sum_logits": -1.6508376598358154, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.6508376598358154, "logits_per_char": -0.8254188299179077, "num_chars": 2}, {"sum_logits": -2.050201654434204, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -2.050201654434204, "logits_per_char": -1.025100827217102, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 493, "native_id": "1ab04c0501b815b2a48f2581f04215a8", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5920804738998413, "incorrect_loss_raw": 1.6636982858181, "correct_loss_per_char": 0.7960402369499207, "incorrect_loss_per_char": 0.83184914290905, "correct_loss_per_token": 1.5920804738998413, "incorrect_loss_per_token": 1.6636982858181, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3420240879058838, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.3420240879058838, "logits_per_char": -0.6710120439529419, "num_chars": 2}, {"sum_logits": -1.4463832378387451, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.4463832378387451, "logits_per_char": -0.7231916189193726, "num_chars": 2}, {"sum_logits": -1.5920804738998413, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5920804738998413, "logits_per_char": -0.7960402369499207, "num_chars": 2}, {"sum_logits": -1.7276800870895386, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.7276800870895386, "logits_per_char": -0.8638400435447693, "num_chars": 2}, {"sum_logits": -2.1387057304382324, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -2.1387057304382324, "logits_per_char": -1.0693528652191162, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 494, "native_id": "7776b10c7bb96f3fe5e026678673634d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5237329006195068, "incorrect_loss_raw": 1.7127971947193146, "correct_loss_per_char": 0.7618664503097534, "incorrect_loss_per_char": 0.8563985973596573, "correct_loss_per_token": 1.5237329006195068, "incorrect_loss_per_token": 1.7127971947193146, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1434742212295532, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.1434742212295532, "logits_per_char": -0.5717371106147766, "num_chars": 2}, {"sum_logits": -1.5237329006195068, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5237329006195068, "logits_per_char": -0.7618664503097534, "num_chars": 2}, {"sum_logits": -1.7400319576263428, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.7400319576263428, "logits_per_char": -0.8700159788131714, "num_chars": 2}, {"sum_logits": -1.748492956161499, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.748492956161499, "logits_per_char": -0.8742464780807495, "num_chars": 2}, {"sum_logits": -2.2191896438598633, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -2.2191896438598633, "logits_per_char": -1.1095948219299316, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 495, "native_id": "f7c005244d406b9bde48dc8c22003af1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3655505180358887, "incorrect_loss_raw": 1.7035391628742218, "correct_loss_per_char": 0.6827752590179443, "incorrect_loss_per_char": 0.8517695814371109, "correct_loss_per_token": 1.3655505180358887, "incorrect_loss_per_token": 1.7035391628742218, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3655505180358887, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.3655505180358887, "logits_per_char": -0.6827752590179443, "num_chars": 2}, {"sum_logits": -1.5107089281082153, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5107089281082153, "logits_per_char": -0.7553544640541077, "num_chars": 2}, {"sum_logits": -1.6535720825195312, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.6535720825195312, "logits_per_char": -0.8267860412597656, "num_chars": 2}, {"sum_logits": -1.6567821502685547, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.6567821502685547, "logits_per_char": -0.8283910751342773, "num_chars": 2}, {"sum_logits": -1.993093490600586, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.993093490600586, "logits_per_char": -0.996546745300293, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 496, "native_id": "88501d528c855e2b533b3fea2f86183d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6492527723312378, "incorrect_loss_raw": 1.6620656251907349, "correct_loss_per_char": 0.8246263861656189, "incorrect_loss_per_char": 0.8310328125953674, "correct_loss_per_token": 1.6492527723312378, "incorrect_loss_per_token": 1.6620656251907349, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.290820598602295, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.290820598602295, "logits_per_char": -0.6454102993011475, "num_chars": 2}, {"sum_logits": -1.5214653015136719, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5214653015136719, "logits_per_char": -0.7607326507568359, "num_chars": 2}, {"sum_logits": -1.6048345565795898, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.6048345565795898, "logits_per_char": -0.8024172782897949, "num_chars": 2}, {"sum_logits": -1.6492527723312378, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.6492527723312378, "logits_per_char": -0.8246263861656189, "num_chars": 2}, {"sum_logits": -2.231142044067383, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.231142044067383, "logits_per_char": -1.1155710220336914, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 497, "native_id": "3d9c3253e24fb108cea9083e8a853cf2", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3993576765060425, "incorrect_loss_raw": 1.6796835660934448, "correct_loss_per_char": 0.6996788382530212, "incorrect_loss_per_char": 0.8398417830467224, "correct_loss_per_token": 1.3993576765060425, "incorrect_loss_per_token": 1.6796835660934448, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6952522993087769, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.6952522993087769, "logits_per_char": -0.8476261496543884, "num_chars": 2}, {"sum_logits": -1.6571004390716553, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.6571004390716553, "logits_per_char": -0.8285502195358276, "num_chars": 2}, {"sum_logits": -1.5634173154830933, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.5634173154830933, "logits_per_char": -0.7817086577415466, "num_chars": 2}, {"sum_logits": -1.3993576765060425, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": true, "logits_per_token": -1.3993576765060425, "logits_per_char": -0.6996788382530212, "num_chars": 2}, {"sum_logits": -1.802964210510254, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.802964210510254, "logits_per_char": -0.901482105255127, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 498, "native_id": "9808782b2e2e1bfbfa27c41e605bfffe", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5257821083068848, "incorrect_loss_raw": 1.6700833439826965, "correct_loss_per_char": 0.7628910541534424, "incorrect_loss_per_char": 0.8350416719913483, "correct_loss_per_token": 1.5257821083068848, "incorrect_loss_per_token": 1.6700833439826965, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4450700283050537, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.4450700283050537, "logits_per_char": -0.7225350141525269, "num_chars": 2}, {"sum_logits": -1.465573787689209, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.465573787689209, "logits_per_char": -0.7327868938446045, "num_chars": 2}, {"sum_logits": -1.5257821083068848, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5257821083068848, "logits_per_char": -0.7628910541534424, "num_chars": 2}, {"sum_logits": -1.664257287979126, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.664257287979126, "logits_per_char": -0.832128643989563, "num_chars": 2}, {"sum_logits": -2.1054322719573975, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.1054322719573975, "logits_per_char": -1.0527161359786987, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 499, "native_id": "c432b860fcd7297751ff5254ec4a7956", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4051953554153442, "incorrect_loss_raw": 1.6924099922180176, "correct_loss_per_char": 0.7025976777076721, "incorrect_loss_per_char": 0.8462049961090088, "correct_loss_per_token": 1.4051953554153442, "incorrect_loss_per_token": 1.6924099922180176, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4051953554153442, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": true, "logits_per_token": -1.4051953554153442, "logits_per_char": -0.7025976777076721, "num_chars": 2}, {"sum_logits": -1.4947384595870972, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.4947384595870972, "logits_per_char": -0.7473692297935486, "num_chars": 2}, {"sum_logits": -1.5501331090927124, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.5501331090927124, "logits_per_char": -0.7750665545463562, "num_chars": 2}, {"sum_logits": -1.7303386926651, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.7303386926651, "logits_per_char": -0.86516934633255, "num_chars": 2}, {"sum_logits": -1.9944297075271606, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.9944297075271606, "logits_per_char": -0.9972148537635803, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 500, "native_id": "732af155f677a51d05d0c9e080d598b6", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6794798374176025, "incorrect_loss_raw": 1.634115844964981, "correct_loss_per_char": 0.8397399187088013, "incorrect_loss_per_char": 0.8170579224824905, "correct_loss_per_token": 1.6794798374176025, "incorrect_loss_per_token": 1.634115844964981, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2808524370193481, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": true, "logits_per_token": -1.2808524370193481, "logits_per_char": -0.6404262185096741, "num_chars": 2}, {"sum_logits": -1.5904362201690674, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.5904362201690674, "logits_per_char": -0.7952181100845337, "num_chars": 2}, {"sum_logits": -1.6223320960998535, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.6223320960998535, "logits_per_char": -0.8111660480499268, "num_chars": 2}, {"sum_logits": -1.6794798374176025, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.6794798374176025, "logits_per_char": -0.8397399187088013, "num_chars": 2}, {"sum_logits": -2.0428426265716553, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -2.0428426265716553, "logits_per_char": -1.0214213132858276, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 501, "native_id": "48abc2c113623fd72f758502529f93a5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6165649890899658, "incorrect_loss_raw": 1.6581282317638397, "correct_loss_per_char": 0.8082824945449829, "incorrect_loss_per_char": 0.8290641158819199, "correct_loss_per_token": 1.6165649890899658, "incorrect_loss_per_token": 1.6581282317638397, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3331184387207031, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.3331184387207031, "logits_per_char": -0.6665592193603516, "num_chars": 2}, {"sum_logits": -1.4239743947982788, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.4239743947982788, "logits_per_char": -0.7119871973991394, "num_chars": 2}, {"sum_logits": -1.6165649890899658, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6165649890899658, "logits_per_char": -0.8082824945449829, "num_chars": 2}, {"sum_logits": -1.7590947151184082, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.7590947151184082, "logits_per_char": -0.8795473575592041, "num_chars": 2}, {"sum_logits": -2.1163253784179688, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -2.1163253784179688, "logits_per_char": -1.0581626892089844, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 502, "native_id": "03f06f77aaf80b5f5e296ffbd11e9d82", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4962066411972046, "incorrect_loss_raw": 1.6839163303375244, "correct_loss_per_char": 0.7481033205986023, "incorrect_loss_per_char": 0.8419581651687622, "correct_loss_per_token": 1.4962066411972046, "incorrect_loss_per_token": 1.6839163303375244, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.445326805114746, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.445326805114746, "logits_per_char": -0.722663402557373, "num_chars": 2}, {"sum_logits": -1.4962066411972046, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.4962066411972046, "logits_per_char": -0.7481033205986023, "num_chars": 2}, {"sum_logits": -1.5245275497436523, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5245275497436523, "logits_per_char": -0.7622637748718262, "num_chars": 2}, {"sum_logits": -1.5798282623291016, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5798282623291016, "logits_per_char": -0.7899141311645508, "num_chars": 2}, {"sum_logits": -2.1859827041625977, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -2.1859827041625977, "logits_per_char": -1.0929913520812988, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 503, "native_id": "e7084c166ec67d0f983a26e055e845c6", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4598369598388672, "incorrect_loss_raw": 1.6873125433921814, "correct_loss_per_char": 0.7299184799194336, "incorrect_loss_per_char": 0.8436562716960907, "correct_loss_per_token": 1.4598369598388672, "incorrect_loss_per_token": 1.6873125433921814, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4931424856185913, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": false, "logits_per_token": -1.4931424856185913, "logits_per_char": -0.7465712428092957, "num_chars": 2}, {"sum_logits": -1.6314691305160522, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": false, "logits_per_token": -1.6314691305160522, "logits_per_char": -0.8157345652580261, "num_chars": 2}, {"sum_logits": -1.4598369598388672, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": true, "logits_per_token": -1.4598369598388672, "logits_per_char": -0.7299184799194336, "num_chars": 2}, {"sum_logits": -1.5188531875610352, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": false, "logits_per_token": -1.5188531875610352, "logits_per_char": -0.7594265937805176, "num_chars": 2}, {"sum_logits": -2.105785369873047, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": false, "logits_per_token": -2.105785369873047, "logits_per_char": -1.0528926849365234, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 504, "native_id": "c55c31b5a2aa996f3b75ad88c017a6b9", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.618537425994873, "incorrect_loss_raw": 1.6386721730232239, "correct_loss_per_char": 0.8092687129974365, "incorrect_loss_per_char": 0.8193360865116119, "correct_loss_per_token": 1.618537425994873, "incorrect_loss_per_token": 1.6386721730232239, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5581661462783813, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.5581661462783813, "logits_per_char": -0.7790830731391907, "num_chars": 2}, {"sum_logits": -1.618537425994873, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.618537425994873, "logits_per_char": -0.8092687129974365, "num_chars": 2}, {"sum_logits": -1.5413652658462524, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.5413652658462524, "logits_per_char": -0.7706826329231262, "num_chars": 2}, {"sum_logits": -1.4780468940734863, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": true, "logits_per_token": -1.4780468940734863, "logits_per_char": -0.7390234470367432, "num_chars": 2}, {"sum_logits": -1.9771103858947754, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.9771103858947754, "logits_per_char": -0.9885551929473877, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 505, "native_id": "463521a93ae71e93bea8b97cdf7a6792", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5614609718322754, "incorrect_loss_raw": 1.6299358308315277, "correct_loss_per_char": 0.7807304859161377, "incorrect_loss_per_char": 0.8149679154157639, "correct_loss_per_token": 1.5614609718322754, "incorrect_loss_per_token": 1.6299358308315277, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.548742413520813, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": true, "logits_per_token": -1.548742413520813, "logits_per_char": -0.7743712067604065, "num_chars": 2}, {"sum_logits": -1.6245758533477783, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": false, "logits_per_token": -1.6245758533477783, "logits_per_char": -0.8122879266738892, "num_chars": 2}, {"sum_logits": -1.6797218322753906, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": false, "logits_per_token": -1.6797218322753906, "logits_per_char": -0.8398609161376953, "num_chars": 2}, {"sum_logits": -1.5614609718322754, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": false, "logits_per_token": -1.5614609718322754, "logits_per_char": -0.7807304859161377, "num_chars": 2}, {"sum_logits": -1.666703224182129, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": false, "logits_per_token": -1.666703224182129, "logits_per_char": -0.8333516120910645, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 506, "native_id": "c036ce033bc429ac1aba0a6ac8d057e1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5514757633209229, "incorrect_loss_raw": 1.6413764953613281, "correct_loss_per_char": 0.7757378816604614, "incorrect_loss_per_char": 0.8206882476806641, "correct_loss_per_token": 1.5514757633209229, "incorrect_loss_per_token": 1.6413764953613281, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4964354038238525, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.4964354038238525, "logits_per_char": -0.7482177019119263, "num_chars": 2}, {"sum_logits": -1.5514757633209229, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5514757633209229, "logits_per_char": -0.7757378816604614, "num_chars": 2}, {"sum_logits": -1.5522799491882324, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5522799491882324, "logits_per_char": -0.7761399745941162, "num_chars": 2}, {"sum_logits": -1.672971487045288, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.672971487045288, "logits_per_char": -0.836485743522644, "num_chars": 2}, {"sum_logits": -1.8438191413879395, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.8438191413879395, "logits_per_char": -0.9219095706939697, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 507, "native_id": "db7f2bfdabcf53d6778fd7af80b603d2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.407850742340088, "incorrect_loss_raw": 1.6975871324539185, "correct_loss_per_char": 0.703925371170044, "incorrect_loss_per_char": 0.8487935662269592, "correct_loss_per_token": 1.407850742340088, "incorrect_loss_per_token": 1.6975871324539185, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.407850742340088, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.407850742340088, "logits_per_char": -0.703925371170044, "num_chars": 2}, {"sum_logits": -1.5287631750106812, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5287631750106812, "logits_per_char": -0.7643815875053406, "num_chars": 2}, {"sum_logits": -1.5633405447006226, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5633405447006226, "logits_per_char": -0.7816702723503113, "num_chars": 2}, {"sum_logits": -1.5951414108276367, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5951414108276367, "logits_per_char": -0.7975707054138184, "num_chars": 2}, {"sum_logits": -2.1031033992767334, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -2.1031033992767334, "logits_per_char": -1.0515516996383667, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 508, "native_id": "8605fd2affc796d79073d0f3ef0761c9", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4616763591766357, "incorrect_loss_raw": 1.6807630062103271, "correct_loss_per_char": 0.7308381795883179, "incorrect_loss_per_char": 0.8403815031051636, "correct_loss_per_token": 1.4616763591766357, "incorrect_loss_per_token": 1.6807630062103271, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3739268779754639, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.3739268779754639, "logits_per_char": -0.6869634389877319, "num_chars": 2}, {"sum_logits": -1.4616763591766357, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.4616763591766357, "logits_per_char": -0.7308381795883179, "num_chars": 2}, {"sum_logits": -1.6798267364501953, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6798267364501953, "logits_per_char": -0.8399133682250977, "num_chars": 2}, {"sum_logits": -1.6508049964904785, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6508049964904785, "logits_per_char": -0.8254024982452393, "num_chars": 2}, {"sum_logits": -2.018493413925171, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -2.018493413925171, "logits_per_char": -1.0092467069625854, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 509, "native_id": "ad37795fd9e3a65553683ff305b5113d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.638547658920288, "incorrect_loss_raw": 1.6282995641231537, "correct_loss_per_char": 0.819273829460144, "incorrect_loss_per_char": 0.8141497820615768, "correct_loss_per_token": 1.638547658920288, "incorrect_loss_per_token": 1.6282995641231537, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4327871799468994, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.4327871799468994, "logits_per_char": -0.7163935899734497, "num_chars": 2}, {"sum_logits": -1.79102623462677, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.79102623462677, "logits_per_char": -0.895513117313385, "num_chars": 2}, {"sum_logits": -1.4407137632369995, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.4407137632369995, "logits_per_char": -0.7203568816184998, "num_chars": 2}, {"sum_logits": -1.638547658920288, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.638547658920288, "logits_per_char": -0.819273829460144, "num_chars": 2}, {"sum_logits": -1.8486710786819458, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.8486710786819458, "logits_per_char": -0.9243355393409729, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 510, "native_id": "bcd51af35d691f5c3b6b548096ab1559", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5985699892044067, "incorrect_loss_raw": 1.639747679233551, "correct_loss_per_char": 0.7992849946022034, "incorrect_loss_per_char": 0.8198738396167755, "correct_loss_per_token": 1.5985699892044067, "incorrect_loss_per_token": 1.639747679233551, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5174864530563354, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.5174864530563354, "logits_per_char": -0.7587432265281677, "num_chars": 2}, {"sum_logits": -1.5896984338760376, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.5896984338760376, "logits_per_char": -0.7948492169380188, "num_chars": 2}, {"sum_logits": -1.4507510662078857, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": true, "logits_per_token": -1.4507510662078857, "logits_per_char": -0.7253755331039429, "num_chars": 2}, {"sum_logits": -1.5985699892044067, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.5985699892044067, "logits_per_char": -0.7992849946022034, "num_chars": 2}, {"sum_logits": -2.0010547637939453, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -2.0010547637939453, "logits_per_char": -1.0005273818969727, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 511, "native_id": "b5345f15d5b451562ab9e0851e7f394f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6523391008377075, "incorrect_loss_raw": 1.634129673242569, "correct_loss_per_char": 0.8261695504188538, "incorrect_loss_per_char": 0.8170648366212845, "correct_loss_per_token": 1.6523391008377075, "incorrect_loss_per_token": 1.634129673242569, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.320349931716919, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.320349931716919, "logits_per_char": -0.6601749658584595, "num_chars": 2}, {"sum_logits": -1.5761955976486206, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5761955976486206, "logits_per_char": -0.7880977988243103, "num_chars": 2}, {"sum_logits": -1.6523391008377075, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6523391008377075, "logits_per_char": -0.8261695504188538, "num_chars": 2}, {"sum_logits": -1.6505085229873657, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6505085229873657, "logits_per_char": -0.8252542614936829, "num_chars": 2}, {"sum_logits": -1.9894646406173706, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.9894646406173706, "logits_per_char": -0.9947323203086853, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 512, "native_id": "6a884d5d8febfdd86fcf68ff1a904d9b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3785929679870605, "incorrect_loss_raw": 1.6915559768676758, "correct_loss_per_char": 0.6892964839935303, "incorrect_loss_per_char": 0.8457779884338379, "correct_loss_per_token": 1.3785929679870605, "incorrect_loss_per_token": 1.6915559768676758, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3785929679870605, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.3785929679870605, "logits_per_char": -0.6892964839935303, "num_chars": 2}, {"sum_logits": -1.5383073091506958, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5383073091506958, "logits_per_char": -0.7691536545753479, "num_chars": 2}, {"sum_logits": -1.6641813516616821, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6641813516616821, "logits_per_char": -0.8320906758308411, "num_chars": 2}, {"sum_logits": -1.6644130945205688, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6644130945205688, "logits_per_char": -0.8322065472602844, "num_chars": 2}, {"sum_logits": -1.8993221521377563, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.8993221521377563, "logits_per_char": -0.9496610760688782, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 513, "native_id": "a1303b5177df0a5b653c9abd7d5f5e08", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.67715322971344, "incorrect_loss_raw": 1.6408756077289581, "correct_loss_per_char": 0.83857661485672, "incorrect_loss_per_char": 0.8204378038644791, "correct_loss_per_token": 1.67715322971344, "incorrect_loss_per_token": 1.6408756077289581, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3485586643218994, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.3485586643218994, "logits_per_char": -0.6742793321609497, "num_chars": 2}, {"sum_logits": -1.5348742008209229, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5348742008209229, "logits_per_char": -0.7674371004104614, "num_chars": 2}, {"sum_logits": -1.5238229036331177, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5238229036331177, "logits_per_char": -0.7619114518165588, "num_chars": 2}, {"sum_logits": -1.67715322971344, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.67715322971344, "logits_per_char": -0.83857661485672, "num_chars": 2}, {"sum_logits": -2.1562466621398926, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -2.1562466621398926, "logits_per_char": -1.0781233310699463, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 514, "native_id": "315baf79f8dd3673f67a90de0758240e", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.772064447402954, "incorrect_loss_raw": 1.5793084502220154, "correct_loss_per_char": 0.886032223701477, "incorrect_loss_per_char": 0.7896542251110077, "correct_loss_per_token": 1.772064447402954, "incorrect_loss_per_token": 1.5793084502220154, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5750148296356201, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5750148296356201, "logits_per_char": -0.7875074148178101, "num_chars": 2}, {"sum_logits": -1.5911614894866943, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5911614894866943, "logits_per_char": -0.7955807447433472, "num_chars": 2}, {"sum_logits": -1.526120901107788, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.526120901107788, "logits_per_char": -0.763060450553894, "num_chars": 2}, {"sum_logits": -1.624936580657959, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.624936580657959, "logits_per_char": -0.8124682903289795, "num_chars": 2}, {"sum_logits": -1.772064447402954, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.772064447402954, "logits_per_char": -0.886032223701477, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 515, "native_id": "01f01cc3ad152773ef42b30e926912bf", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3667781352996826, "incorrect_loss_raw": 1.7198744118213654, "correct_loss_per_char": 0.6833890676498413, "incorrect_loss_per_char": 0.8599372059106827, "correct_loss_per_token": 1.3667781352996826, "incorrect_loss_per_token": 1.7198744118213654, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3667781352996826, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.3667781352996826, "logits_per_char": -0.6833890676498413, "num_chars": 2}, {"sum_logits": -1.45354425907135, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.45354425907135, "logits_per_char": -0.726772129535675, "num_chars": 2}, {"sum_logits": -1.5678907632827759, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5678907632827759, "logits_per_char": -0.7839453816413879, "num_chars": 2}, {"sum_logits": -1.7102583646774292, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.7102583646774292, "logits_per_char": -0.8551291823387146, "num_chars": 2}, {"sum_logits": -2.1478042602539062, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.1478042602539062, "logits_per_char": -1.0739021301269531, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 516, "native_id": "f192cfacbaa2f7e0e879f673c8e076a7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.393419861793518, "incorrect_loss_raw": 1.6842171847820282, "correct_loss_per_char": 0.696709930896759, "incorrect_loss_per_char": 0.8421085923910141, "correct_loss_per_token": 1.393419861793518, "incorrect_loss_per_token": 1.6842171847820282, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.393419861793518, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.393419861793518, "logits_per_char": -0.696709930896759, "num_chars": 2}, {"sum_logits": -1.6159051656723022, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6159051656723022, "logits_per_char": -0.8079525828361511, "num_chars": 2}, {"sum_logits": -1.66645348072052, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.66645348072052, "logits_per_char": -0.83322674036026, "num_chars": 2}, {"sum_logits": -1.6828713417053223, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6828713417053223, "logits_per_char": -0.8414356708526611, "num_chars": 2}, {"sum_logits": -1.7716387510299683, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.7716387510299683, "logits_per_char": -0.8858193755149841, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 517, "native_id": "ab8d5e21a2cf34b60a04768b01f1f8e9", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.677226185798645, "incorrect_loss_raw": 1.6068567037582397, "correct_loss_per_char": 0.8386130928993225, "incorrect_loss_per_char": 0.8034283518791199, "correct_loss_per_token": 1.677226185798645, "incorrect_loss_per_token": 1.6068567037582397, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.432982087135315, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": true, "logits_per_token": -1.432982087135315, "logits_per_char": -0.7164910435676575, "num_chars": 2}, {"sum_logits": -1.677226185798645, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.677226185798645, "logits_per_char": -0.8386130928993225, "num_chars": 2}, {"sum_logits": -1.6228848695755005, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.6228848695755005, "logits_per_char": -0.8114424347877502, "num_chars": 2}, {"sum_logits": -1.6595114469528198, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.6595114469528198, "logits_per_char": -0.8297557234764099, "num_chars": 2}, {"sum_logits": -1.7120484113693237, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.7120484113693237, "logits_per_char": -0.8560242056846619, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 518, "native_id": "5d1df1daa886efb78db2103ddc1398eb", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4019815921783447, "incorrect_loss_raw": 1.6946603953838348, "correct_loss_per_char": 0.7009907960891724, "incorrect_loss_per_char": 0.8473301976919174, "correct_loss_per_token": 1.4019815921783447, "incorrect_loss_per_token": 1.6946603953838348, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4019815921783447, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.4019815921783447, "logits_per_char": -0.7009907960891724, "num_chars": 2}, {"sum_logits": -1.4677857160568237, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.4677857160568237, "logits_per_char": -0.7338928580284119, "num_chars": 2}, {"sum_logits": -1.6039279699325562, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6039279699325562, "logits_per_char": -0.8019639849662781, "num_chars": 2}, {"sum_logits": -1.6765280961990356, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6765280961990356, "logits_per_char": -0.8382640480995178, "num_chars": 2}, {"sum_logits": -2.030399799346924, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.030399799346924, "logits_per_char": -1.015199899673462, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 519, "native_id": "2f8b35d352097cc9277599be49fab0b3", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9558753967285156, "incorrect_loss_raw": 1.5493758022785187, "correct_loss_per_char": 0.9779376983642578, "incorrect_loss_per_char": 0.7746879011392593, "correct_loss_per_token": 1.9558753967285156, "incorrect_loss_per_token": 1.5493758022785187, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.441805362701416, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.441805362701416, "logits_per_char": -0.720902681350708, "num_chars": 2}, {"sum_logits": -1.4763160943984985, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.4763160943984985, "logits_per_char": -0.7381580471992493, "num_chars": 2}, {"sum_logits": -1.6541026830673218, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6541026830673218, "logits_per_char": -0.8270513415336609, "num_chars": 2}, {"sum_logits": -1.6252790689468384, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6252790689468384, "logits_per_char": -0.8126395344734192, "num_chars": 2}, {"sum_logits": -1.9558753967285156, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.9558753967285156, "logits_per_char": -0.9779376983642578, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 520, "native_id": "18eb6a3b54ccf4989e268cfb9ea90f9c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4376044273376465, "incorrect_loss_raw": 1.793275147676468, "correct_loss_per_char": 0.7188022136688232, "incorrect_loss_per_char": 0.896637573838234, "correct_loss_per_token": 1.4376044273376465, "incorrect_loss_per_token": 1.793275147676468, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1089640855789185, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.1089640855789185, "logits_per_char": -0.5544820427894592, "num_chars": 2}, {"sum_logits": -1.4376044273376465, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.4376044273376465, "logits_per_char": -0.7188022136688232, "num_chars": 2}, {"sum_logits": -1.67892587184906, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.67892587184906, "logits_per_char": -0.83946293592453, "num_chars": 2}, {"sum_logits": -1.7855321168899536, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.7855321168899536, "logits_per_char": -0.8927660584449768, "num_chars": 2}, {"sum_logits": -2.5996785163879395, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -2.5996785163879395, "logits_per_char": -1.2998392581939697, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 521, "native_id": "3e12400bc5a2038a747edf2605787fe8", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6800332069396973, "incorrect_loss_raw": 1.649026781320572, "correct_loss_per_char": 0.8400166034698486, "incorrect_loss_per_char": 0.824513390660286, "correct_loss_per_token": 1.6800332069396973, "incorrect_loss_per_token": 1.649026781320572, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2497327327728271, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.2497327327728271, "logits_per_char": -0.6248663663864136, "num_chars": 2}, {"sum_logits": -1.4595829248428345, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.4595829248428345, "logits_per_char": -0.7297914624214172, "num_chars": 2}, {"sum_logits": -1.6800332069396973, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6800332069396973, "logits_per_char": -0.8400166034698486, "num_chars": 2}, {"sum_logits": -1.7846348285675049, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.7846348285675049, "logits_per_char": -0.8923174142837524, "num_chars": 2}, {"sum_logits": -2.102156639099121, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -2.102156639099121, "logits_per_char": -1.0510783195495605, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 522, "native_id": "72baf6ca5c4daa01c2cc7fda22183db8", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.5057373046875, "incorrect_loss_raw": 1.6467425525188446, "correct_loss_per_char": 0.75286865234375, "incorrect_loss_per_char": 0.8233712762594223, "correct_loss_per_token": 1.5057373046875, "incorrect_loss_per_token": 1.6467425525188446, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5057373046875, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.5057373046875, "logits_per_char": -0.75286865234375, "num_chars": 2}, {"sum_logits": -1.6060731410980225, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6060731410980225, "logits_per_char": -0.8030365705490112, "num_chars": 2}, {"sum_logits": -1.6249308586120605, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6249308586120605, "logits_per_char": -0.8124654293060303, "num_chars": 2}, {"sum_logits": -1.570355772972107, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.570355772972107, "logits_per_char": -0.7851778864860535, "num_chars": 2}, {"sum_logits": -1.7856104373931885, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.7856104373931885, "logits_per_char": -0.8928052186965942, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 523, "native_id": "9bac07574c966cae34c85e9f25538cba", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.466632604598999, "incorrect_loss_raw": 1.6709502041339874, "correct_loss_per_char": 0.7333163022994995, "incorrect_loss_per_char": 0.8354751020669937, "correct_loss_per_token": 1.466632604598999, "incorrect_loss_per_token": 1.6709502041339874, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3917845487594604, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": true, "logits_per_token": -1.3917845487594604, "logits_per_char": -0.6958922743797302, "num_chars": 2}, {"sum_logits": -1.466632604598999, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.466632604598999, "logits_per_char": -0.7333163022994995, "num_chars": 2}, {"sum_logits": -1.7543396949768066, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.7543396949768066, "logits_per_char": -0.8771698474884033, "num_chars": 2}, {"sum_logits": -1.716601848602295, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.716601848602295, "logits_per_char": -0.8583009243011475, "num_chars": 2}, {"sum_logits": -1.8210747241973877, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.8210747241973877, "logits_per_char": -0.9105373620986938, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 524, "native_id": "fe2a21ddb1bde76025a961126044a9a3", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7580971717834473, "incorrect_loss_raw": 1.6220379769802094, "correct_loss_per_char": 0.8790485858917236, "incorrect_loss_per_char": 0.8110189884901047, "correct_loss_per_token": 1.7580971717834473, "incorrect_loss_per_token": 1.6220379769802094, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3019205331802368, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": true, "logits_per_token": -1.3019205331802368, "logits_per_char": -0.6509602665901184, "num_chars": 2}, {"sum_logits": -1.4814012050628662, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.4814012050628662, "logits_per_char": -0.7407006025314331, "num_chars": 2}, {"sum_logits": -1.6032800674438477, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.6032800674438477, "logits_per_char": -0.8016400337219238, "num_chars": 2}, {"sum_logits": -1.7580971717834473, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.7580971717834473, "logits_per_char": -0.8790485858917236, "num_chars": 2}, {"sum_logits": -2.1015501022338867, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -2.1015501022338867, "logits_per_char": -1.0507750511169434, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 525, "native_id": "d03e09b22927542d6b0d5ebe233e467c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0615222454071045, "incorrect_loss_raw": 1.536100596189499, "correct_loss_per_char": 1.0307611227035522, "incorrect_loss_per_char": 0.7680502980947495, "correct_loss_per_token": 2.0615222454071045, "incorrect_loss_per_token": 1.536100596189499, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3260167837142944, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": true, "logits_per_token": -1.3260167837142944, "logits_per_char": -0.6630083918571472, "num_chars": 2}, {"sum_logits": -1.539698839187622, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.539698839187622, "logits_per_char": -0.769849419593811, "num_chars": 2}, {"sum_logits": -1.6320264339447021, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.6320264339447021, "logits_per_char": -0.8160132169723511, "num_chars": 2}, {"sum_logits": -1.646660327911377, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.646660327911377, "logits_per_char": -0.8233301639556885, "num_chars": 2}, {"sum_logits": -2.0615222454071045, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -2.0615222454071045, "logits_per_char": -1.0307611227035522, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 526, "native_id": "e63a210053cf7f961ca0b5a7e6eb355d", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5141136646270752, "incorrect_loss_raw": 1.6538272500038147, "correct_loss_per_char": 0.7570568323135376, "incorrect_loss_per_char": 0.8269136250019073, "correct_loss_per_token": 1.5141136646270752, "incorrect_loss_per_token": 1.6538272500038147, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.567335844039917, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.567335844039917, "logits_per_char": -0.7836679220199585, "num_chars": 2}, {"sum_logits": -1.6268563270568848, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6268563270568848, "logits_per_char": -0.8134281635284424, "num_chars": 2}, {"sum_logits": -1.5141136646270752, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5141136646270752, "logits_per_char": -0.7570568323135376, "num_chars": 2}, {"sum_logits": -1.4991296529769897, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.4991296529769897, "logits_per_char": -0.7495648264884949, "num_chars": 2}, {"sum_logits": -1.9219871759414673, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.9219871759414673, "logits_per_char": -0.9609935879707336, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 527, "native_id": "a4b4242fab25e86a9d7ffedcaecdcdbe", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.546407699584961, "incorrect_loss_raw": 1.638649821281433, "correct_loss_per_char": 0.7732038497924805, "incorrect_loss_per_char": 0.8193249106407166, "correct_loss_per_token": 1.546407699584961, "incorrect_loss_per_token": 1.638649821281433, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.586126685142517, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.586126685142517, "logits_per_char": -0.7930633425712585, "num_chars": 2}, {"sum_logits": -1.569332242012024, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.569332242012024, "logits_per_char": -0.784666121006012, "num_chars": 2}, {"sum_logits": -1.5657614469528198, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5657614469528198, "logits_per_char": -0.7828807234764099, "num_chars": 2}, {"sum_logits": -1.546407699584961, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.546407699584961, "logits_per_char": -0.7732038497924805, "num_chars": 2}, {"sum_logits": -1.8333789110183716, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.8333789110183716, "logits_per_char": -0.9166894555091858, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 528, "native_id": "ec8797b12e3c6666ebe70b2a7680b66f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.681252360343933, "incorrect_loss_raw": 1.6136814057826996, "correct_loss_per_char": 0.8406261801719666, "incorrect_loss_per_char": 0.8068407028913498, "correct_loss_per_token": 1.681252360343933, "incorrect_loss_per_token": 1.6136814057826996, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3945828676223755, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": true, "logits_per_token": -1.3945828676223755, "logits_per_char": -0.6972914338111877, "num_chars": 2}, {"sum_logits": -1.5456292629241943, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.5456292629241943, "logits_per_char": -0.7728146314620972, "num_chars": 2}, {"sum_logits": -1.677209496498108, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.677209496498108, "logits_per_char": -0.838604748249054, "num_chars": 2}, {"sum_logits": -1.681252360343933, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.681252360343933, "logits_per_char": -0.8406261801719666, "num_chars": 2}, {"sum_logits": -1.8373039960861206, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.8373039960861206, "logits_per_char": -0.9186519980430603, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 529, "native_id": "4536489e5d8e02aadc3fcc7a55effe20", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.617074728012085, "incorrect_loss_raw": 1.6416683793067932, "correct_loss_per_char": 0.8085373640060425, "incorrect_loss_per_char": 0.8208341896533966, "correct_loss_per_token": 1.617074728012085, "incorrect_loss_per_token": 1.6416683793067932, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3997613191604614, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.3997613191604614, "logits_per_char": -0.6998806595802307, "num_chars": 2}, {"sum_logits": -1.5313459634780884, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5313459634780884, "logits_per_char": -0.7656729817390442, "num_chars": 2}, {"sum_logits": -1.596564769744873, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.596564769744873, "logits_per_char": -0.7982823848724365, "num_chars": 2}, {"sum_logits": -1.617074728012085, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.617074728012085, "logits_per_char": -0.8085373640060425, "num_chars": 2}, {"sum_logits": -2.03900146484375, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.03900146484375, "logits_per_char": -1.019500732421875, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 530, "native_id": "0854478d174c9127064f0d4b58df7e62", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5894324779510498, "incorrect_loss_raw": 1.6285107731819153, "correct_loss_per_char": 0.7947162389755249, "incorrect_loss_per_char": 0.8142553865909576, "correct_loss_per_token": 1.5894324779510498, "incorrect_loss_per_token": 1.6285107731819153, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4581894874572754, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.4581894874572754, "logits_per_char": -0.7290947437286377, "num_chars": 2}, {"sum_logits": -1.5682120323181152, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5682120323181152, "logits_per_char": -0.7841060161590576, "num_chars": 2}, {"sum_logits": -1.5894324779510498, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5894324779510498, "logits_per_char": -0.7947162389755249, "num_chars": 2}, {"sum_logits": -1.7314636707305908, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.7314636707305908, "logits_per_char": -0.8657318353652954, "num_chars": 2}, {"sum_logits": -1.7561779022216797, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.7561779022216797, "logits_per_char": -0.8780889511108398, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 531, "native_id": "4b7d1b70060cd1f1a7321795f62a7325", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5918669700622559, "incorrect_loss_raw": 1.6219947040081024, "correct_loss_per_char": 0.7959334850311279, "incorrect_loss_per_char": 0.8109973520040512, "correct_loss_per_token": 1.5918669700622559, "incorrect_loss_per_token": 1.6219947040081024, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6485093832015991, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6485093832015991, "logits_per_char": -0.8242546916007996, "num_chars": 2}, {"sum_logits": -1.5918669700622559, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5918669700622559, "logits_per_char": -0.7959334850311279, "num_chars": 2}, {"sum_logits": -1.5851709842681885, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.5851709842681885, "logits_per_char": -0.7925854921340942, "num_chars": 2}, {"sum_logits": -1.5961642265319824, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5961642265319824, "logits_per_char": -0.7980821132659912, "num_chars": 2}, {"sum_logits": -1.6581342220306396, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6581342220306396, "logits_per_char": -0.8290671110153198, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 532, "native_id": "0e6a005eec5e6746f3facf4d608bfd8b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5369118452072144, "incorrect_loss_raw": 1.6532542407512665, "correct_loss_per_char": 0.7684559226036072, "incorrect_loss_per_char": 0.8266271203756332, "correct_loss_per_token": 1.5369118452072144, "incorrect_loss_per_token": 1.6532542407512665, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.445749044418335, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.445749044418335, "logits_per_char": -0.7228745222091675, "num_chars": 2}, {"sum_logits": -1.5617610216140747, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5617610216140747, "logits_per_char": -0.7808805108070374, "num_chars": 2}, {"sum_logits": -1.6474007368087769, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6474007368087769, "logits_per_char": -0.8237003684043884, "num_chars": 2}, {"sum_logits": -1.5369118452072144, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5369118452072144, "logits_per_char": -0.7684559226036072, "num_chars": 2}, {"sum_logits": -1.9581061601638794, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.9581061601638794, "logits_per_char": -0.9790530800819397, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 533, "native_id": "2d2b69ad187b7c40273ab13caab7dc19", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3926570415496826, "incorrect_loss_raw": 1.7073356211185455, "correct_loss_per_char": 0.6963285207748413, "incorrect_loss_per_char": 0.8536678105592728, "correct_loss_per_token": 1.3926570415496826, "incorrect_loss_per_token": 1.7073356211185455, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3926570415496826, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.3926570415496826, "logits_per_char": -0.6963285207748413, "num_chars": 2}, {"sum_logits": -1.4383348226547241, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.4383348226547241, "logits_per_char": -0.7191674113273621, "num_chars": 2}, {"sum_logits": -1.5490138530731201, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5490138530731201, "logits_per_char": -0.7745069265365601, "num_chars": 2}, {"sum_logits": -1.7417733669281006, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.7417733669281006, "logits_per_char": -0.8708866834640503, "num_chars": 2}, {"sum_logits": -2.1002204418182373, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -2.1002204418182373, "logits_per_char": -1.0501102209091187, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 534, "native_id": "fde1f9bfc33da302449c0b950d16c0ea", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6574912071228027, "incorrect_loss_raw": 1.6091816425323486, "correct_loss_per_char": 0.8287456035614014, "incorrect_loss_per_char": 0.8045908212661743, "correct_loss_per_token": 1.6574912071228027, "incorrect_loss_per_token": 1.6091816425323486, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4783217906951904, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": true, "logits_per_token": -1.4783217906951904, "logits_per_char": -0.7391608953475952, "num_chars": 2}, {"sum_logits": -1.6574912071228027, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.6574912071228027, "logits_per_char": -0.8287456035614014, "num_chars": 2}, {"sum_logits": -1.6475555896759033, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.6475555896759033, "logits_per_char": -0.8237777948379517, "num_chars": 2}, {"sum_logits": -1.7099196910858154, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.7099196910858154, "logits_per_char": -0.8549598455429077, "num_chars": 2}, {"sum_logits": -1.6009294986724854, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.6009294986724854, "logits_per_char": -0.8004647493362427, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 535, "native_id": "3c90a632f46aeab11fbb73aa59a33892", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.336373209953308, "incorrect_loss_raw": 1.7294528186321259, "correct_loss_per_char": 0.668186604976654, "incorrect_loss_per_char": 0.8647264093160629, "correct_loss_per_token": 1.336373209953308, "incorrect_loss_per_token": 1.7294528186321259, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.336373209953308, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.336373209953308, "logits_per_char": -0.668186604976654, "num_chars": 2}, {"sum_logits": -1.4461851119995117, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.4461851119995117, "logits_per_char": -0.7230925559997559, "num_chars": 2}, {"sum_logits": -1.6293590068817139, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6293590068817139, "logits_per_char": -0.8146795034408569, "num_chars": 2}, {"sum_logits": -1.6814709901809692, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6814709901809692, "logits_per_char": -0.8407354950904846, "num_chars": 2}, {"sum_logits": -2.1607961654663086, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.1607961654663086, "logits_per_char": -1.0803980827331543, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 536, "native_id": "1f3ccb722600da7d862531416934949a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.662255883216858, "incorrect_loss_raw": 1.6154115498065948, "correct_loss_per_char": 0.831127941608429, "incorrect_loss_per_char": 0.8077057749032974, "correct_loss_per_token": 1.662255883216858, "incorrect_loss_per_token": 1.6154115498065948, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4119775295257568, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.4119775295257568, "logits_per_char": -0.7059887647628784, "num_chars": 2}, {"sum_logits": -1.5923504829406738, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5923504829406738, "logits_per_char": -0.7961752414703369, "num_chars": 2}, {"sum_logits": -1.662255883216858, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.662255883216858, "logits_per_char": -0.831127941608429, "num_chars": 2}, {"sum_logits": -1.5764731168746948, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5764731168746948, "logits_per_char": -0.7882365584373474, "num_chars": 2}, {"sum_logits": -1.880845069885254, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.880845069885254, "logits_per_char": -0.940422534942627, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 537, "native_id": "46ba5d2b8cfc6708e5e2618568d8730e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4098581075668335, "incorrect_loss_raw": 1.691277414560318, "correct_loss_per_char": 0.7049290537834167, "incorrect_loss_per_char": 0.845638707280159, "correct_loss_per_token": 1.4098581075668335, "incorrect_loss_per_token": 1.691277414560318, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4098581075668335, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.4098581075668335, "logits_per_char": -0.7049290537834167, "num_chars": 2}, {"sum_logits": -1.4766143560409546, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.4766143560409546, "logits_per_char": -0.7383071780204773, "num_chars": 2}, {"sum_logits": -1.6221355199813843, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6221355199813843, "logits_per_char": -0.8110677599906921, "num_chars": 2}, {"sum_logits": -1.6324599981307983, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6324599981307983, "logits_per_char": -0.8162299990653992, "num_chars": 2}, {"sum_logits": -2.0338997840881348, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -2.0338997840881348, "logits_per_char": -1.0169498920440674, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 538, "native_id": "f8a2cbc7189b92a809ce9cd857030621", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4961684942245483, "incorrect_loss_raw": 1.657261848449707, "correct_loss_per_char": 0.7480842471122742, "incorrect_loss_per_char": 0.8286309242248535, "correct_loss_per_token": 1.4961684942245483, "incorrect_loss_per_token": 1.657261848449707, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.51329505443573, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.51329505443573, "logits_per_char": -0.756647527217865, "num_chars": 2}, {"sum_logits": -1.4961684942245483, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.4961684942245483, "logits_per_char": -0.7480842471122742, "num_chars": 2}, {"sum_logits": -1.57878839969635, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.57878839969635, "logits_per_char": -0.789394199848175, "num_chars": 2}, {"sum_logits": -1.6112033128738403, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.6112033128738403, "logits_per_char": -0.8056016564369202, "num_chars": 2}, {"sum_logits": -1.9257606267929077, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.9257606267929077, "logits_per_char": -0.9628803133964539, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 539, "native_id": "225287e06c993feee34e0f06b25f6ba8", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.512792944908142, "incorrect_loss_raw": 1.675104558467865, "correct_loss_per_char": 0.756396472454071, "incorrect_loss_per_char": 0.8375522792339325, "correct_loss_per_token": 1.512792944908142, "incorrect_loss_per_token": 1.675104558467865, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.310413122177124, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.310413122177124, "logits_per_char": -0.655206561088562, "num_chars": 2}, {"sum_logits": -1.512792944908142, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.512792944908142, "logits_per_char": -0.756396472454071, "num_chars": 2}, {"sum_logits": -1.6202186346054077, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6202186346054077, "logits_per_char": -0.8101093173027039, "num_chars": 2}, {"sum_logits": -1.7303162813186646, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.7303162813186646, "logits_per_char": -0.8651581406593323, "num_chars": 2}, {"sum_logits": -2.0394701957702637, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.0394701957702637, "logits_per_char": -1.0197350978851318, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 540, "native_id": "e211b1a3f3401d164c8b0bfc10160caa", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6555287837982178, "incorrect_loss_raw": 1.6287722289562225, "correct_loss_per_char": 0.8277643918991089, "incorrect_loss_per_char": 0.8143861144781113, "correct_loss_per_token": 1.6555287837982178, "incorrect_loss_per_token": 1.6287722289562225, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3805958032608032, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.3805958032608032, "logits_per_char": -0.6902979016304016, "num_chars": 2}, {"sum_logits": -1.4771294593811035, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.4771294593811035, "logits_per_char": -0.7385647296905518, "num_chars": 2}, {"sum_logits": -1.6555287837982178, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6555287837982178, "logits_per_char": -0.8277643918991089, "num_chars": 2}, {"sum_logits": -1.6740832328796387, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6740832328796387, "logits_per_char": -0.8370416164398193, "num_chars": 2}, {"sum_logits": -1.9832804203033447, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.9832804203033447, "logits_per_char": -0.9916402101516724, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 541, "native_id": "fce1c5d069758aea57a787fc98dcf7a9", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.078277111053467, "incorrect_loss_raw": 1.527550756931305, "correct_loss_per_char": 1.0391385555267334, "incorrect_loss_per_char": 0.7637753784656525, "correct_loss_per_token": 2.078277111053467, "incorrect_loss_per_token": 1.527550756931305, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5330954790115356, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5330954790115356, "logits_per_char": -0.7665477395057678, "num_chars": 2}, {"sum_logits": -1.614148497581482, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.614148497581482, "logits_per_char": -0.807074248790741, "num_chars": 2}, {"sum_logits": -1.4258941411972046, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.4258941411972046, "logits_per_char": -0.7129470705986023, "num_chars": 2}, {"sum_logits": -1.5370649099349976, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5370649099349976, "logits_per_char": -0.7685324549674988, "num_chars": 2}, {"sum_logits": -2.078277111053467, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.078277111053467, "logits_per_char": -1.0391385555267334, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 542, "native_id": "c0d75f9fbf30aa3a612f16edb20d6b8d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5231362581253052, "incorrect_loss_raw": 1.662276715040207, "correct_loss_per_char": 0.7615681290626526, "incorrect_loss_per_char": 0.8311383575201035, "correct_loss_per_token": 1.5231362581253052, "incorrect_loss_per_token": 1.662276715040207, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3986235857009888, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.3986235857009888, "logits_per_char": -0.6993117928504944, "num_chars": 2}, {"sum_logits": -1.5231362581253052, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5231362581253052, "logits_per_char": -0.7615681290626526, "num_chars": 2}, {"sum_logits": -1.6317898035049438, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6317898035049438, "logits_per_char": -0.8158949017524719, "num_chars": 2}, {"sum_logits": -1.5835715532302856, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5835715532302856, "logits_per_char": -0.7917857766151428, "num_chars": 2}, {"sum_logits": -2.0351219177246094, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.0351219177246094, "logits_per_char": -1.0175609588623047, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 543, "native_id": "d07f149d8d953dcc45dda432194c375e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4824217557907104, "incorrect_loss_raw": 1.6718740165233612, "correct_loss_per_char": 0.7412108778953552, "incorrect_loss_per_char": 0.8359370082616806, "correct_loss_per_token": 1.4824217557907104, "incorrect_loss_per_token": 1.6718740165233612, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3570582866668701, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": true, "logits_per_token": -1.3570582866668701, "logits_per_char": -0.6785291433334351, "num_chars": 2}, {"sum_logits": -1.4824217557907104, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.4824217557907104, "logits_per_char": -0.7412108778953552, "num_chars": 2}, {"sum_logits": -1.683026671409607, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.683026671409607, "logits_per_char": -0.8415133357048035, "num_chars": 2}, {"sum_logits": -1.7367454767227173, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.7367454767227173, "logits_per_char": -0.8683727383613586, "num_chars": 2}, {"sum_logits": -1.9106656312942505, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.9106656312942505, "logits_per_char": -0.9553328156471252, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 544, "native_id": "080a9cf2d6447a9a4d98b0af311e10da", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6784428358078003, "incorrect_loss_raw": 1.6521939933300018, "correct_loss_per_char": 0.8392214179039001, "incorrect_loss_per_char": 0.8260969966650009, "correct_loss_per_token": 1.6784428358078003, "incorrect_loss_per_token": 1.6521939933300018, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3119263648986816, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.3119263648986816, "logits_per_char": -0.6559631824493408, "num_chars": 2}, {"sum_logits": -1.5140845775604248, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5140845775604248, "logits_per_char": -0.7570422887802124, "num_chars": 2}, {"sum_logits": -1.5510812997817993, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5510812997817993, "logits_per_char": -0.7755406498908997, "num_chars": 2}, {"sum_logits": -1.6784428358078003, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6784428358078003, "logits_per_char": -0.8392214179039001, "num_chars": 2}, {"sum_logits": -2.2316837310791016, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.2316837310791016, "logits_per_char": -1.1158418655395508, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 545, "native_id": "111501a49dd41ceed9c2073eed5d2b72", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4143694639205933, "incorrect_loss_raw": 1.6756391525268555, "correct_loss_per_char": 0.7071847319602966, "incorrect_loss_per_char": 0.8378195762634277, "correct_loss_per_token": 1.4143694639205933, "incorrect_loss_per_token": 1.6756391525268555, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4143694639205933, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.4143694639205933, "logits_per_char": -0.7071847319602966, "num_chars": 2}, {"sum_logits": -1.6194347143173218, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6194347143173218, "logits_per_char": -0.8097173571586609, "num_chars": 2}, {"sum_logits": -1.6565402746200562, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6565402746200562, "logits_per_char": -0.8282701373100281, "num_chars": 2}, {"sum_logits": -1.6333037614822388, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6333037614822388, "logits_per_char": -0.8166518807411194, "num_chars": 2}, {"sum_logits": -1.7932778596878052, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.7932778596878052, "logits_per_char": -0.8966389298439026, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 546, "native_id": "7bb87c6d8eab57d4e983f60025b1f0dc", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6793068647384644, "incorrect_loss_raw": 1.6352073848247528, "correct_loss_per_char": 0.8396534323692322, "incorrect_loss_per_char": 0.8176036924123764, "correct_loss_per_token": 1.6793068647384644, "incorrect_loss_per_token": 1.6352073848247528, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.300395131111145, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.300395131111145, "logits_per_char": -0.6501975655555725, "num_chars": 2}, {"sum_logits": -1.5429860353469849, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5429860353469849, "logits_per_char": -0.7714930176734924, "num_chars": 2}, {"sum_logits": -1.6338297128677368, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6338297128677368, "logits_per_char": -0.8169148564338684, "num_chars": 2}, {"sum_logits": -1.6793068647384644, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6793068647384644, "logits_per_char": -0.8396534323692322, "num_chars": 2}, {"sum_logits": -2.0636186599731445, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.0636186599731445, "logits_per_char": -1.0318093299865723, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 547, "native_id": "5c2bc4335c8860342ec2d568ceb6ac6b", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5721325874328613, "incorrect_loss_raw": 1.6499474048614502, "correct_loss_per_char": 0.7860662937164307, "incorrect_loss_per_char": 0.8249737024307251, "correct_loss_per_token": 1.5721325874328613, "incorrect_loss_per_token": 1.6499474048614502, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5669119358062744, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5669119358062744, "logits_per_char": -0.7834559679031372, "num_chars": 2}, {"sum_logits": -1.513247013092041, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.513247013092041, "logits_per_char": -0.7566235065460205, "num_chars": 2}, {"sum_logits": -1.5721325874328613, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5721325874328613, "logits_per_char": -0.7860662937164307, "num_chars": 2}, {"sum_logits": -1.466214895248413, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.466214895248413, "logits_per_char": -0.7331074476242065, "num_chars": 2}, {"sum_logits": -2.0534157752990723, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.0534157752990723, "logits_per_char": -1.0267078876495361, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 548, "native_id": "083861fc5ebb9226fff70544f3f83d2b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4760574102401733, "incorrect_loss_raw": 1.6631961464881897, "correct_loss_per_char": 0.7380287051200867, "incorrect_loss_per_char": 0.8315980732440948, "correct_loss_per_token": 1.4760574102401733, "incorrect_loss_per_token": 1.6631961464881897, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4760574102401733, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": true, "logits_per_token": -1.4760574102401733, "logits_per_char": -0.7380287051200867, "num_chars": 2}, {"sum_logits": -1.5108321905136108, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.5108321905136108, "logits_per_char": -0.7554160952568054, "num_chars": 2}, {"sum_logits": -1.6036015748977661, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.6036015748977661, "logits_per_char": -0.8018007874488831, "num_chars": 2}, {"sum_logits": -1.6098769903182983, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.6098769903182983, "logits_per_char": -0.8049384951591492, "num_chars": 2}, {"sum_logits": -1.9284738302230835, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.9284738302230835, "logits_per_char": -0.9642369151115417, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 549, "native_id": "520b0eea9148e3cb4d45aa69a55491eb", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9105875492095947, "incorrect_loss_raw": 1.5543110966682434, "correct_loss_per_char": 0.9552937746047974, "incorrect_loss_per_char": 0.7771555483341217, "correct_loss_per_token": 1.9105875492095947, "incorrect_loss_per_token": 1.5543110966682434, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4697937965393066, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.4697937965393066, "logits_per_char": -0.7348968982696533, "num_chars": 2}, {"sum_logits": -1.6170258522033691, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6170258522033691, "logits_per_char": -0.8085129261016846, "num_chars": 2}, {"sum_logits": -1.491835594177246, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.491835594177246, "logits_per_char": -0.745917797088623, "num_chars": 2}, {"sum_logits": -1.6385891437530518, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6385891437530518, "logits_per_char": -0.8192945718765259, "num_chars": 2}, {"sum_logits": -1.9105875492095947, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.9105875492095947, "logits_per_char": -0.9552937746047974, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 550, "native_id": "ef6ede0af827ddd1dc7bbeb36a6fdd22", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3326890468597412, "incorrect_loss_raw": 1.7218173742294312, "correct_loss_per_char": 0.6663445234298706, "incorrect_loss_per_char": 0.8609086871147156, "correct_loss_per_token": 1.3326890468597412, "incorrect_loss_per_token": 1.7218173742294312, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3326890468597412, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.3326890468597412, "logits_per_char": -0.6663445234298706, "num_chars": 2}, {"sum_logits": -1.551254153251648, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.551254153251648, "logits_per_char": -0.775627076625824, "num_chars": 2}, {"sum_logits": -1.640038013458252, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.640038013458252, "logits_per_char": -0.820019006729126, "num_chars": 2}, {"sum_logits": -1.6231008768081665, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6231008768081665, "logits_per_char": -0.8115504384040833, "num_chars": 2}, {"sum_logits": -2.072876453399658, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.072876453399658, "logits_per_char": -1.036438226699829, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 551, "native_id": "d47986deb91d64b2b15d385da3d2f483", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.167479991912842, "incorrect_loss_raw": 1.517584651708603, "correct_loss_per_char": 1.083739995956421, "incorrect_loss_per_char": 0.7587923258543015, "correct_loss_per_token": 2.167479991912842, "incorrect_loss_per_token": 1.517584651708603, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3792062997817993, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.3792062997817993, "logits_per_char": -0.6896031498908997, "num_chars": 2}, {"sum_logits": -1.5124893188476562, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5124893188476562, "logits_per_char": -0.7562446594238281, "num_chars": 2}, {"sum_logits": -1.6310969591140747, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6310969591140747, "logits_per_char": -0.8155484795570374, "num_chars": 2}, {"sum_logits": -1.5475460290908813, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5475460290908813, "logits_per_char": -0.7737730145454407, "num_chars": 2}, {"sum_logits": -2.167479991912842, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -2.167479991912842, "logits_per_char": -1.083739995956421, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 552, "native_id": "c3b7f4196b12714940ac1b9417194df4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.611878752708435, "incorrect_loss_raw": 1.6519520580768585, "correct_loss_per_char": 0.8059393763542175, "incorrect_loss_per_char": 0.8259760290384293, "correct_loss_per_token": 1.611878752708435, "incorrect_loss_per_token": 1.6519520580768585, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.419384479522705, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": true, "logits_per_token": -1.419384479522705, "logits_per_char": -0.7096922397613525, "num_chars": 2}, {"sum_logits": -1.6163355112075806, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.6163355112075806, "logits_per_char": -0.8081677556037903, "num_chars": 2}, {"sum_logits": -1.439711570739746, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.439711570739746, "logits_per_char": -0.719855785369873, "num_chars": 2}, {"sum_logits": -1.611878752708435, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.611878752708435, "logits_per_char": -0.8059393763542175, "num_chars": 2}, {"sum_logits": -2.1323766708374023, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -2.1323766708374023, "logits_per_char": -1.0661883354187012, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 553, "native_id": "5d03ad171fd661a28da5b6eb79967a6b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5373936891555786, "incorrect_loss_raw": 1.6599359214305878, "correct_loss_per_char": 0.7686968445777893, "incorrect_loss_per_char": 0.8299679607152939, "correct_loss_per_token": 1.5373936891555786, "incorrect_loss_per_token": 1.6599359214305878, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3609700202941895, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.3609700202941895, "logits_per_char": -0.6804850101470947, "num_chars": 2}, {"sum_logits": -1.5484355688095093, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5484355688095093, "logits_per_char": -0.7742177844047546, "num_chars": 2}, {"sum_logits": -1.5373936891555786, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5373936891555786, "logits_per_char": -0.7686968445777893, "num_chars": 2}, {"sum_logits": -1.7492600679397583, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.7492600679397583, "logits_per_char": -0.8746300339698792, "num_chars": 2}, {"sum_logits": -1.981078028678894, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.981078028678894, "logits_per_char": -0.990539014339447, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 554, "native_id": "7c95d753943c58757fe6e1ccff8aea14", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6989856958389282, "incorrect_loss_raw": 1.616184264421463, "correct_loss_per_char": 0.8494928479194641, "incorrect_loss_per_char": 0.8080921322107315, "correct_loss_per_token": 1.6989856958389282, "incorrect_loss_per_token": 1.616184264421463, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.341286540031433, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": true, "logits_per_token": -1.341286540031433, "logits_per_char": -0.6706432700157166, "num_chars": 2}, {"sum_logits": -1.5565831661224365, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.5565831661224365, "logits_per_char": -0.7782915830612183, "num_chars": 2}, {"sum_logits": -1.6989856958389282, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.6989856958389282, "logits_per_char": -0.8494928479194641, "num_chars": 2}, {"sum_logits": -1.6276620626449585, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.6276620626449585, "logits_per_char": -0.8138310313224792, "num_chars": 2}, {"sum_logits": -1.939205288887024, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.939205288887024, "logits_per_char": -0.969602644443512, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 555, "native_id": "88d8bfb9dc8e77ef642acbe1a129f3db", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.688459873199463, "incorrect_loss_raw": 1.6189643144607544, "correct_loss_per_char": 0.8442299365997314, "incorrect_loss_per_char": 0.8094821572303772, "correct_loss_per_token": 1.688459873199463, "incorrect_loss_per_token": 1.6189643144607544, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3970606327056885, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": true, "logits_per_token": -1.3970606327056885, "logits_per_char": -0.6985303163528442, "num_chars": 2}, {"sum_logits": -1.4753508567810059, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.4753508567810059, "logits_per_char": -0.7376754283905029, "num_chars": 2}, {"sum_logits": -1.6399691104888916, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.6399691104888916, "logits_per_char": -0.8199845552444458, "num_chars": 2}, {"sum_logits": -1.688459873199463, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.688459873199463, "logits_per_char": -0.8442299365997314, "num_chars": 2}, {"sum_logits": -1.9634766578674316, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.9634766578674316, "logits_per_char": -0.9817383289337158, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 556, "native_id": "b1a9b20793b46e46e1beedadbf852f84", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4771922826766968, "incorrect_loss_raw": 1.656839668750763, "correct_loss_per_char": 0.7385961413383484, "incorrect_loss_per_char": 0.8284198343753815, "correct_loss_per_token": 1.4771922826766968, "incorrect_loss_per_token": 1.656839668750763, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4771922826766968, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.4771922826766968, "logits_per_char": -0.7385961413383484, "num_chars": 2}, {"sum_logits": -1.5548337697982788, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5548337697982788, "logits_per_char": -0.7774168848991394, "num_chars": 2}, {"sum_logits": -1.6188641786575317, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6188641786575317, "logits_per_char": -0.8094320893287659, "num_chars": 2}, {"sum_logits": -1.6215542554855347, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6215542554855347, "logits_per_char": -0.8107771277427673, "num_chars": 2}, {"sum_logits": -1.8321064710617065, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.8321064710617065, "logits_per_char": -0.9160532355308533, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 557, "native_id": "81e016974d33fe383c848b6c819791cd", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4821500778198242, "incorrect_loss_raw": 1.6579146087169647, "correct_loss_per_char": 0.7410750389099121, "incorrect_loss_per_char": 0.8289573043584824, "correct_loss_per_token": 1.4821500778198242, "incorrect_loss_per_token": 1.6579146087169647, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4821500778198242, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": true, "logits_per_token": -1.4821500778198242, "logits_per_char": -0.7410750389099121, "num_chars": 2}, {"sum_logits": -1.5993337631225586, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.5993337631225586, "logits_per_char": -0.7996668815612793, "num_chars": 2}, {"sum_logits": -1.5300434827804565, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.5300434827804565, "logits_per_char": -0.7650217413902283, "num_chars": 2}, {"sum_logits": -1.6801419258117676, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.6801419258117676, "logits_per_char": -0.8400709629058838, "num_chars": 2}, {"sum_logits": -1.8221392631530762, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.8221392631530762, "logits_per_char": -0.9110696315765381, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 558, "native_id": "7cf54544d54818d53e7088c0749a3eca", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6538783311843872, "incorrect_loss_raw": 1.677825927734375, "correct_loss_per_char": 0.8269391655921936, "incorrect_loss_per_char": 0.8389129638671875, "correct_loss_per_token": 1.6538783311843872, "incorrect_loss_per_token": 1.677825927734375, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.21079421043396, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.21079421043396, "logits_per_char": -0.60539710521698, "num_chars": 2}, {"sum_logits": -1.4562803506851196, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.4562803506851196, "logits_per_char": -0.7281401753425598, "num_chars": 2}, {"sum_logits": -1.6538783311843872, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6538783311843872, "logits_per_char": -0.8269391655921936, "num_chars": 2}, {"sum_logits": -1.7911025285720825, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.7911025285720825, "logits_per_char": -0.8955512642860413, "num_chars": 2}, {"sum_logits": -2.253126621246338, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.253126621246338, "logits_per_char": -1.126563310623169, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 559, "native_id": "6acd88b9b5dd15e23bbcc3fd679100a8", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5374374389648438, "incorrect_loss_raw": 1.645665168762207, "correct_loss_per_char": 0.7687187194824219, "incorrect_loss_per_char": 0.8228325843811035, "correct_loss_per_token": 1.5374374389648438, "incorrect_loss_per_token": 1.645665168762207, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5374374389648438, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5374374389648438, "logits_per_char": -0.7687187194824219, "num_chars": 2}, {"sum_logits": -1.4842197895050049, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.4842197895050049, "logits_per_char": -0.7421098947525024, "num_chars": 2}, {"sum_logits": -1.544425368309021, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.544425368309021, "logits_per_char": -0.7722126841545105, "num_chars": 2}, {"sum_logits": -1.6648592948913574, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.6648592948913574, "logits_per_char": -0.8324296474456787, "num_chars": 2}, {"sum_logits": -1.8891562223434448, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.8891562223434448, "logits_per_char": -0.9445781111717224, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 560, "native_id": "c96a86957a9ab1d8ca0aeeb7f040d87a_1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7151703834533691, "incorrect_loss_raw": 1.5984704494476318, "correct_loss_per_char": 0.8575851917266846, "incorrect_loss_per_char": 0.7992352247238159, "correct_loss_per_token": 1.7151703834533691, "incorrect_loss_per_token": 1.5984704494476318, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4337074756622314, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.4337074756622314, "logits_per_char": -0.7168537378311157, "num_chars": 2}, {"sum_logits": -1.7151703834533691, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.7151703834533691, "logits_per_char": -0.8575851917266846, "num_chars": 2}, {"sum_logits": -1.6370482444763184, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6370482444763184, "logits_per_char": -0.8185241222381592, "num_chars": 2}, {"sum_logits": -1.5402500629425049, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5402500629425049, "logits_per_char": -0.7701250314712524, "num_chars": 2}, {"sum_logits": -1.7828760147094727, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.7828760147094727, "logits_per_char": -0.8914380073547363, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 561, "native_id": "6a1bf527af9ed0685ac5e2bf0bd76647", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4799578189849854, "incorrect_loss_raw": 1.6681633591651917, "correct_loss_per_char": 0.7399789094924927, "incorrect_loss_per_char": 0.8340816795825958, "correct_loss_per_token": 1.4799578189849854, "incorrect_loss_per_token": 1.6681633591651917, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5253615379333496, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5253615379333496, "logits_per_char": -0.7626807689666748, "num_chars": 2}, {"sum_logits": -1.4799578189849854, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.4799578189849854, "logits_per_char": -0.7399789094924927, "num_chars": 2}, {"sum_logits": -1.5059568881988525, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5059568881988525, "logits_per_char": -0.7529784440994263, "num_chars": 2}, {"sum_logits": -1.6386735439300537, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.6386735439300537, "logits_per_char": -0.8193367719650269, "num_chars": 2}, {"sum_logits": -2.0026614665985107, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.0026614665985107, "logits_per_char": -1.0013307332992554, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 562, "native_id": "094fe91b20b03c647325fa2ee94470b3", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6371203660964966, "incorrect_loss_raw": 1.6305983364582062, "correct_loss_per_char": 0.8185601830482483, "incorrect_loss_per_char": 0.8152991682291031, "correct_loss_per_token": 1.6371203660964966, "incorrect_loss_per_token": 1.6305983364582062, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3513329029083252, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.3513329029083252, "logits_per_char": -0.6756664514541626, "num_chars": 2}, {"sum_logits": -1.6371203660964966, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6371203660964966, "logits_per_char": -0.8185601830482483, "num_chars": 2}, {"sum_logits": -1.5825337171554565, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5825337171554565, "logits_per_char": -0.7912668585777283, "num_chars": 2}, {"sum_logits": -1.6544548273086548, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6544548273086548, "logits_per_char": -0.8272274136543274, "num_chars": 2}, {"sum_logits": -1.9340718984603882, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.9340718984603882, "logits_per_char": -0.9670359492301941, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 563, "native_id": "bee2a6eadfaf7a4fa0a214e341ddbe5b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.499902606010437, "incorrect_loss_raw": 1.6560907065868378, "correct_loss_per_char": 0.7499513030052185, "incorrect_loss_per_char": 0.8280453532934189, "correct_loss_per_token": 1.499902606010437, "incorrect_loss_per_token": 1.6560907065868378, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.499902606010437, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.499902606010437, "logits_per_char": -0.7499513030052185, "num_chars": 2}, {"sum_logits": -1.5660426616668701, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5660426616668701, "logits_per_char": -0.7830213308334351, "num_chars": 2}, {"sum_logits": -1.5917415618896484, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5917415618896484, "logits_per_char": -0.7958707809448242, "num_chars": 2}, {"sum_logits": -1.5304991006851196, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5304991006851196, "logits_per_char": -0.7652495503425598, "num_chars": 2}, {"sum_logits": -1.936079502105713, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.936079502105713, "logits_per_char": -0.9680397510528564, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 564, "native_id": "2f97a77d155cb99092e8a7c055737b03_1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6030585765838623, "incorrect_loss_raw": 1.6544285416603088, "correct_loss_per_char": 0.8015292882919312, "incorrect_loss_per_char": 0.8272142708301544, "correct_loss_per_token": 1.6030585765838623, "incorrect_loss_per_token": 1.6544285416603088, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5016262531280518, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5016262531280518, "logits_per_char": -0.7508131265640259, "num_chars": 2}, {"sum_logits": -1.4594337940216064, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.4594337940216064, "logits_per_char": -0.7297168970108032, "num_chars": 2}, {"sum_logits": -1.5307338237762451, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5307338237762451, "logits_per_char": -0.7653669118881226, "num_chars": 2}, {"sum_logits": -1.6030585765838623, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.6030585765838623, "logits_per_char": -0.8015292882919312, "num_chars": 2}, {"sum_logits": -2.125920295715332, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -2.125920295715332, "logits_per_char": -1.062960147857666, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 565, "native_id": "bc268cd19e2c95c78967fd6b9092fb90", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4503002166748047, "incorrect_loss_raw": 1.6651182174682617, "correct_loss_per_char": 0.7251501083374023, "incorrect_loss_per_char": 0.8325591087341309, "correct_loss_per_token": 1.4503002166748047, "incorrect_loss_per_token": 1.6651182174682617, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4503002166748047, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.4503002166748047, "logits_per_char": -0.7251501083374023, "num_chars": 2}, {"sum_logits": -1.6538949012756348, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6538949012756348, "logits_per_char": -0.8269474506378174, "num_chars": 2}, {"sum_logits": -1.5969593524932861, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5969593524932861, "logits_per_char": -0.7984796762466431, "num_chars": 2}, {"sum_logits": -1.5632238388061523, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5632238388061523, "logits_per_char": -0.7816119194030762, "num_chars": 2}, {"sum_logits": -1.8463947772979736, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.8463947772979736, "logits_per_char": -0.9231973886489868, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 566, "native_id": "060cad0d3c007ceb151db9907bfcb214", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5230673551559448, "incorrect_loss_raw": 1.668151468038559, "correct_loss_per_char": 0.7615336775779724, "incorrect_loss_per_char": 0.8340757340192795, "correct_loss_per_token": 1.5230673551559448, "incorrect_loss_per_token": 1.668151468038559, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5136609077453613, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5136609077453613, "logits_per_char": -0.7568304538726807, "num_chars": 2}, {"sum_logits": -1.539473056793213, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.539473056793213, "logits_per_char": -0.7697365283966064, "num_chars": 2}, {"sum_logits": -1.502402424812317, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.502402424812317, "logits_per_char": -0.7512012124061584, "num_chars": 2}, {"sum_logits": -1.5230673551559448, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5230673551559448, "logits_per_char": -0.7615336775779724, "num_chars": 2}, {"sum_logits": -2.1170694828033447, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.1170694828033447, "logits_per_char": -1.0585347414016724, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 567, "native_id": "29c2cc0ba85b4afb9c9d29801469a68f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5432134866714478, "incorrect_loss_raw": 1.657448947429657, "correct_loss_per_char": 0.7716067433357239, "incorrect_loss_per_char": 0.8287244737148285, "correct_loss_per_token": 1.5432134866714478, "incorrect_loss_per_token": 1.657448947429657, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4339611530303955, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.4339611530303955, "logits_per_char": -0.7169805765151978, "num_chars": 2}, {"sum_logits": -1.5150123834609985, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5150123834609985, "logits_per_char": -0.7575061917304993, "num_chars": 2}, {"sum_logits": -1.639166235923767, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.639166235923767, "logits_per_char": -0.8195831179618835, "num_chars": 2}, {"sum_logits": -1.5432134866714478, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5432134866714478, "logits_per_char": -0.7716067433357239, "num_chars": 2}, {"sum_logits": -2.041656017303467, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -2.041656017303467, "logits_per_char": -1.0208280086517334, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 568, "native_id": "6cb895ce89995f6be422f7c4167c7638", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1390882730484009, "incorrect_loss_raw": 1.8593704104423523, "correct_loss_per_char": 0.5695441365242004, "incorrect_loss_per_char": 0.9296852052211761, "correct_loss_per_token": 1.1390882730484009, "incorrect_loss_per_token": 1.8593704104423523, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1390882730484009, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.1390882730484009, "logits_per_char": -0.5695441365242004, "num_chars": 2}, {"sum_logits": -1.399519443511963, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.399519443511963, "logits_per_char": -0.6997597217559814, "num_chars": 2}, {"sum_logits": -1.682191252708435, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.682191252708435, "logits_per_char": -0.8410956263542175, "num_chars": 2}, {"sum_logits": -1.8186320066452026, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.8186320066452026, "logits_per_char": -0.9093160033226013, "num_chars": 2}, {"sum_logits": -2.5371389389038086, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.5371389389038086, "logits_per_char": -1.2685694694519043, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 569, "native_id": "839f3c37622c1ed5eebc9cd0b9d658e8", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6860849857330322, "incorrect_loss_raw": 1.6318893134593964, "correct_loss_per_char": 0.8430424928665161, "incorrect_loss_per_char": 0.8159446567296982, "correct_loss_per_token": 1.6860849857330322, "incorrect_loss_per_token": 1.6318893134593964, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3102883100509644, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.3102883100509644, "logits_per_char": -0.6551441550254822, "num_chars": 2}, {"sum_logits": -1.5211890935897827, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5211890935897827, "logits_per_char": -0.7605945467948914, "num_chars": 2}, {"sum_logits": -1.6506236791610718, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6506236791610718, "logits_per_char": -0.8253118395805359, "num_chars": 2}, {"sum_logits": -1.6860849857330322, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6860849857330322, "logits_per_char": -0.8430424928665161, "num_chars": 2}, {"sum_logits": -2.0454561710357666, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.0454561710357666, "logits_per_char": -1.0227280855178833, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 570, "native_id": "3957ac6bab96fc9d4f173ada4692d16b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2910194396972656, "incorrect_loss_raw": 1.7601268291473389, "correct_loss_per_char": 0.6455097198486328, "incorrect_loss_per_char": 0.8800634145736694, "correct_loss_per_token": 1.2910194396972656, "incorrect_loss_per_token": 1.7601268291473389, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2910194396972656, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.2910194396972656, "logits_per_char": -0.6455097198486328, "num_chars": 2}, {"sum_logits": -1.435585618019104, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.435585618019104, "logits_per_char": -0.717792809009552, "num_chars": 2}, {"sum_logits": -1.6179907321929932, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6179907321929932, "logits_per_char": -0.8089953660964966, "num_chars": 2}, {"sum_logits": -1.7082511186599731, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.7082511186599731, "logits_per_char": -0.8541255593299866, "num_chars": 2}, {"sum_logits": -2.278679847717285, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.278679847717285, "logits_per_char": -1.1393399238586426, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 571, "native_id": "a4f5e5412f0f8ac9190db1730db07a90", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.736615777015686, "incorrect_loss_raw": 1.6219504177570343, "correct_loss_per_char": 0.868307888507843, "incorrect_loss_per_char": 0.8109752088785172, "correct_loss_per_token": 1.736615777015686, "incorrect_loss_per_token": 1.6219504177570343, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.332871675491333, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.332871675491333, "logits_per_char": -0.6664358377456665, "num_chars": 2}, {"sum_logits": -1.4653290510177612, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.4653290510177612, "logits_per_char": -0.7326645255088806, "num_chars": 2}, {"sum_logits": -1.6271052360534668, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6271052360534668, "logits_per_char": -0.8135526180267334, "num_chars": 2}, {"sum_logits": -1.736615777015686, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.736615777015686, "logits_per_char": -0.868307888507843, "num_chars": 2}, {"sum_logits": -2.062495708465576, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -2.062495708465576, "logits_per_char": -1.031247854232788, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 572, "native_id": "cb5b39878be0e05a3ffe783801adbc3b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5239912271499634, "incorrect_loss_raw": 1.6675224006175995, "correct_loss_per_char": 0.7619956135749817, "incorrect_loss_per_char": 0.8337612003087997, "correct_loss_per_token": 1.5239912271499634, "incorrect_loss_per_token": 1.6675224006175995, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.38657546043396, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": true, "logits_per_token": -1.38657546043396, "logits_per_char": -0.69328773021698, "num_chars": 2}, {"sum_logits": -1.5239912271499634, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.5239912271499634, "logits_per_char": -0.7619956135749817, "num_chars": 2}, {"sum_logits": -1.5424554347991943, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.5424554347991943, "logits_per_char": -0.7712277173995972, "num_chars": 2}, {"sum_logits": -1.686076045036316, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.686076045036316, "logits_per_char": -0.843038022518158, "num_chars": 2}, {"sum_logits": -2.0549826622009277, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -2.0549826622009277, "logits_per_char": -1.0274913311004639, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 573, "native_id": "985a4f1a3f31f1ba6654f4fc48f504df", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6904691457748413, "incorrect_loss_raw": 1.6464878618717194, "correct_loss_per_char": 0.8452345728874207, "incorrect_loss_per_char": 0.8232439309358597, "correct_loss_per_token": 1.6904691457748413, "incorrect_loss_per_token": 1.6464878618717194, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3276101350784302, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.3276101350784302, "logits_per_char": -0.6638050675392151, "num_chars": 2}, {"sum_logits": -1.4444037675857544, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.4444037675857544, "logits_per_char": -0.7222018837928772, "num_chars": 2}, {"sum_logits": -1.6090201139450073, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6090201139450073, "logits_per_char": -0.8045100569725037, "num_chars": 2}, {"sum_logits": -1.6904691457748413, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6904691457748413, "logits_per_char": -0.8452345728874207, "num_chars": 2}, {"sum_logits": -2.2049174308776855, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.2049174308776855, "logits_per_char": -1.1024587154388428, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 574, "native_id": "5d687fe9c95436ce84230c996d34382d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5825181007385254, "incorrect_loss_raw": 1.6401355862617493, "correct_loss_per_char": 0.7912590503692627, "incorrect_loss_per_char": 0.8200677931308746, "correct_loss_per_token": 1.5825181007385254, "incorrect_loss_per_token": 1.6401355862617493, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4733777046203613, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": true, "logits_per_token": -1.4733777046203613, "logits_per_char": -0.7366888523101807, "num_chars": 2}, {"sum_logits": -1.5201685428619385, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.5201685428619385, "logits_per_char": -0.7600842714309692, "num_chars": 2}, {"sum_logits": -1.5825181007385254, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.5825181007385254, "logits_per_char": -0.7912590503692627, "num_chars": 2}, {"sum_logits": -1.5819268226623535, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.5819268226623535, "logits_per_char": -0.7909634113311768, "num_chars": 2}, {"sum_logits": -1.9850692749023438, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.9850692749023438, "logits_per_char": -0.9925346374511719, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 575, "native_id": "af11faa29097b71141fe192ad019d1dd", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4939532279968262, "incorrect_loss_raw": 1.6483513712882996, "correct_loss_per_char": 0.7469766139984131, "incorrect_loss_per_char": 0.8241756856441498, "correct_loss_per_token": 1.4939532279968262, "incorrect_loss_per_token": 1.6483513712882996, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4939532279968262, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": true, "logits_per_token": -1.4939532279968262, "logits_per_char": -0.7469766139984131, "num_chars": 2}, {"sum_logits": -1.579903244972229, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.579903244972229, "logits_per_char": -0.7899516224861145, "num_chars": 2}, {"sum_logits": -1.5711063146591187, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.5711063146591187, "logits_per_char": -0.7855531573295593, "num_chars": 2}, {"sum_logits": -1.686884880065918, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.686884880065918, "logits_per_char": -0.843442440032959, "num_chars": 2}, {"sum_logits": -1.7555110454559326, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.7555110454559326, "logits_per_char": -0.8777555227279663, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 576, "native_id": "07fd8b0aed06406fedb137d11b07a890", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.656951904296875, "incorrect_loss_raw": 1.6169174313545227, "correct_loss_per_char": 0.8284759521484375, "incorrect_loss_per_char": 0.8084587156772614, "correct_loss_per_token": 1.656951904296875, "incorrect_loss_per_token": 1.6169174313545227, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4282567501068115, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.4282567501068115, "logits_per_char": -0.7141283750534058, "num_chars": 2}, {"sum_logits": -1.572121024131775, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.572121024131775, "logits_per_char": -0.7860605120658875, "num_chars": 2}, {"sum_logits": -1.5770050287246704, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5770050287246704, "logits_per_char": -0.7885025143623352, "num_chars": 2}, {"sum_logits": -1.656951904296875, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.656951904296875, "logits_per_char": -0.8284759521484375, "num_chars": 2}, {"sum_logits": -1.890286922454834, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.890286922454834, "logits_per_char": -0.945143461227417, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 577, "native_id": "7044d82a456d0fa6f0210abb03cbf2c4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6383070945739746, "incorrect_loss_raw": 1.6886139810085297, "correct_loss_per_char": 0.8191535472869873, "incorrect_loss_per_char": 0.8443069905042648, "correct_loss_per_token": 1.6383070945739746, "incorrect_loss_per_token": 1.6886139810085297, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2968860864639282, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.2968860864639282, "logits_per_char": -0.6484430432319641, "num_chars": 2}, {"sum_logits": -1.3547782897949219, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.3547782897949219, "logits_per_char": -0.6773891448974609, "num_chars": 2}, {"sum_logits": -1.6383070945739746, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6383070945739746, "logits_per_char": -0.8191535472869873, "num_chars": 2}, {"sum_logits": -1.7312512397766113, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.7312512397766113, "logits_per_char": -0.8656256198883057, "num_chars": 2}, {"sum_logits": -2.3715403079986572, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.3715403079986572, "logits_per_char": -1.1857701539993286, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 578, "native_id": "e53ba4c7d2a818bdb6001e6924bc8896", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5106618404388428, "incorrect_loss_raw": 1.7483795881271362, "correct_loss_per_char": 0.7553309202194214, "incorrect_loss_per_char": 0.8741897940635681, "correct_loss_per_token": 1.5106618404388428, "incorrect_loss_per_token": 1.7483795881271362, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1185965538024902, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.1185965538024902, "logits_per_char": -0.5592982769012451, "num_chars": 2}, {"sum_logits": -1.5106618404388428, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5106618404388428, "logits_per_char": -0.7553309202194214, "num_chars": 2}, {"sum_logits": -1.6389729976654053, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6389729976654053, "logits_per_char": -0.8194864988327026, "num_chars": 2}, {"sum_logits": -1.8007352352142334, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.8007352352142334, "logits_per_char": -0.9003676176071167, "num_chars": 2}, {"sum_logits": -2.435213565826416, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.435213565826416, "logits_per_char": -1.217606782913208, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 579, "native_id": "ecbc1ab06ad1ed6c53e5293d7a90ebd3", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6494630575180054, "incorrect_loss_raw": 1.6330612897872925, "correct_loss_per_char": 0.8247315287590027, "incorrect_loss_per_char": 0.8165306448936462, "correct_loss_per_token": 1.6494630575180054, "incorrect_loss_per_token": 1.6330612897872925, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3747024536132812, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.3747024536132812, "logits_per_char": -0.6873512268066406, "num_chars": 2}, {"sum_logits": -1.5906742811203003, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5906742811203003, "logits_per_char": -0.7953371405601501, "num_chars": 2}, {"sum_logits": -1.5326277017593384, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5326277017593384, "logits_per_char": -0.7663138508796692, "num_chars": 2}, {"sum_logits": -1.6494630575180054, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6494630575180054, "logits_per_char": -0.8247315287590027, "num_chars": 2}, {"sum_logits": -2.03424072265625, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -2.03424072265625, "logits_per_char": -1.017120361328125, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 580, "native_id": "9a356ff463c042d04ba45bfd627bac20", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5684815645217896, "incorrect_loss_raw": 1.6289436221122742, "correct_loss_per_char": 0.7842407822608948, "incorrect_loss_per_char": 0.8144718110561371, "correct_loss_per_token": 1.5684815645217896, "incorrect_loss_per_token": 1.6289436221122742, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5425604581832886, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": true, "logits_per_token": -1.5425604581832886, "logits_per_char": -0.7712802290916443, "num_chars": 2}, {"sum_logits": -1.7255940437316895, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.7255940437316895, "logits_per_char": -0.8627970218658447, "num_chars": 2}, {"sum_logits": -1.5717569589614868, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.5717569589614868, "logits_per_char": -0.7858784794807434, "num_chars": 2}, {"sum_logits": -1.5684815645217896, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.5684815645217896, "logits_per_char": -0.7842407822608948, "num_chars": 2}, {"sum_logits": -1.6758630275726318, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.6758630275726318, "logits_per_char": -0.8379315137863159, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 581, "native_id": "0a5c069836784c3d574828d85a20a074", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5614935159683228, "incorrect_loss_raw": 1.6358709335327148, "correct_loss_per_char": 0.7807467579841614, "incorrect_loss_per_char": 0.8179354667663574, "correct_loss_per_token": 1.5614935159683228, "incorrect_loss_per_token": 1.6358709335327148, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4971929788589478, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": true, "logits_per_token": -1.4971929788589478, "logits_per_char": -0.7485964894294739, "num_chars": 2}, {"sum_logits": -1.5689176321029663, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.5689176321029663, "logits_per_char": -0.7844588160514832, "num_chars": 2}, {"sum_logits": -1.6632064580917358, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.6632064580917358, "logits_per_char": -0.8316032290458679, "num_chars": 2}, {"sum_logits": -1.5614935159683228, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.5614935159683228, "logits_per_char": -0.7807467579841614, "num_chars": 2}, {"sum_logits": -1.8141666650772095, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.8141666650772095, "logits_per_char": -0.9070833325386047, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 582, "native_id": "f996430ce208606452868fd2e739d409", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.592647910118103, "incorrect_loss_raw": 1.6647590100765228, "correct_loss_per_char": 0.7963239550590515, "incorrect_loss_per_char": 0.8323795050382614, "correct_loss_per_token": 1.592647910118103, "incorrect_loss_per_token": 1.6647590100765228, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3079766035079956, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.3079766035079956, "logits_per_char": -0.6539883017539978, "num_chars": 2}, {"sum_logits": -1.592647910118103, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.592647910118103, "logits_per_char": -0.7963239550590515, "num_chars": 2}, {"sum_logits": -1.6075397729873657, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6075397729873657, "logits_per_char": -0.8037698864936829, "num_chars": 2}, {"sum_logits": -1.582410216331482, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.582410216331482, "logits_per_char": -0.791205108165741, "num_chars": 2}, {"sum_logits": -2.161109447479248, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.161109447479248, "logits_per_char": -1.080554723739624, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 583, "native_id": "26c854d933d2115e7636fdcde57eb463", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.267110824584961, "incorrect_loss_raw": 1.5078193247318268, "correct_loss_per_char": 1.1335554122924805, "incorrect_loss_per_char": 0.7539096623659134, "correct_loss_per_token": 2.267110824584961, "incorrect_loss_per_token": 1.5078193247318268, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4182987213134766, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.4182987213134766, "logits_per_char": -0.7091493606567383, "num_chars": 2}, {"sum_logits": -1.3823739290237427, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.3823739290237427, "logits_per_char": -0.6911869645118713, "num_chars": 2}, {"sum_logits": -1.5387715101242065, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5387715101242065, "logits_per_char": -0.7693857550621033, "num_chars": 2}, {"sum_logits": -1.6918331384658813, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6918331384658813, "logits_per_char": -0.8459165692329407, "num_chars": 2}, {"sum_logits": -2.267110824584961, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.267110824584961, "logits_per_char": -1.1335554122924805, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 584, "native_id": "83c25b9a5db5f9b3fd1ff6c7453d23d0", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6407088041305542, "incorrect_loss_raw": 1.65457883477211, "correct_loss_per_char": 0.8203544020652771, "incorrect_loss_per_char": 0.827289417386055, "correct_loss_per_token": 1.6407088041305542, "incorrect_loss_per_token": 1.65457883477211, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.271172046661377, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": true, "logits_per_token": -1.271172046661377, "logits_per_char": -0.6355860233306885, "num_chars": 2}, {"sum_logits": -1.5135878324508667, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.5135878324508667, "logits_per_char": -0.7567939162254333, "num_chars": 2}, {"sum_logits": -1.6407088041305542, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.6407088041305542, "logits_per_char": -0.8203544020652771, "num_chars": 2}, {"sum_logits": -1.7197790145874023, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.7197790145874023, "logits_per_char": -0.8598895072937012, "num_chars": 2}, {"sum_logits": -2.113776445388794, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -2.113776445388794, "logits_per_char": -1.056888222694397, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 585, "native_id": "a0d02fc32878efdf0b0d420972943492", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4325491189956665, "incorrect_loss_raw": 1.7028484046459198, "correct_loss_per_char": 0.7162745594978333, "incorrect_loss_per_char": 0.8514242023229599, "correct_loss_per_token": 1.4325491189956665, "incorrect_loss_per_token": 1.7028484046459198, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3382257223129272, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.3382257223129272, "logits_per_char": -0.6691128611564636, "num_chars": 2}, {"sum_logits": -1.4325491189956665, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.4325491189956665, "logits_per_char": -0.7162745594978333, "num_chars": 2}, {"sum_logits": -1.5886389017105103, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5886389017105103, "logits_per_char": -0.7943194508552551, "num_chars": 2}, {"sum_logits": -1.8037632703781128, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.8037632703781128, "logits_per_char": -0.9018816351890564, "num_chars": 2}, {"sum_logits": -2.080765724182129, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.080765724182129, "logits_per_char": -1.0403828620910645, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 586, "native_id": "73fbd2caac2c3786ca810adfe7030273", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.722395420074463, "incorrect_loss_raw": 1.612690269947052, "correct_loss_per_char": 0.8611977100372314, "incorrect_loss_per_char": 0.806345134973526, "correct_loss_per_token": 1.722395420074463, "incorrect_loss_per_token": 1.612690269947052, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3506464958190918, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": true, "logits_per_token": -1.3506464958190918, "logits_per_char": -0.6753232479095459, "num_chars": 2}, {"sum_logits": -1.5256874561309814, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.5256874561309814, "logits_per_char": -0.7628437280654907, "num_chars": 2}, {"sum_logits": -1.722395420074463, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.722395420074463, "logits_per_char": -0.8611977100372314, "num_chars": 2}, {"sum_logits": -1.612823247909546, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.612823247909546, "logits_per_char": -0.806411623954773, "num_chars": 2}, {"sum_logits": -1.9616038799285889, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.9616038799285889, "logits_per_char": -0.9808019399642944, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 587, "native_id": "6c515b068b4d3aa88a5382224d9b866d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5815097093582153, "incorrect_loss_raw": 1.648891806602478, "correct_loss_per_char": 0.7907548546791077, "incorrect_loss_per_char": 0.824445903301239, "correct_loss_per_token": 1.5815097093582153, "incorrect_loss_per_token": 1.648891806602478, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4007341861724854, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.4007341861724854, "logits_per_char": -0.7003670930862427, "num_chars": 2}, {"sum_logits": -1.6095000505447388, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6095000505447388, "logits_per_char": -0.8047500252723694, "num_chars": 2}, {"sum_logits": -1.5815097093582153, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5815097093582153, "logits_per_char": -0.7907548546791077, "num_chars": 2}, {"sum_logits": -1.5346466302871704, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5346466302871704, "logits_per_char": -0.7673233151435852, "num_chars": 2}, {"sum_logits": -2.0506863594055176, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.0506863594055176, "logits_per_char": -1.0253431797027588, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 588, "native_id": "0af371b94fb414860b13eea6009ccc31", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7541638612747192, "incorrect_loss_raw": 1.6146975457668304, "correct_loss_per_char": 0.8770819306373596, "incorrect_loss_per_char": 0.8073487728834152, "correct_loss_per_token": 1.7541638612747192, "incorrect_loss_per_token": 1.6146975457668304, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.337708830833435, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.337708830833435, "logits_per_char": -0.6688544154167175, "num_chars": 2}, {"sum_logits": -1.500670313835144, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.500670313835144, "logits_per_char": -0.750335156917572, "num_chars": 2}, {"sum_logits": -1.5667930841445923, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5667930841445923, "logits_per_char": -0.7833965420722961, "num_chars": 2}, {"sum_logits": -1.7541638612747192, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.7541638612747192, "logits_per_char": -0.8770819306373596, "num_chars": 2}, {"sum_logits": -2.0536179542541504, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -2.0536179542541504, "logits_per_char": -1.0268089771270752, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 589, "native_id": "38e61d4be0da46b3cbbd76dc20bce677", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.5191261768341064, "incorrect_loss_raw": 1.641657441854477, "correct_loss_per_char": 0.7595630884170532, "incorrect_loss_per_char": 0.8208287209272385, "correct_loss_per_token": 1.5191261768341064, "incorrect_loss_per_token": 1.641657441854477, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.644321084022522, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.644321084022522, "logits_per_char": -0.822160542011261, "num_chars": 2}, {"sum_logits": -1.6100198030471802, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.6100198030471802, "logits_per_char": -0.8050099015235901, "num_chars": 2}, {"sum_logits": -1.578843355178833, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.578843355178833, "logits_per_char": -0.7894216775894165, "num_chars": 2}, {"sum_logits": -1.5191261768341064, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": true, "logits_per_token": -1.5191261768341064, "logits_per_char": -0.7595630884170532, "num_chars": 2}, {"sum_logits": -1.7334455251693726, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.7334455251693726, "logits_per_char": -0.8667227625846863, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 590, "native_id": "cebc07bd5080cc72862cb333b10d782d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7140138149261475, "incorrect_loss_raw": 1.6023330390453339, "correct_loss_per_char": 0.8570069074630737, "incorrect_loss_per_char": 0.8011665195226669, "correct_loss_per_token": 1.7140138149261475, "incorrect_loss_per_token": 1.6023330390453339, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.387136697769165, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": true, "logits_per_token": -1.387136697769165, "logits_per_char": -0.6935683488845825, "num_chars": 2}, {"sum_logits": -1.6156065464019775, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.6156065464019775, "logits_per_char": -0.8078032732009888, "num_chars": 2}, {"sum_logits": -1.7140138149261475, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.7140138149261475, "logits_per_char": -0.8570069074630737, "num_chars": 2}, {"sum_logits": -1.6377331018447876, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.6377331018447876, "logits_per_char": -0.8188665509223938, "num_chars": 2}, {"sum_logits": -1.7688558101654053, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.7688558101654053, "logits_per_char": -0.8844279050827026, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 591, "native_id": "de0386024f32cdf277a785a851b97544", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4921975135803223, "incorrect_loss_raw": 1.6868272721767426, "correct_loss_per_char": 0.7460987567901611, "incorrect_loss_per_char": 0.8434136360883713, "correct_loss_per_token": 1.4921975135803223, "incorrect_loss_per_token": 1.6868272721767426, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.400183916091919, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.400183916091919, "logits_per_char": -0.7000919580459595, "num_chars": 2}, {"sum_logits": -1.5267482995986938, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5267482995986938, "logits_per_char": -0.7633741497993469, "num_chars": 2}, {"sum_logits": -1.4921975135803223, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.4921975135803223, "logits_per_char": -0.7460987567901611, "num_chars": 2}, {"sum_logits": -1.6507163047790527, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6507163047790527, "logits_per_char": -0.8253581523895264, "num_chars": 2}, {"sum_logits": -2.1696605682373047, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.1696605682373047, "logits_per_char": -1.0848302841186523, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 592, "native_id": "9b62cd7f89716f393239e6c6ff3e11d5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4187284708023071, "incorrect_loss_raw": 1.6866547167301178, "correct_loss_per_char": 0.7093642354011536, "incorrect_loss_per_char": 0.8433273583650589, "correct_loss_per_token": 1.4187284708023071, "incorrect_loss_per_token": 1.6866547167301178, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4187284708023071, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.4187284708023071, "logits_per_char": -0.7093642354011536, "num_chars": 2}, {"sum_logits": -1.5236908197402954, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5236908197402954, "logits_per_char": -0.7618454098701477, "num_chars": 2}, {"sum_logits": -1.5860356092453003, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5860356092453003, "logits_per_char": -0.7930178046226501, "num_chars": 2}, {"sum_logits": -1.6256743669509888, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6256743669509888, "logits_per_char": -0.8128371834754944, "num_chars": 2}, {"sum_logits": -2.0112180709838867, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -2.0112180709838867, "logits_per_char": -1.0056090354919434, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 593, "native_id": "8b25332de2894ab38784235838d38cec", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8975471258163452, "incorrect_loss_raw": 1.5567253530025482, "correct_loss_per_char": 0.9487735629081726, "incorrect_loss_per_char": 0.7783626765012741, "correct_loss_per_token": 1.8975471258163452, "incorrect_loss_per_token": 1.5567253530025482, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.491931438446045, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": true, "logits_per_token": -1.491931438446045, "logits_per_char": -0.7459657192230225, "num_chars": 2}, {"sum_logits": -1.5194669961929321, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.5194669961929321, "logits_per_char": -0.7597334980964661, "num_chars": 2}, {"sum_logits": -1.6288365125656128, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.6288365125656128, "logits_per_char": -0.8144182562828064, "num_chars": 2}, {"sum_logits": -1.586666464805603, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.586666464805603, "logits_per_char": -0.7933332324028015, "num_chars": 2}, {"sum_logits": -1.8975471258163452, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.8975471258163452, "logits_per_char": -0.9487735629081726, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 594, "native_id": "dd4a811d18549f1ae1954cf938b28536", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4515535831451416, "incorrect_loss_raw": 1.6702328324317932, "correct_loss_per_char": 0.7257767915725708, "incorrect_loss_per_char": 0.8351164162158966, "correct_loss_per_token": 1.4515535831451416, "incorrect_loss_per_token": 1.6702328324317932, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4515535831451416, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.4515535831451416, "logits_per_char": -0.7257767915725708, "num_chars": 2}, {"sum_logits": -1.5617512464523315, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5617512464523315, "logits_per_char": -0.7808756232261658, "num_chars": 2}, {"sum_logits": -1.5683289766311646, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5683289766311646, "logits_per_char": -0.7841644883155823, "num_chars": 2}, {"sum_logits": -1.6122907400131226, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6122907400131226, "logits_per_char": -0.8061453700065613, "num_chars": 2}, {"sum_logits": -1.9385603666305542, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.9385603666305542, "logits_per_char": -0.9692801833152771, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 595, "native_id": "e2ff952c17faf1c56a970502630d4c86", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9474389553070068, "incorrect_loss_raw": 1.5536359548568726, "correct_loss_per_char": 0.9737194776535034, "incorrect_loss_per_char": 0.7768179774284363, "correct_loss_per_token": 1.9474389553070068, "incorrect_loss_per_token": 1.5536359548568726, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4008941650390625, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": true, "logits_per_token": -1.4008941650390625, "logits_per_char": -0.7004470825195312, "num_chars": 2}, {"sum_logits": -1.4912853240966797, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.4912853240966797, "logits_per_char": -0.7456426620483398, "num_chars": 2}, {"sum_logits": -1.5756444931030273, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.5756444931030273, "logits_per_char": -0.7878222465515137, "num_chars": 2}, {"sum_logits": -1.7467198371887207, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.7467198371887207, "logits_per_char": -0.8733599185943604, "num_chars": 2}, {"sum_logits": -1.9474389553070068, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.9474389553070068, "logits_per_char": -0.9737194776535034, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 596, "native_id": "3a6140e475cbbd3ee1da5ba9a6953597_1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.112722158432007, "incorrect_loss_raw": 1.5261694490909576, "correct_loss_per_char": 1.0563610792160034, "incorrect_loss_per_char": 0.7630847245454788, "correct_loss_per_token": 2.112722158432007, "incorrect_loss_per_token": 1.5261694490909576, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3600517511367798, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.3600517511367798, "logits_per_char": -0.6800258755683899, "num_chars": 2}, {"sum_logits": -1.5702224969863892, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5702224969863892, "logits_per_char": -0.7851112484931946, "num_chars": 2}, {"sum_logits": -1.6409988403320312, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.6409988403320312, "logits_per_char": -0.8204994201660156, "num_chars": 2}, {"sum_logits": -1.5334047079086304, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5334047079086304, "logits_per_char": -0.7667023539543152, "num_chars": 2}, {"sum_logits": -2.112722158432007, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -2.112722158432007, "logits_per_char": -1.0563610792160034, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 597, "native_id": "e75e0c11e2d5a7b634455a1b4b76856c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5147372484207153, "incorrect_loss_raw": 1.6871011853218079, "correct_loss_per_char": 0.7573686242103577, "incorrect_loss_per_char": 0.8435505926609039, "correct_loss_per_token": 1.5147372484207153, "incorrect_loss_per_token": 1.6871011853218079, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2326061725616455, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.2326061725616455, "logits_per_char": -0.6163030862808228, "num_chars": 2}, {"sum_logits": -1.5147372484207153, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5147372484207153, "logits_per_char": -0.7573686242103577, "num_chars": 2}, {"sum_logits": -1.6614525318145752, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.6614525318145752, "logits_per_char": -0.8307262659072876, "num_chars": 2}, {"sum_logits": -1.7951161861419678, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.7951161861419678, "logits_per_char": -0.8975580930709839, "num_chars": 2}, {"sum_logits": -2.059229850769043, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -2.059229850769043, "logits_per_char": -1.0296149253845215, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 598, "native_id": "3b9ccdcb1c932c46a38e040d3e6c7f5b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4040359258651733, "incorrect_loss_raw": 1.6934119164943695, "correct_loss_per_char": 0.7020179629325867, "incorrect_loss_per_char": 0.8467059582471848, "correct_loss_per_token": 1.4040359258651733, "incorrect_loss_per_token": 1.6934119164943695, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4040359258651733, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.4040359258651733, "logits_per_char": -0.7020179629325867, "num_chars": 2}, {"sum_logits": -1.5333279371261597, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5333279371261597, "logits_per_char": -0.7666639685630798, "num_chars": 2}, {"sum_logits": -1.5139023065567017, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5139023065567017, "logits_per_char": -0.7569511532783508, "num_chars": 2}, {"sum_logits": -1.6922534704208374, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6922534704208374, "logits_per_char": -0.8461267352104187, "num_chars": 2}, {"sum_logits": -2.0341639518737793, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.0341639518737793, "logits_per_char": -1.0170819759368896, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 599, "native_id": "6a29b657b29e1506284d8328dffbbd21", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.5239847898483276, "incorrect_loss_raw": 1.6421735286712646, "correct_loss_per_char": 0.7619923949241638, "incorrect_loss_per_char": 0.8210867643356323, "correct_loss_per_token": 1.5239847898483276, "incorrect_loss_per_token": 1.6421735286712646, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5505284070968628, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.5505284070968628, "logits_per_char": -0.7752642035484314, "num_chars": 2}, {"sum_logits": -1.5239847898483276, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": true, "logits_per_token": -1.5239847898483276, "logits_per_char": -0.7619923949241638, "num_chars": 2}, {"sum_logits": -1.6437453031539917, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.6437453031539917, "logits_per_char": -0.8218726515769958, "num_chars": 2}, {"sum_logits": -1.5862888097763062, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.5862888097763062, "logits_per_char": -0.7931444048881531, "num_chars": 2}, {"sum_logits": -1.788131594657898, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.788131594657898, "logits_per_char": -0.894065797328949, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 600, "native_id": "96cb628fb7ed2f53245598f707ed2b80", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.548599123954773, "incorrect_loss_raw": 1.6350420117378235, "correct_loss_per_char": 0.7742995619773865, "incorrect_loss_per_char": 0.8175210058689117, "correct_loss_per_token": 1.548599123954773, "incorrect_loss_per_token": 1.6350420117378235, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5250312089920044, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.5250312089920044, "logits_per_char": -0.7625156044960022, "num_chars": 2}, {"sum_logits": -1.548599123954773, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.548599123954773, "logits_per_char": -0.7742995619773865, "num_chars": 2}, {"sum_logits": -1.5961366891860962, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5961366891860962, "logits_per_char": -0.7980683445930481, "num_chars": 2}, {"sum_logits": -1.663650393486023, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.663650393486023, "logits_per_char": -0.8318251967430115, "num_chars": 2}, {"sum_logits": -1.7553497552871704, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.7553497552871704, "logits_per_char": -0.8776748776435852, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 601, "native_id": "bd4e80fa6642a76c064d0bc924411fb0", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6343008279800415, "incorrect_loss_raw": 1.653969019651413, "correct_loss_per_char": 0.8171504139900208, "incorrect_loss_per_char": 0.8269845098257065, "correct_loss_per_token": 1.6343008279800415, "incorrect_loss_per_token": 1.653969019651413, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2529031038284302, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.2529031038284302, "logits_per_char": -0.6264515519142151, "num_chars": 2}, {"sum_logits": -1.5692919492721558, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5692919492721558, "logits_per_char": -0.7846459746360779, "num_chars": 2}, {"sum_logits": -1.6343008279800415, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6343008279800415, "logits_per_char": -0.8171504139900208, "num_chars": 2}, {"sum_logits": -1.7035554647445679, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.7035554647445679, "logits_per_char": -0.8517777323722839, "num_chars": 2}, {"sum_logits": -2.090125560760498, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -2.090125560760498, "logits_per_char": -1.045062780380249, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 602, "native_id": "05490e6c191fbc3c2fe0033ed0bd8aa0", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6051002740859985, "incorrect_loss_raw": 1.6232191920280457, "correct_loss_per_char": 0.8025501370429993, "incorrect_loss_per_char": 0.8116095960140228, "correct_loss_per_token": 1.6051002740859985, "incorrect_loss_per_token": 1.6232191920280457, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.45771324634552, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.45771324634552, "logits_per_char": -0.72885662317276, "num_chars": 2}, {"sum_logits": -1.7420731782913208, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.7420731782913208, "logits_per_char": -0.8710365891456604, "num_chars": 2}, {"sum_logits": -1.5750147104263306, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5750147104263306, "logits_per_char": -0.7875073552131653, "num_chars": 2}, {"sum_logits": -1.6051002740859985, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.6051002740859985, "logits_per_char": -0.8025501370429993, "num_chars": 2}, {"sum_logits": -1.7180756330490112, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.7180756330490112, "logits_per_char": -0.8590378165245056, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 603, "native_id": "6abd34442438509b4a00c69d6fd24764", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.2242085933685303, "incorrect_loss_raw": 1.50790473818779, "correct_loss_per_char": 1.1121042966842651, "incorrect_loss_per_char": 0.753952369093895, "correct_loss_per_token": 2.2242085933685303, "incorrect_loss_per_token": 1.50790473818779, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5065513849258423, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5065513849258423, "logits_per_char": -0.7532756924629211, "num_chars": 2}, {"sum_logits": -1.4325183629989624, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.4325183629989624, "logits_per_char": -0.7162591814994812, "num_chars": 2}, {"sum_logits": -1.5092253684997559, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5092253684997559, "logits_per_char": -0.7546126842498779, "num_chars": 2}, {"sum_logits": -1.5833238363265991, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5833238363265991, "logits_per_char": -0.7916619181632996, "num_chars": 2}, {"sum_logits": -2.2242085933685303, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.2242085933685303, "logits_per_char": -1.1121042966842651, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 604, "native_id": "e58eb0ec4197c29e961a7bdd4d67de4e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7204326391220093, "incorrect_loss_raw": 1.6056618690490723, "correct_loss_per_char": 0.8602163195610046, "incorrect_loss_per_char": 0.8028309345245361, "correct_loss_per_token": 1.7204326391220093, "incorrect_loss_per_token": 1.6056618690490723, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.42133367061615, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.42133367061615, "logits_per_char": -0.710666835308075, "num_chars": 2}, {"sum_logits": -1.5633612871170044, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5633612871170044, "logits_per_char": -0.7816806435585022, "num_chars": 2}, {"sum_logits": -1.5285896062850952, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5285896062850952, "logits_per_char": -0.7642948031425476, "num_chars": 2}, {"sum_logits": -1.7204326391220093, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.7204326391220093, "logits_per_char": -0.8602163195610046, "num_chars": 2}, {"sum_logits": -1.9093629121780396, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.9093629121780396, "logits_per_char": -0.9546814560890198, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 605, "native_id": "597d2a1c9df7962218d8b807df1f8212", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.3092873096466064, "incorrect_loss_raw": 1.4996550977230072, "correct_loss_per_char": 1.1546436548233032, "incorrect_loss_per_char": 0.7498275488615036, "correct_loss_per_token": 2.3092873096466064, "incorrect_loss_per_token": 1.4996550977230072, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4318424463272095, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": false, "logits_per_token": -1.4318424463272095, "logits_per_char": -0.7159212231636047, "num_chars": 2}, {"sum_logits": -1.6318917274475098, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": false, "logits_per_token": -1.6318917274475098, "logits_per_char": -0.8159458637237549, "num_chars": 2}, {"sum_logits": -1.4110198020935059, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": true, "logits_per_token": -1.4110198020935059, "logits_per_char": -0.7055099010467529, "num_chars": 2}, {"sum_logits": -1.5238664150238037, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": false, "logits_per_token": -1.5238664150238037, "logits_per_char": -0.7619332075119019, "num_chars": 2}, {"sum_logits": -2.3092873096466064, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": false, "logits_per_token": -2.3092873096466064, "logits_per_char": -1.1546436548233032, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 606, "native_id": "68f6ac445cc008d93f931b999b44b0ba", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0426149368286133, "incorrect_loss_raw": 1.543874353170395, "correct_loss_per_char": 1.0213074684143066, "incorrect_loss_per_char": 0.7719371765851974, "correct_loss_per_token": 2.0426149368286133, "incorrect_loss_per_token": 1.543874353170395, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3124582767486572, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.3124582767486572, "logits_per_char": -0.6562291383743286, "num_chars": 2}, {"sum_logits": -1.5161534547805786, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5161534547805786, "logits_per_char": -0.7580767273902893, "num_chars": 2}, {"sum_logits": -1.5634289979934692, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5634289979934692, "logits_per_char": -0.7817144989967346, "num_chars": 2}, {"sum_logits": -1.7834566831588745, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.7834566831588745, "logits_per_char": -0.8917283415794373, "num_chars": 2}, {"sum_logits": -2.0426149368286133, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -2.0426149368286133, "logits_per_char": -1.0213074684143066, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 607, "native_id": "aa4c5d2d348796b8d7fa324f27f4c34f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5870985984802246, "incorrect_loss_raw": 1.652931123971939, "correct_loss_per_char": 0.7935492992401123, "incorrect_loss_per_char": 0.8264655619859695, "correct_loss_per_token": 1.5870985984802246, "incorrect_loss_per_token": 1.652931123971939, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4920295476913452, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.4920295476913452, "logits_per_char": -0.7460147738456726, "num_chars": 2}, {"sum_logits": -1.5038726329803467, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5038726329803467, "logits_per_char": -0.7519363164901733, "num_chars": 2}, {"sum_logits": -1.498945713043213, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.498945713043213, "logits_per_char": -0.7494728565216064, "num_chars": 2}, {"sum_logits": -1.5870985984802246, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5870985984802246, "logits_per_char": -0.7935492992401123, "num_chars": 2}, {"sum_logits": -2.1168766021728516, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -2.1168766021728516, "logits_per_char": -1.0584383010864258, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 608, "native_id": "7400e9c4a2c8e600a0f7e2d162a07837", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5965187549591064, "incorrect_loss_raw": 1.6320119500160217, "correct_loss_per_char": 0.7982593774795532, "incorrect_loss_per_char": 0.8160059750080109, "correct_loss_per_token": 1.5965187549591064, "incorrect_loss_per_token": 1.6320119500160217, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5166490077972412, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": false, "logits_per_token": -1.5166490077972412, "logits_per_char": -0.7583245038986206, "num_chars": 2}, {"sum_logits": -1.4981353282928467, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": true, "logits_per_token": -1.4981353282928467, "logits_per_char": -0.7490676641464233, "num_chars": 2}, {"sum_logits": -1.605186939239502, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": false, "logits_per_token": -1.605186939239502, "logits_per_char": -0.802593469619751, "num_chars": 2}, {"sum_logits": -1.5965187549591064, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": false, "logits_per_token": -1.5965187549591064, "logits_per_char": -0.7982593774795532, "num_chars": 2}, {"sum_logits": -1.908076524734497, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": false, "logits_per_token": -1.908076524734497, "logits_per_char": -0.9540382623672485, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 609, "native_id": "fad197409a977126c9587eccd240ceea", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5792548656463623, "incorrect_loss_raw": 1.6352717578411102, "correct_loss_per_char": 0.7896274328231812, "incorrect_loss_per_char": 0.8176358789205551, "correct_loss_per_token": 1.5792548656463623, "incorrect_loss_per_token": 1.6352717578411102, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.493539810180664, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.493539810180664, "logits_per_char": -0.746769905090332, "num_chars": 2}, {"sum_logits": -1.4972952604293823, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.4972952604293823, "logits_per_char": -0.7486476302146912, "num_chars": 2}, {"sum_logits": -1.5792548656463623, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5792548656463623, "logits_per_char": -0.7896274328231812, "num_chars": 2}, {"sum_logits": -1.6884853839874268, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.6884853839874268, "logits_per_char": -0.8442426919937134, "num_chars": 2}, {"sum_logits": -1.8617665767669678, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.8617665767669678, "logits_per_char": -0.9308832883834839, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 610, "native_id": "f09038444aeb1a048f04dedd5b97b769", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5815638303756714, "incorrect_loss_raw": 1.6281837821006775, "correct_loss_per_char": 0.7907819151878357, "incorrect_loss_per_char": 0.8140918910503387, "correct_loss_per_token": 1.5815638303756714, "incorrect_loss_per_token": 1.6281837821006775, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5490100383758545, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5490100383758545, "logits_per_char": -0.7745050191879272, "num_chars": 2}, {"sum_logits": -1.6048531532287598, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6048531532287598, "logits_per_char": -0.8024265766143799, "num_chars": 2}, {"sum_logits": -1.5353025197982788, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.5353025197982788, "logits_per_char": -0.7676512598991394, "num_chars": 2}, {"sum_logits": -1.5815638303756714, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5815638303756714, "logits_per_char": -0.7907819151878357, "num_chars": 2}, {"sum_logits": -1.823569416999817, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.823569416999817, "logits_per_char": -0.9117847084999084, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 611, "native_id": "0aa23ad1ba9f28bc3e0185237a7ce1cc", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6297078132629395, "incorrect_loss_raw": 1.616422325372696, "correct_loss_per_char": 0.8148539066314697, "incorrect_loss_per_char": 0.808211162686348, "correct_loss_per_token": 1.6297078132629395, "incorrect_loss_per_token": 1.616422325372696, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4713795185089111, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.4713795185089111, "logits_per_char": -0.7356897592544556, "num_chars": 2}, {"sum_logits": -1.6993614435195923, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6993614435195923, "logits_per_char": -0.8496807217597961, "num_chars": 2}, {"sum_logits": -1.6297078132629395, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6297078132629395, "logits_per_char": -0.8148539066314697, "num_chars": 2}, {"sum_logits": -1.5956896543502808, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5956896543502808, "logits_per_char": -0.7978448271751404, "num_chars": 2}, {"sum_logits": -1.6992586851119995, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6992586851119995, "logits_per_char": -0.8496293425559998, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 612, "native_id": "06be29539ad3e1fbd7b53b05243f4bd7", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.464333176612854, "incorrect_loss_raw": 1.6667578518390656, "correct_loss_per_char": 0.732166588306427, "incorrect_loss_per_char": 0.8333789259195328, "correct_loss_per_token": 1.464333176612854, "incorrect_loss_per_token": 1.6667578518390656, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4719791412353516, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.4719791412353516, "logits_per_char": -0.7359895706176758, "num_chars": 2}, {"sum_logits": -1.464333176612854, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.464333176612854, "logits_per_char": -0.732166588306427, "num_chars": 2}, {"sum_logits": -1.6007970571517944, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6007970571517944, "logits_per_char": -0.8003985285758972, "num_chars": 2}, {"sum_logits": -1.7224059104919434, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.7224059104919434, "logits_per_char": -0.8612029552459717, "num_chars": 2}, {"sum_logits": -1.8718492984771729, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.8718492984771729, "logits_per_char": -0.9359246492385864, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 613, "native_id": "bbe0a1ad733e5699f991ff91b3712a6f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4243780374526978, "incorrect_loss_raw": 1.6907803118228912, "correct_loss_per_char": 0.7121890187263489, "incorrect_loss_per_char": 0.8453901559114456, "correct_loss_per_token": 1.4243780374526978, "incorrect_loss_per_token": 1.6907803118228912, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4243780374526978, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": true, "logits_per_token": -1.4243780374526978, "logits_per_char": -0.7121890187263489, "num_chars": 2}, {"sum_logits": -1.5465995073318481, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5465995073318481, "logits_per_char": -0.7732997536659241, "num_chars": 2}, {"sum_logits": -1.5784099102020264, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5784099102020264, "logits_per_char": -0.7892049551010132, "num_chars": 2}, {"sum_logits": -1.5662615299224854, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5662615299224854, "logits_per_char": -0.7831307649612427, "num_chars": 2}, {"sum_logits": -2.071850299835205, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -2.071850299835205, "logits_per_char": -1.0359251499176025, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 614, "native_id": "9e5ce2b7d9eb404cdf8c7317dd0b5a59", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6264764070510864, "incorrect_loss_raw": 1.6513645946979523, "correct_loss_per_char": 0.8132382035255432, "incorrect_loss_per_char": 0.8256822973489761, "correct_loss_per_token": 1.6264764070510864, "incorrect_loss_per_token": 1.6513645946979523, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2913894653320312, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.2913894653320312, "logits_per_char": -0.6456947326660156, "num_chars": 2}, {"sum_logits": -1.4870567321777344, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.4870567321777344, "logits_per_char": -0.7435283660888672, "num_chars": 2}, {"sum_logits": -1.6264764070510864, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6264764070510864, "logits_per_char": -0.8132382035255432, "num_chars": 2}, {"sum_logits": -1.7907692193984985, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.7907692193984985, "logits_per_char": -0.8953846096992493, "num_chars": 2}, {"sum_logits": -2.036242961883545, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -2.036242961883545, "logits_per_char": -1.0181214809417725, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 615, "native_id": "ffde211723f55e9744f94cbc14488a23", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8270601034164429, "incorrect_loss_raw": 1.604643702507019, "correct_loss_per_char": 0.9135300517082214, "incorrect_loss_per_char": 0.8023218512535095, "correct_loss_per_token": 1.8270601034164429, "incorrect_loss_per_token": 1.604643702507019, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3567630052566528, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.3567630052566528, "logits_per_char": -0.6783815026283264, "num_chars": 2}, {"sum_logits": -1.402604579925537, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.402604579925537, "logits_per_char": -0.7013022899627686, "num_chars": 2}, {"sum_logits": -1.5980714559555054, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5980714559555054, "logits_per_char": -0.7990357279777527, "num_chars": 2}, {"sum_logits": -1.8270601034164429, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.8270601034164429, "logits_per_char": -0.9135300517082214, "num_chars": 2}, {"sum_logits": -2.061135768890381, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -2.061135768890381, "logits_per_char": -1.0305678844451904, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 616, "native_id": "5ff8b0deed53b9ff91d58bd5b6f85bdf", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7416198253631592, "incorrect_loss_raw": 1.6246158182621002, "correct_loss_per_char": 0.8708099126815796, "incorrect_loss_per_char": 0.8123079091310501, "correct_loss_per_token": 1.7416198253631592, "incorrect_loss_per_token": 1.6246158182621002, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.369443655014038, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": true, "logits_per_token": -1.369443655014038, "logits_per_char": -0.684721827507019, "num_chars": 2}, {"sum_logits": -1.459170937538147, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.459170937538147, "logits_per_char": -0.7295854687690735, "num_chars": 2}, {"sum_logits": -1.542691707611084, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.542691707611084, "logits_per_char": -0.771345853805542, "num_chars": 2}, {"sum_logits": -1.7416198253631592, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.7416198253631592, "logits_per_char": -0.8708099126815796, "num_chars": 2}, {"sum_logits": -2.127156972885132, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -2.127156972885132, "logits_per_char": -1.063578486442566, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 617, "native_id": "36f1ceeecde7abf99dab635239e12442", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7484196424484253, "incorrect_loss_raw": 1.5985900461673737, "correct_loss_per_char": 0.8742098212242126, "incorrect_loss_per_char": 0.7992950230836868, "correct_loss_per_token": 1.7484196424484253, "incorrect_loss_per_token": 1.5985900461673737, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3621059656143188, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": true, "logits_per_token": -1.3621059656143188, "logits_per_char": -0.6810529828071594, "num_chars": 2}, {"sum_logits": -1.5923877954483032, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.5923877954483032, "logits_per_char": -0.7961938977241516, "num_chars": 2}, {"sum_logits": -1.7484196424484253, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.7484196424484253, "logits_per_char": -0.8742098212242126, "num_chars": 2}, {"sum_logits": -1.6082227230072021, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.6082227230072021, "logits_per_char": -0.8041113615036011, "num_chars": 2}, {"sum_logits": -1.8316437005996704, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.8316437005996704, "logits_per_char": -0.9158218502998352, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 618, "native_id": "e3c9e83c0c62d842de2dfe229f5e6d41", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.3471126556396484, "incorrect_loss_raw": 1.5179077982902527, "correct_loss_per_char": 1.1735563278198242, "incorrect_loss_per_char": 0.7589538991451263, "correct_loss_per_token": 2.3471126556396484, "incorrect_loss_per_token": 1.5179077982902527, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1621618270874023, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.1621618270874023, "logits_per_char": -0.5810809135437012, "num_chars": 2}, {"sum_logits": -1.5251109600067139, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5251109600067139, "logits_per_char": -0.7625554800033569, "num_chars": 2}, {"sum_logits": -1.6641871929168701, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.6641871929168701, "logits_per_char": -0.8320935964584351, "num_chars": 2}, {"sum_logits": -1.7201712131500244, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.7201712131500244, "logits_per_char": -0.8600856065750122, "num_chars": 2}, {"sum_logits": -2.3471126556396484, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -2.3471126556396484, "logits_per_char": -1.1735563278198242, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 619, "native_id": "c0e4d0118c9cdfe2edc49ef954572b31", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5684109926223755, "incorrect_loss_raw": 1.6369285881519318, "correct_loss_per_char": 0.7842054963111877, "incorrect_loss_per_char": 0.8184642940759659, "correct_loss_per_token": 1.5684109926223755, "incorrect_loss_per_token": 1.6369285881519318, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5284717082977295, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": true, "logits_per_token": -1.5284717082977295, "logits_per_char": -0.7642358541488647, "num_chars": 2}, {"sum_logits": -1.6084731817245483, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.6084731817245483, "logits_per_char": -0.8042365908622742, "num_chars": 2}, {"sum_logits": -1.5684109926223755, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.5684109926223755, "logits_per_char": -0.7842054963111877, "num_chars": 2}, {"sum_logits": -1.532924771308899, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.532924771308899, "logits_per_char": -0.7664623856544495, "num_chars": 2}, {"sum_logits": -1.8778446912765503, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.8778446912765503, "logits_per_char": -0.9389223456382751, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 620, "native_id": "4423c006f2a43f222d4c4e97360c25d3", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3401894569396973, "incorrect_loss_raw": 1.7129748463630676, "correct_loss_per_char": 0.6700947284698486, "incorrect_loss_per_char": 0.8564874231815338, "correct_loss_per_token": 1.3401894569396973, "incorrect_loss_per_token": 1.7129748463630676, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3401894569396973, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": true, "logits_per_token": -1.3401894569396973, "logits_per_char": -0.6700947284698486, "num_chars": 2}, {"sum_logits": -1.471919298171997, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.471919298171997, "logits_per_char": -0.7359596490859985, "num_chars": 2}, {"sum_logits": -1.6909723281860352, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.6909723281860352, "logits_per_char": -0.8454861640930176, "num_chars": 2}, {"sum_logits": -1.6946377754211426, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.6946377754211426, "logits_per_char": -0.8473188877105713, "num_chars": 2}, {"sum_logits": -1.9943699836730957, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.9943699836730957, "logits_per_char": -0.9971849918365479, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 621, "native_id": "9382bc51ba092f55a494eff8615899de", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6715723276138306, "incorrect_loss_raw": 1.6055760383605957, "correct_loss_per_char": 0.8357861638069153, "incorrect_loss_per_char": 0.8027880191802979, "correct_loss_per_token": 1.6715723276138306, "incorrect_loss_per_token": 1.6055760383605957, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5164613723754883, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.5164613723754883, "logits_per_char": -0.7582306861877441, "num_chars": 2}, {"sum_logits": -1.6392920017242432, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6392920017242432, "logits_per_char": -0.8196460008621216, "num_chars": 2}, {"sum_logits": -1.6715723276138306, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6715723276138306, "logits_per_char": -0.8357861638069153, "num_chars": 2}, {"sum_logits": -1.6030796766281128, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6030796766281128, "logits_per_char": -0.8015398383140564, "num_chars": 2}, {"sum_logits": -1.6634711027145386, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6634711027145386, "logits_per_char": -0.8317355513572693, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 622, "native_id": "dec1c42628a7448aa364cdada6e82f98", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.448258638381958, "incorrect_loss_raw": 1.6627214550971985, "correct_loss_per_char": 0.724129319190979, "incorrect_loss_per_char": 0.8313607275485992, "correct_loss_per_token": 1.448258638381958, "incorrect_loss_per_token": 1.6627214550971985, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.448258638381958, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": true, "logits_per_token": -1.448258638381958, "logits_per_char": -0.724129319190979, "num_chars": 2}, {"sum_logits": -1.591966986656189, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.591966986656189, "logits_per_char": -0.7959834933280945, "num_chars": 2}, {"sum_logits": -1.695654273033142, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.695654273033142, "logits_per_char": -0.847827136516571, "num_chars": 2}, {"sum_logits": -1.6269108057022095, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.6269108057022095, "logits_per_char": -0.8134554028511047, "num_chars": 2}, {"sum_logits": -1.7363537549972534, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.7363537549972534, "logits_per_char": -0.8681768774986267, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 623, "native_id": "07ea8ff6ee916f2bf9aceab1e19ff99a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6797826290130615, "incorrect_loss_raw": 1.6240573525428772, "correct_loss_per_char": 0.8398913145065308, "incorrect_loss_per_char": 0.8120286762714386, "correct_loss_per_token": 1.6797826290130615, "incorrect_loss_per_token": 1.6240573525428772, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.382869005203247, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.382869005203247, "logits_per_char": -0.6914345026016235, "num_chars": 2}, {"sum_logits": -1.4733848571777344, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.4733848571777344, "logits_per_char": -0.7366924285888672, "num_chars": 2}, {"sum_logits": -1.6476187705993652, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6476187705993652, "logits_per_char": -0.8238093852996826, "num_chars": 2}, {"sum_logits": -1.6797826290130615, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6797826290130615, "logits_per_char": -0.8398913145065308, "num_chars": 2}, {"sum_logits": -1.992356777191162, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.992356777191162, "logits_per_char": -0.996178388595581, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 624, "native_id": "a328285c6212c899e335c45db3c49ffd", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8215805292129517, "incorrect_loss_raw": 1.589908629655838, "correct_loss_per_char": 0.9107902646064758, "incorrect_loss_per_char": 0.794954314827919, "correct_loss_per_token": 1.8215805292129517, "incorrect_loss_per_token": 1.589908629655838, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4400007724761963, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.4400007724761963, "logits_per_char": -0.7200003862380981, "num_chars": 2}, {"sum_logits": -1.4301270246505737, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.4301270246505737, "logits_per_char": -0.7150635123252869, "num_chars": 2}, {"sum_logits": -1.5298329591751099, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5298329591751099, "logits_per_char": -0.7649164795875549, "num_chars": 2}, {"sum_logits": -1.8215805292129517, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.8215805292129517, "logits_per_char": -0.9107902646064758, "num_chars": 2}, {"sum_logits": -1.9596737623214722, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.9596737623214722, "logits_per_char": -0.9798368811607361, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 625, "native_id": "e248968fec422e1fab0f0561fedff76e", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6099430322647095, "incorrect_loss_raw": 1.6325268745422363, "correct_loss_per_char": 0.8049715161323547, "incorrect_loss_per_char": 0.8162634372711182, "correct_loss_per_token": 1.6099430322647095, "incorrect_loss_per_token": 1.6325268745422363, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4704934358596802, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.4704934358596802, "logits_per_char": -0.7352467179298401, "num_chars": 2}, {"sum_logits": -1.4598957300186157, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.4598957300186157, "logits_per_char": -0.7299478650093079, "num_chars": 2}, {"sum_logits": -1.6518827676773071, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6518827676773071, "logits_per_char": -0.8259413838386536, "num_chars": 2}, {"sum_logits": -1.6099430322647095, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6099430322647095, "logits_per_char": -0.8049715161323547, "num_chars": 2}, {"sum_logits": -1.9478355646133423, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.9478355646133423, "logits_per_char": -0.9739177823066711, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 626, "native_id": "2067720531fc03c017af941cec2f6f40", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.584585428237915, "incorrect_loss_raw": 1.6494695246219635, "correct_loss_per_char": 0.7922927141189575, "incorrect_loss_per_char": 0.8247347623109818, "correct_loss_per_token": 1.584585428237915, "incorrect_loss_per_token": 1.6494695246219635, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4329700469970703, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.4329700469970703, "logits_per_char": -0.7164850234985352, "num_chars": 2}, {"sum_logits": -1.553457498550415, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.553457498550415, "logits_per_char": -0.7767287492752075, "num_chars": 2}, {"sum_logits": -1.584585428237915, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.584585428237915, "logits_per_char": -0.7922927141189575, "num_chars": 2}, {"sum_logits": -1.5407437086105347, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5407437086105347, "logits_per_char": -0.7703718543052673, "num_chars": 2}, {"sum_logits": -2.070706844329834, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.070706844329834, "logits_per_char": -1.035353422164917, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 627, "native_id": "70d3ebc00b165d9d08f9491a1dd85034", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6114393472671509, "incorrect_loss_raw": 1.6423115134239197, "correct_loss_per_char": 0.8057196736335754, "incorrect_loss_per_char": 0.8211557567119598, "correct_loss_per_token": 1.6114393472671509, "incorrect_loss_per_token": 1.6423115134239197, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3231979608535767, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": true, "logits_per_token": -1.3231979608535767, "logits_per_char": -0.6615989804267883, "num_chars": 2}, {"sum_logits": -1.6114393472671509, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.6114393472671509, "logits_per_char": -0.8057196736335754, "num_chars": 2}, {"sum_logits": -1.5575789213180542, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.5575789213180542, "logits_per_char": -0.7787894606590271, "num_chars": 2}, {"sum_logits": -1.7277733087539673, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.7277733087539673, "logits_per_char": -0.8638866543769836, "num_chars": 2}, {"sum_logits": -1.9606958627700806, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.9606958627700806, "logits_per_char": -0.9803479313850403, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 628, "native_id": "41bab71fea3fa04e5a4e10a2f86996df", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6578893661499023, "incorrect_loss_raw": 1.606382131576538, "correct_loss_per_char": 0.8289446830749512, "incorrect_loss_per_char": 0.803191065788269, "correct_loss_per_token": 1.6578893661499023, "incorrect_loss_per_token": 1.606382131576538, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6228744983673096, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.6228744983673096, "logits_per_char": -0.8114372491836548, "num_chars": 2}, {"sum_logits": -1.5842111110687256, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.5842111110687256, "logits_per_char": -0.7921055555343628, "num_chars": 2}, {"sum_logits": -1.709366798400879, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.709366798400879, "logits_per_char": -0.8546833992004395, "num_chars": 2}, {"sum_logits": -1.5090761184692383, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": true, "logits_per_token": -1.5090761184692383, "logits_per_char": -0.7545380592346191, "num_chars": 2}, {"sum_logits": -1.6578893661499023, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.6578893661499023, "logits_per_char": -0.8289446830749512, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 629, "native_id": "e18dd9ffc7b7934c39f2b5e9dee5a8c2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.042741298675537, "incorrect_loss_raw": 1.5430046319961548, "correct_loss_per_char": 1.0213706493377686, "incorrect_loss_per_char": 0.7715023159980774, "correct_loss_per_token": 2.042741298675537, "incorrect_loss_per_token": 1.5430046319961548, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3890516757965088, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.3890516757965088, "logits_per_char": -0.6945258378982544, "num_chars": 2}, {"sum_logits": -1.4015417098999023, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.4015417098999023, "logits_per_char": -0.7007708549499512, "num_chars": 2}, {"sum_logits": -1.599845051765442, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.599845051765442, "logits_per_char": -0.799922525882721, "num_chars": 2}, {"sum_logits": -1.7815800905227661, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.7815800905227661, "logits_per_char": -0.8907900452613831, "num_chars": 2}, {"sum_logits": -2.042741298675537, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -2.042741298675537, "logits_per_char": -1.0213706493377686, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 630, "native_id": "449de58e919975867255218484a9fc89", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.433537244796753, "incorrect_loss_raw": 1.6658721566200256, "correct_loss_per_char": 0.7167686223983765, "incorrect_loss_per_char": 0.8329360783100128, "correct_loss_per_token": 1.433537244796753, "incorrect_loss_per_token": 1.6658721566200256, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.433537244796753, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": true, "logits_per_token": -1.433537244796753, "logits_per_char": -0.7167686223983765, "num_chars": 2}, {"sum_logits": -1.6208677291870117, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.6208677291870117, "logits_per_char": -0.8104338645935059, "num_chars": 2}, {"sum_logits": -1.6518471240997314, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.6518471240997314, "logits_per_char": -0.8259235620498657, "num_chars": 2}, {"sum_logits": -1.6180808544158936, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.6180808544158936, "logits_per_char": -0.8090404272079468, "num_chars": 2}, {"sum_logits": -1.7726929187774658, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.7726929187774658, "logits_per_char": -0.8863464593887329, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 631, "native_id": "9698232e3599157431c9dc8f2fe179cd", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.565839171409607, "incorrect_loss_raw": 1.6775644421577454, "correct_loss_per_char": 0.7829195857048035, "incorrect_loss_per_char": 0.8387822210788727, "correct_loss_per_token": 1.565839171409607, "incorrect_loss_per_token": 1.6775644421577454, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2505910396575928, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.2505910396575928, "logits_per_char": -0.6252955198287964, "num_chars": 2}, {"sum_logits": -1.565839171409607, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.565839171409607, "logits_per_char": -0.7829195857048035, "num_chars": 2}, {"sum_logits": -1.596434235572815, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.596434235572815, "logits_per_char": -0.7982171177864075, "num_chars": 2}, {"sum_logits": -1.6952918767929077, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.6952918767929077, "logits_per_char": -0.8476459383964539, "num_chars": 2}, {"sum_logits": -2.167940616607666, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.167940616607666, "logits_per_char": -1.083970308303833, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 632, "native_id": "0b5d0c3bafbe06dd5334c20cd8ea7fe2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8676260709762573, "incorrect_loss_raw": 1.6131412088871002, "correct_loss_per_char": 0.9338130354881287, "incorrect_loss_per_char": 0.8065706044435501, "correct_loss_per_token": 1.8676260709762573, "incorrect_loss_per_token": 1.6131412088871002, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3028717041015625, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.3028717041015625, "logits_per_char": -0.6514358520507812, "num_chars": 2}, {"sum_logits": -1.4125161170959473, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.4125161170959473, "logits_per_char": -0.7062580585479736, "num_chars": 2}, {"sum_logits": -1.5421630144119263, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5421630144119263, "logits_per_char": -0.7710815072059631, "num_chars": 2}, {"sum_logits": -1.8676260709762573, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.8676260709762573, "logits_per_char": -0.9338130354881287, "num_chars": 2}, {"sum_logits": -2.195013999938965, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.195013999938965, "logits_per_char": -1.0975069999694824, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 633, "native_id": "7fe53bf68ec57a52a508611acf5b279e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.661494255065918, "incorrect_loss_raw": 1.6203792989253998, "correct_loss_per_char": 0.830747127532959, "incorrect_loss_per_char": 0.8101896494626999, "correct_loss_per_token": 1.661494255065918, "incorrect_loss_per_token": 1.6203792989253998, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.365836262702942, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": true, "logits_per_token": -1.365836262702942, "logits_per_char": -0.682918131351471, "num_chars": 2}, {"sum_logits": -1.7293726205825806, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.7293726205825806, "logits_per_char": -0.8646863102912903, "num_chars": 2}, {"sum_logits": -1.727180004119873, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.727180004119873, "logits_per_char": -0.8635900020599365, "num_chars": 2}, {"sum_logits": -1.6591283082962036, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.6591283082962036, "logits_per_char": -0.8295641541481018, "num_chars": 2}, {"sum_logits": -1.661494255065918, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.661494255065918, "logits_per_char": -0.830747127532959, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 634, "native_id": "68c41ec8415eab50620eb9ecf6f35a6a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5155774354934692, "incorrect_loss_raw": 1.6790450513362885, "correct_loss_per_char": 0.7577887177467346, "incorrect_loss_per_char": 0.8395225256681442, "correct_loss_per_token": 1.5155774354934692, "incorrect_loss_per_token": 1.6790450513362885, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3394609689712524, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.3394609689712524, "logits_per_char": -0.6697304844856262, "num_chars": 2}, {"sum_logits": -1.5155774354934692, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5155774354934692, "logits_per_char": -0.7577887177467346, "num_chars": 2}, {"sum_logits": -1.628953218460083, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.628953218460083, "logits_per_char": -0.8144766092300415, "num_chars": 2}, {"sum_logits": -1.6075594425201416, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6075594425201416, "logits_per_char": -0.8037797212600708, "num_chars": 2}, {"sum_logits": -2.1402065753936768, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -2.1402065753936768, "logits_per_char": -1.0701032876968384, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 635, "native_id": "6c4b2c93a4bdafb6cbf2b2ef2439b06f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4832149744033813, "incorrect_loss_raw": 1.6688512861728668, "correct_loss_per_char": 0.7416074872016907, "incorrect_loss_per_char": 0.8344256430864334, "correct_loss_per_token": 1.4832149744033813, "incorrect_loss_per_token": 1.6688512861728668, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4783271551132202, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.4783271551132202, "logits_per_char": -0.7391635775566101, "num_chars": 2}, {"sum_logits": -1.5458372831344604, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5458372831344604, "logits_per_char": -0.7729186415672302, "num_chars": 2}, {"sum_logits": -1.4832149744033813, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.4832149744033813, "logits_per_char": -0.7416074872016907, "num_chars": 2}, {"sum_logits": -1.638297438621521, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.638297438621521, "logits_per_char": -0.8191487193107605, "num_chars": 2}, {"sum_logits": -2.0129432678222656, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.0129432678222656, "logits_per_char": -1.0064716339111328, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 636, "native_id": "51e2da7396ab7045533e885dbb98a424", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4614667892456055, "incorrect_loss_raw": 1.6647694110870361, "correct_loss_per_char": 0.7307333946228027, "incorrect_loss_per_char": 0.8323847055435181, "correct_loss_per_token": 1.4614667892456055, "incorrect_loss_per_token": 1.6647694110870361, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4531364440917969, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.4531364440917969, "logits_per_char": -0.7265682220458984, "num_chars": 2}, {"sum_logits": -1.4614667892456055, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.4614667892456055, "logits_per_char": -0.7307333946228027, "num_chars": 2}, {"sum_logits": -1.6563177108764648, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6563177108764648, "logits_per_char": -0.8281588554382324, "num_chars": 2}, {"sum_logits": -1.7388246059417725, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.7388246059417725, "logits_per_char": -0.8694123029708862, "num_chars": 2}, {"sum_logits": -1.8107988834381104, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.8107988834381104, "logits_per_char": -0.9053994417190552, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 637, "native_id": "3f6157968fcf50d257ec3d8c729b7443", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3100004196166992, "incorrect_loss_raw": 1.7239779233932495, "correct_loss_per_char": 0.6550002098083496, "incorrect_loss_per_char": 0.8619889616966248, "correct_loss_per_token": 1.3100004196166992, "incorrect_loss_per_token": 1.7239779233932495, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3100004196166992, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.3100004196166992, "logits_per_char": -0.6550002098083496, "num_chars": 2}, {"sum_logits": -1.5304547548294067, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5304547548294067, "logits_per_char": -0.7652273774147034, "num_chars": 2}, {"sum_logits": -1.6444545984268188, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.6444545984268188, "logits_per_char": -0.8222272992134094, "num_chars": 2}, {"sum_logits": -1.8251243829727173, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.8251243829727173, "logits_per_char": -0.9125621914863586, "num_chars": 2}, {"sum_logits": -1.8958779573440552, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.8958779573440552, "logits_per_char": -0.9479389786720276, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 638, "native_id": "4768aa28fa14569d830f8947565296c1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.592110514640808, "incorrect_loss_raw": 1.6488872170448303, "correct_loss_per_char": 0.796055257320404, "incorrect_loss_per_char": 0.8244436085224152, "correct_loss_per_token": 1.592110514640808, "incorrect_loss_per_token": 1.6488872170448303, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3523961305618286, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": true, "logits_per_token": -1.3523961305618286, "logits_per_char": -0.6761980652809143, "num_chars": 2}, {"sum_logits": -1.631856918334961, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.631856918334961, "logits_per_char": -0.8159284591674805, "num_chars": 2}, {"sum_logits": -1.5667940378189087, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.5667940378189087, "logits_per_char": -0.7833970189094543, "num_chars": 2}, {"sum_logits": -1.592110514640808, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.592110514640808, "logits_per_char": -0.796055257320404, "num_chars": 2}, {"sum_logits": -2.044501781463623, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -2.044501781463623, "logits_per_char": -1.0222508907318115, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 639, "native_id": "5516b1c93f94aaa0bf9a4c7b124788d4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3028827905654907, "incorrect_loss_raw": 1.731054425239563, "correct_loss_per_char": 0.6514413952827454, "incorrect_loss_per_char": 0.8655272126197815, "correct_loss_per_token": 1.3028827905654907, "incorrect_loss_per_token": 1.731054425239563, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3028827905654907, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.3028827905654907, "logits_per_char": -0.6514413952827454, "num_chars": 2}, {"sum_logits": -1.485469102859497, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.485469102859497, "logits_per_char": -0.7427345514297485, "num_chars": 2}, {"sum_logits": -1.6641725301742554, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6641725301742554, "logits_per_char": -0.8320862650871277, "num_chars": 2}, {"sum_logits": -1.7309120893478394, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.7309120893478394, "logits_per_char": -0.8654560446739197, "num_chars": 2}, {"sum_logits": -2.04366397857666, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.04366397857666, "logits_per_char": -1.02183198928833, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 640, "native_id": "96ea2c3174229c4a6a0e2ffaed2df378", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.892564296722412, "incorrect_loss_raw": 1.5599271953105927, "correct_loss_per_char": 0.946282148361206, "incorrect_loss_per_char": 0.7799635976552963, "correct_loss_per_token": 1.892564296722412, "incorrect_loss_per_token": 1.5599271953105927, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5277498960494995, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5277498960494995, "logits_per_char": -0.7638749480247498, "num_chars": 2}, {"sum_logits": -1.610450267791748, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.610450267791748, "logits_per_char": -0.805225133895874, "num_chars": 2}, {"sum_logits": -1.4942967891693115, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.4942967891693115, "logits_per_char": -0.7471483945846558, "num_chars": 2}, {"sum_logits": -1.6072118282318115, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6072118282318115, "logits_per_char": -0.8036059141159058, "num_chars": 2}, {"sum_logits": -1.892564296722412, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.892564296722412, "logits_per_char": -0.946282148361206, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 641, "native_id": "7905b9f4ba503b0ce13b576808e99c42", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.5235222578048706, "incorrect_loss_raw": 1.6448041498661041, "correct_loss_per_char": 0.7617611289024353, "incorrect_loss_per_char": 0.8224020749330521, "correct_loss_per_token": 1.5235222578048706, "incorrect_loss_per_token": 1.6448041498661041, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5235222578048706, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.5235222578048706, "logits_per_char": -0.7617611289024353, "num_chars": 2}, {"sum_logits": -1.5253888368606567, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5253888368606567, "logits_per_char": -0.7626944184303284, "num_chars": 2}, {"sum_logits": -1.5559117794036865, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5559117794036865, "logits_per_char": -0.7779558897018433, "num_chars": 2}, {"sum_logits": -1.671973705291748, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.671973705291748, "logits_per_char": -0.835986852645874, "num_chars": 2}, {"sum_logits": -1.8259422779083252, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.8259422779083252, "logits_per_char": -0.9129711389541626, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 642, "native_id": "e0a7d1df3ce14b27888e785e6636d5f0", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5699636936187744, "incorrect_loss_raw": 1.6455521881580353, "correct_loss_per_char": 0.7849818468093872, "incorrect_loss_per_char": 0.8227760940790176, "correct_loss_per_token": 1.5699636936187744, "incorrect_loss_per_token": 1.6455521881580353, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4605152606964111, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.4605152606964111, "logits_per_char": -0.7302576303482056, "num_chars": 2}, {"sum_logits": -1.490050196647644, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.490050196647644, "logits_per_char": -0.745025098323822, "num_chars": 2}, {"sum_logits": -1.5699636936187744, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5699636936187744, "logits_per_char": -0.7849818468093872, "num_chars": 2}, {"sum_logits": -1.647775411605835, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.647775411605835, "logits_per_char": -0.8238877058029175, "num_chars": 2}, {"sum_logits": -1.983867883682251, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.983867883682251, "logits_per_char": -0.9919339418411255, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 643, "native_id": "3eb397b96b6c3a245c81ab30205943f1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6513707637786865, "incorrect_loss_raw": 1.6312989294528961, "correct_loss_per_char": 0.8256853818893433, "incorrect_loss_per_char": 0.8156494647264481, "correct_loss_per_token": 1.6513707637786865, "incorrect_loss_per_token": 1.6312989294528961, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4407920837402344, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.4407920837402344, "logits_per_char": -0.7203960418701172, "num_chars": 2}, {"sum_logits": -1.4292875528335571, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": true, "logits_per_token": -1.4292875528335571, "logits_per_char": -0.7146437764167786, "num_chars": 2}, {"sum_logits": -1.6513707637786865, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.6513707637786865, "logits_per_char": -0.8256853818893433, "num_chars": 2}, {"sum_logits": -1.649493932723999, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.649493932723999, "logits_per_char": -0.8247469663619995, "num_chars": 2}, {"sum_logits": -2.005622148513794, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -2.005622148513794, "logits_per_char": -1.002811074256897, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 644, "native_id": "536c9af0fae0aa75b32874dfcac66353", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.5207949876785278, "incorrect_loss_raw": 1.646554172039032, "correct_loss_per_char": 0.7603974938392639, "incorrect_loss_per_char": 0.823277086019516, "correct_loss_per_token": 1.5207949876785278, "incorrect_loss_per_token": 1.646554172039032, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5207949876785278, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.5207949876785278, "logits_per_char": -0.7603974938392639, "num_chars": 2}, {"sum_logits": -1.5563892126083374, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5563892126083374, "logits_per_char": -0.7781946063041687, "num_chars": 2}, {"sum_logits": -1.6501425504684448, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6501425504684448, "logits_per_char": -0.8250712752342224, "num_chars": 2}, {"sum_logits": -1.5330060720443726, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5330060720443726, "logits_per_char": -0.7665030360221863, "num_chars": 2}, {"sum_logits": -1.8466788530349731, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.8466788530349731, "logits_per_char": -0.9233394265174866, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 645, "native_id": "dc36293f603cf230f8059fc6f2e5660d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4979122877120972, "incorrect_loss_raw": 1.6986523568630219, "correct_loss_per_char": 0.7489561438560486, "incorrect_loss_per_char": 0.8493261784315109, "correct_loss_per_token": 1.4979122877120972, "incorrect_loss_per_token": 1.6986523568630219, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3385533094406128, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": true, "logits_per_token": -1.3385533094406128, "logits_per_char": -0.6692766547203064, "num_chars": 2}, {"sum_logits": -1.6038140058517456, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.6038140058517456, "logits_per_char": -0.8019070029258728, "num_chars": 2}, {"sum_logits": -1.4979122877120972, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.4979122877120972, "logits_per_char": -0.7489561438560486, "num_chars": 2}, {"sum_logits": -1.577724575996399, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.577724575996399, "logits_per_char": -0.7888622879981995, "num_chars": 2}, {"sum_logits": -2.27451753616333, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -2.27451753616333, "logits_per_char": -1.137258768081665, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 646, "native_id": "1510f5183095466e4fe41b82501a9dd0", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6874996423721313, "incorrect_loss_raw": 1.6395300328731537, "correct_loss_per_char": 0.8437498211860657, "incorrect_loss_per_char": 0.8197650164365768, "correct_loss_per_token": 1.6874996423721313, "incorrect_loss_per_token": 1.6395300328731537, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.256592035293579, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.256592035293579, "logits_per_char": -0.6282960176467896, "num_chars": 2}, {"sum_logits": -1.489030122756958, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.489030122756958, "logits_per_char": -0.744515061378479, "num_chars": 2}, {"sum_logits": -1.6874996423721313, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6874996423721313, "logits_per_char": -0.8437498211860657, "num_chars": 2}, {"sum_logits": -1.7642244100570679, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.7642244100570679, "logits_per_char": -0.8821122050285339, "num_chars": 2}, {"sum_logits": -2.0482735633850098, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.0482735633850098, "logits_per_char": -1.0241367816925049, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 647, "native_id": "1fcc547e4e6813afc1a66717248d6c62", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7785409688949585, "incorrect_loss_raw": 1.616059422492981, "correct_loss_per_char": 0.8892704844474792, "incorrect_loss_per_char": 0.8080297112464905, "correct_loss_per_token": 1.7785409688949585, "incorrect_loss_per_token": 1.616059422492981, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.279457688331604, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.279457688331604, "logits_per_char": -0.639728844165802, "num_chars": 2}, {"sum_logits": -1.5117011070251465, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5117011070251465, "logits_per_char": -0.7558505535125732, "num_chars": 2}, {"sum_logits": -1.7785409688949585, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.7785409688949585, "logits_per_char": -0.8892704844474792, "num_chars": 2}, {"sum_logits": -1.6063374280929565, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6063374280929565, "logits_per_char": -0.8031687140464783, "num_chars": 2}, {"sum_logits": -2.066741466522217, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.066741466522217, "logits_per_char": -1.0333707332611084, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 648, "native_id": "68a911b64dc943b5f81c0f8dec7faed7", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.5080387592315674, "incorrect_loss_raw": 1.666050374507904, "correct_loss_per_char": 0.7540193796157837, "incorrect_loss_per_char": 0.833025187253952, "correct_loss_per_token": 1.5080387592315674, "incorrect_loss_per_token": 1.666050374507904, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5094319581985474, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.5094319581985474, "logits_per_char": -0.7547159790992737, "num_chars": 2}, {"sum_logits": -1.5080387592315674, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": true, "logits_per_token": -1.5080387592315674, "logits_per_char": -0.7540193796157837, "num_chars": 2}, {"sum_logits": -1.5620343685150146, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.5620343685150146, "logits_per_char": -0.7810171842575073, "num_chars": 2}, {"sum_logits": -1.534018874168396, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.534018874168396, "logits_per_char": -0.767009437084198, "num_chars": 2}, {"sum_logits": -2.058716297149658, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -2.058716297149658, "logits_per_char": -1.029358148574829, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 649, "native_id": "92f423de9a556a66c3eb73e9ddf9399a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5245000123977661, "incorrect_loss_raw": 1.6706554889678955, "correct_loss_per_char": 0.7622500061988831, "incorrect_loss_per_char": 0.8353277444839478, "correct_loss_per_token": 1.5245000123977661, "incorrect_loss_per_token": 1.6706554889678955, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.338937759399414, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.338937759399414, "logits_per_char": -0.669468879699707, "num_chars": 2}, {"sum_logits": -1.5245000123977661, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5245000123977661, "logits_per_char": -0.7622500061988831, "num_chars": 2}, {"sum_logits": -1.678821086883545, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.678821086883545, "logits_per_char": -0.8394105434417725, "num_chars": 2}, {"sum_logits": -1.5969574451446533, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5969574451446533, "logits_per_char": -0.7984787225723267, "num_chars": 2}, {"sum_logits": -2.0679056644439697, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.0679056644439697, "logits_per_char": -1.0339528322219849, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 650, "native_id": "1cd94405124031e8681cd12bd25e2d61", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.702576756477356, "incorrect_loss_raw": 1.618218183517456, "correct_loss_per_char": 0.851288378238678, "incorrect_loss_per_char": 0.809109091758728, "correct_loss_per_token": 1.702576756477356, "incorrect_loss_per_token": 1.618218183517456, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4374128580093384, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.4374128580093384, "logits_per_char": -0.7187064290046692, "num_chars": 2}, {"sum_logits": -1.4757881164550781, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.4757881164550781, "logits_per_char": -0.7378940582275391, "num_chars": 2}, {"sum_logits": -1.54008948802948, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.54008948802948, "logits_per_char": -0.77004474401474, "num_chars": 2}, {"sum_logits": -1.702576756477356, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.702576756477356, "logits_per_char": -0.851288378238678, "num_chars": 2}, {"sum_logits": -2.0195822715759277, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.0195822715759277, "logits_per_char": -1.0097911357879639, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 651, "native_id": "64ab884bd870f6f68146636b4cce921c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2658277750015259, "incorrect_loss_raw": 1.7661233246326447, "correct_loss_per_char": 0.6329138875007629, "incorrect_loss_per_char": 0.8830616623163223, "correct_loss_per_token": 1.2658277750015259, "incorrect_loss_per_token": 1.7661233246326447, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2658277750015259, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": true, "logits_per_token": -1.2658277750015259, "logits_per_char": -0.6329138875007629, "num_chars": 2}, {"sum_logits": -1.5399352312088013, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.5399352312088013, "logits_per_char": -0.7699676156044006, "num_chars": 2}, {"sum_logits": -1.6211572885513306, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.6211572885513306, "logits_per_char": -0.8105786442756653, "num_chars": 2}, {"sum_logits": -1.6077479124069214, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.6077479124069214, "logits_per_char": -0.8038739562034607, "num_chars": 2}, {"sum_logits": -2.2956528663635254, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -2.2956528663635254, "logits_per_char": -1.1478264331817627, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 652, "native_id": "66275550d64d16339c944e6a6d63eb5b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4169318675994873, "incorrect_loss_raw": 1.6803258061408997, "correct_loss_per_char": 0.7084659337997437, "incorrect_loss_per_char": 0.8401629030704498, "correct_loss_per_token": 1.4169318675994873, "incorrect_loss_per_token": 1.6803258061408997, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4169318675994873, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.4169318675994873, "logits_per_char": -0.7084659337997437, "num_chars": 2}, {"sum_logits": -1.551998496055603, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.551998496055603, "logits_per_char": -0.7759992480278015, "num_chars": 2}, {"sum_logits": -1.6295214891433716, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6295214891433716, "logits_per_char": -0.8147607445716858, "num_chars": 2}, {"sum_logits": -1.6393126249313354, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6393126249313354, "logits_per_char": -0.8196563124656677, "num_chars": 2}, {"sum_logits": -1.9004706144332886, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.9004706144332886, "logits_per_char": -0.9502353072166443, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 653, "native_id": "9b26329d74a6159ab9af4f899303de39", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4973199367523193, "incorrect_loss_raw": 1.6473824977874756, "correct_loss_per_char": 0.7486599683761597, "incorrect_loss_per_char": 0.8236912488937378, "correct_loss_per_token": 1.4973199367523193, "incorrect_loss_per_token": 1.6473824977874756, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6093943119049072, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6093943119049072, "logits_per_char": -0.8046971559524536, "num_chars": 2}, {"sum_logits": -1.4973199367523193, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.4973199367523193, "logits_per_char": -0.7486599683761597, "num_chars": 2}, {"sum_logits": -1.5930049419403076, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5930049419403076, "logits_per_char": -0.7965024709701538, "num_chars": 2}, {"sum_logits": -1.6141748428344727, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6141748428344727, "logits_per_char": -0.8070874214172363, "num_chars": 2}, {"sum_logits": -1.7729558944702148, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.7729558944702148, "logits_per_char": -0.8864779472351074, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 654, "native_id": "f74b7f268d3c190a13f99ede6d2359e1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.293463945388794, "incorrect_loss_raw": 1.731466293334961, "correct_loss_per_char": 0.646731972694397, "incorrect_loss_per_char": 0.8657331466674805, "correct_loss_per_token": 1.293463945388794, "incorrect_loss_per_token": 1.731466293334961, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.293463945388794, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.293463945388794, "logits_per_char": -0.646731972694397, "num_chars": 2}, {"sum_logits": -1.5632222890853882, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5632222890853882, "logits_per_char": -0.7816111445426941, "num_chars": 2}, {"sum_logits": -1.6811916828155518, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.6811916828155518, "logits_per_char": -0.8405958414077759, "num_chars": 2}, {"sum_logits": -1.6182488203048706, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.6182488203048706, "logits_per_char": -0.8091244101524353, "num_chars": 2}, {"sum_logits": -2.063202381134033, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -2.063202381134033, "logits_per_char": -1.0316011905670166, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 655, "native_id": "22458fdcead20e2def0df0d92d5806f6", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.2094898223876953, "incorrect_loss_raw": 1.5159801244735718, "correct_loss_per_char": 1.1047449111938477, "incorrect_loss_per_char": 0.7579900622367859, "correct_loss_per_token": 2.2094898223876953, "incorrect_loss_per_token": 1.5159801244735718, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.385014295578003, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.385014295578003, "logits_per_char": -0.6925071477890015, "num_chars": 2}, {"sum_logits": -1.4369131326675415, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.4369131326675415, "logits_per_char": -0.7184565663337708, "num_chars": 2}, {"sum_logits": -1.6131441593170166, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.6131441593170166, "logits_per_char": -0.8065720796585083, "num_chars": 2}, {"sum_logits": -1.628848910331726, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.628848910331726, "logits_per_char": -0.814424455165863, "num_chars": 2}, {"sum_logits": -2.2094898223876953, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -2.2094898223876953, "logits_per_char": -1.1047449111938477, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 656, "native_id": "f7b96f195a7adfe0c74924a165cfd055", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5083245038986206, "incorrect_loss_raw": 1.7240366339683533, "correct_loss_per_char": 0.7541622519493103, "incorrect_loss_per_char": 0.8620183169841766, "correct_loss_per_token": 1.5083245038986206, "incorrect_loss_per_token": 1.7240366339683533, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1857621669769287, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": true, "logits_per_token": -1.1857621669769287, "logits_per_char": -0.5928810834884644, "num_chars": 2}, {"sum_logits": -1.5083245038986206, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.5083245038986206, "logits_per_char": -0.7541622519493103, "num_chars": 2}, {"sum_logits": -1.683005690574646, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.683005690574646, "logits_per_char": -0.841502845287323, "num_chars": 2}, {"sum_logits": -1.6729742288589478, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.6729742288589478, "logits_per_char": -0.8364871144294739, "num_chars": 2}, {"sum_logits": -2.3544044494628906, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -2.3544044494628906, "logits_per_char": -1.1772022247314453, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 657, "native_id": "9b631734e72a0e559da153492c1e7894", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5069925785064697, "incorrect_loss_raw": 1.689577966928482, "correct_loss_per_char": 0.7534962892532349, "incorrect_loss_per_char": 0.844788983464241, "correct_loss_per_token": 1.5069925785064697, "incorrect_loss_per_token": 1.689577966928482, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2339774370193481, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": true, "logits_per_token": -1.2339774370193481, "logits_per_char": -0.6169887185096741, "num_chars": 2}, {"sum_logits": -1.5069925785064697, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.5069925785064697, "logits_per_char": -0.7534962892532349, "num_chars": 2}, {"sum_logits": -1.7761356830596924, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.7761356830596924, "logits_per_char": -0.8880678415298462, "num_chars": 2}, {"sum_logits": -1.668020248413086, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.668020248413086, "logits_per_char": -0.834010124206543, "num_chars": 2}, {"sum_logits": -2.0801784992218018, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -2.0801784992218018, "logits_per_char": -1.0400892496109009, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 658, "native_id": "caccaa51ee960a92d44e5b949fc35a66", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.05458402633667, "incorrect_loss_raw": 1.5345337092876434, "correct_loss_per_char": 1.027292013168335, "incorrect_loss_per_char": 0.7672668546438217, "correct_loss_per_token": 2.05458402633667, "incorrect_loss_per_token": 1.5345337092876434, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.390436053276062, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.390436053276062, "logits_per_char": -0.695218026638031, "num_chars": 2}, {"sum_logits": -1.4676363468170166, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.4676363468170166, "logits_per_char": -0.7338181734085083, "num_chars": 2}, {"sum_logits": -1.6262904405593872, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6262904405593872, "logits_per_char": -0.8131452202796936, "num_chars": 2}, {"sum_logits": -1.653771996498108, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.653771996498108, "logits_per_char": -0.826885998249054, "num_chars": 2}, {"sum_logits": -2.05458402633667, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -2.05458402633667, "logits_per_char": -1.027292013168335, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 659, "native_id": "def936fda9f6ccee01f57c0f804fabd0", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5384135246276855, "incorrect_loss_raw": 1.6428503394126892, "correct_loss_per_char": 0.7692067623138428, "incorrect_loss_per_char": 0.8214251697063446, "correct_loss_per_token": 1.5384135246276855, "incorrect_loss_per_token": 1.6428503394126892, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4289517402648926, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.4289517402648926, "logits_per_char": -0.7144758701324463, "num_chars": 2}, {"sum_logits": -1.652984857559204, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.652984857559204, "logits_per_char": -0.826492428779602, "num_chars": 2}, {"sum_logits": -1.694715976715088, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.694715976715088, "logits_per_char": -0.847357988357544, "num_chars": 2}, {"sum_logits": -1.5384135246276855, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5384135246276855, "logits_per_char": -0.7692067623138428, "num_chars": 2}, {"sum_logits": -1.7947487831115723, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.7947487831115723, "logits_per_char": -0.8973743915557861, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 660, "native_id": "761b0f6c68b1540949b70f76a9e67c78", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.567698359489441, "incorrect_loss_raw": 1.6490938663482666, "correct_loss_per_char": 0.7838491797447205, "incorrect_loss_per_char": 0.8245469331741333, "correct_loss_per_token": 1.567698359489441, "incorrect_loss_per_token": 1.6490938663482666, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4004570245742798, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.4004570245742798, "logits_per_char": -0.7002285122871399, "num_chars": 2}, {"sum_logits": -1.5699247121810913, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5699247121810913, "logits_per_char": -0.7849623560905457, "num_chars": 2}, {"sum_logits": -1.567698359489441, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.567698359489441, "logits_per_char": -0.7838491797447205, "num_chars": 2}, {"sum_logits": -1.6405245065689087, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6405245065689087, "logits_per_char": -0.8202622532844543, "num_chars": 2}, {"sum_logits": -1.9854692220687866, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.9854692220687866, "logits_per_char": -0.9927346110343933, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 661, "native_id": "8c11546468a2595b29a1297e73334fc4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7151479721069336, "incorrect_loss_raw": 1.6056134700775146, "correct_loss_per_char": 0.8575739860534668, "incorrect_loss_per_char": 0.8028067350387573, "correct_loss_per_token": 1.7151479721069336, "incorrect_loss_per_token": 1.6056134700775146, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3214342594146729, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.3214342594146729, "logits_per_char": -0.6607171297073364, "num_chars": 2}, {"sum_logits": -1.7151479721069336, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.7151479721069336, "logits_per_char": -0.8575739860534668, "num_chars": 2}, {"sum_logits": -1.6947381496429443, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6947381496429443, "logits_per_char": -0.8473690748214722, "num_chars": 2}, {"sum_logits": -1.660442590713501, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.660442590713501, "logits_per_char": -0.8302212953567505, "num_chars": 2}, {"sum_logits": -1.7458388805389404, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.7458388805389404, "logits_per_char": -0.8729194402694702, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 662, "native_id": "a5dcac512870e79f5aa2b22dbd662404", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5813214778900146, "incorrect_loss_raw": 1.651055634021759, "correct_loss_per_char": 0.7906607389450073, "incorrect_loss_per_char": 0.8255278170108795, "correct_loss_per_token": 1.5813214778900146, "incorrect_loss_per_token": 1.651055634021759, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3764511346817017, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": true, "logits_per_token": -1.3764511346817017, "logits_per_char": -0.6882255673408508, "num_chars": 2}, {"sum_logits": -1.5813214778900146, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.5813214778900146, "logits_per_char": -0.7906607389450073, "num_chars": 2}, {"sum_logits": -1.563184380531311, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.563184380531311, "logits_per_char": -0.7815921902656555, "num_chars": 2}, {"sum_logits": -1.612734317779541, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.612734317779541, "logits_per_char": -0.8063671588897705, "num_chars": 2}, {"sum_logits": -2.0518527030944824, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -2.0518527030944824, "logits_per_char": -1.0259263515472412, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 663, "native_id": "870b07a1c5af2e956673a9680da99852", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.2127227783203125, "incorrect_loss_raw": 1.5174325704574585, "correct_loss_per_char": 1.1063613891601562, "incorrect_loss_per_char": 0.7587162852287292, "correct_loss_per_token": 2.2127227783203125, "incorrect_loss_per_token": 1.5174325704574585, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.315014123916626, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.315014123916626, "logits_per_char": -0.657507061958313, "num_chars": 2}, {"sum_logits": -1.463202714920044, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.463202714920044, "logits_per_char": -0.731601357460022, "num_chars": 2}, {"sum_logits": -1.6097440719604492, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6097440719604492, "logits_per_char": -0.8048720359802246, "num_chars": 2}, {"sum_logits": -1.6817693710327148, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6817693710327148, "logits_per_char": -0.8408846855163574, "num_chars": 2}, {"sum_logits": -2.2127227783203125, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -2.2127227783203125, "logits_per_char": -1.1063613891601562, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 664, "native_id": "f48528156632b9c5b18af9ce2095509b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5407029390335083, "incorrect_loss_raw": 1.642730712890625, "correct_loss_per_char": 0.7703514695167542, "incorrect_loss_per_char": 0.8213653564453125, "correct_loss_per_token": 1.5407029390335083, "incorrect_loss_per_token": 1.642730712890625, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4955140352249146, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.4955140352249146, "logits_per_char": -0.7477570176124573, "num_chars": 2}, {"sum_logits": -1.5561085939407349, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5561085939407349, "logits_per_char": -0.7780542969703674, "num_chars": 2}, {"sum_logits": -1.5407029390335083, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5407029390335083, "logits_per_char": -0.7703514695167542, "num_chars": 2}, {"sum_logits": -1.6868616342544556, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6868616342544556, "logits_per_char": -0.8434308171272278, "num_chars": 2}, {"sum_logits": -1.832438588142395, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.832438588142395, "logits_per_char": -0.9162192940711975, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 665, "native_id": "5496c7293f653120e5a5213db2d7b103", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5414739847183228, "incorrect_loss_raw": 1.645501434803009, "correct_loss_per_char": 0.7707369923591614, "incorrect_loss_per_char": 0.8227507174015045, "correct_loss_per_token": 1.5414739847183228, "incorrect_loss_per_token": 1.645501434803009, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5251452922821045, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5251452922821045, "logits_per_char": -0.7625726461410522, "num_chars": 2}, {"sum_logits": -1.67096745967865, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.67096745967865, "logits_per_char": -0.835483729839325, "num_chars": 2}, {"sum_logits": -1.5414739847183228, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5414739847183228, "logits_per_char": -0.7707369923591614, "num_chars": 2}, {"sum_logits": -1.4881551265716553, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.4881551265716553, "logits_per_char": -0.7440775632858276, "num_chars": 2}, {"sum_logits": -1.8977378606796265, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.8977378606796265, "logits_per_char": -0.9488689303398132, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 666, "native_id": "9d97e2bb458d93a8bafe4380b08727e3", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3071553707122803, "incorrect_loss_raw": 1.724560707807541, "correct_loss_per_char": 0.6535776853561401, "incorrect_loss_per_char": 0.8622803539037704, "correct_loss_per_token": 1.3071553707122803, "incorrect_loss_per_token": 1.724560707807541, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3071553707122803, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.3071553707122803, "logits_per_char": -0.6535776853561401, "num_chars": 2}, {"sum_logits": -1.5842379331588745, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5842379331588745, "logits_per_char": -0.7921189665794373, "num_chars": 2}, {"sum_logits": -1.726244330406189, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.726244330406189, "logits_per_char": -0.8631221652030945, "num_chars": 2}, {"sum_logits": -1.5753637552261353, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5753637552261353, "logits_per_char": -0.7876818776130676, "num_chars": 2}, {"sum_logits": -2.012396812438965, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.012396812438965, "logits_per_char": -1.0061984062194824, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 667, "native_id": "26d7d59ef7b9f2e0c2d47419fa5bca91", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.5000202655792236, "incorrect_loss_raw": 1.6654757857322693, "correct_loss_per_char": 0.7500101327896118, "incorrect_loss_per_char": 0.8327378928661346, "correct_loss_per_token": 1.5000202655792236, "incorrect_loss_per_token": 1.6654757857322693, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5099732875823975, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5099732875823975, "logits_per_char": -0.7549866437911987, "num_chars": 2}, {"sum_logits": -1.5862483978271484, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5862483978271484, "logits_per_char": -0.7931241989135742, "num_chars": 2}, {"sum_logits": -1.5280604362487793, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5280604362487793, "logits_per_char": -0.7640302181243896, "num_chars": 2}, {"sum_logits": -1.5000202655792236, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.5000202655792236, "logits_per_char": -0.7500101327896118, "num_chars": 2}, {"sum_logits": -2.037621021270752, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.037621021270752, "logits_per_char": -1.018810510635376, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 668, "native_id": "c6f10fd07348bf2cf5488b0d9f38d806", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4866224527359009, "incorrect_loss_raw": 1.7002992033958435, "correct_loss_per_char": 0.7433112263679504, "incorrect_loss_per_char": 0.8501496016979218, "correct_loss_per_token": 1.4866224527359009, "incorrect_loss_per_token": 1.7002992033958435, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2567930221557617, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.2567930221557617, "logits_per_char": -0.6283965110778809, "num_chars": 2}, {"sum_logits": -1.4866224527359009, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.4866224527359009, "logits_per_char": -0.7433112263679504, "num_chars": 2}, {"sum_logits": -1.6097911596298218, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6097911596298218, "logits_per_char": -0.8048955798149109, "num_chars": 2}, {"sum_logits": -1.7814916372299194, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.7814916372299194, "logits_per_char": -0.8907458186149597, "num_chars": 2}, {"sum_logits": -2.153120994567871, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -2.153120994567871, "logits_per_char": -1.0765604972839355, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 669, "native_id": "8ebf9d24719649a0b041aea02a6e46af", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5955569744110107, "incorrect_loss_raw": 1.634692907333374, "correct_loss_per_char": 0.7977784872055054, "incorrect_loss_per_char": 0.817346453666687, "correct_loss_per_token": 1.5955569744110107, "incorrect_loss_per_token": 1.634692907333374, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4098515510559082, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.4098515510559082, "logits_per_char": -0.7049257755279541, "num_chars": 2}, {"sum_logits": -1.6025128364562988, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6025128364562988, "logits_per_char": -0.8012564182281494, "num_chars": 2}, {"sum_logits": -1.5955569744110107, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5955569744110107, "logits_per_char": -0.7977784872055054, "num_chars": 2}, {"sum_logits": -1.6234979629516602, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6234979629516602, "logits_per_char": -0.8117489814758301, "num_chars": 2}, {"sum_logits": -1.902909278869629, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.902909278869629, "logits_per_char": -0.9514546394348145, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 670, "native_id": "c961578f4c5768b67b843e5d2ce18452", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4001295566558838, "incorrect_loss_raw": 1.7212335765361786, "correct_loss_per_char": 0.7000647783279419, "incorrect_loss_per_char": 0.8606167882680893, "correct_loss_per_token": 1.4001295566558838, "incorrect_loss_per_token": 1.7212335765361786, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4001295566558838, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.4001295566558838, "logits_per_char": -0.7000647783279419, "num_chars": 2}, {"sum_logits": -1.4180008172988892, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.4180008172988892, "logits_per_char": -0.7090004086494446, "num_chars": 2}, {"sum_logits": -1.5071214437484741, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5071214437484741, "logits_per_char": -0.7535607218742371, "num_chars": 2}, {"sum_logits": -1.7214993238449097, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.7214993238449097, "logits_per_char": -0.8607496619224548, "num_chars": 2}, {"sum_logits": -2.2383127212524414, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.2383127212524414, "logits_per_char": -1.1191563606262207, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 671, "native_id": "cce1b59f7c4f540a84a1a7d6d88548c4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.513332486152649, "incorrect_loss_raw": 1.6677748560905457, "correct_loss_per_char": 0.7566662430763245, "incorrect_loss_per_char": 0.8338874280452728, "correct_loss_per_token": 1.513332486152649, "incorrect_loss_per_token": 1.6677748560905457, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.441443920135498, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.441443920135498, "logits_per_char": -0.720721960067749, "num_chars": 2}, {"sum_logits": -1.513332486152649, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.513332486152649, "logits_per_char": -0.7566662430763245, "num_chars": 2}, {"sum_logits": -1.5731556415557861, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5731556415557861, "logits_per_char": -0.7865778207778931, "num_chars": 2}, {"sum_logits": -1.591451644897461, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.591451644897461, "logits_per_char": -0.7957258224487305, "num_chars": 2}, {"sum_logits": -2.0650482177734375, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.0650482177734375, "logits_per_char": -1.0325241088867188, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 672, "native_id": "60848ce50295fc745756fbe960e78b88", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3611890077590942, "incorrect_loss_raw": 1.724654346704483, "correct_loss_per_char": 0.6805945038795471, "incorrect_loss_per_char": 0.8623271733522415, "correct_loss_per_token": 1.3611890077590942, "incorrect_loss_per_token": 1.724654346704483, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3611890077590942, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.3611890077590942, "logits_per_char": -0.6805945038795471, "num_chars": 2}, {"sum_logits": -1.411929965019226, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.411929965019226, "logits_per_char": -0.705964982509613, "num_chars": 2}, {"sum_logits": -1.6496758460998535, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6496758460998535, "logits_per_char": -0.8248379230499268, "num_chars": 2}, {"sum_logits": -1.6707444190979004, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6707444190979004, "logits_per_char": -0.8353722095489502, "num_chars": 2}, {"sum_logits": -2.166267156600952, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.166267156600952, "logits_per_char": -1.083133578300476, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 673, "native_id": "3fdc0c422c524c994b9911a17f1f1834", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6936074495315552, "incorrect_loss_raw": 1.5970338582992554, "correct_loss_per_char": 0.8468037247657776, "incorrect_loss_per_char": 0.7985169291496277, "correct_loss_per_token": 1.6936074495315552, "incorrect_loss_per_token": 1.5970338582992554, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6936074495315552, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6936074495315552, "logits_per_char": -0.8468037247657776, "num_chars": 2}, {"sum_logits": -1.5773855447769165, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5773855447769165, "logits_per_char": -0.7886927723884583, "num_chars": 2}, {"sum_logits": -1.6319661140441895, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6319661140441895, "logits_per_char": -0.8159830570220947, "num_chars": 2}, {"sum_logits": -1.62598717212677, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.62598717212677, "logits_per_char": -0.812993586063385, "num_chars": 2}, {"sum_logits": -1.5527966022491455, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.5527966022491455, "logits_per_char": -0.7763983011245728, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 674, "native_id": "cc8eac9956f645533b8d7b99702e3507", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0530831813812256, "incorrect_loss_raw": 1.539059966802597, "correct_loss_per_char": 1.0265415906906128, "incorrect_loss_per_char": 0.7695299834012985, "correct_loss_per_token": 2.0530831813812256, "incorrect_loss_per_token": 1.539059966802597, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3302052021026611, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": true, "logits_per_token": -1.3302052021026611, "logits_per_char": -0.6651026010513306, "num_chars": 2}, {"sum_logits": -1.6358908414840698, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.6358908414840698, "logits_per_char": -0.8179454207420349, "num_chars": 2}, {"sum_logits": -1.6321722269058228, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.6321722269058228, "logits_per_char": -0.8160861134529114, "num_chars": 2}, {"sum_logits": -1.5579715967178345, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.5579715967178345, "logits_per_char": -0.7789857983589172, "num_chars": 2}, {"sum_logits": -2.0530831813812256, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -2.0530831813812256, "logits_per_char": -1.0265415906906128, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 675, "native_id": "c0e7fa3e39a2d9af2c323416015729dc", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5840083360671997, "incorrect_loss_raw": 1.6655777394771576, "correct_loss_per_char": 0.7920041680335999, "incorrect_loss_per_char": 0.8327888697385788, "correct_loss_per_token": 1.5840083360671997, "incorrect_loss_per_token": 1.6655777394771576, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3222426176071167, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.3222426176071167, "logits_per_char": -0.6611213088035583, "num_chars": 2}, {"sum_logits": -1.5840083360671997, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5840083360671997, "logits_per_char": -0.7920041680335999, "num_chars": 2}, {"sum_logits": -1.5763331651687622, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5763331651687622, "logits_per_char": -0.7881665825843811, "num_chars": 2}, {"sum_logits": -1.6026896238327026, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6026896238327026, "logits_per_char": -0.8013448119163513, "num_chars": 2}, {"sum_logits": -2.161045551300049, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -2.161045551300049, "logits_per_char": -1.0805227756500244, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 676, "native_id": "335b51bd3a8ada014bbe6754dcbd425f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.585933804512024, "incorrect_loss_raw": 1.6621690690517426, "correct_loss_per_char": 0.792966902256012, "incorrect_loss_per_char": 0.8310845345258713, "correct_loss_per_token": 1.585933804512024, "incorrect_loss_per_token": 1.6621690690517426, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3484212160110474, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.3484212160110474, "logits_per_char": -0.6742106080055237, "num_chars": 2}, {"sum_logits": -1.533406376838684, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.533406376838684, "logits_per_char": -0.766703188419342, "num_chars": 2}, {"sum_logits": -1.585933804512024, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.585933804512024, "logits_per_char": -0.792966902256012, "num_chars": 2}, {"sum_logits": -1.6144458055496216, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6144458055496216, "logits_per_char": -0.8072229027748108, "num_chars": 2}, {"sum_logits": -2.152402877807617, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.152402877807617, "logits_per_char": -1.0762014389038086, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 677, "native_id": "c7327a1a7d12b6cc0740fc9446270e02", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.900288701057434, "incorrect_loss_raw": 1.564927488565445, "correct_loss_per_char": 0.950144350528717, "incorrect_loss_per_char": 0.7824637442827225, "correct_loss_per_token": 1.900288701057434, "incorrect_loss_per_token": 1.564927488565445, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.345752477645874, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": true, "logits_per_token": -1.345752477645874, "logits_per_char": -0.672876238822937, "num_chars": 2}, {"sum_logits": -1.5242043733596802, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.5242043733596802, "logits_per_char": -0.7621021866798401, "num_chars": 2}, {"sum_logits": -1.6455951929092407, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.6455951929092407, "logits_per_char": -0.8227975964546204, "num_chars": 2}, {"sum_logits": -1.7441579103469849, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.7441579103469849, "logits_per_char": -0.8720789551734924, "num_chars": 2}, {"sum_logits": -1.900288701057434, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.900288701057434, "logits_per_char": -0.950144350528717, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 678, "native_id": "2729d8502208c25d8e9293cd4e8ecbb5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5410959720611572, "incorrect_loss_raw": 1.656205654144287, "correct_loss_per_char": 0.7705479860305786, "incorrect_loss_per_char": 0.8281028270721436, "correct_loss_per_token": 1.5410959720611572, "incorrect_loss_per_token": 1.656205654144287, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3792603015899658, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": true, "logits_per_token": -1.3792603015899658, "logits_per_char": -0.6896301507949829, "num_chars": 2}, {"sum_logits": -1.5771212577819824, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.5771212577819824, "logits_per_char": -0.7885606288909912, "num_chars": 2}, {"sum_logits": -1.5410959720611572, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.5410959720611572, "logits_per_char": -0.7705479860305786, "num_chars": 2}, {"sum_logits": -1.6978613138198853, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.6978613138198853, "logits_per_char": -0.8489306569099426, "num_chars": 2}, {"sum_logits": -1.970579743385315, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.970579743385315, "logits_per_char": -0.9852898716926575, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 679, "native_id": "7ea57ee4580042b0a6a40479c8ace3e4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5280836820602417, "incorrect_loss_raw": 1.6855417490005493, "correct_loss_per_char": 0.7640418410301208, "incorrect_loss_per_char": 0.8427708745002747, "correct_loss_per_token": 1.5280836820602417, "incorrect_loss_per_token": 1.6855417490005493, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.360276699066162, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.360276699066162, "logits_per_char": -0.680138349533081, "num_chars": 2}, {"sum_logits": -1.5018059015274048, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5018059015274048, "logits_per_char": -0.7509029507637024, "num_chars": 2}, {"sum_logits": -1.5280836820602417, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5280836820602417, "logits_per_char": -0.7640418410301208, "num_chars": 2}, {"sum_logits": -1.666296362876892, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.666296362876892, "logits_per_char": -0.833148181438446, "num_chars": 2}, {"sum_logits": -2.2137880325317383, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.2137880325317383, "logits_per_char": -1.1068940162658691, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 680, "native_id": "65432eb6e617514d863a465f38865fde", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4760764837265015, "incorrect_loss_raw": 1.6617191135883331, "correct_loss_per_char": 0.7380382418632507, "incorrect_loss_per_char": 0.8308595567941666, "correct_loss_per_token": 1.4760764837265015, "incorrect_loss_per_token": 1.6617191135883331, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4760764837265015, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.4760764837265015, "logits_per_char": -0.7380382418632507, "num_chars": 2}, {"sum_logits": -1.4869197607040405, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.4869197607040405, "logits_per_char": -0.7434598803520203, "num_chars": 2}, {"sum_logits": -1.6408638954162598, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.6408638954162598, "logits_per_char": -0.8204319477081299, "num_chars": 2}, {"sum_logits": -1.6378207206726074, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.6378207206726074, "logits_per_char": -0.8189103603363037, "num_chars": 2}, {"sum_logits": -1.8812720775604248, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.8812720775604248, "logits_per_char": -0.9406360387802124, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 681, "native_id": "316a8dee8a4dde7d95cf503a715104be", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6628522872924805, "incorrect_loss_raw": 1.6084195971488953, "correct_loss_per_char": 0.8314261436462402, "incorrect_loss_per_char": 0.8042097985744476, "correct_loss_per_token": 1.6628522872924805, "incorrect_loss_per_token": 1.6084195971488953, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5612952709197998, "num_tokens": 1, "num_tokens_all": 301, "is_greedy": false, "logits_per_token": -1.5612952709197998, "logits_per_char": -0.7806476354598999, "num_chars": 2}, {"sum_logits": -1.5587999820709229, "num_tokens": 1, "num_tokens_all": 301, "is_greedy": false, "logits_per_token": -1.5587999820709229, "logits_per_char": -0.7793999910354614, "num_chars": 2}, {"sum_logits": -1.6628522872924805, "num_tokens": 1, "num_tokens_all": 301, "is_greedy": false, "logits_per_token": -1.6628522872924805, "logits_per_char": -0.8314261436462402, "num_chars": 2}, {"sum_logits": -1.5423606634140015, "num_tokens": 1, "num_tokens_all": 301, "is_greedy": true, "logits_per_token": -1.5423606634140015, "logits_per_char": -0.7711803317070007, "num_chars": 2}, {"sum_logits": -1.771222472190857, "num_tokens": 1, "num_tokens_all": 301, "is_greedy": false, "logits_per_token": -1.771222472190857, "logits_per_char": -0.8856112360954285, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 682, "native_id": "520972425aed0e532fa28a91c9b55b30", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4648280143737793, "incorrect_loss_raw": 1.7067419290542603, "correct_loss_per_char": 0.7324140071868896, "incorrect_loss_per_char": 0.8533709645271301, "correct_loss_per_token": 1.4648280143737793, "incorrect_loss_per_token": 1.7067419290542603, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.257096529006958, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.257096529006958, "logits_per_char": -0.628548264503479, "num_chars": 2}, {"sum_logits": -1.4648280143737793, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.4648280143737793, "logits_per_char": -0.7324140071868896, "num_chars": 2}, {"sum_logits": -1.6703112125396729, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6703112125396729, "logits_per_char": -0.8351556062698364, "num_chars": 2}, {"sum_logits": -1.7373838424682617, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.7373838424682617, "logits_per_char": -0.8686919212341309, "num_chars": 2}, {"sum_logits": -2.1621761322021484, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -2.1621761322021484, "logits_per_char": -1.0810880661010742, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 683, "native_id": "4d67cdb4ba1b0058e383c212303a9f4e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5259026288986206, "incorrect_loss_raw": 1.6751161217689514, "correct_loss_per_char": 0.7629513144493103, "incorrect_loss_per_char": 0.8375580608844757, "correct_loss_per_token": 1.5259026288986206, "incorrect_loss_per_token": 1.6751161217689514, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.444658637046814, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.444658637046814, "logits_per_char": -0.722329318523407, "num_chars": 2}, {"sum_logits": -1.5602660179138184, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5602660179138184, "logits_per_char": -0.7801330089569092, "num_chars": 2}, {"sum_logits": -1.5259026288986206, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5259026288986206, "logits_per_char": -0.7629513144493103, "num_chars": 2}, {"sum_logits": -1.5396274328231812, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5396274328231812, "logits_per_char": -0.7698137164115906, "num_chars": 2}, {"sum_logits": -2.155912399291992, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -2.155912399291992, "logits_per_char": -1.077956199645996, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 684, "native_id": "95d1d968ee66b6054cbb16b58a7c6455", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5896666049957275, "incorrect_loss_raw": 1.6455354690551758, "correct_loss_per_char": 0.7948333024978638, "incorrect_loss_per_char": 0.8227677345275879, "correct_loss_per_token": 1.5896666049957275, "incorrect_loss_per_token": 1.6455354690551758, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3991527557373047, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.3991527557373047, "logits_per_char": -0.6995763778686523, "num_chars": 2}, {"sum_logits": -1.5243055820465088, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5243055820465088, "logits_per_char": -0.7621527910232544, "num_chars": 2}, {"sum_logits": -1.5896666049957275, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5896666049957275, "logits_per_char": -0.7948333024978638, "num_chars": 2}, {"sum_logits": -1.6414527893066406, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6414527893066406, "logits_per_char": -0.8207263946533203, "num_chars": 2}, {"sum_logits": -2.017230749130249, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -2.017230749130249, "logits_per_char": -1.0086153745651245, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 685, "native_id": "c43b60be106662de1863097ee3ddb4d2", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4850904941558838, "incorrect_loss_raw": 1.6707482933998108, "correct_loss_per_char": 0.7425452470779419, "incorrect_loss_per_char": 0.8353741466999054, "correct_loss_per_token": 1.4850904941558838, "incorrect_loss_per_token": 1.6707482933998108, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4850904941558838, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.4850904941558838, "logits_per_char": -0.7425452470779419, "num_chars": 2}, {"sum_logits": -1.4588582515716553, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.4588582515716553, "logits_per_char": -0.7294291257858276, "num_chars": 2}, {"sum_logits": -1.5940968990325928, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5940968990325928, "logits_per_char": -0.7970484495162964, "num_chars": 2}, {"sum_logits": -1.6043012142181396, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6043012142181396, "logits_per_char": -0.8021506071090698, "num_chars": 2}, {"sum_logits": -2.0257368087768555, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.0257368087768555, "logits_per_char": -1.0128684043884277, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 686, "native_id": "456f2fb41cac8c028dcfe2f48637e473", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3029193878173828, "incorrect_loss_raw": 1.735548049211502, "correct_loss_per_char": 0.6514596939086914, "incorrect_loss_per_char": 0.867774024605751, "correct_loss_per_token": 1.3029193878173828, "incorrect_loss_per_token": 1.735548049211502, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3029193878173828, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.3029193878173828, "logits_per_char": -0.6514596939086914, "num_chars": 2}, {"sum_logits": -1.4918895959854126, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.4918895959854126, "logits_per_char": -0.7459447979927063, "num_chars": 2}, {"sum_logits": -1.613731026649475, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.613731026649475, "logits_per_char": -0.8068655133247375, "num_chars": 2}, {"sum_logits": -1.7227495908737183, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.7227495908737183, "logits_per_char": -0.8613747954368591, "num_chars": 2}, {"sum_logits": -2.1138219833374023, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.1138219833374023, "logits_per_char": -1.0569109916687012, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 687, "native_id": "a5d853d1c2fb3ef160218fb91110fbe5", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3895039558410645, "incorrect_loss_raw": 1.6953465640544891, "correct_loss_per_char": 0.6947519779205322, "incorrect_loss_per_char": 0.8476732820272446, "correct_loss_per_token": 1.3895039558410645, "incorrect_loss_per_token": 1.6953465640544891, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4779072999954224, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.4779072999954224, "logits_per_char": -0.7389536499977112, "num_chars": 2}, {"sum_logits": -1.3895039558410645, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": true, "logits_per_token": -1.3895039558410645, "logits_per_char": -0.6947519779205322, "num_chars": 2}, {"sum_logits": -1.589724063873291, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.589724063873291, "logits_per_char": -0.7948620319366455, "num_chars": 2}, {"sum_logits": -1.811218023300171, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.811218023300171, "logits_per_char": -0.9056090116500854, "num_chars": 2}, {"sum_logits": -1.9025368690490723, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.9025368690490723, "logits_per_char": -0.9512684345245361, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 688, "native_id": "3df1b88da6a90c9526be2c8a6cc736dc", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6204984188079834, "incorrect_loss_raw": 1.614033818244934, "correct_loss_per_char": 0.8102492094039917, "incorrect_loss_per_char": 0.807016909122467, "correct_loss_per_token": 1.6204984188079834, "incorrect_loss_per_token": 1.614033818244934, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.69893479347229, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.69893479347229, "logits_per_char": -0.849467396736145, "num_chars": 2}, {"sum_logits": -1.6204984188079834, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.6204984188079834, "logits_per_char": -0.8102492094039917, "num_chars": 2}, {"sum_logits": -1.6120903491973877, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.6120903491973877, "logits_per_char": -0.8060451745986938, "num_chars": 2}, {"sum_logits": -1.5514881610870361, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": true, "logits_per_token": -1.5514881610870361, "logits_per_char": -0.7757440805435181, "num_chars": 2}, {"sum_logits": -1.5936219692230225, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.5936219692230225, "logits_per_char": -0.7968109846115112, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 689, "native_id": "f912bcd7479b76db9b1c57a612b90f00", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4995381832122803, "incorrect_loss_raw": 1.6599068939685822, "correct_loss_per_char": 0.7497690916061401, "incorrect_loss_per_char": 0.8299534469842911, "correct_loss_per_token": 1.4995381832122803, "incorrect_loss_per_token": 1.6599068939685822, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.41592538356781, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": true, "logits_per_token": -1.41592538356781, "logits_per_char": -0.707962691783905, "num_chars": 2}, {"sum_logits": -1.4995381832122803, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.4995381832122803, "logits_per_char": -0.7497690916061401, "num_chars": 2}, {"sum_logits": -1.672797679901123, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.672797679901123, "logits_per_char": -0.8363988399505615, "num_chars": 2}, {"sum_logits": -1.6756367683410645, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.6756367683410645, "logits_per_char": -0.8378183841705322, "num_chars": 2}, {"sum_logits": -1.875267744064331, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.875267744064331, "logits_per_char": -0.9376338720321655, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 690, "native_id": "94f34cc1e6aa9eefe06563cce8225658", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.503961205482483, "incorrect_loss_raw": 1.6645347774028778, "correct_loss_per_char": 0.7519806027412415, "incorrect_loss_per_char": 0.8322673887014389, "correct_loss_per_token": 1.503961205482483, "incorrect_loss_per_token": 1.6645347774028778, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.503961205482483, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.503961205482483, "logits_per_char": -0.7519806027412415, "num_chars": 2}, {"sum_logits": -1.5556426048278809, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5556426048278809, "logits_per_char": -0.7778213024139404, "num_chars": 2}, {"sum_logits": -1.4986913204193115, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.4986913204193115, "logits_per_char": -0.7493456602096558, "num_chars": 2}, {"sum_logits": -1.5883680582046509, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5883680582046509, "logits_per_char": -0.7941840291023254, "num_chars": 2}, {"sum_logits": -2.015437126159668, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -2.015437126159668, "logits_per_char": -1.007718563079834, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 691, "native_id": "bb503ece4eac41dfe608a1dcb654e6bf", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6067897081375122, "incorrect_loss_raw": 1.6488628089427948, "correct_loss_per_char": 0.8033948540687561, "incorrect_loss_per_char": 0.8244314044713974, "correct_loss_per_token": 1.6067897081375122, "incorrect_loss_per_token": 1.6488628089427948, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.431398868560791, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": true, "logits_per_token": -1.431398868560791, "logits_per_char": -0.7156994342803955, "num_chars": 2}, {"sum_logits": -1.4805288314819336, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.4805288314819336, "logits_per_char": -0.7402644157409668, "num_chars": 2}, {"sum_logits": -1.6067897081375122, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.6067897081375122, "logits_per_char": -0.8033948540687561, "num_chars": 2}, {"sum_logits": -1.5733674764633179, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.5733674764633179, "logits_per_char": -0.7866837382316589, "num_chars": 2}, {"sum_logits": -2.1101560592651367, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -2.1101560592651367, "logits_per_char": -1.0550780296325684, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 692, "native_id": "5502dc807d4921679ae1abd0dc9570d6", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7548847198486328, "incorrect_loss_raw": 1.6095450818538666, "correct_loss_per_char": 0.8774423599243164, "incorrect_loss_per_char": 0.8047725409269333, "correct_loss_per_token": 1.7548847198486328, "incorrect_loss_per_token": 1.6095450818538666, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3152259588241577, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.3152259588241577, "logits_per_char": -0.6576129794120789, "num_chars": 2}, {"sum_logits": -1.5761730670928955, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5761730670928955, "logits_per_char": -0.7880865335464478, "num_chars": 2}, {"sum_logits": -1.5758161544799805, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5758161544799805, "logits_per_char": -0.7879080772399902, "num_chars": 2}, {"sum_logits": -1.7548847198486328, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.7548847198486328, "logits_per_char": -0.8774423599243164, "num_chars": 2}, {"sum_logits": -1.9709651470184326, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.9709651470184326, "logits_per_char": -0.9854825735092163, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 693, "native_id": "a7e3de0719fe30e7048f67426e29fdd1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4992785453796387, "incorrect_loss_raw": 1.6455658674240112, "correct_loss_per_char": 0.7496392726898193, "incorrect_loss_per_char": 0.8227829337120056, "correct_loss_per_token": 1.4992785453796387, "incorrect_loss_per_token": 1.6455658674240112, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6445813179016113, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.6445813179016113, "logits_per_char": -0.8222906589508057, "num_chars": 2}, {"sum_logits": -1.590512752532959, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.590512752532959, "logits_per_char": -0.7952563762664795, "num_chars": 2}, {"sum_logits": -1.6622527837753296, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.6622527837753296, "logits_per_char": -0.8311263918876648, "num_chars": 2}, {"sum_logits": -1.4992785453796387, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": true, "logits_per_token": -1.4992785453796387, "logits_per_char": -0.7496392726898193, "num_chars": 2}, {"sum_logits": -1.684916615486145, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.684916615486145, "logits_per_char": -0.8424583077430725, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 694, "native_id": "d6107d454181b701ddcaa449a1e422a3", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.353224754333496, "incorrect_loss_raw": 1.7256121337413788, "correct_loss_per_char": 0.676612377166748, "incorrect_loss_per_char": 0.8628060668706894, "correct_loss_per_token": 1.353224754333496, "incorrect_loss_per_token": 1.7256121337413788, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.353224754333496, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.353224754333496, "logits_per_char": -0.676612377166748, "num_chars": 2}, {"sum_logits": -1.4963568449020386, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.4963568449020386, "logits_per_char": -0.7481784224510193, "num_chars": 2}, {"sum_logits": -1.5379687547683716, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5379687547683716, "logits_per_char": -0.7689843773841858, "num_chars": 2}, {"sum_logits": -1.6865791082382202, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6865791082382202, "logits_per_char": -0.8432895541191101, "num_chars": 2}, {"sum_logits": -2.1815438270568848, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.1815438270568848, "logits_per_char": -1.0907719135284424, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 695, "native_id": "ab2eb930b29bb6d5e94a6cd3b04ba01e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.2067806720733643, "incorrect_loss_raw": 1.5163850784301758, "correct_loss_per_char": 1.1033903360366821, "incorrect_loss_per_char": 0.7581925392150879, "correct_loss_per_token": 2.2067806720733643, "incorrect_loss_per_token": 1.5163850784301758, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3845150470733643, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.3845150470733643, "logits_per_char": -0.6922575235366821, "num_chars": 2}, {"sum_logits": -1.4947304725646973, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.4947304725646973, "logits_per_char": -0.7473652362823486, "num_chars": 2}, {"sum_logits": -1.444704294204712, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.444704294204712, "logits_per_char": -0.722352147102356, "num_chars": 2}, {"sum_logits": -1.7415904998779297, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.7415904998779297, "logits_per_char": -0.8707952499389648, "num_chars": 2}, {"sum_logits": -2.2067806720733643, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.2067806720733643, "logits_per_char": -1.1033903360366821, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 696, "native_id": "92869fc0be5dc45f407700692ffd80a0", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.160245180130005, "incorrect_loss_raw": 1.5163702070713043, "correct_loss_per_char": 1.0801225900650024, "incorrect_loss_per_char": 0.7581851035356522, "correct_loss_per_token": 2.160245180130005, "incorrect_loss_per_token": 1.5163702070713043, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4165961742401123, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.4165961742401123, "logits_per_char": -0.7082980871200562, "num_chars": 2}, {"sum_logits": -1.4753646850585938, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.4753646850585938, "logits_per_char": -0.7376823425292969, "num_chars": 2}, {"sum_logits": -1.5764328241348267, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5764328241348267, "logits_per_char": -0.7882164120674133, "num_chars": 2}, {"sum_logits": -1.5970871448516846, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5970871448516846, "logits_per_char": -0.7985435724258423, "num_chars": 2}, {"sum_logits": -2.160245180130005, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -2.160245180130005, "logits_per_char": -1.0801225900650024, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 697, "native_id": "6a0177586d506cb7b741f4207b428e42", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5503662824630737, "incorrect_loss_raw": 1.6507568359375, "correct_loss_per_char": 0.7751831412315369, "incorrect_loss_per_char": 0.82537841796875, "correct_loss_per_token": 1.5503662824630737, "incorrect_loss_per_token": 1.6507568359375, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3693599700927734, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": true, "logits_per_token": -1.3693599700927734, "logits_per_char": -0.6846799850463867, "num_chars": 2}, {"sum_logits": -1.5503662824630737, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.5503662824630737, "logits_per_char": -0.7751831412315369, "num_chars": 2}, {"sum_logits": -1.6681755781173706, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.6681755781173706, "logits_per_char": -0.8340877890586853, "num_chars": 2}, {"sum_logits": -1.647384524345398, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.647384524345398, "logits_per_char": -0.823692262172699, "num_chars": 2}, {"sum_logits": -1.918107271194458, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.918107271194458, "logits_per_char": -0.959053635597229, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 698, "native_id": "584188da9a429f1bc319abda5e5c7a76", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.039274215698242, "incorrect_loss_raw": 1.5320715606212616, "correct_loss_per_char": 1.019637107849121, "incorrect_loss_per_char": 0.7660357803106308, "correct_loss_per_token": 2.039274215698242, "incorrect_loss_per_token": 1.5320715606212616, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5594313144683838, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5594313144683838, "logits_per_char": -0.7797156572341919, "num_chars": 2}, {"sum_logits": -1.4932606220245361, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.4932606220245361, "logits_per_char": -0.7466303110122681, "num_chars": 2}, {"sum_logits": -1.4850282669067383, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": true, "logits_per_token": -1.4850282669067383, "logits_per_char": -0.7425141334533691, "num_chars": 2}, {"sum_logits": -1.5905660390853882, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5905660390853882, "logits_per_char": -0.7952830195426941, "num_chars": 2}, {"sum_logits": -2.039274215698242, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -2.039274215698242, "logits_per_char": -1.019637107849121, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 699, "native_id": "e480d4a672af0194e0a6ccdb8c37499b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.2121806144714355, "incorrect_loss_raw": 1.5214090049266815, "correct_loss_per_char": 1.1060903072357178, "incorrect_loss_per_char": 0.7607045024633408, "correct_loss_per_token": 2.2121806144714355, "incorrect_loss_per_token": 1.5214090049266815, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3243505954742432, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.3243505954742432, "logits_per_char": -0.6621752977371216, "num_chars": 2}, {"sum_logits": -1.381569743156433, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.381569743156433, "logits_per_char": -0.6907848715782166, "num_chars": 2}, {"sum_logits": -1.63782799243927, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.63782799243927, "logits_per_char": -0.818913996219635, "num_chars": 2}, {"sum_logits": -1.7418876886367798, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.7418876886367798, "logits_per_char": -0.8709438443183899, "num_chars": 2}, {"sum_logits": -2.2121806144714355, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -2.2121806144714355, "logits_per_char": -1.1060903072357178, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 700, "native_id": "275c859994f7d3acd3c8863be591ab2c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.255227565765381, "incorrect_loss_raw": 1.5261204838752747, "correct_loss_per_char": 1.1276137828826904, "incorrect_loss_per_char": 0.7630602419376373, "correct_loss_per_token": 2.255227565765381, "incorrect_loss_per_token": 1.5261204838752747, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2099483013153076, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.2099483013153076, "logits_per_char": -0.6049741506576538, "num_chars": 2}, {"sum_logits": -1.4485735893249512, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.4485735893249512, "logits_per_char": -0.7242867946624756, "num_chars": 2}, {"sum_logits": -1.6508064270019531, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6508064270019531, "logits_per_char": -0.8254032135009766, "num_chars": 2}, {"sum_logits": -1.7951536178588867, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.7951536178588867, "logits_per_char": -0.8975768089294434, "num_chars": 2}, {"sum_logits": -2.255227565765381, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.255227565765381, "logits_per_char": -1.1276137828826904, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 701, "native_id": "32758ab86d888be680845b0dfe7de35e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9588701725006104, "incorrect_loss_raw": 1.5486912727355957, "correct_loss_per_char": 0.9794350862503052, "incorrect_loss_per_char": 0.7743456363677979, "correct_loss_per_token": 1.9588701725006104, "incorrect_loss_per_token": 1.5486912727355957, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.417252779006958, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": true, "logits_per_token": -1.417252779006958, "logits_per_char": -0.708626389503479, "num_chars": 2}, {"sum_logits": -1.5518436431884766, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.5518436431884766, "logits_per_char": -0.7759218215942383, "num_chars": 2}, {"sum_logits": -1.639230489730835, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.639230489730835, "logits_per_char": -0.8196152448654175, "num_chars": 2}, {"sum_logits": -1.5864381790161133, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.5864381790161133, "logits_per_char": -0.7932190895080566, "num_chars": 2}, {"sum_logits": -1.9588701725006104, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.9588701725006104, "logits_per_char": -0.9794350862503052, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 702, "native_id": "69335eb9bc5b7b5df840c38a086bf8b2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7566272020339966, "incorrect_loss_raw": 1.6163972914218903, "correct_loss_per_char": 0.8783136010169983, "incorrect_loss_per_char": 0.8081986457109451, "correct_loss_per_token": 1.7566272020339966, "incorrect_loss_per_token": 1.6163972914218903, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2870787382125854, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": true, "logits_per_token": -1.2870787382125854, "logits_per_char": -0.6435393691062927, "num_chars": 2}, {"sum_logits": -1.53890860080719, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.53890860080719, "logits_per_char": -0.769454300403595, "num_chars": 2}, {"sum_logits": -1.599373459815979, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.599373459815979, "logits_per_char": -0.7996867299079895, "num_chars": 2}, {"sum_logits": -1.7566272020339966, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.7566272020339966, "logits_per_char": -0.8783136010169983, "num_chars": 2}, {"sum_logits": -2.0402283668518066, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -2.0402283668518066, "logits_per_char": -1.0201141834259033, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 703, "native_id": "4396cb65629672723c7b184424e139bb", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4643648862838745, "incorrect_loss_raw": 1.6664339005947113, "correct_loss_per_char": 0.7321824431419373, "incorrect_loss_per_char": 0.8332169502973557, "correct_loss_per_token": 1.4643648862838745, "incorrect_loss_per_token": 1.6664339005947113, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4861966371536255, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.4861966371536255, "logits_per_char": -0.7430983185768127, "num_chars": 2}, {"sum_logits": -1.4643648862838745, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.4643648862838745, "logits_per_char": -0.7321824431419373, "num_chars": 2}, {"sum_logits": -1.6363928318023682, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6363928318023682, "logits_per_char": -0.8181964159011841, "num_chars": 2}, {"sum_logits": -1.676457405090332, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.676457405090332, "logits_per_char": -0.838228702545166, "num_chars": 2}, {"sum_logits": -1.8666887283325195, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.8666887283325195, "logits_per_char": -0.9333443641662598, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 704, "native_id": "2a58e81a9c4ce095d099e0d785fc2da4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8671694993972778, "incorrect_loss_raw": 1.5654090642929077, "correct_loss_per_char": 0.9335847496986389, "incorrect_loss_per_char": 0.7827045321464539, "correct_loss_per_token": 1.8671694993972778, "incorrect_loss_per_token": 1.5654090642929077, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4727801084518433, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": true, "logits_per_token": -1.4727801084518433, "logits_per_char": -0.7363900542259216, "num_chars": 2}, {"sum_logits": -1.5126396417617798, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.5126396417617798, "logits_per_char": -0.7563198208808899, "num_chars": 2}, {"sum_logits": -1.5503917932510376, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.5503917932510376, "logits_per_char": -0.7751958966255188, "num_chars": 2}, {"sum_logits": -1.7258247137069702, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.7258247137069702, "logits_per_char": -0.8629123568534851, "num_chars": 2}, {"sum_logits": -1.8671694993972778, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.8671694993972778, "logits_per_char": -0.9335847496986389, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 705, "native_id": "07f108d5321a66f460685f5c7499ecb2", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6252269744873047, "incorrect_loss_raw": 1.6253352165222168, "correct_loss_per_char": 0.8126134872436523, "incorrect_loss_per_char": 0.8126676082611084, "correct_loss_per_token": 1.6252269744873047, "incorrect_loss_per_token": 1.6253352165222168, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5179798603057861, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5179798603057861, "logits_per_char": -0.7589899301528931, "num_chars": 2}, {"sum_logits": -1.5696485042572021, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5696485042572021, "logits_per_char": -0.7848242521286011, "num_chars": 2}, {"sum_logits": -1.6252269744873047, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6252269744873047, "logits_per_char": -0.8126134872436523, "num_chars": 2}, {"sum_logits": -1.480492115020752, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.480492115020752, "logits_per_char": -0.740246057510376, "num_chars": 2}, {"sum_logits": -1.933220386505127, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.933220386505127, "logits_per_char": -0.9666101932525635, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 706, "native_id": "69bef3eb55463d040bdf98e2c97bfe1f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4168369770050049, "incorrect_loss_raw": 1.6845405399799347, "correct_loss_per_char": 0.7084184885025024, "incorrect_loss_per_char": 0.8422702699899673, "correct_loss_per_token": 1.4168369770050049, "incorrect_loss_per_token": 1.6845405399799347, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4168369770050049, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": true, "logits_per_token": -1.4168369770050049, "logits_per_char": -0.7084184885025024, "num_chars": 2}, {"sum_logits": -1.5058845281600952, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.5058845281600952, "logits_per_char": -0.7529422640800476, "num_chars": 2}, {"sum_logits": -1.554673671722412, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.554673671722412, "logits_per_char": -0.777336835861206, "num_chars": 2}, {"sum_logits": -1.7346339225769043, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.7346339225769043, "logits_per_char": -0.8673169612884521, "num_chars": 2}, {"sum_logits": -1.9429700374603271, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.9429700374603271, "logits_per_char": -0.9714850187301636, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 707, "native_id": "912676495cceefadccbbf8c604486f97", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4595578908920288, "incorrect_loss_raw": 1.6655949652194977, "correct_loss_per_char": 0.7297789454460144, "incorrect_loss_per_char": 0.8327974826097488, "correct_loss_per_token": 1.4595578908920288, "incorrect_loss_per_token": 1.6655949652194977, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4595578908920288, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.4595578908920288, "logits_per_char": -0.7297789454460144, "num_chars": 2}, {"sum_logits": -1.5962952375411987, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5962952375411987, "logits_per_char": -0.7981476187705994, "num_chars": 2}, {"sum_logits": -1.5593348741531372, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5593348741531372, "logits_per_char": -0.7796674370765686, "num_chars": 2}, {"sum_logits": -1.6544749736785889, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.6544749736785889, "logits_per_char": -0.8272374868392944, "num_chars": 2}, {"sum_logits": -1.852274775505066, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.852274775505066, "logits_per_char": -0.926137387752533, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 708, "native_id": "bdf92566f14599f1606109677206001f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4608919620513916, "incorrect_loss_raw": 1.6657793521881104, "correct_loss_per_char": 0.7304459810256958, "incorrect_loss_per_char": 0.8328896760940552, "correct_loss_per_token": 1.4608919620513916, "incorrect_loss_per_token": 1.6657793521881104, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4608919620513916, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": true, "logits_per_token": -1.4608919620513916, "logits_per_char": -0.7304459810256958, "num_chars": 2}, {"sum_logits": -1.550421953201294, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.550421953201294, "logits_per_char": -0.775210976600647, "num_chars": 2}, {"sum_logits": -1.6227939128875732, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.6227939128875732, "logits_per_char": -0.8113969564437866, "num_chars": 2}, {"sum_logits": -1.5808024406433105, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.5808024406433105, "logits_per_char": -0.7904012203216553, "num_chars": 2}, {"sum_logits": -1.9090991020202637, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.9090991020202637, "logits_per_char": -0.9545495510101318, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 709, "native_id": "0df042743128b57e874bd5d79b7aae7a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4752203226089478, "incorrect_loss_raw": 1.7016130685806274, "correct_loss_per_char": 0.7376101613044739, "incorrect_loss_per_char": 0.8508065342903137, "correct_loss_per_token": 1.4752203226089478, "incorrect_loss_per_token": 1.7016130685806274, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3092350959777832, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.3092350959777832, "logits_per_char": -0.6546175479888916, "num_chars": 2}, {"sum_logits": -1.4752203226089478, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.4752203226089478, "logits_per_char": -0.7376101613044739, "num_chars": 2}, {"sum_logits": -1.5540685653686523, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5540685653686523, "logits_per_char": -0.7770342826843262, "num_chars": 2}, {"sum_logits": -1.7697055339813232, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.7697055339813232, "logits_per_char": -0.8848527669906616, "num_chars": 2}, {"sum_logits": -2.173443078994751, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.173443078994751, "logits_per_char": -1.0867215394973755, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 710, "native_id": "866ef7266d34c11e5a1b3df49fab96a4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7194868326187134, "incorrect_loss_raw": 1.597827821969986, "correct_loss_per_char": 0.8597434163093567, "incorrect_loss_per_char": 0.798913910984993, "correct_loss_per_token": 1.7194868326187134, "incorrect_loss_per_token": 1.597827821969986, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4770418405532837, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": true, "logits_per_token": -1.4770418405532837, "logits_per_char": -0.7385209202766418, "num_chars": 2}, {"sum_logits": -1.4996027946472168, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.4996027946472168, "logits_per_char": -0.7498013973236084, "num_chars": 2}, {"sum_logits": -1.615777611732483, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.615777611732483, "logits_per_char": -0.8078888058662415, "num_chars": 2}, {"sum_logits": -1.7194868326187134, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.7194868326187134, "logits_per_char": -0.8597434163093567, "num_chars": 2}, {"sum_logits": -1.7988890409469604, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.7988890409469604, "logits_per_char": -0.8994445204734802, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 711, "native_id": "67ffcb4c3f2c6a1155e356f8a15ed250", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4763779640197754, "incorrect_loss_raw": 1.6676529049873352, "correct_loss_per_char": 0.7381889820098877, "incorrect_loss_per_char": 0.8338264524936676, "correct_loss_per_token": 1.4763779640197754, "incorrect_loss_per_token": 1.6676529049873352, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4763779640197754, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": true, "logits_per_token": -1.4763779640197754, "logits_per_char": -0.7381889820098877, "num_chars": 2}, {"sum_logits": -1.586482286453247, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.586482286453247, "logits_per_char": -0.7932411432266235, "num_chars": 2}, {"sum_logits": -1.5011053085327148, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.5011053085327148, "logits_per_char": -0.7505526542663574, "num_chars": 2}, {"sum_logits": -1.5969455242156982, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.5969455242156982, "logits_per_char": -0.7984727621078491, "num_chars": 2}, {"sum_logits": -1.9860785007476807, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.9860785007476807, "logits_per_char": -0.9930392503738403, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 712, "native_id": "87a133afae5d9d29d634f3384f28ef24", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.1650490760803223, "incorrect_loss_raw": 1.524320363998413, "correct_loss_per_char": 1.0825245380401611, "incorrect_loss_per_char": 0.7621601819992065, "correct_loss_per_token": 2.1650490760803223, "incorrect_loss_per_token": 1.524320363998413, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3009159564971924, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.3009159564971924, "logits_per_char": -0.6504579782485962, "num_chars": 2}, {"sum_logits": -1.529500961303711, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.529500961303711, "logits_per_char": -0.7647504806518555, "num_chars": 2}, {"sum_logits": -1.5765140056610107, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5765140056610107, "logits_per_char": -0.7882570028305054, "num_chars": 2}, {"sum_logits": -1.6903505325317383, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6903505325317383, "logits_per_char": -0.8451752662658691, "num_chars": 2}, {"sum_logits": -2.1650490760803223, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -2.1650490760803223, "logits_per_char": -1.0825245380401611, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 713, "native_id": "4779be55f47a301debfc472e4fc2c7b6", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.580653429031372, "incorrect_loss_raw": 1.6930555701255798, "correct_loss_per_char": 0.790326714515686, "incorrect_loss_per_char": 0.8465277850627899, "correct_loss_per_token": 1.580653429031372, "incorrect_loss_per_token": 1.6930555701255798, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.17262864112854, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.17262864112854, "logits_per_char": -0.58631432056427, "num_chars": 2}, {"sum_logits": -1.580653429031372, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.580653429031372, "logits_per_char": -0.790326714515686, "num_chars": 2}, {"sum_logits": -1.6316514015197754, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6316514015197754, "logits_per_char": -0.8158257007598877, "num_chars": 2}, {"sum_logits": -1.7222199440002441, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.7222199440002441, "logits_per_char": -0.8611099720001221, "num_chars": 2}, {"sum_logits": -2.2457222938537598, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.2457222938537598, "logits_per_char": -1.1228611469268799, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 714, "native_id": "7a28d31e66d870370642de3be47b9ef9", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7845357656478882, "incorrect_loss_raw": 1.6227958798408508, "correct_loss_per_char": 0.8922678828239441, "incorrect_loss_per_char": 0.8113979399204254, "correct_loss_per_token": 1.7845357656478882, "incorrect_loss_per_token": 1.6227958798408508, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.322489619255066, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.322489619255066, "logits_per_char": -0.661244809627533, "num_chars": 2}, {"sum_logits": -1.4216430187225342, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.4216430187225342, "logits_per_char": -0.7108215093612671, "num_chars": 2}, {"sum_logits": -1.588448166847229, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.588448166847229, "logits_per_char": -0.7942240834236145, "num_chars": 2}, {"sum_logits": -1.7845357656478882, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.7845357656478882, "logits_per_char": -0.8922678828239441, "num_chars": 2}, {"sum_logits": -2.158602714538574, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.158602714538574, "logits_per_char": -1.079301357269287, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 715, "native_id": "042898e0c71adac5d123aaa6221c9754", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5977281332015991, "incorrect_loss_raw": 1.6256813704967499, "correct_loss_per_char": 0.7988640666007996, "incorrect_loss_per_char": 0.8128406852483749, "correct_loss_per_token": 1.5977281332015991, "incorrect_loss_per_token": 1.6256813704967499, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.47361421585083, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.47361421585083, "logits_per_char": -0.736807107925415, "num_chars": 2}, {"sum_logits": -1.7152717113494873, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.7152717113494873, "logits_per_char": -0.8576358556747437, "num_chars": 2}, {"sum_logits": -1.600224256515503, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.600224256515503, "logits_per_char": -0.8001121282577515, "num_chars": 2}, {"sum_logits": -1.5977281332015991, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5977281332015991, "logits_per_char": -0.7988640666007996, "num_chars": 2}, {"sum_logits": -1.7136152982711792, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.7136152982711792, "logits_per_char": -0.8568076491355896, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 716, "native_id": "93bbaccb1c46d22124a846b8514de5bc", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3871142864227295, "incorrect_loss_raw": 1.6863268315792084, "correct_loss_per_char": 0.6935571432113647, "incorrect_loss_per_char": 0.8431634157896042, "correct_loss_per_token": 1.3871142864227295, "incorrect_loss_per_token": 1.6863268315792084, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3871142864227295, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": true, "logits_per_token": -1.3871142864227295, "logits_per_char": -0.6935571432113647, "num_chars": 2}, {"sum_logits": -1.670985221862793, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.670985221862793, "logits_per_char": -0.8354926109313965, "num_chars": 2}, {"sum_logits": -1.642940878868103, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.642940878868103, "logits_per_char": -0.8214704394340515, "num_chars": 2}, {"sum_logits": -1.5943955183029175, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.5943955183029175, "logits_per_char": -0.7971977591514587, "num_chars": 2}, {"sum_logits": -1.83698570728302, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.83698570728302, "logits_per_char": -0.91849285364151, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 717, "native_id": "ef889edd1b57d8d0c81e43f73c98c8e9", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6081016063690186, "incorrect_loss_raw": 1.6343953907489777, "correct_loss_per_char": 0.8040508031845093, "incorrect_loss_per_char": 0.8171976953744888, "correct_loss_per_token": 1.6081016063690186, "incorrect_loss_per_token": 1.6343953907489777, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4004517793655396, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.4004517793655396, "logits_per_char": -0.7002258896827698, "num_chars": 2}, {"sum_logits": -1.5271306037902832, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5271306037902832, "logits_per_char": -0.7635653018951416, "num_chars": 2}, {"sum_logits": -1.6081016063690186, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6081016063690186, "logits_per_char": -0.8040508031845093, "num_chars": 2}, {"sum_logits": -1.6812682151794434, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6812682151794434, "logits_per_char": -0.8406341075897217, "num_chars": 2}, {"sum_logits": -1.9287309646606445, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.9287309646606445, "logits_per_char": -0.9643654823303223, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 718, "native_id": "f4bb8ecacb9ce89e040f5f76bc79afb3", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4557607173919678, "incorrect_loss_raw": 1.6914407014846802, "correct_loss_per_char": 0.7278803586959839, "incorrect_loss_per_char": 0.8457203507423401, "correct_loss_per_token": 1.4557607173919678, "incorrect_loss_per_token": 1.6914407014846802, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3361575603485107, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.3361575603485107, "logits_per_char": -0.6680787801742554, "num_chars": 2}, {"sum_logits": -1.4557607173919678, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.4557607173919678, "logits_per_char": -0.7278803586959839, "num_chars": 2}, {"sum_logits": -1.593201756477356, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.593201756477356, "logits_per_char": -0.796600878238678, "num_chars": 2}, {"sum_logits": -1.8153473138809204, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.8153473138809204, "logits_per_char": -0.9076736569404602, "num_chars": 2}, {"sum_logits": -2.0210561752319336, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -2.0210561752319336, "logits_per_char": -1.0105280876159668, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 719, "native_id": "ec2e18fd8c18a4ebe5a091e0c8b94462", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.616082787513733, "incorrect_loss_raw": 1.6380795240402222, "correct_loss_per_char": 0.8080413937568665, "incorrect_loss_per_char": 0.8190397620201111, "correct_loss_per_token": 1.616082787513733, "incorrect_loss_per_token": 1.6380795240402222, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4387524127960205, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.4387524127960205, "logits_per_char": -0.7193762063980103, "num_chars": 2}, {"sum_logits": -1.616082787513733, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.616082787513733, "logits_per_char": -0.8080413937568665, "num_chars": 2}, {"sum_logits": -1.4966787099838257, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.4966787099838257, "logits_per_char": -0.7483393549919128, "num_chars": 2}, {"sum_logits": -1.6097625494003296, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.6097625494003296, "logits_per_char": -0.8048812747001648, "num_chars": 2}, {"sum_logits": -2.007124423980713, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -2.007124423980713, "logits_per_char": -1.0035622119903564, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 720, "native_id": "07b51b231a9d6a143d8a73e69121e1b1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5134601593017578, "incorrect_loss_raw": 1.6847951412200928, "correct_loss_per_char": 0.7567300796508789, "incorrect_loss_per_char": 0.8423975706100464, "correct_loss_per_token": 1.5134601593017578, "incorrect_loss_per_token": 1.6847951412200928, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3173284530639648, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.3173284530639648, "logits_per_char": -0.6586642265319824, "num_chars": 2}, {"sum_logits": -1.5134601593017578, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5134601593017578, "logits_per_char": -0.7567300796508789, "num_chars": 2}, {"sum_logits": -1.5931010246276855, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5931010246276855, "logits_per_char": -0.7965505123138428, "num_chars": 2}, {"sum_logits": -1.6663260459899902, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6663260459899902, "logits_per_char": -0.8331630229949951, "num_chars": 2}, {"sum_logits": -2.1624250411987305, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.1624250411987305, "logits_per_char": -1.0812125205993652, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 721, "native_id": "e1744fc698cffb574e5cf4b29a81ce76", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.455706238746643, "incorrect_loss_raw": 1.674351453781128, "correct_loss_per_char": 0.7278531193733215, "incorrect_loss_per_char": 0.837175726890564, "correct_loss_per_token": 1.455706238746643, "incorrect_loss_per_token": 1.674351453781128, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.455706238746643, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.455706238746643, "logits_per_char": -0.7278531193733215, "num_chars": 2}, {"sum_logits": -1.498437762260437, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.498437762260437, "logits_per_char": -0.7492188811302185, "num_chars": 2}, {"sum_logits": -1.5932668447494507, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5932668447494507, "logits_per_char": -0.7966334223747253, "num_chars": 2}, {"sum_logits": -1.6139520406723022, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6139520406723022, "logits_per_char": -0.8069760203361511, "num_chars": 2}, {"sum_logits": -1.9917491674423218, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.9917491674423218, "logits_per_char": -0.9958745837211609, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 722, "native_id": "27604394ccee83e089f9ffae1883cf07", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.414764404296875, "incorrect_loss_raw": 1.703173577785492, "correct_loss_per_char": 0.7073822021484375, "incorrect_loss_per_char": 0.851586788892746, "correct_loss_per_token": 1.414764404296875, "incorrect_loss_per_token": 1.703173577785492, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.414764404296875, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.414764404296875, "logits_per_char": -0.7073822021484375, "num_chars": 2}, {"sum_logits": -1.4228556156158447, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.4228556156158447, "logits_per_char": -0.7114278078079224, "num_chars": 2}, {"sum_logits": -1.7171448469161987, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.7171448469161987, "logits_per_char": -0.8585724234580994, "num_chars": 2}, {"sum_logits": -1.563271164894104, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.563271164894104, "logits_per_char": -0.781635582447052, "num_chars": 2}, {"sum_logits": -2.1094226837158203, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.1094226837158203, "logits_per_char": -1.0547113418579102, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 723, "native_id": "1272e693cf9152e7ac71095c643676b5", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.586666464805603, "incorrect_loss_raw": 1.62868994474411, "correct_loss_per_char": 0.7933332324028015, "incorrect_loss_per_char": 0.814344972372055, "correct_loss_per_token": 1.586666464805603, "incorrect_loss_per_token": 1.62868994474411, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.586666464805603, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.586666464805603, "logits_per_char": -0.7933332324028015, "num_chars": 2}, {"sum_logits": -1.5641801357269287, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.5641801357269287, "logits_per_char": -0.7820900678634644, "num_chars": 2}, {"sum_logits": -1.6739553213119507, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.6739553213119507, "logits_per_char": -0.8369776606559753, "num_chars": 2}, {"sum_logits": -1.4833173751831055, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": true, "logits_per_token": -1.4833173751831055, "logits_per_char": -0.7416586875915527, "num_chars": 2}, {"sum_logits": -1.7933069467544556, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.7933069467544556, "logits_per_char": -0.8966534733772278, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 724, "native_id": "7bff23f6c12e9136f0961514bebb8cd3", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.764940857887268, "incorrect_loss_raw": 1.606703281402588, "correct_loss_per_char": 0.882470428943634, "incorrect_loss_per_char": 0.803351640701294, "correct_loss_per_token": 1.764940857887268, "incorrect_loss_per_token": 1.606703281402588, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.42172372341156, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.42172372341156, "logits_per_char": -0.71086186170578, "num_chars": 2}, {"sum_logits": -1.3648711442947388, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.3648711442947388, "logits_per_char": -0.6824355721473694, "num_chars": 2}, {"sum_logits": -1.6812587976455688, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6812587976455688, "logits_per_char": -0.8406293988227844, "num_chars": 2}, {"sum_logits": -1.764940857887268, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.764940857887268, "logits_per_char": -0.882470428943634, "num_chars": 2}, {"sum_logits": -1.9589594602584839, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.9589594602584839, "logits_per_char": -0.9794797301292419, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 725, "native_id": "20ae70b9b157b298569cd761787833e7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.1804757118225098, "incorrect_loss_raw": 1.5222660899162292, "correct_loss_per_char": 1.0902378559112549, "incorrect_loss_per_char": 0.7611330449581146, "correct_loss_per_token": 2.1804757118225098, "incorrect_loss_per_token": 1.5222660899162292, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3059394359588623, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.3059394359588623, "logits_per_char": -0.6529697179794312, "num_chars": 2}, {"sum_logits": -1.4832749366760254, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.4832749366760254, "logits_per_char": -0.7416374683380127, "num_chars": 2}, {"sum_logits": -1.5846271514892578, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5846271514892578, "logits_per_char": -0.7923135757446289, "num_chars": 2}, {"sum_logits": -1.7152228355407715, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.7152228355407715, "logits_per_char": -0.8576114177703857, "num_chars": 2}, {"sum_logits": -2.1804757118225098, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -2.1804757118225098, "logits_per_char": -1.0902378559112549, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 726, "native_id": "bdd29d7c12e3d795b78ffc048631e7e7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6415376663208008, "incorrect_loss_raw": 1.6258285641670227, "correct_loss_per_char": 0.8207688331604004, "incorrect_loss_per_char": 0.8129142820835114, "correct_loss_per_token": 1.6415376663208008, "incorrect_loss_per_token": 1.6258285641670227, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3645601272583008, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.3645601272583008, "logits_per_char": -0.6822800636291504, "num_chars": 2}, {"sum_logits": -1.7487523555755615, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.7487523555755615, "logits_per_char": -0.8743761777877808, "num_chars": 2}, {"sum_logits": -1.5413293838500977, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5413293838500977, "logits_per_char": -0.7706646919250488, "num_chars": 2}, {"sum_logits": -1.6415376663208008, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6415376663208008, "logits_per_char": -0.8207688331604004, "num_chars": 2}, {"sum_logits": -1.8486723899841309, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.8486723899841309, "logits_per_char": -0.9243361949920654, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 727, "native_id": "cc1a547bdfdcc95e4d632453af14bc96", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.505183458328247, "incorrect_loss_raw": 1.6774057447910309, "correct_loss_per_char": 0.7525917291641235, "incorrect_loss_per_char": 0.8387028723955154, "correct_loss_per_token": 1.505183458328247, "incorrect_loss_per_token": 1.6774057447910309, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3822916746139526, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": true, "logits_per_token": -1.3822916746139526, "logits_per_char": -0.6911458373069763, "num_chars": 2}, {"sum_logits": -1.5737425088882446, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": false, "logits_per_token": -1.5737425088882446, "logits_per_char": -0.7868712544441223, "num_chars": 2}, {"sum_logits": -1.505183458328247, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": false, "logits_per_token": -1.505183458328247, "logits_per_char": -0.7525917291641235, "num_chars": 2}, {"sum_logits": -1.6311942338943481, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": false, "logits_per_token": -1.6311942338943481, "logits_per_char": -0.8155971169471741, "num_chars": 2}, {"sum_logits": -2.122394561767578, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": false, "logits_per_token": -2.122394561767578, "logits_per_char": -1.061197280883789, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 728, "native_id": "896b25dc41f84357add1c798d4a96cd8", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3958462476730347, "incorrect_loss_raw": 1.718283861875534, "correct_loss_per_char": 0.6979231238365173, "incorrect_loss_per_char": 0.859141930937767, "correct_loss_per_token": 1.3958462476730347, "incorrect_loss_per_token": 1.718283861875534, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4681657552719116, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.4681657552719116, "logits_per_char": -0.7340828776359558, "num_chars": 2}, {"sum_logits": -1.5218063592910767, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5218063592910767, "logits_per_char": -0.7609031796455383, "num_chars": 2}, {"sum_logits": -1.3958462476730347, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.3958462476730347, "logits_per_char": -0.6979231238365173, "num_chars": 2}, {"sum_logits": -1.6465972661972046, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.6465972661972046, "logits_per_char": -0.8232986330986023, "num_chars": 2}, {"sum_logits": -2.2365660667419434, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -2.2365660667419434, "logits_per_char": -1.1182830333709717, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 729, "native_id": "1ca3cd9475d7e9da2ddb74911ee2bb68", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.007563352584839, "incorrect_loss_raw": 1.5445386171340942, "correct_loss_per_char": 1.0037816762924194, "incorrect_loss_per_char": 0.7722693085670471, "correct_loss_per_token": 2.007563352584839, "incorrect_loss_per_token": 1.5445386171340942, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.488869547843933, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.488869547843933, "logits_per_char": -0.7444347739219666, "num_chars": 2}, {"sum_logits": -1.379095435142517, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.379095435142517, "logits_per_char": -0.6895477175712585, "num_chars": 2}, {"sum_logits": -1.57716703414917, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.57716703414917, "logits_per_char": -0.788583517074585, "num_chars": 2}, {"sum_logits": -1.7330224514007568, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.7330224514007568, "logits_per_char": -0.8665112257003784, "num_chars": 2}, {"sum_logits": -2.007563352584839, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -2.007563352584839, "logits_per_char": -1.0037816762924194, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 730, "native_id": "129ec46cc2541b73198d774ee632c8d7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6029255390167236, "incorrect_loss_raw": 1.6764830946922302, "correct_loss_per_char": 0.8014627695083618, "incorrect_loss_per_char": 0.8382415473461151, "correct_loss_per_token": 1.6029255390167236, "incorrect_loss_per_token": 1.6764830946922302, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3450473546981812, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.3450473546981812, "logits_per_char": -0.6725236773490906, "num_chars": 2}, {"sum_logits": -1.4377678632736206, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.4377678632736206, "logits_per_char": -0.7188839316368103, "num_chars": 2}, {"sum_logits": -1.6029255390167236, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6029255390167236, "logits_per_char": -0.8014627695083618, "num_chars": 2}, {"sum_logits": -1.6379084587097168, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6379084587097168, "logits_per_char": -0.8189542293548584, "num_chars": 2}, {"sum_logits": -2.2852087020874023, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.2852087020874023, "logits_per_char": -1.1426043510437012, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 731, "native_id": "0e5c7c0cec5b693e52f74f5f879d84fb", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4770158529281616, "incorrect_loss_raw": 1.674129456281662, "correct_loss_per_char": 0.7385079264640808, "incorrect_loss_per_char": 0.837064728140831, "correct_loss_per_token": 1.4770158529281616, "incorrect_loss_per_token": 1.674129456281662, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3717237710952759, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": true, "logits_per_token": -1.3717237710952759, "logits_per_char": -0.6858618855476379, "num_chars": 2}, {"sum_logits": -1.4770158529281616, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.4770158529281616, "logits_per_char": -0.7385079264640808, "num_chars": 2}, {"sum_logits": -1.6757233142852783, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.6757233142852783, "logits_per_char": -0.8378616571426392, "num_chars": 2}, {"sum_logits": -1.7119066715240479, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.7119066715240479, "logits_per_char": -0.8559533357620239, "num_chars": 2}, {"sum_logits": -1.937164068222046, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.937164068222046, "logits_per_char": -0.968582034111023, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 732, "native_id": "af035b75b6f7a1927b1648963f281c5e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3820871114730835, "incorrect_loss_raw": 1.7394087314605713, "correct_loss_per_char": 0.6910435557365417, "incorrect_loss_per_char": 0.8697043657302856, "correct_loss_per_token": 1.3820871114730835, "incorrect_loss_per_token": 1.7394087314605713, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3430068492889404, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.3430068492889404, "logits_per_char": -0.6715034246444702, "num_chars": 2}, {"sum_logits": -1.3820871114730835, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.3820871114730835, "logits_per_char": -0.6910435557365417, "num_chars": 2}, {"sum_logits": -1.6548035144805908, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6548035144805908, "logits_per_char": -0.8274017572402954, "num_chars": 2}, {"sum_logits": -1.6494770050048828, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6494770050048828, "logits_per_char": -0.8247385025024414, "num_chars": 2}, {"sum_logits": -2.310347557067871, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.310347557067871, "logits_per_char": -1.1551737785339355, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 733, "native_id": "32d5b7fcae24f0d4871cfb219c5a4b47", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5100632905960083, "incorrect_loss_raw": 1.6579316556453705, "correct_loss_per_char": 0.7550316452980042, "incorrect_loss_per_char": 0.8289658278226852, "correct_loss_per_token": 1.5100632905960083, "incorrect_loss_per_token": 1.6579316556453705, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5675874948501587, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.5675874948501587, "logits_per_char": -0.7837937474250793, "num_chars": 2}, {"sum_logits": -1.4831219911575317, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": true, "logits_per_token": -1.4831219911575317, "logits_per_char": -0.7415609955787659, "num_chars": 2}, {"sum_logits": -1.5100632905960083, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.5100632905960083, "logits_per_char": -0.7550316452980042, "num_chars": 2}, {"sum_logits": -1.6079829931259155, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.6079829931259155, "logits_per_char": -0.8039914965629578, "num_chars": 2}, {"sum_logits": -1.973034143447876, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.973034143447876, "logits_per_char": -0.986517071723938, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 734, "native_id": "87505da761eaa5c3c4703d02a12d46bc", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5705451965332031, "incorrect_loss_raw": 1.6560498178005219, "correct_loss_per_char": 0.7852725982666016, "incorrect_loss_per_char": 0.8280249089002609, "correct_loss_per_token": 1.5705451965332031, "incorrect_loss_per_token": 1.6560498178005219, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.29163658618927, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": true, "logits_per_token": -1.29163658618927, "logits_per_char": -0.645818293094635, "num_chars": 2}, {"sum_logits": -1.7596803903579712, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.7596803903579712, "logits_per_char": -0.8798401951789856, "num_chars": 2}, {"sum_logits": -1.7700512409210205, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.7700512409210205, "logits_per_char": -0.8850256204605103, "num_chars": 2}, {"sum_logits": -1.5705451965332031, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.5705451965332031, "logits_per_char": -0.7852725982666016, "num_chars": 2}, {"sum_logits": -1.8028310537338257, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.8028310537338257, "logits_per_char": -0.9014155268669128, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 735, "native_id": "ef3d5d35128678937c36438466e0fc93", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.2046351432800293, "incorrect_loss_raw": 1.5245082676410675, "correct_loss_per_char": 1.1023175716400146, "incorrect_loss_per_char": 0.7622541338205338, "correct_loss_per_token": 2.2046351432800293, "incorrect_loss_per_token": 1.5245082676410675, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.285681962966919, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.285681962966919, "logits_per_char": -0.6428409814834595, "num_chars": 2}, {"sum_logits": -1.4400821924209595, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.4400821924209595, "logits_per_char": -0.7200410962104797, "num_chars": 2}, {"sum_logits": -1.6055883169174194, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6055883169174194, "logits_per_char": -0.8027941584587097, "num_chars": 2}, {"sum_logits": -1.7666805982589722, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.7666805982589722, "logits_per_char": -0.8833402991294861, "num_chars": 2}, {"sum_logits": -2.2046351432800293, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.2046351432800293, "logits_per_char": -1.1023175716400146, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 736, "native_id": "4f1d8007b446b0e10f07fd63cbd31b6f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5001685619354248, "incorrect_loss_raw": 1.6553330421447754, "correct_loss_per_char": 0.7500842809677124, "incorrect_loss_per_char": 0.8276665210723877, "correct_loss_per_token": 1.5001685619354248, "incorrect_loss_per_token": 1.6553330421447754, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4887182712554932, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": true, "logits_per_token": -1.4887182712554932, "logits_per_char": -0.7443591356277466, "num_chars": 2}, {"sum_logits": -1.5001685619354248, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.5001685619354248, "logits_per_char": -0.7500842809677124, "num_chars": 2}, {"sum_logits": -1.624939203262329, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.624939203262329, "logits_per_char": -0.8124696016311646, "num_chars": 2}, {"sum_logits": -1.6338474750518799, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.6338474750518799, "logits_per_char": -0.8169237375259399, "num_chars": 2}, {"sum_logits": -1.8738272190093994, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.8738272190093994, "logits_per_char": -0.9369136095046997, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 737, "native_id": "4c30d5eed4137cba89747510973f37a3", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5027300119400024, "incorrect_loss_raw": 1.671879380941391, "correct_loss_per_char": 0.7513650059700012, "incorrect_loss_per_char": 0.8359396904706955, "correct_loss_per_token": 1.5027300119400024, "incorrect_loss_per_token": 1.671879380941391, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3442939519882202, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.3442939519882202, "logits_per_char": -0.6721469759941101, "num_chars": 2}, {"sum_logits": -1.5027300119400024, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5027300119400024, "logits_per_char": -0.7513650059700012, "num_chars": 2}, {"sum_logits": -1.6446539163589478, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6446539163589478, "logits_per_char": -0.8223269581794739, "num_chars": 2}, {"sum_logits": -1.6838701963424683, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6838701963424683, "logits_per_char": -0.8419350981712341, "num_chars": 2}, {"sum_logits": -2.0146994590759277, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.0146994590759277, "logits_per_char": -1.0073497295379639, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 738, "native_id": "515834727e23e30ab7c8fe5ba7e9a765", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.521127700805664, "incorrect_loss_raw": 1.642617404460907, "correct_loss_per_char": 0.760563850402832, "incorrect_loss_per_char": 0.8213087022304535, "correct_loss_per_token": 1.521127700805664, "incorrect_loss_per_token": 1.642617404460907, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5223608016967773, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.5223608016967773, "logits_per_char": -0.7611804008483887, "num_chars": 2}, {"sum_logits": -1.521127700805664, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": true, "logits_per_token": -1.521127700805664, "logits_per_char": -0.760563850402832, "num_chars": 2}, {"sum_logits": -1.6362535953521729, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.6362535953521729, "logits_per_char": -0.8181267976760864, "num_chars": 2}, {"sum_logits": -1.6371188163757324, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.6371188163757324, "logits_per_char": -0.8185594081878662, "num_chars": 2}, {"sum_logits": -1.7747364044189453, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.7747364044189453, "logits_per_char": -0.8873682022094727, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 739, "native_id": "34ec6393db5a01f689c11fac153e31c1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.498995065689087, "incorrect_loss_raw": 1.6486564576625824, "correct_loss_per_char": 0.7494975328445435, "incorrect_loss_per_char": 0.8243282288312912, "correct_loss_per_token": 1.498995065689087, "incorrect_loss_per_token": 1.6486564576625824, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.498995065689087, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": true, "logits_per_token": -1.498995065689087, "logits_per_char": -0.7494975328445435, "num_chars": 2}, {"sum_logits": -1.5620110034942627, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.5620110034942627, "logits_per_char": -0.7810055017471313, "num_chars": 2}, {"sum_logits": -1.6593774557113647, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.6593774557113647, "logits_per_char": -0.8296887278556824, "num_chars": 2}, {"sum_logits": -1.6977039575576782, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.6977039575576782, "logits_per_char": -0.8488519787788391, "num_chars": 2}, {"sum_logits": -1.675533413887024, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.675533413887024, "logits_per_char": -0.837766706943512, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 740, "native_id": "0f0e339412f719a019bf373e6daf2530", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4845151901245117, "incorrect_loss_raw": 1.6582105457782745, "correct_loss_per_char": 0.7422575950622559, "incorrect_loss_per_char": 0.8291052728891373, "correct_loss_per_token": 1.4845151901245117, "incorrect_loss_per_token": 1.6582105457782745, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4845151901245117, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": true, "logits_per_token": -1.4845151901245117, "logits_per_char": -0.7422575950622559, "num_chars": 2}, {"sum_logits": -1.5416259765625, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.5416259765625, "logits_per_char": -0.77081298828125, "num_chars": 2}, {"sum_logits": -1.6049948930740356, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.6049948930740356, "logits_per_char": -0.8024974465370178, "num_chars": 2}, {"sum_logits": -1.6645169258117676, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.6645169258117676, "logits_per_char": -0.8322584629058838, "num_chars": 2}, {"sum_logits": -1.821704387664795, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.821704387664795, "logits_per_char": -0.9108521938323975, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 741, "native_id": "489a082aab43dd1a53f3f1f89c2365ed", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4566965103149414, "incorrect_loss_raw": 1.6665975153446198, "correct_loss_per_char": 0.7283482551574707, "incorrect_loss_per_char": 0.8332987576723099, "correct_loss_per_token": 1.4566965103149414, "incorrect_loss_per_token": 1.6665975153446198, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4566965103149414, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.4566965103149414, "logits_per_char": -0.7283482551574707, "num_chars": 2}, {"sum_logits": -1.520627737045288, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.520627737045288, "logits_per_char": -0.760313868522644, "num_chars": 2}, {"sum_logits": -1.6663821935653687, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6663821935653687, "logits_per_char": -0.8331910967826843, "num_chars": 2}, {"sum_logits": -1.6130820512771606, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6130820512771606, "logits_per_char": -0.8065410256385803, "num_chars": 2}, {"sum_logits": -1.8662980794906616, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.8662980794906616, "logits_per_char": -0.9331490397453308, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 742, "native_id": "7c45033e9fd9f1a759923971b14390ed", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9592244625091553, "incorrect_loss_raw": 1.5551811456680298, "correct_loss_per_char": 0.9796122312545776, "incorrect_loss_per_char": 0.7775905728340149, "correct_loss_per_token": 1.9592244625091553, "incorrect_loss_per_token": 1.5551811456680298, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.362088680267334, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.362088680267334, "logits_per_char": -0.681044340133667, "num_chars": 2}, {"sum_logits": -1.4975829124450684, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.4975829124450684, "logits_per_char": -0.7487914562225342, "num_chars": 2}, {"sum_logits": -1.5898041725158691, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5898041725158691, "logits_per_char": -0.7949020862579346, "num_chars": 2}, {"sum_logits": -1.7712488174438477, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.7712488174438477, "logits_per_char": -0.8856244087219238, "num_chars": 2}, {"sum_logits": -1.9592244625091553, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.9592244625091553, "logits_per_char": -0.9796122312545776, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 743, "native_id": "061f326d2a87a10da6316b55bd5522bd", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7041360139846802, "incorrect_loss_raw": 1.6047636568546295, "correct_loss_per_char": 0.8520680069923401, "incorrect_loss_per_char": 0.8023818284273148, "correct_loss_per_token": 1.7041360139846802, "incorrect_loss_per_token": 1.6047636568546295, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4624364376068115, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": true, "logits_per_token": -1.4624364376068115, "logits_per_char": -0.7312182188034058, "num_chars": 2}, {"sum_logits": -1.4820834398269653, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.4820834398269653, "logits_per_char": -0.7410417199134827, "num_chars": 2}, {"sum_logits": -1.6236165761947632, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.6236165761947632, "logits_per_char": -0.8118082880973816, "num_chars": 2}, {"sum_logits": -1.7041360139846802, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.7041360139846802, "logits_per_char": -0.8520680069923401, "num_chars": 2}, {"sum_logits": -1.850918173789978, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.850918173789978, "logits_per_char": -0.925459086894989, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 744, "native_id": "d747c4e463b80bfcc49b874063f9fae1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.66789710521698, "incorrect_loss_raw": 1.6058369576931, "correct_loss_per_char": 0.83394855260849, "incorrect_loss_per_char": 0.80291847884655, "correct_loss_per_token": 1.66789710521698, "incorrect_loss_per_token": 1.6058369576931, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5248239040374756, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.5248239040374756, "logits_per_char": -0.7624119520187378, "num_chars": 2}, {"sum_logits": -1.571110725402832, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.571110725402832, "logits_per_char": -0.785555362701416, "num_chars": 2}, {"sum_logits": -1.562041997909546, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.562041997909546, "logits_per_char": -0.781020998954773, "num_chars": 2}, {"sum_logits": -1.66789710521698, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.66789710521698, "logits_per_char": -0.83394855260849, "num_chars": 2}, {"sum_logits": -1.7653712034225464, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.7653712034225464, "logits_per_char": -0.8826856017112732, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 745, "native_id": "df3d27338bcf86b341b8b02d4309daf5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5526926517486572, "incorrect_loss_raw": 1.653957575559616, "correct_loss_per_char": 0.7763463258743286, "incorrect_loss_per_char": 0.826978787779808, "correct_loss_per_token": 1.5526926517486572, "incorrect_loss_per_token": 1.653957575559616, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4170516729354858, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.4170516729354858, "logits_per_char": -0.7085258364677429, "num_chars": 2}, {"sum_logits": -1.5526926517486572, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5526926517486572, "logits_per_char": -0.7763463258743286, "num_chars": 2}, {"sum_logits": -1.6199437379837036, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.6199437379837036, "logits_per_char": -0.8099718689918518, "num_chars": 2}, {"sum_logits": -1.563637137413025, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.563637137413025, "logits_per_char": -0.7818185687065125, "num_chars": 2}, {"sum_logits": -2.01519775390625, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -2.01519775390625, "logits_per_char": -1.007598876953125, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 746, "native_id": "db63bf66a8bfd16e5103cbdd350f5202", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4389233589172363, "incorrect_loss_raw": 1.6704445779323578, "correct_loss_per_char": 0.7194616794586182, "incorrect_loss_per_char": 0.8352222889661789, "correct_loss_per_token": 1.4389233589172363, "incorrect_loss_per_token": 1.6704445779323578, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4389233589172363, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": true, "logits_per_token": -1.4389233589172363, "logits_per_char": -0.7194616794586182, "num_chars": 2}, {"sum_logits": -1.542219638824463, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.542219638824463, "logits_per_char": -0.7711098194122314, "num_chars": 2}, {"sum_logits": -1.7519583702087402, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.7519583702087402, "logits_per_char": -0.8759791851043701, "num_chars": 2}, {"sum_logits": -1.6258140802383423, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.6258140802383423, "logits_per_char": -0.8129070401191711, "num_chars": 2}, {"sum_logits": -1.7617862224578857, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.7617862224578857, "logits_per_char": -0.8808931112289429, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 747, "native_id": "f8a9208665a4f2d64986940456b4b964", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6868692636489868, "incorrect_loss_raw": 1.6479192078113556, "correct_loss_per_char": 0.8434346318244934, "incorrect_loss_per_char": 0.8239596039056778, "correct_loss_per_token": 1.6868692636489868, "incorrect_loss_per_token": 1.6479192078113556, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.341392159461975, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": true, "logits_per_token": -1.341392159461975, "logits_per_char": -0.6706960797309875, "num_chars": 2}, {"sum_logits": -1.4724117517471313, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.4724117517471313, "logits_per_char": -0.7362058758735657, "num_chars": 2}, {"sum_logits": -1.5547784566879272, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.5547784566879272, "logits_per_char": -0.7773892283439636, "num_chars": 2}, {"sum_logits": -1.6868692636489868, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.6868692636489868, "logits_per_char": -0.8434346318244934, "num_chars": 2}, {"sum_logits": -2.2230944633483887, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -2.2230944633483887, "logits_per_char": -1.1115472316741943, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 748, "native_id": "1bf4c6b5bd870b1a079106e1e97e5d09", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7256379127502441, "incorrect_loss_raw": 1.612466722726822, "correct_loss_per_char": 0.8628189563751221, "incorrect_loss_per_char": 0.806233361363411, "correct_loss_per_token": 1.7256379127502441, "incorrect_loss_per_token": 1.612466722726822, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3887146711349487, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.3887146711349487, "logits_per_char": -0.6943573355674744, "num_chars": 2}, {"sum_logits": -1.488684892654419, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.488684892654419, "logits_per_char": -0.7443424463272095, "num_chars": 2}, {"sum_logits": -1.7256379127502441, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.7256379127502441, "logits_per_char": -0.8628189563751221, "num_chars": 2}, {"sum_logits": -1.5884430408477783, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5884430408477783, "logits_per_char": -0.7942215204238892, "num_chars": 2}, {"sum_logits": -1.9840242862701416, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.9840242862701416, "logits_per_char": -0.9920121431350708, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 749, "native_id": "c1c73ef0ff662a76cd42c3500240974a", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4894022941589355, "incorrect_loss_raw": 1.6557208597660065, "correct_loss_per_char": 0.7447011470794678, "incorrect_loss_per_char": 0.8278604298830032, "correct_loss_per_token": 1.4894022941589355, "incorrect_loss_per_token": 1.6557208597660065, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4894022941589355, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.4894022941589355, "logits_per_char": -0.7447011470794678, "num_chars": 2}, {"sum_logits": -1.5798537731170654, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5798537731170654, "logits_per_char": -0.7899268865585327, "num_chars": 2}, {"sum_logits": -1.4890133142471313, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.4890133142471313, "logits_per_char": -0.7445066571235657, "num_chars": 2}, {"sum_logits": -1.7882180213928223, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.7882180213928223, "logits_per_char": -0.8941090106964111, "num_chars": 2}, {"sum_logits": -1.7657983303070068, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.7657983303070068, "logits_per_char": -0.8828991651535034, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 750, "native_id": "aefa60233f3c5c4966f8ac22e901a1c7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4328585863113403, "incorrect_loss_raw": 1.6758384108543396, "correct_loss_per_char": 0.7164292931556702, "incorrect_loss_per_char": 0.8379192054271698, "correct_loss_per_token": 1.4328585863113403, "incorrect_loss_per_token": 1.6758384108543396, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4328585863113403, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.4328585863113403, "logits_per_char": -0.7164292931556702, "num_chars": 2}, {"sum_logits": -1.517485499382019, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.517485499382019, "logits_per_char": -0.7587427496910095, "num_chars": 2}, {"sum_logits": -1.636407732963562, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.636407732963562, "logits_per_char": -0.818203866481781, "num_chars": 2}, {"sum_logits": -1.6273669004440308, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.6273669004440308, "logits_per_char": -0.8136834502220154, "num_chars": 2}, {"sum_logits": -1.9220935106277466, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.9220935106277466, "logits_per_char": -0.9610467553138733, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 751, "native_id": "9221962ed3a6094e5c8f33785ce048cd", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5286328792572021, "incorrect_loss_raw": 1.6633868217468262, "correct_loss_per_char": 0.7643164396286011, "incorrect_loss_per_char": 0.8316934108734131, "correct_loss_per_token": 1.5286328792572021, "incorrect_loss_per_token": 1.6633868217468262, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3252969980239868, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.3252969980239868, "logits_per_char": -0.6626484990119934, "num_chars": 2}, {"sum_logits": -1.6057201623916626, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6057201623916626, "logits_per_char": -0.8028600811958313, "num_chars": 2}, {"sum_logits": -1.5286328792572021, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5286328792572021, "logits_per_char": -0.7643164396286011, "num_chars": 2}, {"sum_logits": -1.803249478340149, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.803249478340149, "logits_per_char": -0.9016247391700745, "num_chars": 2}, {"sum_logits": -1.9192806482315063, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.9192806482315063, "logits_per_char": -0.9596403241157532, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 752, "native_id": "8c8052980e357545398d27d1c3c832d8", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4133825302124023, "incorrect_loss_raw": 1.681805431842804, "correct_loss_per_char": 0.7066912651062012, "incorrect_loss_per_char": 0.840902715921402, "correct_loss_per_token": 1.4133825302124023, "incorrect_loss_per_token": 1.681805431842804, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4133825302124023, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": true, "logits_per_token": -1.4133825302124023, "logits_per_char": -0.7066912651062012, "num_chars": 2}, {"sum_logits": -1.6307381391525269, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.6307381391525269, "logits_per_char": -0.8153690695762634, "num_chars": 2}, {"sum_logits": -1.5112968683242798, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.5112968683242798, "logits_per_char": -0.7556484341621399, "num_chars": 2}, {"sum_logits": -1.6614385843276978, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.6614385843276978, "logits_per_char": -0.8307192921638489, "num_chars": 2}, {"sum_logits": -1.9237481355667114, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.9237481355667114, "logits_per_char": -0.9618740677833557, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 753, "native_id": "418913999c665ae9527fd14a6132da39", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.524057388305664, "incorrect_loss_raw": 1.65067458152771, "correct_loss_per_char": 0.762028694152832, "incorrect_loss_per_char": 0.825337290763855, "correct_loss_per_token": 1.524057388305664, "incorrect_loss_per_token": 1.65067458152771, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5019252300262451, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.5019252300262451, "logits_per_char": -0.7509626150131226, "num_chars": 2}, {"sum_logits": -1.524057388305664, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.524057388305664, "logits_per_char": -0.762028694152832, "num_chars": 2}, {"sum_logits": -1.6532461643218994, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6532461643218994, "logits_per_char": -0.8266230821609497, "num_chars": 2}, {"sum_logits": -1.531789779663086, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.531789779663086, "logits_per_char": -0.765894889831543, "num_chars": 2}, {"sum_logits": -1.9157371520996094, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.9157371520996094, "logits_per_char": -0.9578685760498047, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 754, "native_id": "2634468d21fa33a88cefe28a5d613f59", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9046506881713867, "incorrect_loss_raw": 1.5547935962677002, "correct_loss_per_char": 0.9523253440856934, "incorrect_loss_per_char": 0.7773967981338501, "correct_loss_per_token": 1.9046506881713867, "incorrect_loss_per_token": 1.5547935962677002, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.473199486732483, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": true, "logits_per_token": -1.473199486732483, "logits_per_char": -0.7365997433662415, "num_chars": 2}, {"sum_logits": -1.596463680267334, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": false, "logits_per_token": -1.596463680267334, "logits_per_char": -0.798231840133667, "num_chars": 2}, {"sum_logits": -1.5946005582809448, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": false, "logits_per_token": -1.5946005582809448, "logits_per_char": -0.7973002791404724, "num_chars": 2}, {"sum_logits": -1.554910659790039, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": false, "logits_per_token": -1.554910659790039, "logits_per_char": -0.7774553298950195, "num_chars": 2}, {"sum_logits": -1.9046506881713867, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": false, "logits_per_token": -1.9046506881713867, "logits_per_char": -0.9523253440856934, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 755, "native_id": "66bfb6e209c94e6be5b0d04b0c7e2064", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6137503385543823, "incorrect_loss_raw": 1.6451782584190369, "correct_loss_per_char": 0.8068751692771912, "incorrect_loss_per_char": 0.8225891292095184, "correct_loss_per_token": 1.6137503385543823, "incorrect_loss_per_token": 1.6451782584190369, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4043190479278564, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.4043190479278564, "logits_per_char": -0.7021595239639282, "num_chars": 2}, {"sum_logits": -1.5861200094223022, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5861200094223022, "logits_per_char": -0.7930600047111511, "num_chars": 2}, {"sum_logits": -1.6137503385543823, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6137503385543823, "logits_per_char": -0.8068751692771912, "num_chars": 2}, {"sum_logits": -1.5057207345962524, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5057207345962524, "logits_per_char": -0.7528603672981262, "num_chars": 2}, {"sum_logits": -2.0845532417297363, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.0845532417297363, "logits_per_char": -1.0422766208648682, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 756, "native_id": "3163910d665c139a1f6f07d85803baba", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.832419753074646, "incorrect_loss_raw": 1.568972796201706, "correct_loss_per_char": 0.916209876537323, "incorrect_loss_per_char": 0.784486398100853, "correct_loss_per_token": 1.832419753074646, "incorrect_loss_per_token": 1.568972796201706, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4795671701431274, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": true, "logits_per_token": -1.4795671701431274, "logits_per_char": -0.7397835850715637, "num_chars": 2}, {"sum_logits": -1.5404165983200073, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.5404165983200073, "logits_per_char": -0.7702082991600037, "num_chars": 2}, {"sum_logits": -1.632768988609314, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.632768988609314, "logits_per_char": -0.816384494304657, "num_chars": 2}, {"sum_logits": -1.623138427734375, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.623138427734375, "logits_per_char": -0.8115692138671875, "num_chars": 2}, {"sum_logits": -1.832419753074646, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.832419753074646, "logits_per_char": -0.916209876537323, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 757, "native_id": "0e52659484f2f6d763cf0d38d4c5999d", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4887399673461914, "incorrect_loss_raw": 1.6598299741744995, "correct_loss_per_char": 0.7443699836730957, "incorrect_loss_per_char": 0.8299149870872498, "correct_loss_per_token": 1.4887399673461914, "incorrect_loss_per_token": 1.6598299741744995, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4887399673461914, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.4887399673461914, "logits_per_char": -0.7443699836730957, "num_chars": 2}, {"sum_logits": -1.4775733947753906, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.4775733947753906, "logits_per_char": -0.7387866973876953, "num_chars": 2}, {"sum_logits": -1.5740883350372314, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5740883350372314, "logits_per_char": -0.7870441675186157, "num_chars": 2}, {"sum_logits": -1.7534973621368408, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.7534973621368408, "logits_per_char": -0.8767486810684204, "num_chars": 2}, {"sum_logits": -1.8341608047485352, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.8341608047485352, "logits_per_char": -0.9170804023742676, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 758, "native_id": "167d2cfa04bfaea0e0b5bac3598d5769", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5925077199935913, "incorrect_loss_raw": 1.6305551826953888, "correct_loss_per_char": 0.7962538599967957, "incorrect_loss_per_char": 0.8152775913476944, "correct_loss_per_token": 1.5925077199935913, "incorrect_loss_per_token": 1.6305551826953888, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5051474571228027, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.5051474571228027, "logits_per_char": -0.7525737285614014, "num_chars": 2}, {"sum_logits": -1.5310674905776978, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5310674905776978, "logits_per_char": -0.7655337452888489, "num_chars": 2}, {"sum_logits": -1.606404185295105, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.606404185295105, "logits_per_char": -0.8032020926475525, "num_chars": 2}, {"sum_logits": -1.5925077199935913, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5925077199935913, "logits_per_char": -0.7962538599967957, "num_chars": 2}, {"sum_logits": -1.8796015977859497, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.8796015977859497, "logits_per_char": -0.9398007988929749, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 759, "native_id": "39572e0ba1db51fa74f7fc2d90c5ec7f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.684104323387146, "incorrect_loss_raw": 1.6132208406925201, "correct_loss_per_char": 0.842052161693573, "incorrect_loss_per_char": 0.8066104203462601, "correct_loss_per_token": 1.684104323387146, "incorrect_loss_per_token": 1.6132208406925201, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4196882247924805, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.4196882247924805, "logits_per_char": -0.7098441123962402, "num_chars": 2}, {"sum_logits": -1.5719434022903442, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5719434022903442, "logits_per_char": -0.7859717011451721, "num_chars": 2}, {"sum_logits": -1.5515166521072388, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5515166521072388, "logits_per_char": -0.7757583260536194, "num_chars": 2}, {"sum_logits": -1.684104323387146, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.684104323387146, "logits_per_char": -0.842052161693573, "num_chars": 2}, {"sum_logits": -1.909735083580017, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.909735083580017, "logits_per_char": -0.9548675417900085, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 760, "native_id": "2a32b1e541b1daae04690d0d3a4b3310", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6136698722839355, "incorrect_loss_raw": 1.6591799855232239, "correct_loss_per_char": 0.8068349361419678, "incorrect_loss_per_char": 0.8295899927616119, "correct_loss_per_token": 1.6136698722839355, "incorrect_loss_per_token": 1.6591799855232239, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.357862949371338, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.357862949371338, "logits_per_char": -0.678931474685669, "num_chars": 2}, {"sum_logits": -1.4499558210372925, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.4499558210372925, "logits_per_char": -0.7249779105186462, "num_chars": 2}, {"sum_logits": -1.6136698722839355, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.6136698722839355, "logits_per_char": -0.8068349361419678, "num_chars": 2}, {"sum_logits": -1.6547192335128784, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.6547192335128784, "logits_per_char": -0.8273596167564392, "num_chars": 2}, {"sum_logits": -2.1741819381713867, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -2.1741819381713867, "logits_per_char": -1.0870909690856934, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 761, "native_id": "71cbfeb995b06b21e890c91040722252", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6967369318008423, "incorrect_loss_raw": 1.6213622987270355, "correct_loss_per_char": 0.8483684659004211, "incorrect_loss_per_char": 0.8106811493635178, "correct_loss_per_token": 1.6967369318008423, "incorrect_loss_per_token": 1.6213622987270355, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4450441598892212, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.4450441598892212, "logits_per_char": -0.7225220799446106, "num_chars": 2}, {"sum_logits": -1.500381350517273, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.500381350517273, "logits_per_char": -0.7501906752586365, "num_chars": 2}, {"sum_logits": -1.502165675163269, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.502165675163269, "logits_per_char": -0.7510828375816345, "num_chars": 2}, {"sum_logits": -1.6967369318008423, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6967369318008423, "logits_per_char": -0.8483684659004211, "num_chars": 2}, {"sum_logits": -2.037858009338379, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -2.037858009338379, "logits_per_char": -1.0189290046691895, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 762, "native_id": "a15d564d0be6996251b5d523ac62db2a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.00068998336792, "incorrect_loss_raw": 1.5481927394866943, "correct_loss_per_char": 1.00034499168396, "incorrect_loss_per_char": 0.7740963697433472, "correct_loss_per_token": 2.00068998336792, "incorrect_loss_per_token": 1.5481927394866943, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3375486135482788, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.3375486135482788, "logits_per_char": -0.6687743067741394, "num_chars": 2}, {"sum_logits": -1.5579036474227905, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5579036474227905, "logits_per_char": -0.7789518237113953, "num_chars": 2}, {"sum_logits": -1.5767070055007935, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5767070055007935, "logits_per_char": -0.7883535027503967, "num_chars": 2}, {"sum_logits": -1.7206116914749146, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.7206116914749146, "logits_per_char": -0.8603058457374573, "num_chars": 2}, {"sum_logits": -2.00068998336792, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.00068998336792, "logits_per_char": -1.00034499168396, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 763, "native_id": "6bd170c8d3d99d3c47b3e96427bacaeb", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.087275981903076, "incorrect_loss_raw": 1.5329622626304626, "correct_loss_per_char": 1.043637990951538, "incorrect_loss_per_char": 0.7664811313152313, "correct_loss_per_token": 2.087275981903076, "incorrect_loss_per_token": 1.5329622626304626, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3584498167037964, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.3584498167037964, "logits_per_char": -0.6792249083518982, "num_chars": 2}, {"sum_logits": -1.4900237321853638, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.4900237321853638, "logits_per_char": -0.7450118660926819, "num_chars": 2}, {"sum_logits": -1.545740008354187, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.545740008354187, "logits_per_char": -0.7728700041770935, "num_chars": 2}, {"sum_logits": -1.7376354932785034, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.7376354932785034, "logits_per_char": -0.8688177466392517, "num_chars": 2}, {"sum_logits": -2.087275981903076, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -2.087275981903076, "logits_per_char": -1.043637990951538, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 764, "native_id": "7bc1198664b376f79d584725ad7f874b", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.871797800064087, "incorrect_loss_raw": 1.569153368473053, "correct_loss_per_char": 0.9358989000320435, "incorrect_loss_per_char": 0.7845766842365265, "correct_loss_per_token": 1.871797800064087, "incorrect_loss_per_token": 1.569153368473053, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5174356698989868, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.5174356698989868, "logits_per_char": -0.7587178349494934, "num_chars": 2}, {"sum_logits": -1.4423989057540894, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": true, "logits_per_token": -1.4423989057540894, "logits_per_char": -0.7211994528770447, "num_chars": 2}, {"sum_logits": -1.5356245040893555, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.5356245040893555, "logits_per_char": -0.7678122520446777, "num_chars": 2}, {"sum_logits": -1.7811543941497803, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.7811543941497803, "logits_per_char": -0.8905771970748901, "num_chars": 2}, {"sum_logits": -1.871797800064087, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.871797800064087, "logits_per_char": -0.9358989000320435, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 765, "native_id": "d6c002d46d9bfa466637cec4a134f332", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3368146419525146, "incorrect_loss_raw": 1.7253232300281525, "correct_loss_per_char": 0.6684073209762573, "incorrect_loss_per_char": 0.8626616150140762, "correct_loss_per_token": 1.3368146419525146, "incorrect_loss_per_token": 1.7253232300281525, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4457831382751465, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.4457831382751465, "logits_per_char": -0.7228915691375732, "num_chars": 2}, {"sum_logits": -1.7161592245101929, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.7161592245101929, "logits_per_char": -0.8580796122550964, "num_chars": 2}, {"sum_logits": -1.7002711296081543, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.7002711296081543, "logits_per_char": -0.8501355648040771, "num_chars": 2}, {"sum_logits": -1.3368146419525146, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": true, "logits_per_token": -1.3368146419525146, "logits_per_char": -0.6684073209762573, "num_chars": 2}, {"sum_logits": -2.039079427719116, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -2.039079427719116, "logits_per_char": -1.019539713859558, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 766, "native_id": "8cb45b421375243e788cfc64bd77b051", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0166215896606445, "incorrect_loss_raw": 1.5420366525650024, "correct_loss_per_char": 1.0083107948303223, "incorrect_loss_per_char": 0.7710183262825012, "correct_loss_per_token": 2.0166215896606445, "incorrect_loss_per_token": 1.5420366525650024, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.388634204864502, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.388634204864502, "logits_per_char": -0.694317102432251, "num_chars": 2}, {"sum_logits": -1.6751610040664673, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6751610040664673, "logits_per_char": -0.8375805020332336, "num_chars": 2}, {"sum_logits": -1.5129910707473755, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5129910707473755, "logits_per_char": -0.7564955353736877, "num_chars": 2}, {"sum_logits": -1.591360330581665, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.591360330581665, "logits_per_char": -0.7956801652908325, "num_chars": 2}, {"sum_logits": -2.0166215896606445, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.0166215896606445, "logits_per_char": -1.0083107948303223, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 767, "native_id": "d6ff2d749494d89e9c7a53f587c519f4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6609307527542114, "incorrect_loss_raw": 1.6099202632904053, "correct_loss_per_char": 0.8304653763771057, "incorrect_loss_per_char": 0.8049601316452026, "correct_loss_per_token": 1.6609307527542114, "incorrect_loss_per_token": 1.6099202632904053, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4526630640029907, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.4526630640029907, "logits_per_char": -0.7263315320014954, "num_chars": 2}, {"sum_logits": -1.6609307527542114, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6609307527542114, "logits_per_char": -0.8304653763771057, "num_chars": 2}, {"sum_logits": -1.627864956855774, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.627864956855774, "logits_per_char": -0.813932478427887, "num_chars": 2}, {"sum_logits": -1.602009892463684, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.602009892463684, "logits_per_char": -0.801004946231842, "num_chars": 2}, {"sum_logits": -1.7571431398391724, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.7571431398391724, "logits_per_char": -0.8785715699195862, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 768, "native_id": "6974d215428a974641c1df18678522f5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0432209968566895, "incorrect_loss_raw": 1.5399102866649628, "correct_loss_per_char": 1.0216104984283447, "incorrect_loss_per_char": 0.7699551433324814, "correct_loss_per_token": 2.0432209968566895, "incorrect_loss_per_token": 1.5399102866649628, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3511803150177002, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.3511803150177002, "logits_per_char": -0.6755901575088501, "num_chars": 2}, {"sum_logits": -1.5000473260879517, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5000473260879517, "logits_per_char": -0.7500236630439758, "num_chars": 2}, {"sum_logits": -1.5947786569595337, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5947786569595337, "logits_per_char": -0.7973893284797668, "num_chars": 2}, {"sum_logits": -1.7136348485946655, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.7136348485946655, "logits_per_char": -0.8568174242973328, "num_chars": 2}, {"sum_logits": -2.0432209968566895, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -2.0432209968566895, "logits_per_char": -1.0216104984283447, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 769, "native_id": "b94a9764acff078b52a9cbae04661dc9", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2548987865447998, "incorrect_loss_raw": 1.7605107724666595, "correct_loss_per_char": 0.6274493932723999, "incorrect_loss_per_char": 0.8802553862333298, "correct_loss_per_token": 1.2548987865447998, "incorrect_loss_per_token": 1.7605107724666595, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2548987865447998, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.2548987865447998, "logits_per_char": -0.6274493932723999, "num_chars": 2}, {"sum_logits": -1.4385939836502075, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.4385939836502075, "logits_per_char": -0.7192969918251038, "num_chars": 2}, {"sum_logits": -1.6672320365905762, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.6672320365905762, "logits_per_char": -0.8336160182952881, "num_chars": 2}, {"sum_logits": -1.8012893199920654, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.8012893199920654, "logits_per_char": -0.9006446599960327, "num_chars": 2}, {"sum_logits": -2.134927749633789, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.134927749633789, "logits_per_char": -1.0674638748168945, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 770, "native_id": "80930e9df9ac4ad752749a54e7fc124f_1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5593541860580444, "incorrect_loss_raw": 1.6467468738555908, "correct_loss_per_char": 0.7796770930290222, "incorrect_loss_per_char": 0.8233734369277954, "correct_loss_per_token": 1.5593541860580444, "incorrect_loss_per_token": 1.6467468738555908, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3992952108383179, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.3992952108383179, "logits_per_char": -0.6996476054191589, "num_chars": 2}, {"sum_logits": -1.5593541860580444, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5593541860580444, "logits_per_char": -0.7796770930290222, "num_chars": 2}, {"sum_logits": -1.5360954999923706, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5360954999923706, "logits_per_char": -0.7680477499961853, "num_chars": 2}, {"sum_logits": -1.730772852897644, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.730772852897644, "logits_per_char": -0.865386426448822, "num_chars": 2}, {"sum_logits": -1.9208239316940308, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.9208239316940308, "logits_per_char": -0.9604119658470154, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 771, "native_id": "3310b5b24f03d67179fababf9ae95144", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5800219774246216, "incorrect_loss_raw": 1.6258825063705444, "correct_loss_per_char": 0.7900109887123108, "incorrect_loss_per_char": 0.8129412531852722, "correct_loss_per_token": 1.5800219774246216, "incorrect_loss_per_token": 1.6258825063705444, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5676218271255493, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": true, "logits_per_token": -1.5676218271255493, "logits_per_char": -0.7838109135627747, "num_chars": 2}, {"sum_logits": -1.593939185142517, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.593939185142517, "logits_per_char": -0.7969695925712585, "num_chars": 2}, {"sum_logits": -1.5800219774246216, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.5800219774246216, "logits_per_char": -0.7900109887123108, "num_chars": 2}, {"sum_logits": -1.6401671171188354, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.6401671171188354, "logits_per_char": -0.8200835585594177, "num_chars": 2}, {"sum_logits": -1.7018018960952759, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.7018018960952759, "logits_per_char": -0.8509009480476379, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 772, "native_id": "846bc47ced7119ad2ee19a8780d7fe18", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6590288877487183, "incorrect_loss_raw": 1.6338948607444763, "correct_loss_per_char": 0.8295144438743591, "incorrect_loss_per_char": 0.8169474303722382, "correct_loss_per_token": 1.6590288877487183, "incorrect_loss_per_token": 1.6338948607444763, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3441808223724365, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.3441808223724365, "logits_per_char": -0.6720904111862183, "num_chars": 2}, {"sum_logits": -1.513097882270813, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.513097882270813, "logits_per_char": -0.7565489411354065, "num_chars": 2}, {"sum_logits": -1.6515001058578491, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6515001058578491, "logits_per_char": -0.8257500529289246, "num_chars": 2}, {"sum_logits": -1.6590288877487183, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6590288877487183, "logits_per_char": -0.8295144438743591, "num_chars": 2}, {"sum_logits": -2.0268006324768066, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -2.0268006324768066, "logits_per_char": -1.0134003162384033, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 773, "native_id": "fd5a34e94303d7fd343de2a8f36943d5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9091545343399048, "incorrect_loss_raw": 1.5559532046318054, "correct_loss_per_char": 0.9545772671699524, "incorrect_loss_per_char": 0.7779766023159027, "correct_loss_per_token": 1.9091545343399048, "incorrect_loss_per_token": 1.5559532046318054, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.472449541091919, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": true, "logits_per_token": -1.472449541091919, "logits_per_char": -0.7362247705459595, "num_chars": 2}, {"sum_logits": -1.5479849576950073, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": false, "logits_per_token": -1.5479849576950073, "logits_per_char": -0.7739924788475037, "num_chars": 2}, {"sum_logits": -1.561888575553894, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": false, "logits_per_token": -1.561888575553894, "logits_per_char": -0.780944287776947, "num_chars": 2}, {"sum_logits": -1.6414897441864014, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": false, "logits_per_token": -1.6414897441864014, "logits_per_char": -0.8207448720932007, "num_chars": 2}, {"sum_logits": -1.9091545343399048, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": false, "logits_per_token": -1.9091545343399048, "logits_per_char": -0.9545772671699524, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 774, "native_id": "4e87db4771f2d6423034935446e3fff1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.507531762123108, "incorrect_loss_raw": 1.6770153045654297, "correct_loss_per_char": 0.753765881061554, "incorrect_loss_per_char": 0.8385076522827148, "correct_loss_per_token": 1.507531762123108, "incorrect_loss_per_token": 1.6770153045654297, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3692963123321533, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": true, "logits_per_token": -1.3692963123321533, "logits_per_char": -0.6846481561660767, "num_chars": 2}, {"sum_logits": -1.507531762123108, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.507531762123108, "logits_per_char": -0.753765881061554, "num_chars": 2}, {"sum_logits": -1.5851432085037231, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.5851432085037231, "logits_per_char": -0.7925716042518616, "num_chars": 2}, {"sum_logits": -1.6371923685073853, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.6371923685073853, "logits_per_char": -0.8185961842536926, "num_chars": 2}, {"sum_logits": -2.116429328918457, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -2.116429328918457, "logits_per_char": -1.0582146644592285, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 775, "native_id": "a585df0818180ce3c06f963a4c3c810a", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5477638244628906, "incorrect_loss_raw": 1.63736954331398, "correct_loss_per_char": 0.7738819122314453, "incorrect_loss_per_char": 0.81868477165699, "correct_loss_per_token": 1.5477638244628906, "incorrect_loss_per_token": 1.63736954331398, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5477638244628906, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.5477638244628906, "logits_per_char": -0.7738819122314453, "num_chars": 2}, {"sum_logits": -1.63441801071167, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.63441801071167, "logits_per_char": -0.817209005355835, "num_chars": 2}, {"sum_logits": -1.5667744874954224, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.5667744874954224, "logits_per_char": -0.7833872437477112, "num_chars": 2}, {"sum_logits": -1.5433082580566406, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": true, "logits_per_token": -1.5433082580566406, "logits_per_char": -0.7716541290283203, "num_chars": 2}, {"sum_logits": -1.8049774169921875, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.8049774169921875, "logits_per_char": -0.9024887084960938, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 776, "native_id": "c9f7d07e6d363a99f5fadd68a4dfa35a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5100891590118408, "incorrect_loss_raw": 1.6681055426597595, "correct_loss_per_char": 0.7550445795059204, "incorrect_loss_per_char": 0.8340527713298798, "correct_loss_per_token": 1.5100891590118408, "incorrect_loss_per_token": 1.6681055426597595, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4174461364746094, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.4174461364746094, "logits_per_char": -0.7087230682373047, "num_chars": 2}, {"sum_logits": -1.5100891590118408, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5100891590118408, "logits_per_char": -0.7550445795059204, "num_chars": 2}, {"sum_logits": -1.5084327459335327, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5084327459335327, "logits_per_char": -0.7542163729667664, "num_chars": 2}, {"sum_logits": -1.734066367149353, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.734066367149353, "logits_per_char": -0.8670331835746765, "num_chars": 2}, {"sum_logits": -2.012476921081543, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.012476921081543, "logits_per_char": -1.0062384605407715, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 777, "native_id": "c7cb327fa4c0008efaa7741081a365d4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.090756416320801, "incorrect_loss_raw": 1.5483558177947998, "correct_loss_per_char": 1.0453782081604004, "incorrect_loss_per_char": 0.7741779088973999, "correct_loss_per_token": 2.090756416320801, "incorrect_loss_per_token": 1.5483558177947998, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.253446102142334, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": true, "logits_per_token": -1.253446102142334, "logits_per_char": -0.626723051071167, "num_chars": 2}, {"sum_logits": -1.4249203205108643, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.4249203205108643, "logits_per_char": -0.7124601602554321, "num_chars": 2}, {"sum_logits": -1.716567039489746, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.716567039489746, "logits_per_char": -0.858283519744873, "num_chars": 2}, {"sum_logits": -1.7984898090362549, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.7984898090362549, "logits_per_char": -0.8992449045181274, "num_chars": 2}, {"sum_logits": -2.090756416320801, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -2.090756416320801, "logits_per_char": -1.0453782081604004, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 778, "native_id": "c54ddc0f9d170ba65d9f4f2e0bb41d1c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4847289323806763, "incorrect_loss_raw": 1.656867355108261, "correct_loss_per_char": 0.7423644661903381, "incorrect_loss_per_char": 0.8284336775541306, "correct_loss_per_token": 1.4847289323806763, "incorrect_loss_per_token": 1.656867355108261, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4847289323806763, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": true, "logits_per_token": -1.4847289323806763, "logits_per_char": -0.7423644661903381, "num_chars": 2}, {"sum_logits": -1.4978020191192627, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.4978020191192627, "logits_per_char": -0.7489010095596313, "num_chars": 2}, {"sum_logits": -1.620926022529602, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.620926022529602, "logits_per_char": -0.810463011264801, "num_chars": 2}, {"sum_logits": -1.6392394304275513, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.6392394304275513, "logits_per_char": -0.8196197152137756, "num_chars": 2}, {"sum_logits": -1.8695019483566284, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.8695019483566284, "logits_per_char": -0.9347509741783142, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 779, "native_id": "1729c737ff92cf558efecde2c6cafc5e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5931626558303833, "incorrect_loss_raw": 1.6824998259544373, "correct_loss_per_char": 0.7965813279151917, "incorrect_loss_per_char": 0.8412499129772186, "correct_loss_per_token": 1.5931626558303833, "incorrect_loss_per_token": 1.6824998259544373, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.286421775817871, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.286421775817871, "logits_per_char": -0.6432108879089355, "num_chars": 2}, {"sum_logits": -1.4625157117843628, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.4625157117843628, "logits_per_char": -0.7312578558921814, "num_chars": 2}, {"sum_logits": -1.5931626558303833, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5931626558303833, "logits_per_char": -0.7965813279151917, "num_chars": 2}, {"sum_logits": -1.7150248289108276, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.7150248289108276, "logits_per_char": -0.8575124144554138, "num_chars": 2}, {"sum_logits": -2.2660369873046875, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.2660369873046875, "logits_per_char": -1.1330184936523438, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 780, "native_id": "19dfd55e967dacd6f5700a62c1e14eee", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6026962995529175, "incorrect_loss_raw": 1.6203771531581879, "correct_loss_per_char": 0.8013481497764587, "incorrect_loss_per_char": 0.8101885765790939, "correct_loss_per_token": 1.6026962995529175, "incorrect_loss_per_token": 1.6203771531581879, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5736068487167358, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5736068487167358, "logits_per_char": -0.7868034243583679, "num_chars": 2}, {"sum_logits": -1.6022833585739136, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6022833585739136, "logits_per_char": -0.8011416792869568, "num_chars": 2}, {"sum_logits": -1.524770736694336, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.524770736694336, "logits_per_char": -0.762385368347168, "num_chars": 2}, {"sum_logits": -1.6026962995529175, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6026962995529175, "logits_per_char": -0.8013481497764587, "num_chars": 2}, {"sum_logits": -1.7808476686477661, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.7808476686477661, "logits_per_char": -0.8904238343238831, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 781, "native_id": "b9bed83138901f4a45041b02c5b242c1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.515766978263855, "incorrect_loss_raw": 1.6446075439453125, "correct_loss_per_char": 0.7578834891319275, "incorrect_loss_per_char": 0.8223037719726562, "correct_loss_per_token": 1.515766978263855, "incorrect_loss_per_token": 1.6446075439453125, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5551248788833618, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.5551248788833618, "logits_per_char": -0.7775624394416809, "num_chars": 2}, {"sum_logits": -1.515766978263855, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": true, "logits_per_token": -1.515766978263855, "logits_per_char": -0.7578834891319275, "num_chars": 2}, {"sum_logits": -1.6401067972183228, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.6401067972183228, "logits_per_char": -0.8200533986091614, "num_chars": 2}, {"sum_logits": -1.5961812734603882, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.5961812734603882, "logits_per_char": -0.7980906367301941, "num_chars": 2}, {"sum_logits": -1.7870172262191772, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.7870172262191772, "logits_per_char": -0.8935086131095886, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 782, "native_id": "b9d22425a3d5810be9528a55245c8f09", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5043004751205444, "incorrect_loss_raw": 1.7019622027873993, "correct_loss_per_char": 0.7521502375602722, "incorrect_loss_per_char": 0.8509811013936996, "correct_loss_per_token": 1.5043004751205444, "incorrect_loss_per_token": 1.7019622027873993, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2641927003860474, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.2641927003860474, "logits_per_char": -0.6320963501930237, "num_chars": 2}, {"sum_logits": -1.5043004751205444, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5043004751205444, "logits_per_char": -0.7521502375602722, "num_chars": 2}, {"sum_logits": -1.583124041557312, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.583124041557312, "logits_per_char": -0.791562020778656, "num_chars": 2}, {"sum_logits": -1.716326117515564, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.716326117515564, "logits_per_char": -0.858163058757782, "num_chars": 2}, {"sum_logits": -2.244205951690674, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.244205951690674, "logits_per_char": -1.122102975845337, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 783, "native_id": "2af70107e04e61e3c7884bc743901c02", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4656367301940918, "incorrect_loss_raw": 1.6703923046588898, "correct_loss_per_char": 0.7328183650970459, "incorrect_loss_per_char": 0.8351961523294449, "correct_loss_per_token": 1.4656367301940918, "incorrect_loss_per_token": 1.6703923046588898, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4070510864257812, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": true, "logits_per_token": -1.4070510864257812, "logits_per_char": -0.7035255432128906, "num_chars": 2}, {"sum_logits": -1.4656367301940918, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": false, "logits_per_token": -1.4656367301940918, "logits_per_char": -0.7328183650970459, "num_chars": 2}, {"sum_logits": -1.6678496599197388, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": false, "logits_per_token": -1.6678496599197388, "logits_per_char": -0.8339248299598694, "num_chars": 2}, {"sum_logits": -1.7766462564468384, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": false, "logits_per_token": -1.7766462564468384, "logits_per_char": -0.8883231282234192, "num_chars": 2}, {"sum_logits": -1.8300222158432007, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": false, "logits_per_token": -1.8300222158432007, "logits_per_char": -0.9150111079216003, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 784, "native_id": "be2cb9c96069ac355a7ccef262743d14", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5016145706176758, "incorrect_loss_raw": 1.671661376953125, "correct_loss_per_char": 0.7508072853088379, "incorrect_loss_per_char": 0.8358306884765625, "correct_loss_per_token": 1.5016145706176758, "incorrect_loss_per_token": 1.671661376953125, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5054922103881836, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5054922103881836, "logits_per_char": -0.7527461051940918, "num_chars": 2}, {"sum_logits": -1.5016145706176758, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5016145706176758, "logits_per_char": -0.7508072853088379, "num_chars": 2}, {"sum_logits": -1.5007038116455078, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.5007038116455078, "logits_per_char": -0.7503519058227539, "num_chars": 2}, {"sum_logits": -1.593045711517334, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.593045711517334, "logits_per_char": -0.796522855758667, "num_chars": 2}, {"sum_logits": -2.0874037742614746, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -2.0874037742614746, "logits_per_char": -1.0437018871307373, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 785, "native_id": "799e48ec7fb16415c8f82828c5761ed1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.419588565826416, "incorrect_loss_raw": 1.7313662469387054, "correct_loss_per_char": 0.709794282913208, "incorrect_loss_per_char": 0.8656831234693527, "correct_loss_per_token": 1.419588565826416, "incorrect_loss_per_token": 1.7313662469387054, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2912193536758423, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.2912193536758423, "logits_per_char": -0.6456096768379211, "num_chars": 2}, {"sum_logits": -1.419588565826416, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.419588565826416, "logits_per_char": -0.709794282913208, "num_chars": 2}, {"sum_logits": -1.6656076908111572, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6656076908111572, "logits_per_char": -0.8328038454055786, "num_chars": 2}, {"sum_logits": -1.6685116291046143, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6685116291046143, "logits_per_char": -0.8342558145523071, "num_chars": 2}, {"sum_logits": -2.300126314163208, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.300126314163208, "logits_per_char": -1.150063157081604, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 786, "native_id": "a5db1e9677af118deb8e4add8bc18db2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6279428005218506, "incorrect_loss_raw": 1.6587204933166504, "correct_loss_per_char": 0.8139714002609253, "incorrect_loss_per_char": 0.8293602466583252, "correct_loss_per_token": 1.6279428005218506, "incorrect_loss_per_token": 1.6587204933166504, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2628790140151978, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.2628790140151978, "logits_per_char": -0.6314395070075989, "num_chars": 2}, {"sum_logits": -1.636804223060608, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.636804223060608, "logits_per_char": -0.818402111530304, "num_chars": 2}, {"sum_logits": -1.6566336154937744, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6566336154937744, "logits_per_char": -0.8283168077468872, "num_chars": 2}, {"sum_logits": -1.6279428005218506, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6279428005218506, "logits_per_char": -0.8139714002609253, "num_chars": 2}, {"sum_logits": -2.0785651206970215, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -2.0785651206970215, "logits_per_char": -1.0392825603485107, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 787, "native_id": "28357ebf85f8bb82b6a3210c4397e0aa", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6470729112625122, "incorrect_loss_raw": 1.641548901796341, "correct_loss_per_char": 0.8235364556312561, "incorrect_loss_per_char": 0.8207744508981705, "correct_loss_per_token": 1.6470729112625122, "incorrect_loss_per_token": 1.641548901796341, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4070473909378052, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.4070473909378052, "logits_per_char": -0.7035236954689026, "num_chars": 2}, {"sum_logits": -1.449295163154602, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.449295163154602, "logits_per_char": -0.724647581577301, "num_chars": 2}, {"sum_logits": -1.600725531578064, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.600725531578064, "logits_per_char": -0.800362765789032, "num_chars": 2}, {"sum_logits": -1.6470729112625122, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6470729112625122, "logits_per_char": -0.8235364556312561, "num_chars": 2}, {"sum_logits": -2.1091275215148926, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -2.1091275215148926, "logits_per_char": -1.0545637607574463, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 788, "native_id": "7b95825a19d6930d6aed35c7c57a2d82", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6090806722640991, "incorrect_loss_raw": 1.6217888593673706, "correct_loss_per_char": 0.8045403361320496, "incorrect_loss_per_char": 0.8108944296836853, "correct_loss_per_token": 1.6090806722640991, "incorrect_loss_per_token": 1.6217888593673706, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4925726652145386, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.4925726652145386, "logits_per_char": -0.7462863326072693, "num_chars": 2}, {"sum_logits": -1.57302725315094, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.57302725315094, "logits_per_char": -0.78651362657547, "num_chars": 2}, {"sum_logits": -1.6159228086471558, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6159228086471558, "logits_per_char": -0.8079614043235779, "num_chars": 2}, {"sum_logits": -1.6090806722640991, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6090806722640991, "logits_per_char": -0.8045403361320496, "num_chars": 2}, {"sum_logits": -1.8056327104568481, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.8056327104568481, "logits_per_char": -0.9028163552284241, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 789, "native_id": "6b270159bd402ddd498a38153f9d1efe", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.206367254257202, "incorrect_loss_raw": 1.5201203525066376, "correct_loss_per_char": 1.103183627128601, "incorrect_loss_per_char": 0.7600601762533188, "correct_loss_per_token": 2.206367254257202, "incorrect_loss_per_token": 1.5201203525066376, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2625690698623657, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.2625690698623657, "logits_per_char": -0.6312845349311829, "num_chars": 2}, {"sum_logits": -1.5467147827148438, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5467147827148438, "logits_per_char": -0.7733573913574219, "num_chars": 2}, {"sum_logits": -1.619307279586792, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.619307279586792, "logits_per_char": -0.809653639793396, "num_chars": 2}, {"sum_logits": -1.6518902778625488, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6518902778625488, "logits_per_char": -0.8259451389312744, "num_chars": 2}, {"sum_logits": -2.206367254257202, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.206367254257202, "logits_per_char": -1.103183627128601, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 790, "native_id": "eae0e03773365064ce915603c7addc91", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0826892852783203, "incorrect_loss_raw": 1.8658126592636108, "correct_loss_per_char": 0.5413446426391602, "incorrect_loss_per_char": 0.9329063296318054, "correct_loss_per_token": 1.0826892852783203, "incorrect_loss_per_token": 1.8658126592636108, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0826892852783203, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.0826892852783203, "logits_per_char": -0.5413446426391602, "num_chars": 2}, {"sum_logits": -1.438380241394043, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.438380241394043, "logits_per_char": -0.7191901206970215, "num_chars": 2}, {"sum_logits": -1.7729473114013672, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.7729473114013672, "logits_per_char": -0.8864736557006836, "num_chars": 2}, {"sum_logits": -1.8209993839263916, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.8209993839263916, "logits_per_char": -0.9104996919631958, "num_chars": 2}, {"sum_logits": -2.4309237003326416, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -2.4309237003326416, "logits_per_char": -1.2154618501663208, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 791, "native_id": "a5ca7c89196e54938b5827814d0071d4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7577334642410278, "incorrect_loss_raw": 1.583043783903122, "correct_loss_per_char": 0.8788667321205139, "incorrect_loss_per_char": 0.791521891951561, "correct_loss_per_token": 1.7577334642410278, "incorrect_loss_per_token": 1.583043783903122, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.487837791442871, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": true, "logits_per_token": -1.487837791442871, "logits_per_char": -0.7439188957214355, "num_chars": 2}, {"sum_logits": -1.6213008165359497, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -1.6213008165359497, "logits_per_char": -0.8106504082679749, "num_chars": 2}, {"sum_logits": -1.636573076248169, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -1.636573076248169, "logits_per_char": -0.8182865381240845, "num_chars": 2}, {"sum_logits": -1.586463451385498, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -1.586463451385498, "logits_per_char": -0.793231725692749, "num_chars": 2}, {"sum_logits": -1.7577334642410278, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -1.7577334642410278, "logits_per_char": -0.8788667321205139, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 792, "native_id": "ffc3461d437a1c6c22d1c4f6439ebd9c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5258945226669312, "incorrect_loss_raw": 1.6521215438842773, "correct_loss_per_char": 0.7629472613334656, "incorrect_loss_per_char": 0.8260607719421387, "correct_loss_per_token": 1.5258945226669312, "incorrect_loss_per_token": 1.6521215438842773, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.500104308128357, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.500104308128357, "logits_per_char": -0.7500521540641785, "num_chars": 2}, {"sum_logits": -1.5621814727783203, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5621814727783203, "logits_per_char": -0.7810907363891602, "num_chars": 2}, {"sum_logits": -1.6237595081329346, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.6237595081329346, "logits_per_char": -0.8118797540664673, "num_chars": 2}, {"sum_logits": -1.5258945226669312, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5258945226669312, "logits_per_char": -0.7629472613334656, "num_chars": 2}, {"sum_logits": -1.9224408864974976, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.9224408864974976, "logits_per_char": -0.9612204432487488, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 793, "native_id": "aa2dcd9bcce5e4445bd3bacbf0bb11d3", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5390647649765015, "incorrect_loss_raw": 1.6549760401248932, "correct_loss_per_char": 0.7695323824882507, "incorrect_loss_per_char": 0.8274880200624466, "correct_loss_per_token": 1.5390647649765015, "incorrect_loss_per_token": 1.6549760401248932, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5314559936523438, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5314559936523438, "logits_per_char": -0.7657279968261719, "num_chars": 2}, {"sum_logits": -1.5245341062545776, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.5245341062545776, "logits_per_char": -0.7622670531272888, "num_chars": 2}, {"sum_logits": -1.5390647649765015, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5390647649765015, "logits_per_char": -0.7695323824882507, "num_chars": 2}, {"sum_logits": -1.5401480197906494, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5401480197906494, "logits_per_char": -0.7700740098953247, "num_chars": 2}, {"sum_logits": -2.023766040802002, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.023766040802002, "logits_per_char": -1.011883020401001, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 794, "native_id": "6cc797ec148c1fc74592957a55bd0951", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.043801784515381, "incorrect_loss_raw": 1.5393126904964447, "correct_loss_per_char": 1.0219008922576904, "incorrect_loss_per_char": 0.7696563452482224, "correct_loss_per_token": 2.043801784515381, "incorrect_loss_per_token": 1.5393126904964447, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3632802963256836, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.3632802963256836, "logits_per_char": -0.6816401481628418, "num_chars": 2}, {"sum_logits": -1.5290032625198364, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5290032625198364, "logits_per_char": -0.7645016312599182, "num_chars": 2}, {"sum_logits": -1.5356310606002808, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5356310606002808, "logits_per_char": -0.7678155303001404, "num_chars": 2}, {"sum_logits": -1.729336142539978, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.729336142539978, "logits_per_char": -0.864668071269989, "num_chars": 2}, {"sum_logits": -2.043801784515381, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -2.043801784515381, "logits_per_char": -1.0219008922576904, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 795, "native_id": "64dbe5cb840ef4f1d25f8b68db8d5fed", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7436602115631104, "incorrect_loss_raw": 1.5973311066627502, "correct_loss_per_char": 0.8718301057815552, "incorrect_loss_per_char": 0.7986655533313751, "correct_loss_per_token": 1.7436602115631104, "incorrect_loss_per_token": 1.5973311066627502, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3909028768539429, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": true, "logits_per_token": -1.3909028768539429, "logits_per_char": -0.6954514384269714, "num_chars": 2}, {"sum_logits": -1.5811004638671875, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.5811004638671875, "logits_per_char": -0.7905502319335938, "num_chars": 2}, {"sum_logits": -1.5843991041183472, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.5843991041183472, "logits_per_char": -0.7921995520591736, "num_chars": 2}, {"sum_logits": -1.8329219818115234, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.8329219818115234, "logits_per_char": -0.9164609909057617, "num_chars": 2}, {"sum_logits": -1.7436602115631104, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.7436602115631104, "logits_per_char": -0.8718301057815552, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 796, "native_id": "a74753bf249c1cbcff632c5c16b0397b", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4751930236816406, "incorrect_loss_raw": 1.6774485111236572, "correct_loss_per_char": 0.7375965118408203, "incorrect_loss_per_char": 0.8387242555618286, "correct_loss_per_token": 1.4751930236816406, "incorrect_loss_per_token": 1.6774485111236572, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6106011867523193, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6106011867523193, "logits_per_char": -0.8053005933761597, "num_chars": 2}, {"sum_logits": -1.521744728088379, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.521744728088379, "logits_per_char": -0.7608723640441895, "num_chars": 2}, {"sum_logits": -1.4978609085083008, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.4978609085083008, "logits_per_char": -0.7489304542541504, "num_chars": 2}, {"sum_logits": -1.4751930236816406, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.4751930236816406, "logits_per_char": -0.7375965118408203, "num_chars": 2}, {"sum_logits": -2.07958722114563, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.07958722114563, "logits_per_char": -1.039793610572815, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 797, "native_id": "9190efbd77fe10b989fcaae35e208a0f", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6131651401519775, "incorrect_loss_raw": 1.6274673342704773, "correct_loss_per_char": 0.8065825700759888, "incorrect_loss_per_char": 0.8137336671352386, "correct_loss_per_token": 1.6131651401519775, "incorrect_loss_per_token": 1.6274673342704773, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6131651401519775, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6131651401519775, "logits_per_char": -0.8065825700759888, "num_chars": 2}, {"sum_logits": -1.6721563339233398, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6721563339233398, "logits_per_char": -0.8360781669616699, "num_chars": 2}, {"sum_logits": -1.5577974319458008, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5577974319458008, "logits_per_char": -0.7788987159729004, "num_chars": 2}, {"sum_logits": -1.430319905281067, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.430319905281067, "logits_per_char": -0.7151599526405334, "num_chars": 2}, {"sum_logits": -1.8495956659317017, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.8495956659317017, "logits_per_char": -0.9247978329658508, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 798, "native_id": "ff0303db294a823d4138fb81a6ee6438", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3679735660552979, "incorrect_loss_raw": 1.7178900837898254, "correct_loss_per_char": 0.6839867830276489, "incorrect_loss_per_char": 0.8589450418949127, "correct_loss_per_token": 1.3679735660552979, "incorrect_loss_per_token": 1.7178900837898254, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3679735660552979, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.3679735660552979, "logits_per_char": -0.6839867830276489, "num_chars": 2}, {"sum_logits": -1.5488274097442627, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5488274097442627, "logits_per_char": -0.7744137048721313, "num_chars": 2}, {"sum_logits": -1.5074965953826904, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5074965953826904, "logits_per_char": -0.7537482976913452, "num_chars": 2}, {"sum_logits": -1.6424565315246582, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6424565315246582, "logits_per_char": -0.8212282657623291, "num_chars": 2}, {"sum_logits": -2.1727797985076904, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -2.1727797985076904, "logits_per_char": -1.0863898992538452, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 799, "native_id": "63963c9c15835d451aac2e1e0b116388", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4786038398742676, "incorrect_loss_raw": 1.6810088455677032, "correct_loss_per_char": 0.7393019199371338, "incorrect_loss_per_char": 0.8405044227838516, "correct_loss_per_token": 1.4786038398742676, "incorrect_loss_per_token": 1.6810088455677032, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.382603406906128, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.382603406906128, "logits_per_char": -0.691301703453064, "num_chars": 2}, {"sum_logits": -1.4786038398742676, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.4786038398742676, "logits_per_char": -0.7393019199371338, "num_chars": 2}, {"sum_logits": -1.7124109268188477, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.7124109268188477, "logits_per_char": -0.8562054634094238, "num_chars": 2}, {"sum_logits": -1.5654505491256714, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5654505491256714, "logits_per_char": -0.7827252745628357, "num_chars": 2}, {"sum_logits": -2.063570499420166, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.063570499420166, "logits_per_char": -1.031785249710083, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 800, "native_id": "cc8324b73ed9625e723ef041dfc77a37", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6243999004364014, "incorrect_loss_raw": 1.6400110721588135, "correct_loss_per_char": 0.8121999502182007, "incorrect_loss_per_char": 0.8200055360794067, "correct_loss_per_token": 1.6243999004364014, "incorrect_loss_per_token": 1.6400110721588135, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3674123287200928, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.3674123287200928, "logits_per_char": -0.6837061643600464, "num_chars": 2}, {"sum_logits": -1.5395901203155518, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5395901203155518, "logits_per_char": -0.7697950601577759, "num_chars": 2}, {"sum_logits": -1.629955768585205, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.629955768585205, "logits_per_char": -0.8149778842926025, "num_chars": 2}, {"sum_logits": -1.6243999004364014, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.6243999004364014, "logits_per_char": -0.8121999502182007, "num_chars": 2}, {"sum_logits": -2.0230860710144043, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -2.0230860710144043, "logits_per_char": -1.0115430355072021, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 801, "native_id": "684dbde19719e8224113433981d6e01e", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4885282516479492, "incorrect_loss_raw": 1.668624609708786, "correct_loss_per_char": 0.7442641258239746, "incorrect_loss_per_char": 0.834312304854393, "correct_loss_per_token": 1.4885282516479492, "incorrect_loss_per_token": 1.668624609708786, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4885282516479492, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.4885282516479492, "logits_per_char": -0.7442641258239746, "num_chars": 2}, {"sum_logits": -1.4789774417877197, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": true, "logits_per_token": -1.4789774417877197, "logits_per_char": -0.7394887208938599, "num_chars": 2}, {"sum_logits": -1.6124413013458252, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.6124413013458252, "logits_per_char": -0.8062206506729126, "num_chars": 2}, {"sum_logits": -1.5787094831466675, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.5787094831466675, "logits_per_char": -0.7893547415733337, "num_chars": 2}, {"sum_logits": -2.0043702125549316, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -2.0043702125549316, "logits_per_char": -1.0021851062774658, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 802, "native_id": "21450618657881d8c5af73691f3423a7_1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5660979747772217, "incorrect_loss_raw": 1.638508141040802, "correct_loss_per_char": 0.7830489873886108, "incorrect_loss_per_char": 0.819254070520401, "correct_loss_per_token": 1.5660979747772217, "incorrect_loss_per_token": 1.638508141040802, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.453783392906189, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.453783392906189, "logits_per_char": -0.7268916964530945, "num_chars": 2}, {"sum_logits": -1.5660979747772217, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5660979747772217, "logits_per_char": -0.7830489873886108, "num_chars": 2}, {"sum_logits": -1.5576468706130981, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5576468706130981, "logits_per_char": -0.7788234353065491, "num_chars": 2}, {"sum_logits": -1.6722865104675293, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6722865104675293, "logits_per_char": -0.8361432552337646, "num_chars": 2}, {"sum_logits": -1.8703157901763916, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.8703157901763916, "logits_per_char": -0.9351578950881958, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 803, "native_id": "8b94b61b604ec0d7508804033eec6d23", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.036675453186035, "incorrect_loss_raw": 1.5386774837970734, "correct_loss_per_char": 1.0183377265930176, "incorrect_loss_per_char": 0.7693387418985367, "correct_loss_per_token": 2.036675453186035, "incorrect_loss_per_token": 1.5386774837970734, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3836992979049683, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.3836992979049683, "logits_per_char": -0.6918496489524841, "num_chars": 2}, {"sum_logits": -1.4848601818084717, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.4848601818084717, "logits_per_char": -0.7424300909042358, "num_chars": 2}, {"sum_logits": -1.5880426168441772, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5880426168441772, "logits_per_char": -0.7940213084220886, "num_chars": 2}, {"sum_logits": -1.6981078386306763, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6981078386306763, "logits_per_char": -0.8490539193153381, "num_chars": 2}, {"sum_logits": -2.036675453186035, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -2.036675453186035, "logits_per_char": -1.0183377265930176, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 804, "native_id": "52ecf169febc95a7f5ccb048fc85857d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3376749753952026, "incorrect_loss_raw": 1.7062276005744934, "correct_loss_per_char": 0.6688374876976013, "incorrect_loss_per_char": 0.8531138002872467, "correct_loss_per_token": 1.3376749753952026, "incorrect_loss_per_token": 1.7062276005744934, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3376749753952026, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.3376749753952026, "logits_per_char": -0.6688374876976013, "num_chars": 2}, {"sum_logits": -1.565340280532837, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.565340280532837, "logits_per_char": -0.7826701402664185, "num_chars": 2}, {"sum_logits": -1.6199440956115723, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6199440956115723, "logits_per_char": -0.8099720478057861, "num_chars": 2}, {"sum_logits": -1.712029218673706, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.712029218673706, "logits_per_char": -0.856014609336853, "num_chars": 2}, {"sum_logits": -1.9275968074798584, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.9275968074798584, "logits_per_char": -0.9637984037399292, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 805, "native_id": "e408a5a031caec33782cb3b3a005eecc", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9991446733474731, "incorrect_loss_raw": 1.5430136024951935, "correct_loss_per_char": 0.9995723366737366, "incorrect_loss_per_char": 0.7715068012475967, "correct_loss_per_token": 1.9991446733474731, "incorrect_loss_per_token": 1.5430136024951935, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3701555728912354, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": true, "logits_per_token": -1.3701555728912354, "logits_per_char": -0.6850777864456177, "num_chars": 2}, {"sum_logits": -1.5649842023849487, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.5649842023849487, "logits_per_char": -0.7824921011924744, "num_chars": 2}, {"sum_logits": -1.5887192487716675, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.5887192487716675, "logits_per_char": -0.7943596243858337, "num_chars": 2}, {"sum_logits": -1.6481953859329224, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.6481953859329224, "logits_per_char": -0.8240976929664612, "num_chars": 2}, {"sum_logits": -1.9991446733474731, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.9991446733474731, "logits_per_char": -0.9995723366737366, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 806, "native_id": "31bd05ba62a16ee35217224b98c6baea", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5807465314865112, "incorrect_loss_raw": 1.6509687900543213, "correct_loss_per_char": 0.7903732657432556, "incorrect_loss_per_char": 0.8254843950271606, "correct_loss_per_token": 1.5807465314865112, "incorrect_loss_per_token": 1.6509687900543213, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.472639799118042, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.472639799118042, "logits_per_char": -0.736319899559021, "num_chars": 2}, {"sum_logits": -1.4629782438278198, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.4629782438278198, "logits_per_char": -0.7314891219139099, "num_chars": 2}, {"sum_logits": -1.5807465314865112, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5807465314865112, "logits_per_char": -0.7903732657432556, "num_chars": 2}, {"sum_logits": -1.5973395109176636, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5973395109176636, "logits_per_char": -0.7986697554588318, "num_chars": 2}, {"sum_logits": -2.0709176063537598, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.0709176063537598, "logits_per_char": -1.0354588031768799, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 807, "native_id": "b4043bd1f65a8ad088e62042eca259c2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6172536611557007, "incorrect_loss_raw": 1.6882382333278656, "correct_loss_per_char": 0.8086268305778503, "incorrect_loss_per_char": 0.8441191166639328, "correct_loss_per_token": 1.6172536611557007, "incorrect_loss_per_token": 1.6882382333278656, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3438825607299805, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.3438825607299805, "logits_per_char": -0.6719412803649902, "num_chars": 2}, {"sum_logits": -1.400742530822754, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.400742530822754, "logits_per_char": -0.700371265411377, "num_chars": 2}, {"sum_logits": -1.6172536611557007, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.6172536611557007, "logits_per_char": -0.8086268305778503, "num_chars": 2}, {"sum_logits": -1.617855429649353, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.617855429649353, "logits_per_char": -0.8089277148246765, "num_chars": 2}, {"sum_logits": -2.390472412109375, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.390472412109375, "logits_per_char": -1.1952362060546875, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 808, "native_id": "4302e727e47f464511d4d04f22bed0d2", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.437736988067627, "incorrect_loss_raw": 1.6770985424518585, "correct_loss_per_char": 0.7188684940338135, "incorrect_loss_per_char": 0.8385492712259293, "correct_loss_per_token": 1.437736988067627, "incorrect_loss_per_token": 1.6770985424518585, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5520960092544556, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5520960092544556, "logits_per_char": -0.7760480046272278, "num_chars": 2}, {"sum_logits": -1.6267088651657104, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.6267088651657104, "logits_per_char": -0.8133544325828552, "num_chars": 2}, {"sum_logits": -1.5678123235702515, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5678123235702515, "logits_per_char": -0.7839061617851257, "num_chars": 2}, {"sum_logits": -1.437736988067627, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": true, "logits_per_token": -1.437736988067627, "logits_per_char": -0.7188684940338135, "num_chars": 2}, {"sum_logits": -1.9617769718170166, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.9617769718170166, "logits_per_char": -0.9808884859085083, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 809, "native_id": "f0d473701d52125dd055d23042de1b0d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3975517749786377, "incorrect_loss_raw": 1.6813945174217224, "correct_loss_per_char": 0.6987758874893188, "incorrect_loss_per_char": 0.8406972587108612, "correct_loss_per_token": 1.3975517749786377, "incorrect_loss_per_token": 1.6813945174217224, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3975517749786377, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": true, "logits_per_token": -1.3975517749786377, "logits_per_char": -0.6987758874893188, "num_chars": 2}, {"sum_logits": -1.6205689907073975, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.6205689907073975, "logits_per_char": -0.8102844953536987, "num_chars": 2}, {"sum_logits": -1.604332447052002, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.604332447052002, "logits_per_char": -0.802166223526001, "num_chars": 2}, {"sum_logits": -1.6387097835540771, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.6387097835540771, "logits_per_char": -0.8193548917770386, "num_chars": 2}, {"sum_logits": -1.861966848373413, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.861966848373413, "logits_per_char": -0.9309834241867065, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 810, "native_id": "d35112a99ab3983fb51c3adae80bc2da", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5409326553344727, "incorrect_loss_raw": 1.6641997396945953, "correct_loss_per_char": 0.7704663276672363, "incorrect_loss_per_char": 0.8320998698472977, "correct_loss_per_token": 1.5409326553344727, "incorrect_loss_per_token": 1.6641997396945953, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3954708576202393, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.3954708576202393, "logits_per_char": -0.6977354288101196, "num_chars": 2}, {"sum_logits": -1.559658169746399, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.559658169746399, "logits_per_char": -0.7798290848731995, "num_chars": 2}, {"sum_logits": -1.6174447536468506, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6174447536468506, "logits_per_char": -0.8087223768234253, "num_chars": 2}, {"sum_logits": -1.5409326553344727, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5409326553344727, "logits_per_char": -0.7704663276672363, "num_chars": 2}, {"sum_logits": -2.0842251777648926, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.0842251777648926, "logits_per_char": -1.0421125888824463, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 811, "native_id": "661474a1a0c29dd7a243b284535ac934", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7240716218948364, "incorrect_loss_raw": 1.6458414494991302, "correct_loss_per_char": 0.8620358109474182, "incorrect_loss_per_char": 0.8229207247495651, "correct_loss_per_token": 1.7240716218948364, "incorrect_loss_per_token": 1.6458414494991302, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2539111375808716, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.2539111375808716, "logits_per_char": -0.6269555687904358, "num_chars": 2}, {"sum_logits": -1.5163313150405884, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5163313150405884, "logits_per_char": -0.7581656575202942, "num_chars": 2}, {"sum_logits": -1.5950652360916138, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5950652360916138, "logits_per_char": -0.7975326180458069, "num_chars": 2}, {"sum_logits": -1.7240716218948364, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.7240716218948364, "logits_per_char": -0.8620358109474182, "num_chars": 2}, {"sum_logits": -2.2180581092834473, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.2180581092834473, "logits_per_char": -1.1090290546417236, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 812, "native_id": "6416dcdf9b8d7d2787f07e7426f86fe4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8628891706466675, "incorrect_loss_raw": 1.5662731528282166, "correct_loss_per_char": 0.9314445853233337, "incorrect_loss_per_char": 0.7831365764141083, "correct_loss_per_token": 1.8628891706466675, "incorrect_loss_per_token": 1.5662731528282166, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4362059831619263, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": true, "logits_per_token": -1.4362059831619263, "logits_per_char": -0.7181029915809631, "num_chars": 2}, {"sum_logits": -1.4826105833053589, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.4826105833053589, "logits_per_char": -0.7413052916526794, "num_chars": 2}, {"sum_logits": -1.6498063802719116, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.6498063802719116, "logits_per_char": -0.8249031901359558, "num_chars": 2}, {"sum_logits": -1.6964696645736694, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.6964696645736694, "logits_per_char": -0.8482348322868347, "num_chars": 2}, {"sum_logits": -1.8628891706466675, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.8628891706466675, "logits_per_char": -0.9314445853233337, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 813, "native_id": "0f54a1ee30a0034a3d2db1bfdef9ca85", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5576598644256592, "incorrect_loss_raw": 1.6998044848442078, "correct_loss_per_char": 0.7788299322128296, "incorrect_loss_per_char": 0.8499022424221039, "correct_loss_per_token": 1.5576598644256592, "incorrect_loss_per_token": 1.6998044848442078, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3878251314163208, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.3878251314163208, "logits_per_char": -0.6939125657081604, "num_chars": 2}, {"sum_logits": -1.37645423412323, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.37645423412323, "logits_per_char": -0.688227117061615, "num_chars": 2}, {"sum_logits": -1.5576598644256592, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5576598644256592, "logits_per_char": -0.7788299322128296, "num_chars": 2}, {"sum_logits": -1.6611716747283936, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.6611716747283936, "logits_per_char": -0.8305858373641968, "num_chars": 2}, {"sum_logits": -2.3737668991088867, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -2.3737668991088867, "logits_per_char": -1.1868834495544434, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 814, "native_id": "7850beb1209c41fabe385cbedc96a61a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.178965449333191, "incorrect_loss_raw": 1.8043623268604279, "correct_loss_per_char": 0.5894827246665955, "incorrect_loss_per_char": 0.9021811634302139, "correct_loss_per_token": 1.178965449333191, "incorrect_loss_per_token": 1.8043623268604279, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.178965449333191, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.178965449333191, "logits_per_char": -0.5894827246665955, "num_chars": 2}, {"sum_logits": -1.489869236946106, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.489869236946106, "logits_per_char": -0.744934618473053, "num_chars": 2}, {"sum_logits": -1.7063895463943481, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.7063895463943481, "logits_per_char": -0.8531947731971741, "num_chars": 2}, {"sum_logits": -1.7064489126205444, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.7064489126205444, "logits_per_char": -0.8532244563102722, "num_chars": 2}, {"sum_logits": -2.314741611480713, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.314741611480713, "logits_per_char": -1.1573708057403564, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 815, "native_id": "cdb06b28b9c4e7ef7e880d1f096fd409", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5601955652236938, "incorrect_loss_raw": 1.6627181470394135, "correct_loss_per_char": 0.7800977826118469, "incorrect_loss_per_char": 0.8313590735197067, "correct_loss_per_token": 1.5601955652236938, "incorrect_loss_per_token": 1.6627181470394135, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4219859838485718, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.4219859838485718, "logits_per_char": -0.7109929919242859, "num_chars": 2}, {"sum_logits": -1.496050477027893, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.496050477027893, "logits_per_char": -0.7480252385139465, "num_chars": 2}, {"sum_logits": -1.5601955652236938, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5601955652236938, "logits_per_char": -0.7800977826118469, "num_chars": 2}, {"sum_logits": -1.602784514427185, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.602784514427185, "logits_per_char": -0.8013922572135925, "num_chars": 2}, {"sum_logits": -2.130051612854004, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -2.130051612854004, "logits_per_char": -1.065025806427002, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 816, "native_id": "14309d9bd3c13d1c0efb625198f6304a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0211169719696045, "incorrect_loss_raw": 1.5413013994693756, "correct_loss_per_char": 1.0105584859848022, "incorrect_loss_per_char": 0.7706506997346878, "correct_loss_per_token": 2.0211169719696045, "incorrect_loss_per_token": 1.5413013994693756, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.329269289970398, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.329269289970398, "logits_per_char": -0.664634644985199, "num_chars": 2}, {"sum_logits": -1.56595778465271, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.56595778465271, "logits_per_char": -0.782978892326355, "num_chars": 2}, {"sum_logits": -1.6117308139801025, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6117308139801025, "logits_per_char": -0.8058654069900513, "num_chars": 2}, {"sum_logits": -1.658247709274292, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.658247709274292, "logits_per_char": -0.829123854637146, "num_chars": 2}, {"sum_logits": -2.0211169719696045, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -2.0211169719696045, "logits_per_char": -1.0105584859848022, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 817, "native_id": "a00276c6db928900772c0320aeff77c0", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.470340609550476, "incorrect_loss_raw": 1.6845411658287048, "correct_loss_per_char": 0.735170304775238, "incorrect_loss_per_char": 0.8422705829143524, "correct_loss_per_token": 1.470340609550476, "incorrect_loss_per_token": 1.6845411658287048, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3395493030548096, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.3395493030548096, "logits_per_char": -0.6697746515274048, "num_chars": 2}, {"sum_logits": -1.470340609550476, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.470340609550476, "logits_per_char": -0.735170304775238, "num_chars": 2}, {"sum_logits": -1.6394307613372803, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.6394307613372803, "logits_per_char": -0.8197153806686401, "num_chars": 2}, {"sum_logits": -1.7144219875335693, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.7144219875335693, "logits_per_char": -0.8572109937667847, "num_chars": 2}, {"sum_logits": -2.04476261138916, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -2.04476261138916, "logits_per_char": -1.02238130569458, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 818, "native_id": "4706be6e24f1fafd9ff9fe63583acffd", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.046185255050659, "incorrect_loss_raw": 1.5372219383716583, "correct_loss_per_char": 1.0230926275253296, "incorrect_loss_per_char": 0.7686109691858292, "correct_loss_per_token": 2.046185255050659, "incorrect_loss_per_token": 1.5372219383716583, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3655680418014526, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": true, "logits_per_token": -1.3655680418014526, "logits_per_char": -0.6827840209007263, "num_chars": 2}, {"sum_logits": -1.5064101219177246, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.5064101219177246, "logits_per_char": -0.7532050609588623, "num_chars": 2}, {"sum_logits": -1.5742449760437012, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.5742449760437012, "logits_per_char": -0.7871224880218506, "num_chars": 2}, {"sum_logits": -1.7026646137237549, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.7026646137237549, "logits_per_char": -0.8513323068618774, "num_chars": 2}, {"sum_logits": -2.046185255050659, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -2.046185255050659, "logits_per_char": -1.0230926275253296, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 819, "native_id": "ee8819b2da5453848c1cbb9d9c93403b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5543204545974731, "incorrect_loss_raw": 1.6469482779502869, "correct_loss_per_char": 0.7771602272987366, "incorrect_loss_per_char": 0.8234741389751434, "correct_loss_per_token": 1.5543204545974731, "incorrect_loss_per_token": 1.6469482779502869, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4231634140014648, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.4231634140014648, "logits_per_char": -0.7115817070007324, "num_chars": 2}, {"sum_logits": -1.5543204545974731, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5543204545974731, "logits_per_char": -0.7771602272987366, "num_chars": 2}, {"sum_logits": -1.5478534698486328, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5478534698486328, "logits_per_char": -0.7739267349243164, "num_chars": 2}, {"sum_logits": -1.6747492551803589, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6747492551803589, "logits_per_char": -0.8373746275901794, "num_chars": 2}, {"sum_logits": -1.942026972770691, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.942026972770691, "logits_per_char": -0.9710134863853455, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 820, "native_id": "84ea43b967259814d939c62131f74df0", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.621101975440979, "incorrect_loss_raw": 1.626477986574173, "correct_loss_per_char": 0.8105509877204895, "incorrect_loss_per_char": 0.8132389932870865, "correct_loss_per_token": 1.621101975440979, "incorrect_loss_per_token": 1.626477986574173, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3797986507415771, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": true, "logits_per_token": -1.3797986507415771, "logits_per_char": -0.6898993253707886, "num_chars": 2}, {"sum_logits": -1.621101975440979, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.621101975440979, "logits_per_char": -0.8105509877204895, "num_chars": 2}, {"sum_logits": -1.6218892335891724, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.6218892335891724, "logits_per_char": -0.8109446167945862, "num_chars": 2}, {"sum_logits": -1.6496750116348267, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.6496750116348267, "logits_per_char": -0.8248375058174133, "num_chars": 2}, {"sum_logits": -1.8545490503311157, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.8545490503311157, "logits_per_char": -0.9272745251655579, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 821, "native_id": "60e7338e9e6bfc746a15a161eb12706c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.071188449859619, "incorrect_loss_raw": 1.5292546153068542, "correct_loss_per_char": 1.0355942249298096, "incorrect_loss_per_char": 0.7646273076534271, "correct_loss_per_token": 2.071188449859619, "incorrect_loss_per_token": 1.5292546153068542, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3898521661758423, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.3898521661758423, "logits_per_char": -0.6949260830879211, "num_chars": 2}, {"sum_logits": -1.595687985420227, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.595687985420227, "logits_per_char": -0.7978439927101135, "num_chars": 2}, {"sum_logits": -1.5524457693099976, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5524457693099976, "logits_per_char": -0.7762228846549988, "num_chars": 2}, {"sum_logits": -1.57903254032135, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.57903254032135, "logits_per_char": -0.789516270160675, "num_chars": 2}, {"sum_logits": -2.071188449859619, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.071188449859619, "logits_per_char": -1.0355942249298096, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 822, "native_id": "a0f5414bf98e094f4d807abee28861a4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.63564133644104, "incorrect_loss_raw": 1.6292555332183838, "correct_loss_per_char": 0.81782066822052, "incorrect_loss_per_char": 0.8146277666091919, "correct_loss_per_token": 1.63564133644104, "incorrect_loss_per_token": 1.6292555332183838, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4383277893066406, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.4383277893066406, "logits_per_char": -0.7191638946533203, "num_chars": 2}, {"sum_logits": -1.4926551580429077, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.4926551580429077, "logits_per_char": -0.7463275790214539, "num_chars": 2}, {"sum_logits": -1.63564133644104, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.63564133644104, "logits_per_char": -0.81782066822052, "num_chars": 2}, {"sum_logits": -1.6257586479187012, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6257586479187012, "logits_per_char": -0.8128793239593506, "num_chars": 2}, {"sum_logits": -1.9602805376052856, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.9602805376052856, "logits_per_char": -0.9801402688026428, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 823, "native_id": "44120a9443c619d98ce5bfe4bb219c43", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3390134572982788, "incorrect_loss_raw": 1.720514327287674, "correct_loss_per_char": 0.6695067286491394, "incorrect_loss_per_char": 0.860257163643837, "correct_loss_per_token": 1.3390134572982788, "incorrect_loss_per_token": 1.720514327287674, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3390134572982788, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": true, "logits_per_token": -1.3390134572982788, "logits_per_char": -0.6695067286491394, "num_chars": 2}, {"sum_logits": -1.4757229089736938, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.4757229089736938, "logits_per_char": -0.7378614544868469, "num_chars": 2}, {"sum_logits": -1.6482138633728027, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.6482138633728027, "logits_per_char": -0.8241069316864014, "num_chars": 2}, {"sum_logits": -1.6561119556427002, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.6561119556427002, "logits_per_char": -0.8280559778213501, "num_chars": 2}, {"sum_logits": -2.102008581161499, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -2.102008581161499, "logits_per_char": -1.0510042905807495, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 824, "native_id": "38ab26e29a0984b212006d39185c43f3", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5296502113342285, "incorrect_loss_raw": 1.6401395499706268, "correct_loss_per_char": 0.7648251056671143, "incorrect_loss_per_char": 0.8200697749853134, "correct_loss_per_token": 1.5296502113342285, "incorrect_loss_per_token": 1.6401395499706268, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5175693035125732, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": true, "logits_per_token": -1.5175693035125732, "logits_per_char": -0.7587846517562866, "num_chars": 2}, {"sum_logits": -1.5296502113342285, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.5296502113342285, "logits_per_char": -0.7648251056671143, "num_chars": 2}, {"sum_logits": -1.6067026853561401, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.6067026853561401, "logits_per_char": -0.8033513426780701, "num_chars": 2}, {"sum_logits": -1.705439805984497, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.705439805984497, "logits_per_char": -0.8527199029922485, "num_chars": 2}, {"sum_logits": -1.7308464050292969, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.7308464050292969, "logits_per_char": -0.8654232025146484, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 825, "native_id": "a5e207803684eea8a43ca6670c50b354", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5448620319366455, "incorrect_loss_raw": 1.656321108341217, "correct_loss_per_char": 0.7724310159683228, "incorrect_loss_per_char": 0.8281605541706085, "correct_loss_per_token": 1.5448620319366455, "incorrect_loss_per_token": 1.656321108341217, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4763609170913696, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.4763609170913696, "logits_per_char": -0.7381804585456848, "num_chars": 2}, {"sum_logits": -1.5448620319366455, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5448620319366455, "logits_per_char": -0.7724310159683228, "num_chars": 2}, {"sum_logits": -1.548132300376892, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.548132300376892, "logits_per_char": -0.774066150188446, "num_chars": 2}, {"sum_logits": -1.5536935329437256, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5536935329437256, "logits_per_char": -0.7768467664718628, "num_chars": 2}, {"sum_logits": -2.047097682952881, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -2.047097682952881, "logits_per_char": -1.0235488414764404, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 826, "native_id": "af3b9a8b1962cd3bcd19e644d873e7bc", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9320818185806274, "incorrect_loss_raw": 1.5514029264450073, "correct_loss_per_char": 0.9660409092903137, "incorrect_loss_per_char": 0.7757014632225037, "correct_loss_per_token": 1.9320818185806274, "incorrect_loss_per_token": 1.5514029264450073, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4584344625473022, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": true, "logits_per_token": -1.4584344625473022, "logits_per_char": -0.7292172312736511, "num_chars": 2}, {"sum_logits": -1.4931341409683228, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.4931341409683228, "logits_per_char": -0.7465670704841614, "num_chars": 2}, {"sum_logits": -1.5957378149032593, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.5957378149032593, "logits_per_char": -0.7978689074516296, "num_chars": 2}, {"sum_logits": -1.658305287361145, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.658305287361145, "logits_per_char": -0.8291526436805725, "num_chars": 2}, {"sum_logits": -1.9320818185806274, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.9320818185806274, "logits_per_char": -0.9660409092903137, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 827, "native_id": "43a91955fd0717997a16897c3324e095", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6676055192947388, "incorrect_loss_raw": 1.634922355413437, "correct_loss_per_char": 0.8338027596473694, "incorrect_loss_per_char": 0.8174611777067184, "correct_loss_per_token": 1.6676055192947388, "incorrect_loss_per_token": 1.634922355413437, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3983412981033325, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.3983412981033325, "logits_per_char": -0.6991706490516663, "num_chars": 2}, {"sum_logits": -1.4412473440170288, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.4412473440170288, "logits_per_char": -0.7206236720085144, "num_chars": 2}, {"sum_logits": -1.6191829442977905, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6191829442977905, "logits_per_char": -0.8095914721488953, "num_chars": 2}, {"sum_logits": -1.6676055192947388, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6676055192947388, "logits_per_char": -0.8338027596473694, "num_chars": 2}, {"sum_logits": -2.0809178352355957, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -2.0809178352355957, "logits_per_char": -1.0404589176177979, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 828, "native_id": "7f7a6f2b3087bf37dadbe8aa8d358047", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6174516677856445, "incorrect_loss_raw": 1.6538248360157013, "correct_loss_per_char": 0.8087258338928223, "incorrect_loss_per_char": 0.8269124180078506, "correct_loss_per_token": 1.6174516677856445, "incorrect_loss_per_token": 1.6538248360157013, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3411413431167603, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.3411413431167603, "logits_per_char": -0.6705706715583801, "num_chars": 2}, {"sum_logits": -1.4823195934295654, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.4823195934295654, "logits_per_char": -0.7411597967147827, "num_chars": 2}, {"sum_logits": -1.6174516677856445, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6174516677856445, "logits_per_char": -0.8087258338928223, "num_chars": 2}, {"sum_logits": -1.6861910820007324, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6861910820007324, "logits_per_char": -0.8430955410003662, "num_chars": 2}, {"sum_logits": -2.105647325515747, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.105647325515747, "logits_per_char": -1.0528236627578735, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 829, "native_id": "37d88a9bb24913c1973cc26d4ce3394f", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7355074882507324, "incorrect_loss_raw": 1.5898532569408417, "correct_loss_per_char": 0.8677537441253662, "incorrect_loss_per_char": 0.7949266284704208, "correct_loss_per_token": 1.7355074882507324, "incorrect_loss_per_token": 1.5898532569408417, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5333614349365234, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.5333614349365234, "logits_per_char": -0.7666807174682617, "num_chars": 2}, {"sum_logits": -1.609954833984375, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.609954833984375, "logits_per_char": -0.8049774169921875, "num_chars": 2}, {"sum_logits": -1.7021310329437256, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.7021310329437256, "logits_per_char": -0.8510655164718628, "num_chars": 2}, {"sum_logits": -1.5139657258987427, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": true, "logits_per_token": -1.5139657258987427, "logits_per_char": -0.7569828629493713, "num_chars": 2}, {"sum_logits": -1.7355074882507324, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.7355074882507324, "logits_per_char": -0.8677537441253662, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 830, "native_id": "001b0f5a841fd81d13fbe67c7c7179d6", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0179519653320312, "incorrect_loss_raw": 1.5565553903579712, "correct_loss_per_char": 1.0089759826660156, "incorrect_loss_per_char": 0.7782776951789856, "correct_loss_per_token": 2.0179519653320312, "incorrect_loss_per_token": 1.5565553903579712, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3420060873031616, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": true, "logits_per_token": -1.3420060873031616, "logits_per_char": -0.6710030436515808, "num_chars": 2}, {"sum_logits": -1.3625401258468628, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.3625401258468628, "logits_per_char": -0.6812700629234314, "num_chars": 2}, {"sum_logits": -1.6922975778579712, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.6922975778579712, "logits_per_char": -0.8461487889289856, "num_chars": 2}, {"sum_logits": -1.8293777704238892, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.8293777704238892, "logits_per_char": -0.9146888852119446, "num_chars": 2}, {"sum_logits": -2.0179519653320312, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -2.0179519653320312, "logits_per_char": -1.0089759826660156, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 831, "native_id": "9f9ca9bb06d6afc31b19c365fb29a1c9", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6137975454330444, "incorrect_loss_raw": 1.657332420349121, "correct_loss_per_char": 0.8068987727165222, "incorrect_loss_per_char": 0.8286662101745605, "correct_loss_per_token": 1.6137975454330444, "incorrect_loss_per_token": 1.657332420349121, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3232899904251099, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.3232899904251099, "logits_per_char": -0.6616449952125549, "num_chars": 2}, {"sum_logits": -1.5519177913665771, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5519177913665771, "logits_per_char": -0.7759588956832886, "num_chars": 2}, {"sum_logits": -1.6137975454330444, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.6137975454330444, "logits_per_char": -0.8068987727165222, "num_chars": 2}, {"sum_logits": -1.6026479005813599, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.6026479005813599, "logits_per_char": -0.8013239502906799, "num_chars": 2}, {"sum_logits": -2.1514739990234375, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.1514739990234375, "logits_per_char": -1.0757369995117188, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 832, "native_id": "d60c5a494539c66982c0f692afde9499", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4798967838287354, "incorrect_loss_raw": 1.7034840881824493, "correct_loss_per_char": 0.7399483919143677, "incorrect_loss_per_char": 0.8517420440912247, "correct_loss_per_token": 1.4798967838287354, "incorrect_loss_per_token": 1.7034840881824493, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2858192920684814, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.2858192920684814, "logits_per_char": -0.6429096460342407, "num_chars": 2}, {"sum_logits": -1.4798967838287354, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.4798967838287354, "logits_per_char": -0.7399483919143677, "num_chars": 2}, {"sum_logits": -1.5873355865478516, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5873355865478516, "logits_per_char": -0.7936677932739258, "num_chars": 2}, {"sum_logits": -1.7394472360610962, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.7394472360610962, "logits_per_char": -0.8697236180305481, "num_chars": 2}, {"sum_logits": -2.201334238052368, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.201334238052368, "logits_per_char": -1.100667119026184, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 833, "native_id": "a6d3a2cb250a6310b8cabd31dbe2138c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.424142599105835, "incorrect_loss_raw": 1.6800084710121155, "correct_loss_per_char": 0.7120712995529175, "incorrect_loss_per_char": 0.8400042355060577, "correct_loss_per_token": 1.424142599105835, "incorrect_loss_per_token": 1.6800084710121155, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.424142599105835, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.424142599105835, "logits_per_char": -0.7120712995529175, "num_chars": 2}, {"sum_logits": -1.4969533681869507, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.4969533681869507, "logits_per_char": -0.7484766840934753, "num_chars": 2}, {"sum_logits": -1.5934051275253296, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5934051275253296, "logits_per_char": -0.7967025637626648, "num_chars": 2}, {"sum_logits": -1.689792513847351, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.689792513847351, "logits_per_char": -0.8448962569236755, "num_chars": 2}, {"sum_logits": -1.9398828744888306, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.9398828744888306, "logits_per_char": -0.9699414372444153, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 834, "native_id": "27c523eb9099d2eec66296558eb4448e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7113882303237915, "incorrect_loss_raw": 1.6208038032054901, "correct_loss_per_char": 0.8556941151618958, "incorrect_loss_per_char": 0.8104019016027451, "correct_loss_per_token": 1.7113882303237915, "incorrect_loss_per_token": 1.6208038032054901, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3070542812347412, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.3070542812347412, "logits_per_char": -0.6535271406173706, "num_chars": 2}, {"sum_logits": -1.5126372575759888, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5126372575759888, "logits_per_char": -0.7563186287879944, "num_chars": 2}, {"sum_logits": -1.7113882303237915, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.7113882303237915, "logits_per_char": -0.8556941151618958, "num_chars": 2}, {"sum_logits": -1.6666978597640991, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6666978597640991, "logits_per_char": -0.8333489298820496, "num_chars": 2}, {"sum_logits": -1.9968258142471313, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.9968258142471313, "logits_per_char": -0.9984129071235657, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 835, "native_id": "2509fdd7d94afe9d0c021654ce0ba93f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.58331298828125, "incorrect_loss_raw": 1.6277644038200378, "correct_loss_per_char": 0.791656494140625, "incorrect_loss_per_char": 0.8138822019100189, "correct_loss_per_token": 1.58331298828125, "incorrect_loss_per_token": 1.6277644038200378, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4732730388641357, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.4732730388641357, "logits_per_char": -0.7366365194320679, "num_chars": 2}, {"sum_logits": -1.58331298828125, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.58331298828125, "logits_per_char": -0.791656494140625, "num_chars": 2}, {"sum_logits": -1.6091164350509644, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6091164350509644, "logits_per_char": -0.8045582175254822, "num_chars": 2}, {"sum_logits": -1.7158702611923218, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.7158702611923218, "logits_per_char": -0.8579351305961609, "num_chars": 2}, {"sum_logits": -1.7127978801727295, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.7127978801727295, "logits_per_char": -0.8563989400863647, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 836, "native_id": "75b8195e23c6bada574f1e41471b8f23", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2370142936706543, "incorrect_loss_raw": 1.7689478993415833, "correct_loss_per_char": 0.6185071468353271, "incorrect_loss_per_char": 0.8844739496707916, "correct_loss_per_token": 1.2370142936706543, "incorrect_loss_per_token": 1.7689478993415833, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2370142936706543, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.2370142936706543, "logits_per_char": -0.6185071468353271, "num_chars": 2}, {"sum_logits": -1.536161184310913, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.536161184310913, "logits_per_char": -0.7680805921554565, "num_chars": 2}, {"sum_logits": -1.6099328994750977, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6099328994750977, "logits_per_char": -0.8049664497375488, "num_chars": 2}, {"sum_logits": -1.7060801982879639, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.7060801982879639, "logits_per_char": -0.8530400991439819, "num_chars": 2}, {"sum_logits": -2.2236173152923584, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.2236173152923584, "logits_per_char": -1.1118086576461792, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 837, "native_id": "df1bf6f3f87975aa0c1b6d6153d9ecef", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.534698724746704, "incorrect_loss_raw": 1.6628689169883728, "correct_loss_per_char": 0.767349362373352, "incorrect_loss_per_char": 0.8314344584941864, "correct_loss_per_token": 1.534698724746704, "incorrect_loss_per_token": 1.6628689169883728, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4138388633728027, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.4138388633728027, "logits_per_char": -0.7069194316864014, "num_chars": 2}, {"sum_logits": -1.534698724746704, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.534698724746704, "logits_per_char": -0.767349362373352, "num_chars": 2}, {"sum_logits": -1.5309457778930664, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5309457778930664, "logits_per_char": -0.7654728889465332, "num_chars": 2}, {"sum_logits": -1.643855094909668, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.643855094909668, "logits_per_char": -0.821927547454834, "num_chars": 2}, {"sum_logits": -2.062835931777954, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -2.062835931777954, "logits_per_char": -1.031417965888977, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 838, "native_id": "e99d4cb2e69d3e020ee9e4e9a84ac45b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.427886962890625, "incorrect_loss_raw": 1.7013258934020996, "correct_loss_per_char": 0.7139434814453125, "incorrect_loss_per_char": 0.8506629467010498, "correct_loss_per_token": 1.427886962890625, "incorrect_loss_per_token": 1.7013258934020996, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3157002925872803, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": true, "logits_per_token": -1.3157002925872803, "logits_per_char": -0.6578501462936401, "num_chars": 2}, {"sum_logits": -1.427886962890625, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -1.427886962890625, "logits_per_char": -0.7139434814453125, "num_chars": 2}, {"sum_logits": -1.6984789371490479, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -1.6984789371490479, "logits_per_char": -0.8492394685745239, "num_chars": 2}, {"sum_logits": -1.7544949054718018, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -1.7544949054718018, "logits_per_char": -0.8772474527359009, "num_chars": 2}, {"sum_logits": -2.0366294384002686, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -2.0366294384002686, "logits_per_char": -1.0183147192001343, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 839, "native_id": "b1274d6f5969dea4d46f43fbdc28fd97", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5194528102874756, "incorrect_loss_raw": 1.6527294516563416, "correct_loss_per_char": 0.7597264051437378, "incorrect_loss_per_char": 0.8263647258281708, "correct_loss_per_token": 1.5194528102874756, "incorrect_loss_per_token": 1.6527294516563416, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5194528102874756, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5194528102874756, "logits_per_char": -0.7597264051437378, "num_chars": 2}, {"sum_logits": -1.5596132278442383, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5596132278442383, "logits_per_char": -0.7798066139221191, "num_chars": 2}, {"sum_logits": -1.4842681884765625, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.4842681884765625, "logits_per_char": -0.7421340942382812, "num_chars": 2}, {"sum_logits": -1.6358715295791626, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.6358715295791626, "logits_per_char": -0.8179357647895813, "num_chars": 2}, {"sum_logits": -1.9311648607254028, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.9311648607254028, "logits_per_char": -0.9655824303627014, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 840, "native_id": "001cb999a61a5c8b4031ff53cf261714", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.419283151626587, "incorrect_loss_raw": 1.6753119826316833, "correct_loss_per_char": 0.7096415758132935, "incorrect_loss_per_char": 0.8376559913158417, "correct_loss_per_token": 1.419283151626587, "incorrect_loss_per_token": 1.6753119826316833, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.419283151626587, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.419283151626587, "logits_per_char": -0.7096415758132935, "num_chars": 2}, {"sum_logits": -1.5861175060272217, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5861175060272217, "logits_per_char": -0.7930587530136108, "num_chars": 2}, {"sum_logits": -1.6970391273498535, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6970391273498535, "logits_per_char": -0.8485195636749268, "num_chars": 2}, {"sum_logits": -1.5663886070251465, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5663886070251465, "logits_per_char": -0.7831943035125732, "num_chars": 2}, {"sum_logits": -1.8517026901245117, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.8517026901245117, "logits_per_char": -0.9258513450622559, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 841, "native_id": "18ee7a93410a6b4c9cec5d4894775991_1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6029618978500366, "incorrect_loss_raw": 1.6290883719921112, "correct_loss_per_char": 0.8014809489250183, "incorrect_loss_per_char": 0.8145441859960556, "correct_loss_per_token": 1.6029618978500366, "incorrect_loss_per_token": 1.6290883719921112, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.594308853149414, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.594308853149414, "logits_per_char": -0.797154426574707, "num_chars": 2}, {"sum_logits": -1.522984504699707, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.522984504699707, "logits_per_char": -0.7614922523498535, "num_chars": 2}, {"sum_logits": -1.6029618978500366, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6029618978500366, "logits_per_char": -0.8014809489250183, "num_chars": 2}, {"sum_logits": -1.508450984954834, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.508450984954834, "logits_per_char": -0.754225492477417, "num_chars": 2}, {"sum_logits": -1.8906091451644897, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.8906091451644897, "logits_per_char": -0.9453045725822449, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 842, "native_id": "3b8be90fdd8c67571d8d692eaa6dd87b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.702075481414795, "incorrect_loss_raw": 1.6155429780483246, "correct_loss_per_char": 0.8510377407073975, "incorrect_loss_per_char": 0.8077714890241623, "correct_loss_per_token": 1.702075481414795, "incorrect_loss_per_token": 1.6155429780483246, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.382399082183838, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.382399082183838, "logits_per_char": -0.691199541091919, "num_chars": 2}, {"sum_logits": -1.5568772554397583, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5568772554397583, "logits_per_char": -0.7784386277198792, "num_chars": 2}, {"sum_logits": -1.5606889724731445, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5606889724731445, "logits_per_char": -0.7803444862365723, "num_chars": 2}, {"sum_logits": -1.702075481414795, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.702075481414795, "logits_per_char": -0.8510377407073975, "num_chars": 2}, {"sum_logits": -1.9622066020965576, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.9622066020965576, "logits_per_char": -0.9811033010482788, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 843, "native_id": "300bd7704ae8c5fcef618902f18fd01d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6681358814239502, "incorrect_loss_raw": 1.6587596833705902, "correct_loss_per_char": 0.8340679407119751, "incorrect_loss_per_char": 0.8293798416852951, "correct_loss_per_token": 1.6681358814239502, "incorrect_loss_per_token": 1.6587596833705902, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.272922158241272, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.272922158241272, "logits_per_char": -0.636461079120636, "num_chars": 2}, {"sum_logits": -1.525115966796875, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.525115966796875, "logits_per_char": -0.7625579833984375, "num_chars": 2}, {"sum_logits": -1.5936365127563477, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5936365127563477, "logits_per_char": -0.7968182563781738, "num_chars": 2}, {"sum_logits": -1.6681358814239502, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6681358814239502, "logits_per_char": -0.8340679407119751, "num_chars": 2}, {"sum_logits": -2.243364095687866, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.243364095687866, "logits_per_char": -1.121682047843933, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 844, "native_id": "f18833ace65a54709377134168b457a9", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5091239213943481, "incorrect_loss_raw": 1.6830413341522217, "correct_loss_per_char": 0.7545619606971741, "incorrect_loss_per_char": 0.8415206670761108, "correct_loss_per_token": 1.5091239213943481, "incorrect_loss_per_token": 1.6830413341522217, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4229446649551392, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.4229446649551392, "logits_per_char": -0.7114723324775696, "num_chars": 2}, {"sum_logits": -1.4917699098587036, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.4917699098587036, "logits_per_char": -0.7458849549293518, "num_chars": 2}, {"sum_logits": -1.5091239213943481, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5091239213943481, "logits_per_char": -0.7545619606971741, "num_chars": 2}, {"sum_logits": -1.632857084274292, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.632857084274292, "logits_per_char": -0.816428542137146, "num_chars": 2}, {"sum_logits": -2.184593677520752, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -2.184593677520752, "logits_per_char": -1.092296838760376, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 845, "native_id": "5bba03b425f5abc6e017f194cf074b06", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6119318008422852, "incorrect_loss_raw": 1.62106391787529, "correct_loss_per_char": 0.8059659004211426, "incorrect_loss_per_char": 0.810531958937645, "correct_loss_per_token": 1.6119318008422852, "incorrect_loss_per_token": 1.62106391787529, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5065494775772095, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.5065494775772095, "logits_per_char": -0.7532747387886047, "num_chars": 2}, {"sum_logits": -1.6119318008422852, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6119318008422852, "logits_per_char": -0.8059659004211426, "num_chars": 2}, {"sum_logits": -1.6156489849090576, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6156489849090576, "logits_per_char": -0.8078244924545288, "num_chars": 2}, {"sum_logits": -1.655847191810608, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.655847191810608, "logits_per_char": -0.827923595905304, "num_chars": 2}, {"sum_logits": -1.7062100172042847, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.7062100172042847, "logits_per_char": -0.8531050086021423, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 846, "native_id": "78276a4eab6e8d6b9ae3749211816977", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6407825946807861, "incorrect_loss_raw": 1.6104878187179565, "correct_loss_per_char": 0.8203912973403931, "incorrect_loss_per_char": 0.8052439093589783, "correct_loss_per_token": 1.6407825946807861, "incorrect_loss_per_token": 1.6104878187179565, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5164752006530762, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": true, "logits_per_token": -1.5164752006530762, "logits_per_char": -0.7582376003265381, "num_chars": 2}, {"sum_logits": -1.5847365856170654, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.5847365856170654, "logits_per_char": -0.7923682928085327, "num_chars": 2}, {"sum_logits": -1.6407825946807861, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.6407825946807861, "logits_per_char": -0.8203912973403931, "num_chars": 2}, {"sum_logits": -1.633742094039917, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.633742094039917, "logits_per_char": -0.8168710470199585, "num_chars": 2}, {"sum_logits": -1.7069973945617676, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.7069973945617676, "logits_per_char": -0.8534986972808838, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 847, "native_id": "cf33e0f5891ce53a716432be06a46ee1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6265933513641357, "incorrect_loss_raw": 1.6750926673412323, "correct_loss_per_char": 0.8132966756820679, "incorrect_loss_per_char": 0.8375463336706161, "correct_loss_per_token": 1.6265933513641357, "incorrect_loss_per_token": 1.6750926673412323, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2441576719284058, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.2441576719284058, "logits_per_char": -0.6220788359642029, "num_chars": 2}, {"sum_logits": -1.5318949222564697, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5318949222564697, "logits_per_char": -0.7659474611282349, "num_chars": 2}, {"sum_logits": -1.6265933513641357, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.6265933513641357, "logits_per_char": -0.8132966756820679, "num_chars": 2}, {"sum_logits": -1.6559967994689941, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.6559967994689941, "logits_per_char": -0.8279983997344971, "num_chars": 2}, {"sum_logits": -2.2683212757110596, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.2683212757110596, "logits_per_char": -1.1341606378555298, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 848, "native_id": "3938d6e50d38b1f8774b4f00a89bdb39", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.588897943496704, "incorrect_loss_raw": 1.6536706984043121, "correct_loss_per_char": 0.794448971748352, "incorrect_loss_per_char": 0.8268353492021561, "correct_loss_per_token": 1.588897943496704, "incorrect_loss_per_token": 1.6536706984043121, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4118577241897583, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.4118577241897583, "logits_per_char": -0.7059288620948792, "num_chars": 2}, {"sum_logits": -1.5254096984863281, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5254096984863281, "logits_per_char": -0.7627048492431641, "num_chars": 2}, {"sum_logits": -1.588897943496704, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.588897943496704, "logits_per_char": -0.794448971748352, "num_chars": 2}, {"sum_logits": -1.5688135623931885, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5688135623931885, "logits_per_char": -0.7844067811965942, "num_chars": 2}, {"sum_logits": -2.1086018085479736, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.1086018085479736, "logits_per_char": -1.0543009042739868, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 849, "native_id": "cabefb7063a728e77abd44d97397a2a4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.616012454032898, "incorrect_loss_raw": 1.6348217129707336, "correct_loss_per_char": 0.808006227016449, "incorrect_loss_per_char": 0.8174108564853668, "correct_loss_per_token": 1.616012454032898, "incorrect_loss_per_token": 1.6348217129707336, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3977965116500854, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.3977965116500854, "logits_per_char": -0.6988982558250427, "num_chars": 2}, {"sum_logits": -1.5023783445358276, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5023783445358276, "logits_per_char": -0.7511891722679138, "num_chars": 2}, {"sum_logits": -1.616012454032898, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.616012454032898, "logits_per_char": -0.808006227016449, "num_chars": 2}, {"sum_logits": -1.6750632524490356, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6750632524490356, "logits_per_char": -0.8375316262245178, "num_chars": 2}, {"sum_logits": -1.9640487432479858, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.9640487432479858, "logits_per_char": -0.9820243716239929, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 850, "native_id": "60b909ad1d7956218a5d99954fdebecd", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.51106595993042, "incorrect_loss_raw": 1.6523774862289429, "correct_loss_per_char": 0.75553297996521, "incorrect_loss_per_char": 0.8261887431144714, "correct_loss_per_token": 1.51106595993042, "incorrect_loss_per_token": 1.6523774862289429, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5160343647003174, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5160343647003174, "logits_per_char": -0.7580171823501587, "num_chars": 2}, {"sum_logits": -1.5904126167297363, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5904126167297363, "logits_per_char": -0.7952063083648682, "num_chars": 2}, {"sum_logits": -1.5913159847259521, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5913159847259521, "logits_per_char": -0.7956579923629761, "num_chars": 2}, {"sum_logits": -1.51106595993042, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.51106595993042, "logits_per_char": -0.75553297996521, "num_chars": 2}, {"sum_logits": -1.9117469787597656, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.9117469787597656, "logits_per_char": -0.9558734893798828, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 851, "native_id": "9fdebd1c2cf498f1d726a025b780a39a", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4751393795013428, "incorrect_loss_raw": 1.6667823791503906, "correct_loss_per_char": 0.7375696897506714, "incorrect_loss_per_char": 0.8333911895751953, "correct_loss_per_token": 1.4751393795013428, "incorrect_loss_per_token": 1.6667823791503906, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4751393795013428, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -1.4751393795013428, "logits_per_char": -0.7375696897506714, "num_chars": 2}, {"sum_logits": -1.449388027191162, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": true, "logits_per_token": -1.449388027191162, "logits_per_char": -0.724694013595581, "num_chars": 2}, {"sum_logits": -1.7146265506744385, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -1.7146265506744385, "logits_per_char": -0.8573132753372192, "num_chars": 2}, {"sum_logits": -1.5975761413574219, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -1.5975761413574219, "logits_per_char": -0.7987880706787109, "num_chars": 2}, {"sum_logits": -1.90553879737854, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -1.90553879737854, "logits_per_char": -0.95276939868927, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 852, "native_id": "f36027954e43cfd926451bdf7cb0c3ac", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5490875244140625, "incorrect_loss_raw": 1.641836255788803, "correct_loss_per_char": 0.7745437622070312, "incorrect_loss_per_char": 0.8209181278944016, "correct_loss_per_token": 1.5490875244140625, "incorrect_loss_per_token": 1.641836255788803, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4875026941299438, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.4875026941299438, "logits_per_char": -0.7437513470649719, "num_chars": 2}, {"sum_logits": -1.5490875244140625, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5490875244140625, "logits_per_char": -0.7745437622070312, "num_chars": 2}, {"sum_logits": -1.61452317237854, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.61452317237854, "logits_per_char": -0.80726158618927, "num_chars": 2}, {"sum_logits": -1.5909628868103027, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5909628868103027, "logits_per_char": -0.7954814434051514, "num_chars": 2}, {"sum_logits": -1.8743562698364258, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.8743562698364258, "logits_per_char": -0.9371781349182129, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 853, "native_id": "7ec14907622c6d5a6087cd59a22d8c9d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6292320489883423, "incorrect_loss_raw": 1.6571535170078278, "correct_loss_per_char": 0.8146160244941711, "incorrect_loss_per_char": 0.8285767585039139, "correct_loss_per_token": 1.6292320489883423, "incorrect_loss_per_token": 1.6571535170078278, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3835254907608032, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.3835254907608032, "logits_per_char": -0.6917627453804016, "num_chars": 2}, {"sum_logits": -1.4766765832901, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.4766765832901, "logits_per_char": -0.73833829164505, "num_chars": 2}, {"sum_logits": -1.5513161420822144, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5513161420822144, "logits_per_char": -0.7756580710411072, "num_chars": 2}, {"sum_logits": -1.6292320489883423, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.6292320489883423, "logits_per_char": -0.8146160244941711, "num_chars": 2}, {"sum_logits": -2.2170958518981934, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -2.2170958518981934, "logits_per_char": -1.1085479259490967, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 854, "native_id": "efe488f67b53a4b6e69782c01c84f06c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5842407941818237, "incorrect_loss_raw": 1.6486934423446655, "correct_loss_per_char": 0.7921203970909119, "incorrect_loss_per_char": 0.8243467211723328, "correct_loss_per_token": 1.5842407941818237, "incorrect_loss_per_token": 1.6486934423446655, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4062058925628662, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": true, "logits_per_token": -1.4062058925628662, "logits_per_char": -0.7031029462814331, "num_chars": 2}, {"sum_logits": -1.6984237432479858, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.6984237432479858, "logits_per_char": -0.8492118716239929, "num_chars": 2}, {"sum_logits": -1.5842407941818237, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.5842407941818237, "logits_per_char": -0.7921203970909119, "num_chars": 2}, {"sum_logits": -1.4778684377670288, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.4778684377670288, "logits_per_char": -0.7389342188835144, "num_chars": 2}, {"sum_logits": -2.0122756958007812, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -2.0122756958007812, "logits_per_char": -1.0061378479003906, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 855, "native_id": "7c62637437ad7515452886074010a438", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2664422988891602, "incorrect_loss_raw": 1.7434152960777283, "correct_loss_per_char": 0.6332211494445801, "incorrect_loss_per_char": 0.8717076480388641, "correct_loss_per_token": 1.2664422988891602, "incorrect_loss_per_token": 1.7434152960777283, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2664422988891602, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": true, "logits_per_token": -1.2664422988891602, "logits_per_char": -0.6332211494445801, "num_chars": 2}, {"sum_logits": -1.5075914859771729, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.5075914859771729, "logits_per_char": -0.7537957429885864, "num_chars": 2}, {"sum_logits": -1.6951450109481812, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.6951450109481812, "logits_per_char": -0.8475725054740906, "num_chars": 2}, {"sum_logits": -1.7467728853225708, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.7467728853225708, "logits_per_char": -0.8733864426612854, "num_chars": 2}, {"sum_logits": -2.0241518020629883, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -2.0241518020629883, "logits_per_char": -1.0120759010314941, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 856, "native_id": "4f7be1c68654e2924c161c8eca652928", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5275927782058716, "incorrect_loss_raw": 1.681262731552124, "correct_loss_per_char": 0.7637963891029358, "incorrect_loss_per_char": 0.840631365776062, "correct_loss_per_token": 1.5275927782058716, "incorrect_loss_per_token": 1.681262731552124, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2679142951965332, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.2679142951965332, "logits_per_char": -0.6339571475982666, "num_chars": 2}, {"sum_logits": -1.5275927782058716, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5275927782058716, "logits_per_char": -0.7637963891029358, "num_chars": 2}, {"sum_logits": -1.6733852624893188, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6733852624893188, "logits_per_char": -0.8366926312446594, "num_chars": 2}, {"sum_logits": -1.6584066152572632, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6584066152572632, "logits_per_char": -0.8292033076286316, "num_chars": 2}, {"sum_logits": -2.125344753265381, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -2.125344753265381, "logits_per_char": -1.0626723766326904, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 857, "native_id": "e4976ee741cf4b28b8a42780ffb15774", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5177888870239258, "incorrect_loss_raw": 1.6788055300712585, "correct_loss_per_char": 0.7588944435119629, "incorrect_loss_per_char": 0.8394027650356293, "correct_loss_per_token": 1.5177888870239258, "incorrect_loss_per_token": 1.6788055300712585, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3483582735061646, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": true, "logits_per_token": -1.3483582735061646, "logits_per_char": -0.6741791367530823, "num_chars": 2}, {"sum_logits": -1.5177888870239258, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.5177888870239258, "logits_per_char": -0.7588944435119629, "num_chars": 2}, {"sum_logits": -1.557521104812622, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.557521104812622, "logits_per_char": -0.778760552406311, "num_chars": 2}, {"sum_logits": -1.6935237646102905, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.6935237646102905, "logits_per_char": -0.8467618823051453, "num_chars": 2}, {"sum_logits": -2.115818977355957, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -2.115818977355957, "logits_per_char": -1.0579094886779785, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 858, "native_id": "14e75a42a416d32a24e2826cae34d2bf", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.105776309967041, "incorrect_loss_raw": 1.5350856184959412, "correct_loss_per_char": 1.0528881549835205, "incorrect_loss_per_char": 0.7675428092479706, "correct_loss_per_token": 2.105776309967041, "incorrect_loss_per_token": 1.5350856184959412, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3351736068725586, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.3351736068725586, "logits_per_char": -0.6675868034362793, "num_chars": 2}, {"sum_logits": -1.429673194885254, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.429673194885254, "logits_per_char": -0.714836597442627, "num_chars": 2}, {"sum_logits": -1.6175425052642822, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6175425052642822, "logits_per_char": -0.8087712526321411, "num_chars": 2}, {"sum_logits": -1.75795316696167, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.75795316696167, "logits_per_char": -0.878976583480835, "num_chars": 2}, {"sum_logits": -2.105776309967041, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -2.105776309967041, "logits_per_char": -1.0528881549835205, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 859, "native_id": "004607228ad49b69eac932c1005d6106", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.512746810913086, "incorrect_loss_raw": 1.6867387890815735, "correct_loss_per_char": 0.756373405456543, "incorrect_loss_per_char": 0.8433693945407867, "correct_loss_per_token": 1.512746810913086, "incorrect_loss_per_token": 1.6867387890815735, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.368415117263794, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.368415117263794, "logits_per_char": -0.684207558631897, "num_chars": 2}, {"sum_logits": -1.5902845859527588, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5902845859527588, "logits_per_char": -0.7951422929763794, "num_chars": 2}, {"sum_logits": -1.512746810913086, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.512746810913086, "logits_per_char": -0.756373405456543, "num_chars": 2}, {"sum_logits": -1.565305233001709, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.565305233001709, "logits_per_char": -0.7826526165008545, "num_chars": 2}, {"sum_logits": -2.2229502201080322, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.2229502201080322, "logits_per_char": -1.1114751100540161, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 860, "native_id": "a7f54ee1866d5db34eacf40efa53c93e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5290682315826416, "incorrect_loss_raw": 1.6689790785312653, "correct_loss_per_char": 0.7645341157913208, "incorrect_loss_per_char": 0.8344895392656326, "correct_loss_per_token": 1.5290682315826416, "incorrect_loss_per_token": 1.6689790785312653, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3567678928375244, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.3567678928375244, "logits_per_char": -0.6783839464187622, "num_chars": 2}, {"sum_logits": -1.5438904762268066, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5438904762268066, "logits_per_char": -0.7719452381134033, "num_chars": 2}, {"sum_logits": -1.5290682315826416, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5290682315826416, "logits_per_char": -0.7645341157913208, "num_chars": 2}, {"sum_logits": -1.7406529188156128, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.7406529188156128, "logits_per_char": -0.8703264594078064, "num_chars": 2}, {"sum_logits": -2.034605026245117, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.034605026245117, "logits_per_char": -1.0173025131225586, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 861, "native_id": "e56c56c3cfe50ba0c787c2bd67255be8", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6835304498672485, "incorrect_loss_raw": 1.616532027721405, "correct_loss_per_char": 0.8417652249336243, "incorrect_loss_per_char": 0.8082660138607025, "correct_loss_per_token": 1.6835304498672485, "incorrect_loss_per_token": 1.616532027721405, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3753527402877808, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.3753527402877808, "logits_per_char": -0.6876763701438904, "num_chars": 2}, {"sum_logits": -1.5151981115341187, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5151981115341187, "logits_per_char": -0.7575990557670593, "num_chars": 2}, {"sum_logits": -1.6803497076034546, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6803497076034546, "logits_per_char": -0.8401748538017273, "num_chars": 2}, {"sum_logits": -1.6835304498672485, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6835304498672485, "logits_per_char": -0.8417652249336243, "num_chars": 2}, {"sum_logits": -1.8952275514602661, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.8952275514602661, "logits_per_char": -0.9476137757301331, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 862, "native_id": "6f48ee564a48293eb501cc0d8197bdd9", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5257675647735596, "incorrect_loss_raw": 1.672635793685913, "correct_loss_per_char": 0.7628837823867798, "incorrect_loss_per_char": 0.8363178968429565, "correct_loss_per_token": 1.5257675647735596, "incorrect_loss_per_token": 1.672635793685913, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3940505981445312, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.3940505981445312, "logits_per_char": -0.6970252990722656, "num_chars": 2}, {"sum_logits": -1.5438079833984375, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5438079833984375, "logits_per_char": -0.7719039916992188, "num_chars": 2}, {"sum_logits": -1.5257675647735596, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5257675647735596, "logits_per_char": -0.7628837823867798, "num_chars": 2}, {"sum_logits": -1.608867883682251, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.608867883682251, "logits_per_char": -0.8044339418411255, "num_chars": 2}, {"sum_logits": -2.1438167095184326, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -2.1438167095184326, "logits_per_char": -1.0719083547592163, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 863, "native_id": "13d2a103abbed930cabc9567a1ba12f2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9387998580932617, "incorrect_loss_raw": 1.550157755613327, "correct_loss_per_char": 0.9693999290466309, "incorrect_loss_per_char": 0.7750788778066635, "correct_loss_per_token": 1.9387998580932617, "incorrect_loss_per_token": 1.550157755613327, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.468895435333252, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": true, "logits_per_token": -1.468895435333252, "logits_per_char": -0.734447717666626, "num_chars": 2}, {"sum_logits": -1.4813002347946167, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.4813002347946167, "logits_per_char": -0.7406501173973083, "num_chars": 2}, {"sum_logits": -1.6105613708496094, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.6105613708496094, "logits_per_char": -0.8052806854248047, "num_chars": 2}, {"sum_logits": -1.63987398147583, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.63987398147583, "logits_per_char": -0.819936990737915, "num_chars": 2}, {"sum_logits": -1.9387998580932617, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.9387998580932617, "logits_per_char": -0.9693999290466309, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 864, "native_id": "0c1efb38e023ee9725486fbec4f2d797", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.662068486213684, "incorrect_loss_raw": 1.6506225764751434, "correct_loss_per_char": 0.831034243106842, "incorrect_loss_per_char": 0.8253112882375717, "correct_loss_per_token": 1.662068486213684, "incorrect_loss_per_token": 1.6506225764751434, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3301880359649658, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.3301880359649658, "logits_per_char": -0.6650940179824829, "num_chars": 2}, {"sum_logits": -1.4526159763336182, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.4526159763336182, "logits_per_char": -0.7263079881668091, "num_chars": 2}, {"sum_logits": -1.6538218259811401, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.6538218259811401, "logits_per_char": -0.8269109129905701, "num_chars": 2}, {"sum_logits": -1.662068486213684, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.662068486213684, "logits_per_char": -0.831034243106842, "num_chars": 2}, {"sum_logits": -2.1658644676208496, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -2.1658644676208496, "logits_per_char": -1.0829322338104248, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 865, "native_id": "b7ab4a5e0c19a98f41cd1ba3176f2dff", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.562663197517395, "incorrect_loss_raw": 1.6717390418052673, "correct_loss_per_char": 0.7813315987586975, "incorrect_loss_per_char": 0.8358695209026337, "correct_loss_per_token": 1.562663197517395, "incorrect_loss_per_token": 1.6717390418052673, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.249122977256775, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.249122977256775, "logits_per_char": -0.6245614886283875, "num_chars": 2}, {"sum_logits": -1.6187922954559326, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6187922954559326, "logits_per_char": -0.8093961477279663, "num_chars": 2}, {"sum_logits": -1.562663197517395, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.562663197517395, "logits_per_char": -0.7813315987586975, "num_chars": 2}, {"sum_logits": -1.7148090600967407, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.7148090600967407, "logits_per_char": -0.8574045300483704, "num_chars": 2}, {"sum_logits": -2.104231834411621, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.104231834411621, "logits_per_char": -1.0521159172058105, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 866, "native_id": "8bcbb5098876940b2382db3a9a0b1beb", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5966825485229492, "incorrect_loss_raw": 1.6384120881557465, "correct_loss_per_char": 0.7983412742614746, "incorrect_loss_per_char": 0.8192060440778732, "correct_loss_per_token": 1.5966825485229492, "incorrect_loss_per_token": 1.6384120881557465, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4318732023239136, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.4318732023239136, "logits_per_char": -0.7159366011619568, "num_chars": 2}, {"sum_logits": -1.5795212984085083, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5795212984085083, "logits_per_char": -0.7897606492042542, "num_chars": 2}, {"sum_logits": -1.5818347930908203, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5818347930908203, "logits_per_char": -0.7909173965454102, "num_chars": 2}, {"sum_logits": -1.5966825485229492, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5966825485229492, "logits_per_char": -0.7983412742614746, "num_chars": 2}, {"sum_logits": -1.9604190587997437, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.9604190587997437, "logits_per_char": -0.9802095293998718, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 867, "native_id": "c7ce02d9365fe9275f88338ad51cbde6", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.520295262336731, "incorrect_loss_raw": 1.654442697763443, "correct_loss_per_char": 0.7601476311683655, "incorrect_loss_per_char": 0.8272213488817215, "correct_loss_per_token": 1.520295262336731, "incorrect_loss_per_token": 1.654442697763443, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.520295262336731, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.520295262336731, "logits_per_char": -0.7601476311683655, "num_chars": 2}, {"sum_logits": -1.4894777536392212, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.4894777536392212, "logits_per_char": -0.7447388768196106, "num_chars": 2}, {"sum_logits": -1.5966370105743408, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5966370105743408, "logits_per_char": -0.7983185052871704, "num_chars": 2}, {"sum_logits": -1.6055837869644165, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.6055837869644165, "logits_per_char": -0.8027918934822083, "num_chars": 2}, {"sum_logits": -1.9260722398757935, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.9260722398757935, "logits_per_char": -0.9630361199378967, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 868, "native_id": "fb54a118d46b2776e435d411ae3dd9c8", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7190253734588623, "incorrect_loss_raw": 1.6239464282989502, "correct_loss_per_char": 0.8595126867294312, "incorrect_loss_per_char": 0.8119732141494751, "correct_loss_per_token": 1.7190253734588623, "incorrect_loss_per_token": 1.6239464282989502, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2920973300933838, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.2920973300933838, "logits_per_char": -0.6460486650466919, "num_chars": 2}, {"sum_logits": -1.6374449729919434, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6374449729919434, "logits_per_char": -0.8187224864959717, "num_chars": 2}, {"sum_logits": -1.5371203422546387, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5371203422546387, "logits_per_char": -0.7685601711273193, "num_chars": 2}, {"sum_logits": -1.7190253734588623, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.7190253734588623, "logits_per_char": -0.8595126867294312, "num_chars": 2}, {"sum_logits": -2.029123067855835, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.029123067855835, "logits_per_char": -1.0145615339279175, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 869, "native_id": "2c13e6d61e3733db90a9fd22d72b3337", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6122326850891113, "incorrect_loss_raw": 1.6657450497150421, "correct_loss_per_char": 0.8061163425445557, "incorrect_loss_per_char": 0.8328725248575211, "correct_loss_per_token": 1.6122326850891113, "incorrect_loss_per_token": 1.6657450497150421, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3383684158325195, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.3383684158325195, "logits_per_char": -0.6691842079162598, "num_chars": 2}, {"sum_logits": -1.4320474863052368, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.4320474863052368, "logits_per_char": -0.7160237431526184, "num_chars": 2}, {"sum_logits": -1.6122326850891113, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6122326850891113, "logits_per_char": -0.8061163425445557, "num_chars": 2}, {"sum_logits": -1.6908273696899414, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6908273696899414, "logits_per_char": -0.8454136848449707, "num_chars": 2}, {"sum_logits": -2.2017369270324707, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.2017369270324707, "logits_per_char": -1.1008684635162354, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 870, "native_id": "350292ae429060a00ff2cf64d71558e4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0141000747680664, "incorrect_loss_raw": 1.5401929914951324, "correct_loss_per_char": 1.0070500373840332, "incorrect_loss_per_char": 0.7700964957475662, "correct_loss_per_token": 2.0141000747680664, "incorrect_loss_per_token": 1.5401929914951324, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3723607063293457, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.3723607063293457, "logits_per_char": -0.6861803531646729, "num_chars": 2}, {"sum_logits": -1.547351598739624, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.547351598739624, "logits_per_char": -0.773675799369812, "num_chars": 2}, {"sum_logits": -1.6453099250793457, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.6453099250793457, "logits_per_char": -0.8226549625396729, "num_chars": 2}, {"sum_logits": -1.5957497358322144, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5957497358322144, "logits_per_char": -0.7978748679161072, "num_chars": 2}, {"sum_logits": -2.0141000747680664, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.0141000747680664, "logits_per_char": -1.0070500373840332, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 871, "native_id": "179fff4b5928e5ac3d3ae3e1db782547", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.583404779434204, "incorrect_loss_raw": 1.6362850368022919, "correct_loss_per_char": 0.791702389717102, "incorrect_loss_per_char": 0.8181425184011459, "correct_loss_per_token": 1.583404779434204, "incorrect_loss_per_token": 1.6362850368022919, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4422999620437622, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": true, "logits_per_token": -1.4422999620437622, "logits_per_char": -0.7211499810218811, "num_chars": 2}, {"sum_logits": -1.6488628387451172, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.6488628387451172, "logits_per_char": -0.8244314193725586, "num_chars": 2}, {"sum_logits": -1.5488734245300293, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.5488734245300293, "logits_per_char": -0.7744367122650146, "num_chars": 2}, {"sum_logits": -1.583404779434204, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.583404779434204, "logits_per_char": -0.791702389717102, "num_chars": 2}, {"sum_logits": -1.9051039218902588, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.9051039218902588, "logits_per_char": -0.9525519609451294, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 872, "native_id": "81cc0d320488c7bacafb285cf7db5fbd", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.556212306022644, "incorrect_loss_raw": 1.6680332124233246, "correct_loss_per_char": 0.778106153011322, "incorrect_loss_per_char": 0.8340166062116623, "correct_loss_per_token": 1.556212306022644, "incorrect_loss_per_token": 1.6680332124233246, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3388930559158325, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": true, "logits_per_token": -1.3388930559158325, "logits_per_char": -0.6694465279579163, "num_chars": 2}, {"sum_logits": -1.556212306022644, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.556212306022644, "logits_per_char": -0.778106153011322, "num_chars": 2}, {"sum_logits": -1.5854295492172241, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.5854295492172241, "logits_per_char": -0.7927147746086121, "num_chars": 2}, {"sum_logits": -1.6126078367233276, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.6126078367233276, "logits_per_char": -0.8063039183616638, "num_chars": 2}, {"sum_logits": -2.135202407836914, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -2.135202407836914, "logits_per_char": -1.067601203918457, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 873, "native_id": "26c8a7165d0ed7250b9328f90d83ba83", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6900603771209717, "incorrect_loss_raw": 1.6367990970611572, "correct_loss_per_char": 0.8450301885604858, "incorrect_loss_per_char": 0.8183995485305786, "correct_loss_per_token": 1.6900603771209717, "incorrect_loss_per_token": 1.6367990970611572, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2982947826385498, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.2982947826385498, "logits_per_char": -0.6491473913192749, "num_chars": 2}, {"sum_logits": -1.5633909702301025, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5633909702301025, "logits_per_char": -0.7816954851150513, "num_chars": 2}, {"sum_logits": -1.5933961868286133, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5933961868286133, "logits_per_char": -0.7966980934143066, "num_chars": 2}, {"sum_logits": -1.6900603771209717, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6900603771209717, "logits_per_char": -0.8450301885604858, "num_chars": 2}, {"sum_logits": -2.0921144485473633, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -2.0921144485473633, "logits_per_char": -1.0460572242736816, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 874, "native_id": "636fc69dee35cd357b4191b47e64d0e5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.1521799564361572, "incorrect_loss_raw": 1.519453227519989, "correct_loss_per_char": 1.0760899782180786, "incorrect_loss_per_char": 0.7597266137599945, "correct_loss_per_token": 2.1521799564361572, "incorrect_loss_per_token": 1.519453227519989, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4087178707122803, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": true, "logits_per_token": -1.4087178707122803, "logits_per_char": -0.7043589353561401, "num_chars": 2}, {"sum_logits": -1.4714605808258057, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.4714605808258057, "logits_per_char": -0.7357302904129028, "num_chars": 2}, {"sum_logits": -1.5262830257415771, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5262830257415771, "logits_per_char": -0.7631415128707886, "num_chars": 2}, {"sum_logits": -1.671351432800293, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.671351432800293, "logits_per_char": -0.8356757164001465, "num_chars": 2}, {"sum_logits": -2.1521799564361572, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -2.1521799564361572, "logits_per_char": -1.0760899782180786, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 875, "native_id": "f0c4622a082eb9ad0690dd36dcf61297", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.255631923675537, "incorrect_loss_raw": 1.5156739950180054, "correct_loss_per_char": 1.1278159618377686, "incorrect_loss_per_char": 0.7578369975090027, "correct_loss_per_token": 2.255631923675537, "incorrect_loss_per_token": 1.5156739950180054, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2907207012176514, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.2907207012176514, "logits_per_char": -0.6453603506088257, "num_chars": 2}, {"sum_logits": -1.4568510055541992, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.4568510055541992, "logits_per_char": -0.7284255027770996, "num_chars": 2}, {"sum_logits": -1.5982826948165894, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5982826948165894, "logits_per_char": -0.7991413474082947, "num_chars": 2}, {"sum_logits": -1.7168415784835815, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.7168415784835815, "logits_per_char": -0.8584207892417908, "num_chars": 2}, {"sum_logits": -2.255631923675537, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.255631923675537, "logits_per_char": -1.1278159618377686, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 876, "native_id": "4499ebd5e8188b0d5fdef6afd893017a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6949687004089355, "incorrect_loss_raw": 1.598287433385849, "correct_loss_per_char": 0.8474843502044678, "incorrect_loss_per_char": 0.7991437166929245, "correct_loss_per_token": 1.6949687004089355, "incorrect_loss_per_token": 1.598287433385849, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4821386337280273, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.4821386337280273, "logits_per_char": -0.7410693168640137, "num_chars": 2}, {"sum_logits": -1.540277123451233, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.540277123451233, "logits_per_char": -0.7701385617256165, "num_chars": 2}, {"sum_logits": -1.6938104629516602, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6938104629516602, "logits_per_char": -0.8469052314758301, "num_chars": 2}, {"sum_logits": -1.6769235134124756, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6769235134124756, "logits_per_char": -0.8384617567062378, "num_chars": 2}, {"sum_logits": -1.6949687004089355, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6949687004089355, "logits_per_char": -0.8474843502044678, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 877, "native_id": "230cc491829307e8edb5423c8d09f945", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6696745157241821, "incorrect_loss_raw": 1.6390730738639832, "correct_loss_per_char": 0.8348372578620911, "incorrect_loss_per_char": 0.8195365369319916, "correct_loss_per_token": 1.6696745157241821, "incorrect_loss_per_token": 1.6390730738639832, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2970349788665771, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.2970349788665771, "logits_per_char": -0.6485174894332886, "num_chars": 2}, {"sum_logits": -1.5452085733413696, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5452085733413696, "logits_per_char": -0.7726042866706848, "num_chars": 2}, {"sum_logits": -1.6696745157241821, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6696745157241821, "logits_per_char": -0.8348372578620911, "num_chars": 2}, {"sum_logits": -1.6375616788864136, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6375616788864136, "logits_per_char": -0.8187808394432068, "num_chars": 2}, {"sum_logits": -2.0764870643615723, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -2.0764870643615723, "logits_per_char": -1.0382435321807861, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 878, "native_id": "6163a897cd7eac1deddd4c002a1930ae", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0507125854492188, "incorrect_loss_raw": 1.5349688529968262, "correct_loss_per_char": 1.0253562927246094, "incorrect_loss_per_char": 0.7674844264984131, "correct_loss_per_token": 2.0507125854492188, "incorrect_loss_per_token": 1.5349688529968262, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.411285638809204, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.411285638809204, "logits_per_char": -0.705642819404602, "num_chars": 2}, {"sum_logits": -1.5144271850585938, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5144271850585938, "logits_per_char": -0.7572135925292969, "num_chars": 2}, {"sum_logits": -1.5390536785125732, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5390536785125732, "logits_per_char": -0.7695268392562866, "num_chars": 2}, {"sum_logits": -1.6751089096069336, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6751089096069336, "logits_per_char": -0.8375544548034668, "num_chars": 2}, {"sum_logits": -2.0507125854492188, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.0507125854492188, "logits_per_char": -1.0253562927246094, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 879, "native_id": "55478486079423907508a06be13ca536", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4624675512313843, "incorrect_loss_raw": 1.6788468062877655, "correct_loss_per_char": 0.7312337756156921, "incorrect_loss_per_char": 0.8394234031438828, "correct_loss_per_token": 1.4624675512313843, "incorrect_loss_per_token": 1.6788468062877655, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5371249914169312, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5371249914169312, "logits_per_char": -0.7685624957084656, "num_chars": 2}, {"sum_logits": -1.5196317434310913, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5196317434310913, "logits_per_char": -0.7598158717155457, "num_chars": 2}, {"sum_logits": -1.4624675512313843, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.4624675512313843, "logits_per_char": -0.7312337756156921, "num_chars": 2}, {"sum_logits": -1.5911418199539185, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5911418199539185, "logits_per_char": -0.7955709099769592, "num_chars": 2}, {"sum_logits": -2.067488670349121, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -2.067488670349121, "logits_per_char": -1.0337443351745605, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 880, "native_id": "4fa0d61ec82eb1e238d8938d5f43f392", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7058066129684448, "incorrect_loss_raw": 1.6053851246833801, "correct_loss_per_char": 0.8529033064842224, "incorrect_loss_per_char": 0.8026925623416901, "correct_loss_per_token": 1.7058066129684448, "incorrect_loss_per_token": 1.6053851246833801, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5054267644882202, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5054267644882202, "logits_per_char": -0.7527133822441101, "num_chars": 2}, {"sum_logits": -1.517116904258728, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.517116904258728, "logits_per_char": -0.758558452129364, "num_chars": 2}, {"sum_logits": -1.5042170286178589, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.5042170286178589, "logits_per_char": -0.7521085143089294, "num_chars": 2}, {"sum_logits": -1.7058066129684448, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.7058066129684448, "logits_per_char": -0.8529033064842224, "num_chars": 2}, {"sum_logits": -1.8947798013687134, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.8947798013687134, "logits_per_char": -0.9473899006843567, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 881, "native_id": "b4f79ca5f3595248ee25292ab60ad105", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4505934715270996, "incorrect_loss_raw": 1.665711760520935, "correct_loss_per_char": 0.7252967357635498, "incorrect_loss_per_char": 0.8328558802604675, "correct_loss_per_token": 1.4505934715270996, "incorrect_loss_per_token": 1.665711760520935, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4505934715270996, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": true, "logits_per_token": -1.4505934715270996, "logits_per_char": -0.7252967357635498, "num_chars": 2}, {"sum_logits": -1.5730056762695312, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.5730056762695312, "logits_per_char": -0.7865028381347656, "num_chars": 2}, {"sum_logits": -1.6169192790985107, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.6169192790985107, "logits_per_char": -0.8084596395492554, "num_chars": 2}, {"sum_logits": -1.6562244892120361, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.6562244892120361, "logits_per_char": -0.8281122446060181, "num_chars": 2}, {"sum_logits": -1.816697597503662, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.816697597503662, "logits_per_char": -0.908348798751831, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 882, "native_id": "c39131d979c9205c11d0e109e18188e4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.986098289489746, "incorrect_loss_raw": 1.5488483607769012, "correct_loss_per_char": 0.993049144744873, "incorrect_loss_per_char": 0.7744241803884506, "correct_loss_per_token": 1.986098289489746, "incorrect_loss_per_token": 1.5488483607769012, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3670085668563843, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": true, "logits_per_token": -1.3670085668563843, "logits_per_char": -0.6835042834281921, "num_chars": 2}, {"sum_logits": -1.6009715795516968, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.6009715795516968, "logits_per_char": -0.8004857897758484, "num_chars": 2}, {"sum_logits": -1.5240641832351685, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.5240641832351685, "logits_per_char": -0.7620320916175842, "num_chars": 2}, {"sum_logits": -1.7033491134643555, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.7033491134643555, "logits_per_char": -0.8516745567321777, "num_chars": 2}, {"sum_logits": -1.986098289489746, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.986098289489746, "logits_per_char": -0.993049144744873, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 883, "native_id": "bd773d64f4e22db2358c6e00cbdf2d83", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.399654507637024, "incorrect_loss_raw": 1.6884240508079529, "correct_loss_per_char": 0.699827253818512, "incorrect_loss_per_char": 0.8442120254039764, "correct_loss_per_token": 1.399654507637024, "incorrect_loss_per_token": 1.6884240508079529, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.399654507637024, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": true, "logits_per_token": -1.399654507637024, "logits_per_char": -0.699827253818512, "num_chars": 2}, {"sum_logits": -1.6889890432357788, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.6889890432357788, "logits_per_char": -0.8444945216178894, "num_chars": 2}, {"sum_logits": -1.5850695371627808, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5850695371627808, "logits_per_char": -0.7925347685813904, "num_chars": 2}, {"sum_logits": -1.5322991609573364, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5322991609573364, "logits_per_char": -0.7661495804786682, "num_chars": 2}, {"sum_logits": -1.9473384618759155, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.9473384618759155, "logits_per_char": -0.9736692309379578, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 884, "native_id": "2b416120e2fbd84b44b5dcd4eb42ed5c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8179118633270264, "incorrect_loss_raw": 1.6057648360729218, "correct_loss_per_char": 0.9089559316635132, "incorrect_loss_per_char": 0.8028824180364609, "correct_loss_per_token": 1.8179118633270264, "incorrect_loss_per_token": 1.6057648360729218, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.253926157951355, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.253926157951355, "logits_per_char": -0.6269630789756775, "num_chars": 2}, {"sum_logits": -1.5523145198822021, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5523145198822021, "logits_per_char": -0.7761572599411011, "num_chars": 2}, {"sum_logits": -1.5852611064910889, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5852611064910889, "logits_per_char": -0.7926305532455444, "num_chars": 2}, {"sum_logits": -1.8179118633270264, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.8179118633270264, "logits_per_char": -0.9089559316635132, "num_chars": 2}, {"sum_logits": -2.031557559967041, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -2.031557559967041, "logits_per_char": -1.0157787799835205, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 885, "native_id": "cef855ec07c66a731741026c2839b0d3", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7269532680511475, "incorrect_loss_raw": 1.5914765000343323, "correct_loss_per_char": 0.8634766340255737, "incorrect_loss_per_char": 0.7957382500171661, "correct_loss_per_token": 1.7269532680511475, "incorrect_loss_per_token": 1.5914765000343323, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5374195575714111, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": true, "logits_per_token": -1.5374195575714111, "logits_per_char": -0.7687097787857056, "num_chars": 2}, {"sum_logits": -1.5657294988632202, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": false, "logits_per_token": -1.5657294988632202, "logits_per_char": -0.7828647494316101, "num_chars": 2}, {"sum_logits": -1.7269532680511475, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": false, "logits_per_token": -1.7269532680511475, "logits_per_char": -0.8634766340255737, "num_chars": 2}, {"sum_logits": -1.6410189867019653, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": false, "logits_per_token": -1.6410189867019653, "logits_per_char": -0.8205094933509827, "num_chars": 2}, {"sum_logits": -1.6217379570007324, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": false, "logits_per_token": -1.6217379570007324, "logits_per_char": -0.8108689785003662, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 886, "native_id": "0bbb82c1dc4bfd3b0e0c409a0afd248b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2650498151779175, "incorrect_loss_raw": 1.7590803802013397, "correct_loss_per_char": 0.6325249075889587, "incorrect_loss_per_char": 0.8795401901006699, "correct_loss_per_token": 1.2650498151779175, "incorrect_loss_per_token": 1.7590803802013397, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2650498151779175, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": true, "logits_per_token": -1.2650498151779175, "logits_per_char": -0.6325249075889587, "num_chars": 2}, {"sum_logits": -1.532698392868042, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.532698392868042, "logits_per_char": -0.766349196434021, "num_chars": 2}, {"sum_logits": -1.6787892580032349, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.6787892580032349, "logits_per_char": -0.8393946290016174, "num_chars": 2}, {"sum_logits": -1.6051976680755615, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.6051976680755615, "logits_per_char": -0.8025988340377808, "num_chars": 2}, {"sum_logits": -2.2196362018585205, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -2.2196362018585205, "logits_per_char": -1.1098181009292603, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 887, "native_id": "67beae081a9b5ef56988f205f80cf129", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6849510669708252, "incorrect_loss_raw": 1.627258986234665, "correct_loss_per_char": 0.8424755334854126, "incorrect_loss_per_char": 0.8136294931173325, "correct_loss_per_token": 1.6849510669708252, "incorrect_loss_per_token": 1.627258986234665, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3300522565841675, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.3300522565841675, "logits_per_char": -0.6650261282920837, "num_chars": 2}, {"sum_logits": -1.486032485961914, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.486032485961914, "logits_per_char": -0.743016242980957, "num_chars": 2}, {"sum_logits": -1.6849510669708252, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6849510669708252, "logits_per_char": -0.8424755334854126, "num_chars": 2}, {"sum_logits": -1.6812496185302734, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6812496185302734, "logits_per_char": -0.8406248092651367, "num_chars": 2}, {"sum_logits": -2.0117015838623047, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -2.0117015838623047, "logits_per_char": -1.0058507919311523, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 888, "native_id": "3b4dcfcab4726496bdbe9535cc669082", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6754733324050903, "incorrect_loss_raw": 1.6596533060073853, "correct_loss_per_char": 0.8377366662025452, "incorrect_loss_per_char": 0.8298266530036926, "correct_loss_per_token": 1.6754733324050903, "incorrect_loss_per_token": 1.6596533060073853, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2818032503128052, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": true, "logits_per_token": -1.2818032503128052, "logits_per_char": -0.6409016251564026, "num_chars": 2}, {"sum_logits": -1.415693759918213, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.415693759918213, "logits_per_char": -0.7078468799591064, "num_chars": 2}, {"sum_logits": -1.7154077291488647, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.7154077291488647, "logits_per_char": -0.8577038645744324, "num_chars": 2}, {"sum_logits": -1.6754733324050903, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.6754733324050903, "logits_per_char": -0.8377366662025452, "num_chars": 2}, {"sum_logits": -2.225708484649658, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -2.225708484649658, "logits_per_char": -1.112854242324829, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 889, "native_id": "eebddf5f35d85e9fe2ecbd9b56c1db60", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3437538146972656, "incorrect_loss_raw": 1.717351496219635, "correct_loss_per_char": 0.6718769073486328, "incorrect_loss_per_char": 0.8586757481098175, "correct_loss_per_token": 1.3437538146972656, "incorrect_loss_per_token": 1.717351496219635, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3437538146972656, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": true, "logits_per_token": -1.3437538146972656, "logits_per_char": -0.6718769073486328, "num_chars": 2}, {"sum_logits": -1.4553637504577637, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.4553637504577637, "logits_per_char": -0.7276818752288818, "num_chars": 2}, {"sum_logits": -1.5702604055404663, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.5702604055404663, "logits_per_char": -0.7851302027702332, "num_chars": 2}, {"sum_logits": -1.8343175649642944, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.8343175649642944, "logits_per_char": -0.9171587824821472, "num_chars": 2}, {"sum_logits": -2.0094642639160156, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -2.0094642639160156, "logits_per_char": -1.0047321319580078, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 890, "native_id": "5393ba1ce298bd1ac4744c07d7373a9c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7926301956176758, "incorrect_loss_raw": 1.647743135690689, "correct_loss_per_char": 0.8963150978088379, "incorrect_loss_per_char": 0.8238715678453445, "correct_loss_per_token": 1.7926301956176758, "incorrect_loss_per_token": 1.647743135690689, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1443357467651367, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.1443357467651367, "logits_per_char": -0.5721678733825684, "num_chars": 2}, {"sum_logits": -1.510377049446106, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.510377049446106, "logits_per_char": -0.755188524723053, "num_chars": 2}, {"sum_logits": -1.720646619796753, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.720646619796753, "logits_per_char": -0.8603233098983765, "num_chars": 2}, {"sum_logits": -1.7926301956176758, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.7926301956176758, "logits_per_char": -0.8963150978088379, "num_chars": 2}, {"sum_logits": -2.2156131267547607, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.2156131267547607, "logits_per_char": -1.1078065633773804, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 891, "native_id": "fde48d43e27cefed6ed9c52514e0bb6d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3440651893615723, "incorrect_loss_raw": 1.7210963368415833, "correct_loss_per_char": 0.6720325946807861, "incorrect_loss_per_char": 0.8605481684207916, "correct_loss_per_token": 1.3440651893615723, "incorrect_loss_per_token": 1.7210963368415833, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3440651893615723, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.3440651893615723, "logits_per_char": -0.6720325946807861, "num_chars": 2}, {"sum_logits": -1.4927334785461426, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.4927334785461426, "logits_per_char": -0.7463667392730713, "num_chars": 2}, {"sum_logits": -1.594867467880249, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.594867467880249, "logits_per_char": -0.7974337339401245, "num_chars": 2}, {"sum_logits": -1.6742498874664307, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6742498874664307, "logits_per_char": -0.8371249437332153, "num_chars": 2}, {"sum_logits": -2.1225345134735107, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.1225345134735107, "logits_per_char": -1.0612672567367554, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 892, "native_id": "da83d85e28778c082d9a63f5b890b26d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.346789002418518, "incorrect_loss_raw": 1.7026021480560303, "correct_loss_per_char": 0.673394501209259, "incorrect_loss_per_char": 0.8513010740280151, "correct_loss_per_token": 1.346789002418518, "incorrect_loss_per_token": 1.7026021480560303, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.346789002418518, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": true, "logits_per_token": -1.346789002418518, "logits_per_char": -0.673394501209259, "num_chars": 2}, {"sum_logits": -1.5540512800216675, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.5540512800216675, "logits_per_char": -0.7770256400108337, "num_chars": 2}, {"sum_logits": -1.7357667684555054, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.7357667684555054, "logits_per_char": -0.8678833842277527, "num_chars": 2}, {"sum_logits": -1.6109224557876587, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.6109224557876587, "logits_per_char": -0.8054612278938293, "num_chars": 2}, {"sum_logits": -1.9096680879592896, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.9096680879592896, "logits_per_char": -0.9548340439796448, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 893, "native_id": "cfa980561efe82e7ae7080d4f081b463", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6613577604293823, "incorrect_loss_raw": 1.6214160919189453, "correct_loss_per_char": 0.8306788802146912, "incorrect_loss_per_char": 0.8107080459594727, "correct_loss_per_token": 1.6613577604293823, "incorrect_loss_per_token": 1.6214160919189453, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.369984745979309, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.369984745979309, "logits_per_char": -0.6849923729896545, "num_chars": 2}, {"sum_logits": -1.5426408052444458, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5426408052444458, "logits_per_char": -0.7713204026222229, "num_chars": 2}, {"sum_logits": -1.6873818635940552, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6873818635940552, "logits_per_char": -0.8436909317970276, "num_chars": 2}, {"sum_logits": -1.6613577604293823, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6613577604293823, "logits_per_char": -0.8306788802146912, "num_chars": 2}, {"sum_logits": -1.8856569528579712, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.8856569528579712, "logits_per_char": -0.9428284764289856, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 894, "native_id": "384b89e789e0f4b4796120394fb6303b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6814368963241577, "incorrect_loss_raw": 1.648179292678833, "correct_loss_per_char": 0.8407184481620789, "incorrect_loss_per_char": 0.8240896463394165, "correct_loss_per_token": 1.6814368963241577, "incorrect_loss_per_token": 1.648179292678833, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3185408115386963, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.3185408115386963, "logits_per_char": -0.6592704057693481, "num_chars": 2}, {"sum_logits": -1.4949434995651245, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.4949434995651245, "logits_per_char": -0.7474717497825623, "num_chars": 2}, {"sum_logits": -1.5691450834274292, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5691450834274292, "logits_per_char": -0.7845725417137146, "num_chars": 2}, {"sum_logits": -1.6814368963241577, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6814368963241577, "logits_per_char": -0.8407184481620789, "num_chars": 2}, {"sum_logits": -2.210087776184082, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.210087776184082, "logits_per_char": -1.105043888092041, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 895, "native_id": "0d66d33a17e41eaa3278ca7b3930c5ea", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.1831459999084473, "incorrect_loss_raw": 1.5265531241893768, "correct_loss_per_char": 1.0915729999542236, "incorrect_loss_per_char": 0.7632765620946884, "correct_loss_per_token": 2.1831459999084473, "incorrect_loss_per_token": 1.5265531241893768, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2547612190246582, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.2547612190246582, "logits_per_char": -0.6273806095123291, "num_chars": 2}, {"sum_logits": -1.5113879442214966, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5113879442214966, "logits_per_char": -0.7556939721107483, "num_chars": 2}, {"sum_logits": -1.6110612154006958, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6110612154006958, "logits_per_char": -0.8055306077003479, "num_chars": 2}, {"sum_logits": -1.7290021181106567, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.7290021181106567, "logits_per_char": -0.8645010590553284, "num_chars": 2}, {"sum_logits": -2.1831459999084473, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.1831459999084473, "logits_per_char": -1.0915729999542236, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 896, "native_id": "732183ead4206e51ed4df18b9c9f14fe", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.514473795890808, "incorrect_loss_raw": 1.679393857717514, "correct_loss_per_char": 0.757236897945404, "incorrect_loss_per_char": 0.839696928858757, "correct_loss_per_token": 1.514473795890808, "incorrect_loss_per_token": 1.679393857717514, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.380090355873108, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.380090355873108, "logits_per_char": -0.690045177936554, "num_chars": 2}, {"sum_logits": -1.514473795890808, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.514473795890808, "logits_per_char": -0.757236897945404, "num_chars": 2}, {"sum_logits": -1.5861554145812988, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5861554145812988, "logits_per_char": -0.7930777072906494, "num_chars": 2}, {"sum_logits": -1.5828027725219727, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5828027725219727, "logits_per_char": -0.7914013862609863, "num_chars": 2}, {"sum_logits": -2.1685268878936768, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.1685268878936768, "logits_per_char": -1.0842634439468384, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 897, "native_id": "2632ff6c9b781d3aa74e8dd36b990871", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4549100399017334, "incorrect_loss_raw": 1.6870504915714264, "correct_loss_per_char": 0.7274550199508667, "incorrect_loss_per_char": 0.8435252457857132, "correct_loss_per_token": 1.4549100399017334, "incorrect_loss_per_token": 1.6870504915714264, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.344932198524475, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": true, "logits_per_token": -1.344932198524475, "logits_per_char": -0.6724660992622375, "num_chars": 2}, {"sum_logits": -1.4549100399017334, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.4549100399017334, "logits_per_char": -0.7274550199508667, "num_chars": 2}, {"sum_logits": -1.7060497999191284, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.7060497999191284, "logits_per_char": -0.8530248999595642, "num_chars": 2}, {"sum_logits": -1.6814996004104614, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.6814996004104614, "logits_per_char": -0.8407498002052307, "num_chars": 2}, {"sum_logits": -2.0157203674316406, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -2.0157203674316406, "logits_per_char": -1.0078601837158203, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 898, "native_id": "63db79b940f36f0333377f85c19eacb2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4771252870559692, "incorrect_loss_raw": 1.6850315630435944, "correct_loss_per_char": 0.7385626435279846, "incorrect_loss_per_char": 0.8425157815217972, "correct_loss_per_token": 1.4771252870559692, "incorrect_loss_per_token": 1.6850315630435944, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3685787916183472, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.3685787916183472, "logits_per_char": -0.6842893958091736, "num_chars": 2}, {"sum_logits": -1.4771252870559692, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.4771252870559692, "logits_per_char": -0.7385626435279846, "num_chars": 2}, {"sum_logits": -1.5577343702316284, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5577343702316284, "logits_per_char": -0.7788671851158142, "num_chars": 2}, {"sum_logits": -1.725692629814148, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.725692629814148, "logits_per_char": -0.862846314907074, "num_chars": 2}, {"sum_logits": -2.088120460510254, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.088120460510254, "logits_per_char": -1.044060230255127, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 899, "native_id": "1520a8fd3116e7b856947c5e308d7ce5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.496538519859314, "incorrect_loss_raw": 1.659813940525055, "correct_loss_per_char": 0.748269259929657, "incorrect_loss_per_char": 0.8299069702625275, "correct_loss_per_token": 1.496538519859314, "incorrect_loss_per_token": 1.659813940525055, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.487310767173767, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": true, "logits_per_token": -1.487310767173767, "logits_per_char": -0.7436553835868835, "num_chars": 2}, {"sum_logits": -1.5914462804794312, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.5914462804794312, "logits_per_char": -0.7957231402397156, "num_chars": 2}, {"sum_logits": -1.496538519859314, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.496538519859314, "logits_per_char": -0.748269259929657, "num_chars": 2}, {"sum_logits": -1.6041878461837769, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.6041878461837769, "logits_per_char": -0.8020939230918884, "num_chars": 2}, {"sum_logits": -1.9563108682632446, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.9563108682632446, "logits_per_char": -0.9781554341316223, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 900, "native_id": "bd780fea2d4dd262583446e64c0f314d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7220368385314941, "incorrect_loss_raw": 1.5930349230766296, "correct_loss_per_char": 0.8610184192657471, "incorrect_loss_per_char": 0.7965174615383148, "correct_loss_per_token": 1.7220368385314941, "incorrect_loss_per_token": 1.5930349230766296, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4992084503173828, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": true, "logits_per_token": -1.4992084503173828, "logits_per_char": -0.7496042251586914, "num_chars": 2}, {"sum_logits": -1.717198133468628, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.717198133468628, "logits_per_char": -0.858599066734314, "num_chars": 2}, {"sum_logits": -1.7220368385314941, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.7220368385314941, "logits_per_char": -0.8610184192657471, "num_chars": 2}, {"sum_logits": -1.5274240970611572, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.5274240970611572, "logits_per_char": -0.7637120485305786, "num_chars": 2}, {"sum_logits": -1.6283090114593506, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.6283090114593506, "logits_per_char": -0.8141545057296753, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 901, "native_id": "99e0b2ddf88ebed98b977043b7c2331b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6789848804473877, "incorrect_loss_raw": 1.600626677274704, "correct_loss_per_char": 0.8394924402236938, "incorrect_loss_per_char": 0.800313338637352, "correct_loss_per_token": 1.6789848804473877, "incorrect_loss_per_token": 1.600626677274704, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5271292924880981, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": true, "logits_per_token": -1.5271292924880981, "logits_per_char": -0.7635646462440491, "num_chars": 2}, {"sum_logits": -1.6066868305206299, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": false, "logits_per_token": -1.6066868305206299, "logits_per_char": -0.8033434152603149, "num_chars": 2}, {"sum_logits": -1.6362531185150146, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": false, "logits_per_token": -1.6362531185150146, "logits_per_char": -0.8181265592575073, "num_chars": 2}, {"sum_logits": -1.6324374675750732, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": false, "logits_per_token": -1.6324374675750732, "logits_per_char": -0.8162187337875366, "num_chars": 2}, {"sum_logits": -1.6789848804473877, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": false, "logits_per_token": -1.6789848804473877, "logits_per_char": -0.8394924402236938, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 902, "native_id": "eb0e0c4eaf19c1e9b4df3b4d3a11be3d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.73538339138031, "incorrect_loss_raw": 1.6060847640037537, "correct_loss_per_char": 0.867691695690155, "incorrect_loss_per_char": 0.8030423820018768, "correct_loss_per_token": 1.73538339138031, "incorrect_loss_per_token": 1.6060847640037537, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3935917615890503, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.3935917615890503, "logits_per_char": -0.6967958807945251, "num_chars": 2}, {"sum_logits": -1.4725569486618042, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.4725569486618042, "logits_per_char": -0.7362784743309021, "num_chars": 2}, {"sum_logits": -1.73538339138031, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.73538339138031, "logits_per_char": -0.867691695690155, "num_chars": 2}, {"sum_logits": -1.645084261894226, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.645084261894226, "logits_per_char": -0.822542130947113, "num_chars": 2}, {"sum_logits": -1.913106083869934, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.913106083869934, "logits_per_char": -0.956553041934967, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 903, "native_id": "467a3b464b08b3ffc9922e2a726554f6", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5665059089660645, "incorrect_loss_raw": 1.6371462941169739, "correct_loss_per_char": 0.7832529544830322, "incorrect_loss_per_char": 0.8185731470584869, "correct_loss_per_token": 1.5665059089660645, "incorrect_loss_per_token": 1.6371462941169739, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4695508480072021, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.4695508480072021, "logits_per_char": -0.7347754240036011, "num_chars": 2}, {"sum_logits": -1.5665059089660645, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5665059089660645, "logits_per_char": -0.7832529544830322, "num_chars": 2}, {"sum_logits": -1.5885751247406006, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5885751247406006, "logits_per_char": -0.7942875623703003, "num_chars": 2}, {"sum_logits": -1.6325445175170898, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6325445175170898, "logits_per_char": -0.8162722587585449, "num_chars": 2}, {"sum_logits": -1.857914686203003, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.857914686203003, "logits_per_char": -0.9289573431015015, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 904, "native_id": "dea70fe40fac9ad03bf319bf8a480efa", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.612662434577942, "incorrect_loss_raw": 1.6351059675216675, "correct_loss_per_char": 0.806331217288971, "incorrect_loss_per_char": 0.8175529837608337, "correct_loss_per_token": 1.612662434577942, "incorrect_loss_per_token": 1.6351059675216675, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3240844011306763, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.3240844011306763, "logits_per_char": -0.6620422005653381, "num_chars": 2}, {"sum_logits": -1.6650809049606323, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.6650809049606323, "logits_per_char": -0.8325404524803162, "num_chars": 2}, {"sum_logits": -1.656915545463562, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.656915545463562, "logits_per_char": -0.828457772731781, "num_chars": 2}, {"sum_logits": -1.612662434577942, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.612662434577942, "logits_per_char": -0.806331217288971, "num_chars": 2}, {"sum_logits": -1.8943430185317993, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.8943430185317993, "logits_per_char": -0.9471715092658997, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 905, "native_id": "2f1680da0d388a8453150ff3637e4689", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.1431853771209717, "incorrect_loss_raw": 1.5217625200748444, "correct_loss_per_char": 1.0715926885604858, "incorrect_loss_per_char": 0.7608812600374222, "correct_loss_per_token": 2.1431853771209717, "incorrect_loss_per_token": 1.5217625200748444, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3853657245635986, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.3853657245635986, "logits_per_char": -0.6926828622817993, "num_chars": 2}, {"sum_logits": -1.4909871816635132, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.4909871816635132, "logits_per_char": -0.7454935908317566, "num_chars": 2}, {"sum_logits": -1.5274085998535156, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5274085998535156, "logits_per_char": -0.7637042999267578, "num_chars": 2}, {"sum_logits": -1.68328857421875, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.68328857421875, "logits_per_char": -0.841644287109375, "num_chars": 2}, {"sum_logits": -2.1431853771209717, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.1431853771209717, "logits_per_char": -1.0715926885604858, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 906, "native_id": "8369adc4b4710d00f917d80a75d844d7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6759644746780396, "incorrect_loss_raw": 1.6393976211547852, "correct_loss_per_char": 0.8379822373390198, "incorrect_loss_per_char": 0.8196988105773926, "correct_loss_per_token": 1.6759644746780396, "incorrect_loss_per_token": 1.6393976211547852, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2922484874725342, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.2922484874725342, "logits_per_char": -0.6461242437362671, "num_chars": 2}, {"sum_logits": -1.5501824617385864, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5501824617385864, "logits_per_char": -0.7750912308692932, "num_chars": 2}, {"sum_logits": -1.6759644746780396, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6759644746780396, "logits_per_char": -0.8379822373390198, "num_chars": 2}, {"sum_logits": -1.6294618844985962, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6294618844985962, "logits_per_char": -0.8147309422492981, "num_chars": 2}, {"sum_logits": -2.085697650909424, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.085697650909424, "logits_per_char": -1.042848825454712, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 907, "native_id": "20a3bb788cf408d9a3e25e610fe60905", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5655747652053833, "incorrect_loss_raw": 1.6618292331695557, "correct_loss_per_char": 0.7827873826026917, "incorrect_loss_per_char": 0.8309146165847778, "correct_loss_per_token": 1.5655747652053833, "incorrect_loss_per_token": 1.6618292331695557, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4884289503097534, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.4884289503097534, "logits_per_char": -0.7442144751548767, "num_chars": 2}, {"sum_logits": -1.6033207178115845, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6033207178115845, "logits_per_char": -0.8016603589057922, "num_chars": 2}, {"sum_logits": -1.4286224842071533, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.4286224842071533, "logits_per_char": -0.7143112421035767, "num_chars": 2}, {"sum_logits": -1.5655747652053833, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5655747652053833, "logits_per_char": -0.7827873826026917, "num_chars": 2}, {"sum_logits": -2.1269447803497314, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -2.1269447803497314, "logits_per_char": -1.0634723901748657, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 908, "native_id": "36c1f50eec01c287b8ef6ffe69fe0528", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.522739052772522, "incorrect_loss_raw": 1.6944469511508942, "correct_loss_per_char": 0.761369526386261, "incorrect_loss_per_char": 0.8472234755754471, "correct_loss_per_token": 1.522739052772522, "incorrect_loss_per_token": 1.6944469511508942, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2466577291488647, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": true, "logits_per_token": -1.2466577291488647, "logits_per_char": -0.6233288645744324, "num_chars": 2}, {"sum_logits": -1.522739052772522, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.522739052772522, "logits_per_char": -0.761369526386261, "num_chars": 2}, {"sum_logits": -1.644370675086975, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.644370675086975, "logits_per_char": -0.8221853375434875, "num_chars": 2}, {"sum_logits": -1.6771517992019653, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.6771517992019653, "logits_per_char": -0.8385758996009827, "num_chars": 2}, {"sum_logits": -2.2096076011657715, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -2.2096076011657715, "logits_per_char": -1.1048038005828857, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 909, "native_id": "5f4825137a27f369fe859e85dfe1793f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4074856042861938, "incorrect_loss_raw": 1.6862327754497528, "correct_loss_per_char": 0.7037428021430969, "incorrect_loss_per_char": 0.8431163877248764, "correct_loss_per_token": 1.4074856042861938, "incorrect_loss_per_token": 1.6862327754497528, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4074856042861938, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": true, "logits_per_token": -1.4074856042861938, "logits_per_char": -0.7037428021430969, "num_chars": 2}, {"sum_logits": -1.4412310123443604, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.4412310123443604, "logits_per_char": -0.7206155061721802, "num_chars": 2}, {"sum_logits": -1.6867598295211792, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.6867598295211792, "logits_per_char": -0.8433799147605896, "num_chars": 2}, {"sum_logits": -1.7878392934799194, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.7878392934799194, "logits_per_char": -0.8939196467399597, "num_chars": 2}, {"sum_logits": -1.8291009664535522, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.8291009664535522, "logits_per_char": -0.9145504832267761, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 910, "native_id": "b3dc6d6a5e2f9d7da8eb72816c80b3f8_1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3984121084213257, "incorrect_loss_raw": 1.6905212104320526, "correct_loss_per_char": 0.6992060542106628, "incorrect_loss_per_char": 0.8452606052160263, "correct_loss_per_token": 1.3984121084213257, "incorrect_loss_per_token": 1.6905212104320526, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3984121084213257, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": true, "logits_per_token": -1.3984121084213257, "logits_per_char": -0.6992060542106628, "num_chars": 2}, {"sum_logits": -1.5854859352111816, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.5854859352111816, "logits_per_char": -0.7927429676055908, "num_chars": 2}, {"sum_logits": -1.569935917854309, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.569935917854309, "logits_per_char": -0.7849679589271545, "num_chars": 2}, {"sum_logits": -1.613147258758545, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.613147258758545, "logits_per_char": -0.8065736293792725, "num_chars": 2}, {"sum_logits": -1.9935157299041748, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.9935157299041748, "logits_per_char": -0.9967578649520874, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 911, "native_id": "63bb6128026ce24209583d0eea75fc27", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6821192502975464, "incorrect_loss_raw": 1.6058789193630219, "correct_loss_per_char": 0.8410596251487732, "incorrect_loss_per_char": 0.8029394596815109, "correct_loss_per_token": 1.6821192502975464, "incorrect_loss_per_token": 1.6058789193630219, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4500741958618164, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.4500741958618164, "logits_per_char": -0.7250370979309082, "num_chars": 2}, {"sum_logits": -1.5918408632278442, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5918408632278442, "logits_per_char": -0.7959204316139221, "num_chars": 2}, {"sum_logits": -1.5851024389266968, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5851024389266968, "logits_per_char": -0.7925512194633484, "num_chars": 2}, {"sum_logits": -1.6821192502975464, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6821192502975464, "logits_per_char": -0.8410596251487732, "num_chars": 2}, {"sum_logits": -1.79649817943573, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.79649817943573, "logits_per_char": -0.898249089717865, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 912, "native_id": "e8a9142d2402f818273dd62cf5a7b559_1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6875942945480347, "incorrect_loss_raw": 1.5976455509662628, "correct_loss_per_char": 0.8437971472740173, "incorrect_loss_per_char": 0.7988227754831314, "correct_loss_per_token": 1.6875942945480347, "incorrect_loss_per_token": 1.5976455509662628, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5349355936050415, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": true, "logits_per_token": -1.5349355936050415, "logits_per_char": -0.7674677968025208, "num_chars": 2}, {"sum_logits": -1.5630446672439575, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.5630446672439575, "logits_per_char": -0.7815223336219788, "num_chars": 2}, {"sum_logits": -1.6875942945480347, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.6875942945480347, "logits_per_char": -0.8437971472740173, "num_chars": 2}, {"sum_logits": -1.6488767862319946, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.6488767862319946, "logits_per_char": -0.8244383931159973, "num_chars": 2}, {"sum_logits": -1.6437251567840576, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.6437251567840576, "logits_per_char": -0.8218625783920288, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 913, "native_id": "ead9c9744aee08678759158efe005175", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2890346050262451, "incorrect_loss_raw": 1.7258274853229523, "correct_loss_per_char": 0.6445173025131226, "incorrect_loss_per_char": 0.8629137426614761, "correct_loss_per_token": 1.2890346050262451, "incorrect_loss_per_token": 1.7258274853229523, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2890346050262451, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.2890346050262451, "logits_per_char": -0.6445173025131226, "num_chars": 2}, {"sum_logits": -1.5378942489624023, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5378942489624023, "logits_per_char": -0.7689471244812012, "num_chars": 2}, {"sum_logits": -1.6882537603378296, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6882537603378296, "logits_per_char": -0.8441268801689148, "num_chars": 2}, {"sum_logits": -1.7826439142227173, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.7826439142227173, "logits_per_char": -0.8913219571113586, "num_chars": 2}, {"sum_logits": -1.8945180177688599, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.8945180177688599, "logits_per_char": -0.9472590088844299, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 914, "native_id": "ab8bf60f76bc6119459271140ccae781", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.605621576309204, "incorrect_loss_raw": 1.6251420676708221, "correct_loss_per_char": 0.802810788154602, "incorrect_loss_per_char": 0.8125710338354111, "correct_loss_per_token": 1.605621576309204, "incorrect_loss_per_token": 1.6251420676708221, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.625415563583374, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.625415563583374, "logits_per_char": -0.812707781791687, "num_chars": 2}, {"sum_logits": -1.4689749479293823, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": true, "logits_per_token": -1.4689749479293823, "logits_per_char": -0.7344874739646912, "num_chars": 2}, {"sum_logits": -1.567845106124878, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.567845106124878, "logits_per_char": -0.783922553062439, "num_chars": 2}, {"sum_logits": -1.605621576309204, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.605621576309204, "logits_per_char": -0.802810788154602, "num_chars": 2}, {"sum_logits": -1.8383326530456543, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.8383326530456543, "logits_per_char": -0.9191663265228271, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 915, "native_id": "3c6e2d95a63316b31986e8c7979582c9", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6214238405227661, "incorrect_loss_raw": 1.685044527053833, "correct_loss_per_char": 0.8107119202613831, "incorrect_loss_per_char": 0.8425222635269165, "correct_loss_per_token": 1.6214238405227661, "incorrect_loss_per_token": 1.685044527053833, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2584755420684814, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.2584755420684814, "logits_per_char": -0.6292377710342407, "num_chars": 2}, {"sum_logits": -1.4196120500564575, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.4196120500564575, "logits_per_char": -0.7098060250282288, "num_chars": 2}, {"sum_logits": -1.6214238405227661, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.6214238405227661, "logits_per_char": -0.8107119202613831, "num_chars": 2}, {"sum_logits": -1.7740532159805298, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.7740532159805298, "logits_per_char": -0.8870266079902649, "num_chars": 2}, {"sum_logits": -2.2880373001098633, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.2880373001098633, "logits_per_char": -1.1440186500549316, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 916, "native_id": "5c171b9837af49211891ce40e4a10204", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5974586009979248, "incorrect_loss_raw": 1.6270373165607452, "correct_loss_per_char": 0.7987293004989624, "incorrect_loss_per_char": 0.8135186582803726, "correct_loss_per_token": 1.5974586009979248, "incorrect_loss_per_token": 1.6270373165607452, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4625154733657837, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.4625154733657837, "logits_per_char": -0.7312577366828918, "num_chars": 2}, {"sum_logits": -1.5814979076385498, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5814979076385498, "logits_per_char": -0.7907489538192749, "num_chars": 2}, {"sum_logits": -1.6457059383392334, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6457059383392334, "logits_per_char": -0.8228529691696167, "num_chars": 2}, {"sum_logits": -1.5974586009979248, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5974586009979248, "logits_per_char": -0.7987293004989624, "num_chars": 2}, {"sum_logits": -1.818429946899414, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.818429946899414, "logits_per_char": -0.909214973449707, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 917, "native_id": "56d0fc282a144565f2c852415c6fa92c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.586209774017334, "incorrect_loss_raw": 1.7014143466949463, "correct_loss_per_char": 0.793104887008667, "incorrect_loss_per_char": 0.8507071733474731, "correct_loss_per_token": 1.586209774017334, "incorrect_loss_per_token": 1.7014143466949463, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2212048768997192, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.2212048768997192, "logits_per_char": -0.6106024384498596, "num_chars": 2}, {"sum_logits": -1.4647282361984253, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.4647282361984253, "logits_per_char": -0.7323641180992126, "num_chars": 2}, {"sum_logits": -1.586209774017334, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.586209774017334, "logits_per_char": -0.793104887008667, "num_chars": 2}, {"sum_logits": -1.7876629829406738, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.7876629829406738, "logits_per_char": -0.8938314914703369, "num_chars": 2}, {"sum_logits": -2.332061290740967, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.332061290740967, "logits_per_char": -1.1660306453704834, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 918, "native_id": "5b8a3081c3235d62bc77e2d15f3ad454", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4587159156799316, "incorrect_loss_raw": 1.6724501550197601, "correct_loss_per_char": 0.7293579578399658, "incorrect_loss_per_char": 0.8362250775098801, "correct_loss_per_token": 1.4587159156799316, "incorrect_loss_per_token": 1.6724501550197601, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4587159156799316, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.4587159156799316, "logits_per_char": -0.7293579578399658, "num_chars": 2}, {"sum_logits": -1.6546854972839355, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6546854972839355, "logits_per_char": -0.8273427486419678, "num_chars": 2}, {"sum_logits": -1.5631792545318604, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5631792545318604, "logits_per_char": -0.7815896272659302, "num_chars": 2}, {"sum_logits": -1.5085965394973755, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5085965394973755, "logits_per_char": -0.7542982697486877, "num_chars": 2}, {"sum_logits": -1.9633393287658691, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.9633393287658691, "logits_per_char": -0.9816696643829346, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 919, "native_id": "e43c4eaa04243ddee30f29171718eb92", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.540872573852539, "incorrect_loss_raw": 1.6362978219985962, "correct_loss_per_char": 0.7704362869262695, "incorrect_loss_per_char": 0.8181489109992981, "correct_loss_per_token": 1.540872573852539, "incorrect_loss_per_token": 1.6362978219985962, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.540872573852539, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": true, "logits_per_token": -1.540872573852539, "logits_per_char": -0.7704362869262695, "num_chars": 2}, {"sum_logits": -1.5675389766693115, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.5675389766693115, "logits_per_char": -0.7837694883346558, "num_chars": 2}, {"sum_logits": -1.6986706256866455, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.6986706256866455, "logits_per_char": -0.8493353128433228, "num_chars": 2}, {"sum_logits": -1.579695701599121, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.579695701599121, "logits_per_char": -0.7898478507995605, "num_chars": 2}, {"sum_logits": -1.6992859840393066, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.6992859840393066, "logits_per_char": -0.8496429920196533, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 920, "native_id": "84a736d4b702a6869d8fa8523aee6f1b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4057303667068481, "incorrect_loss_raw": 1.7014826238155365, "correct_loss_per_char": 0.7028651833534241, "incorrect_loss_per_char": 0.8507413119077682, "correct_loss_per_token": 1.4057303667068481, "incorrect_loss_per_token": 1.7014826238155365, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4057303667068481, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.4057303667068481, "logits_per_char": -0.7028651833534241, "num_chars": 2}, {"sum_logits": -1.5524121522903442, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5524121522903442, "logits_per_char": -0.7762060761451721, "num_chars": 2}, {"sum_logits": -1.5241485834121704, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5241485834121704, "logits_per_char": -0.7620742917060852, "num_chars": 2}, {"sum_logits": -1.614237904548645, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.614237904548645, "logits_per_char": -0.8071189522743225, "num_chars": 2}, {"sum_logits": -2.1151318550109863, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.1151318550109863, "logits_per_char": -1.0575659275054932, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 921, "native_id": "72611791cdcb040f2d699827fb9cebc4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6620354652404785, "incorrect_loss_raw": 1.6627496480941772, "correct_loss_per_char": 0.8310177326202393, "incorrect_loss_per_char": 0.8313748240470886, "correct_loss_per_token": 1.6620354652404785, "incorrect_loss_per_token": 1.6627496480941772, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2158021926879883, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.2158021926879883, "logits_per_char": -0.6079010963439941, "num_chars": 2}, {"sum_logits": -1.4668388366699219, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.4668388366699219, "logits_per_char": -0.7334194183349609, "num_chars": 2}, {"sum_logits": -1.6620354652404785, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6620354652404785, "logits_per_char": -0.8310177326202393, "num_chars": 2}, {"sum_logits": -1.8314220905303955, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.8314220905303955, "logits_per_char": -0.9157110452651978, "num_chars": 2}, {"sum_logits": -2.1369354724884033, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -2.1369354724884033, "logits_per_char": -1.0684677362442017, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 922, "native_id": "4477fb61fde4bb8695c241dfc366b554", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.632506251335144, "incorrect_loss_raw": 1.643190622329712, "correct_loss_per_char": 0.816253125667572, "incorrect_loss_per_char": 0.821595311164856, "correct_loss_per_token": 1.632506251335144, "incorrect_loss_per_token": 1.643190622329712, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4476356506347656, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.4476356506347656, "logits_per_char": -0.7238178253173828, "num_chars": 2}, {"sum_logits": -1.448452115058899, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.448452115058899, "logits_per_char": -0.7242260575294495, "num_chars": 2}, {"sum_logits": -1.5609134435653687, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5609134435653687, "logits_per_char": -0.7804567217826843, "num_chars": 2}, {"sum_logits": -1.632506251335144, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.632506251335144, "logits_per_char": -0.816253125667572, "num_chars": 2}, {"sum_logits": -2.1157612800598145, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -2.1157612800598145, "logits_per_char": -1.0578806400299072, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 923, "native_id": "ce246bc94a54431b9c0530e71d2456b5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0342135429382324, "incorrect_loss_raw": 1.5414568781852722, "correct_loss_per_char": 1.0171067714691162, "incorrect_loss_per_char": 0.7707284390926361, "correct_loss_per_token": 2.0342135429382324, "incorrect_loss_per_token": 1.5414568781852722, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3310896158218384, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.3310896158218384, "logits_per_char": -0.6655448079109192, "num_chars": 2}, {"sum_logits": -1.5174821615219116, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5174821615219116, "logits_per_char": -0.7587410807609558, "num_chars": 2}, {"sum_logits": -1.6162419319152832, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6162419319152832, "logits_per_char": -0.8081209659576416, "num_chars": 2}, {"sum_logits": -1.7010138034820557, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.7010138034820557, "logits_per_char": -0.8505069017410278, "num_chars": 2}, {"sum_logits": -2.0342135429382324, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -2.0342135429382324, "logits_per_char": -1.0171067714691162, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 924, "native_id": "2eef2d255fe629414f4d24ade8590102", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6147149801254272, "incorrect_loss_raw": 1.6279785335063934, "correct_loss_per_char": 0.8073574900627136, "incorrect_loss_per_char": 0.8139892667531967, "correct_loss_per_token": 1.6147149801254272, "incorrect_loss_per_token": 1.6279785335063934, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4772549867630005, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.4772549867630005, "logits_per_char": -0.7386274933815002, "num_chars": 2}, {"sum_logits": -1.5926603078842163, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5926603078842163, "logits_per_char": -0.7963301539421082, "num_chars": 2}, {"sum_logits": -1.6147149801254272, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6147149801254272, "logits_per_char": -0.8073574900627136, "num_chars": 2}, {"sum_logits": -1.5322080850601196, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5322080850601196, "logits_per_char": -0.7661040425300598, "num_chars": 2}, {"sum_logits": -1.9097907543182373, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.9097907543182373, "logits_per_char": -0.9548953771591187, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 925, "native_id": "2f85d53721ccc8b3fa4cfc184186d124", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.440824031829834, "incorrect_loss_raw": 1.6752954125404358, "correct_loss_per_char": 0.720412015914917, "incorrect_loss_per_char": 0.8376477062702179, "correct_loss_per_token": 1.440824031829834, "incorrect_loss_per_token": 1.6752954125404358, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.440824031829834, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": true, "logits_per_token": -1.440824031829834, "logits_per_char": -0.720412015914917, "num_chars": 2}, {"sum_logits": -1.5334311723709106, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.5334311723709106, "logits_per_char": -0.7667155861854553, "num_chars": 2}, {"sum_logits": -1.5823819637298584, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.5823819637298584, "logits_per_char": -0.7911909818649292, "num_chars": 2}, {"sum_logits": -1.652487874031067, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.652487874031067, "logits_per_char": -0.8262439370155334, "num_chars": 2}, {"sum_logits": -1.9328806400299072, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.9328806400299072, "logits_per_char": -0.9664403200149536, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 926, "native_id": "2192c5c2145a6e03755ad89a02e64055", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5861780643463135, "incorrect_loss_raw": 1.6313989758491516, "correct_loss_per_char": 0.7930890321731567, "incorrect_loss_per_char": 0.8156994879245758, "correct_loss_per_token": 1.5861780643463135, "incorrect_loss_per_token": 1.6313989758491516, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4899089336395264, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": true, "logits_per_token": -1.4899089336395264, "logits_per_char": -0.7449544668197632, "num_chars": 2}, {"sum_logits": -1.5861780643463135, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.5861780643463135, "logits_per_char": -0.7930890321731567, "num_chars": 2}, {"sum_logits": -1.576585292816162, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.576585292816162, "logits_per_char": -0.788292646408081, "num_chars": 2}, {"sum_logits": -1.5747673511505127, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.5747673511505127, "logits_per_char": -0.7873836755752563, "num_chars": 2}, {"sum_logits": -1.8843343257904053, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.8843343257904053, "logits_per_char": -0.9421671628952026, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 927, "native_id": "bea07406aaadeef50110883b6932d86a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2632859945297241, "incorrect_loss_raw": 1.7463878691196442, "correct_loss_per_char": 0.6316429972648621, "incorrect_loss_per_char": 0.8731939345598221, "correct_loss_per_token": 1.2632859945297241, "incorrect_loss_per_token": 1.7463878691196442, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2632859945297241, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": true, "logits_per_token": -1.2632859945297241, "logits_per_char": -0.6316429972648621, "num_chars": 2}, {"sum_logits": -1.6050812005996704, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.6050812005996704, "logits_per_char": -0.8025406002998352, "num_chars": 2}, {"sum_logits": -1.5537611246109009, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5537611246109009, "logits_per_char": -0.7768805623054504, "num_chars": 2}, {"sum_logits": -1.728509783744812, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.728509783744812, "logits_per_char": -0.864254891872406, "num_chars": 2}, {"sum_logits": -2.0981993675231934, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -2.0981993675231934, "logits_per_char": -1.0490996837615967, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 928, "native_id": "7a58e7e7bf76658751e850f790922aba", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.2487120628356934, "incorrect_loss_raw": 1.504579871892929, "correct_loss_per_char": 1.1243560314178467, "incorrect_loss_per_char": 0.7522899359464645, "correct_loss_per_token": 2.2487120628356934, "incorrect_loss_per_token": 1.504579871892929, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.387239694595337, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": true, "logits_per_token": -1.387239694595337, "logits_per_char": -0.6936198472976685, "num_chars": 2}, {"sum_logits": -1.5545822381973267, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5545822381973267, "logits_per_char": -0.7772911190986633, "num_chars": 2}, {"sum_logits": -1.5319980382919312, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5319980382919312, "logits_per_char": -0.7659990191459656, "num_chars": 2}, {"sum_logits": -1.5444995164871216, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5444995164871216, "logits_per_char": -0.7722497582435608, "num_chars": 2}, {"sum_logits": -2.2487120628356934, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -2.2487120628356934, "logits_per_char": -1.1243560314178467, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 929, "native_id": "76b2c6d254f9127b4fd66d90e1a330e7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6056373119354248, "incorrect_loss_raw": 1.6638071835041046, "correct_loss_per_char": 0.8028186559677124, "incorrect_loss_per_char": 0.8319035917520523, "correct_loss_per_token": 1.6056373119354248, "incorrect_loss_per_token": 1.6638071835041046, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3315157890319824, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.3315157890319824, "logits_per_char": -0.6657578945159912, "num_chars": 2}, {"sum_logits": -1.6056373119354248, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.6056373119354248, "logits_per_char": -0.8028186559677124, "num_chars": 2}, {"sum_logits": -1.584462285041809, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.584462285041809, "logits_per_char": -0.7922311425209045, "num_chars": 2}, {"sum_logits": -1.5603137016296387, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5603137016296387, "logits_per_char": -0.7801568508148193, "num_chars": 2}, {"sum_logits": -2.1789369583129883, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -2.1789369583129883, "logits_per_char": -1.0894684791564941, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 930, "native_id": "cdd3d074031fbd3efeb4f9408abef04e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.512286901473999, "incorrect_loss_raw": 1.6560164391994476, "correct_loss_per_char": 0.7561434507369995, "incorrect_loss_per_char": 0.8280082195997238, "correct_loss_per_token": 1.512286901473999, "incorrect_loss_per_token": 1.6560164391994476, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4680495262145996, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.4680495262145996, "logits_per_char": -0.7340247631072998, "num_chars": 2}, {"sum_logits": -1.7094837427139282, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.7094837427139282, "logits_per_char": -0.8547418713569641, "num_chars": 2}, {"sum_logits": -1.512286901473999, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.512286901473999, "logits_per_char": -0.7561434507369995, "num_chars": 2}, {"sum_logits": -1.5419420003890991, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5419420003890991, "logits_per_char": -0.7709710001945496, "num_chars": 2}, {"sum_logits": -1.9045904874801636, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.9045904874801636, "logits_per_char": -0.9522952437400818, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 931, "native_id": "359aed918343d228e67cef329b693904", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6488415002822876, "incorrect_loss_raw": 1.6340495645999908, "correct_loss_per_char": 0.8244207501411438, "incorrect_loss_per_char": 0.8170247822999954, "correct_loss_per_token": 1.6488415002822876, "incorrect_loss_per_token": 1.6340495645999908, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.330021858215332, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.330021858215332, "logits_per_char": -0.665010929107666, "num_chars": 2}, {"sum_logits": -1.5322307348251343, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5322307348251343, "logits_per_char": -0.7661153674125671, "num_chars": 2}, {"sum_logits": -1.6488415002822876, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6488415002822876, "logits_per_char": -0.8244207501411438, "num_chars": 2}, {"sum_logits": -1.6760178804397583, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6760178804397583, "logits_per_char": -0.8380089402198792, "num_chars": 2}, {"sum_logits": -1.9979277849197388, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.9979277849197388, "logits_per_char": -0.9989638924598694, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 932, "native_id": "cf02cca40a47c2deefd8b2e5a5ff2f70", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4417657852172852, "incorrect_loss_raw": 1.6916621923446655, "correct_loss_per_char": 0.7208828926086426, "incorrect_loss_per_char": 0.8458310961723328, "correct_loss_per_token": 1.4417657852172852, "incorrect_loss_per_token": 1.6916621923446655, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4417657852172852, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.4417657852172852, "logits_per_char": -0.7208828926086426, "num_chars": 2}, {"sum_logits": -1.4616291522979736, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.4616291522979736, "logits_per_char": -0.7308145761489868, "num_chars": 2}, {"sum_logits": -1.5557887554168701, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5557887554168701, "logits_per_char": -0.7778943777084351, "num_chars": 2}, {"sum_logits": -1.6387288570404053, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6387288570404053, "logits_per_char": -0.8193644285202026, "num_chars": 2}, {"sum_logits": -2.110502004623413, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -2.110502004623413, "logits_per_char": -1.0552510023117065, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 933, "native_id": "ac1abecdbbd7bcde6592ca645c2ecb1e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3265167474746704, "incorrect_loss_raw": 1.7147176861763, "correct_loss_per_char": 0.6632583737373352, "incorrect_loss_per_char": 0.85735884308815, "correct_loss_per_token": 1.3265167474746704, "incorrect_loss_per_token": 1.7147176861763, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3265167474746704, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": true, "logits_per_token": -1.3265167474746704, "logits_per_char": -0.6632583737373352, "num_chars": 2}, {"sum_logits": -1.5992050170898438, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.5992050170898438, "logits_per_char": -0.7996025085449219, "num_chars": 2}, {"sum_logits": -1.5467760562896729, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.5467760562896729, "logits_per_char": -0.7733880281448364, "num_chars": 2}, {"sum_logits": -1.7260754108428955, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.7260754108428955, "logits_per_char": -0.8630377054214478, "num_chars": 2}, {"sum_logits": -1.986814260482788, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.986814260482788, "logits_per_char": -0.993407130241394, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 934, "native_id": "2adbb4fc0d5249dc411dda433f378591", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7534538507461548, "incorrect_loss_raw": 1.5969405770301819, "correct_loss_per_char": 0.8767269253730774, "incorrect_loss_per_char": 0.7984702885150909, "correct_loss_per_token": 1.7534538507461548, "incorrect_loss_per_token": 1.5969405770301819, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3598910570144653, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.3598910570144653, "logits_per_char": -0.6799455285072327, "num_chars": 2}, {"sum_logits": -1.5655699968338013, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5655699968338013, "logits_per_char": -0.7827849984169006, "num_chars": 2}, {"sum_logits": -1.623555064201355, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.623555064201355, "logits_per_char": -0.8117775321006775, "num_chars": 2}, {"sum_logits": -1.7534538507461548, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.7534538507461548, "logits_per_char": -0.8767269253730774, "num_chars": 2}, {"sum_logits": -1.838746190071106, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.838746190071106, "logits_per_char": -0.919373095035553, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 935, "native_id": "5a1c8a9dbbb60e523cc1ba14a370729c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8992946147918701, "incorrect_loss_raw": 1.5629205107688904, "correct_loss_per_char": 0.9496473073959351, "incorrect_loss_per_char": 0.7814602553844452, "correct_loss_per_token": 1.8992946147918701, "incorrect_loss_per_token": 1.5629205107688904, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3494300842285156, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.3494300842285156, "logits_per_char": -0.6747150421142578, "num_chars": 2}, {"sum_logits": -1.5816285610198975, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5816285610198975, "logits_per_char": -0.7908142805099487, "num_chars": 2}, {"sum_logits": -1.6111361980438232, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6111361980438232, "logits_per_char": -0.8055680990219116, "num_chars": 2}, {"sum_logits": -1.7094871997833252, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.7094871997833252, "logits_per_char": -0.8547435998916626, "num_chars": 2}, {"sum_logits": -1.8992946147918701, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.8992946147918701, "logits_per_char": -0.9496473073959351, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 936, "native_id": "3665b329f93f7c84edeabe394140f8d2", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.595693588256836, "incorrect_loss_raw": 1.6528989970684052, "correct_loss_per_char": 0.797846794128418, "incorrect_loss_per_char": 0.8264494985342026, "correct_loss_per_token": 1.595693588256836, "incorrect_loss_per_token": 1.6528989970684052, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5470213890075684, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5470213890075684, "logits_per_char": -0.7735106945037842, "num_chars": 2}, {"sum_logits": -1.595693588256836, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.595693588256836, "logits_per_char": -0.797846794128418, "num_chars": 2}, {"sum_logits": -1.4416723251342773, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.4416723251342773, "logits_per_char": -0.7208361625671387, "num_chars": 2}, {"sum_logits": -1.5002845525741577, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5002845525741577, "logits_per_char": -0.7501422762870789, "num_chars": 2}, {"sum_logits": -2.122617721557617, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.122617721557617, "logits_per_char": -1.0613088607788086, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 937, "native_id": "dbcedaa6a6f1f68bc8f2bf7aef23294e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.082090377807617, "incorrect_loss_raw": 1.5331832468509674, "correct_loss_per_char": 1.0410451889038086, "incorrect_loss_per_char": 0.7665916234254837, "correct_loss_per_token": 2.082090377807617, "incorrect_loss_per_token": 1.5331832468509674, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3456013202667236, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.3456013202667236, "logits_per_char": -0.6728006601333618, "num_chars": 2}, {"sum_logits": -1.530479073524475, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.530479073524475, "logits_per_char": -0.7652395367622375, "num_chars": 2}, {"sum_logits": -1.700250506401062, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.700250506401062, "logits_per_char": -0.850125253200531, "num_chars": 2}, {"sum_logits": -1.5564020872116089, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5564020872116089, "logits_per_char": -0.7782010436058044, "num_chars": 2}, {"sum_logits": -2.082090377807617, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.082090377807617, "logits_per_char": -1.0410451889038086, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 938, "native_id": "ba3a2b9ff289c106051163f840a6f5ba", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6243635416030884, "incorrect_loss_raw": 1.6331702172756195, "correct_loss_per_char": 0.8121817708015442, "incorrect_loss_per_char": 0.8165851086378098, "correct_loss_per_token": 1.6243635416030884, "incorrect_loss_per_token": 1.6331702172756195, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4842597246170044, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.4842597246170044, "logits_per_char": -0.7421298623085022, "num_chars": 2}, {"sum_logits": -1.45109224319458, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.45109224319458, "logits_per_char": -0.72554612159729, "num_chars": 2}, {"sum_logits": -1.6243635416030884, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6243635416030884, "logits_per_char": -0.8121817708015442, "num_chars": 2}, {"sum_logits": -1.6099152565002441, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6099152565002441, "logits_per_char": -0.8049576282501221, "num_chars": 2}, {"sum_logits": -1.9874136447906494, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.9874136447906494, "logits_per_char": -0.9937068223953247, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 939, "native_id": "13fc28f53423a9b3a656c9431df1b3b5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0665087699890137, "incorrect_loss_raw": 1.5486976206302643, "correct_loss_per_char": 1.0332543849945068, "incorrect_loss_per_char": 0.7743488103151321, "correct_loss_per_token": 2.0665087699890137, "incorrect_loss_per_token": 1.5486976206302643, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2420947551727295, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.2420947551727295, "logits_per_char": -0.6210473775863647, "num_chars": 2}, {"sum_logits": -1.508745789527893, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.508745789527893, "logits_per_char": -0.7543728947639465, "num_chars": 2}, {"sum_logits": -1.6321064233779907, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6321064233779907, "logits_per_char": -0.8160532116889954, "num_chars": 2}, {"sum_logits": -1.8118435144424438, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.8118435144424438, "logits_per_char": -0.9059217572212219, "num_chars": 2}, {"sum_logits": -2.0665087699890137, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.0665087699890137, "logits_per_char": -1.0332543849945068, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 940, "native_id": "3f4b48708d08f8bf7bec796531023f9c", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6044319868087769, "incorrect_loss_raw": 1.623552143573761, "correct_loss_per_char": 0.8022159934043884, "incorrect_loss_per_char": 0.8117760717868805, "correct_loss_per_token": 1.6044319868087769, "incorrect_loss_per_token": 1.623552143573761, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6044319868087769, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.6044319868087769, "logits_per_char": -0.8022159934043884, "num_chars": 2}, {"sum_logits": -1.5181618928909302, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": true, "logits_per_token": -1.5181618928909302, "logits_per_char": -0.7590809464454651, "num_chars": 2}, {"sum_logits": -1.6104038953781128, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.6104038953781128, "logits_per_char": -0.8052019476890564, "num_chars": 2}, {"sum_logits": -1.552075982093811, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.552075982093811, "logits_per_char": -0.7760379910469055, "num_chars": 2}, {"sum_logits": -1.81356680393219, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.81356680393219, "logits_per_char": -0.906783401966095, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 941, "native_id": "c61790eb63ff6652b878ca051493c07d", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4523708820343018, "incorrect_loss_raw": 1.6813026070594788, "correct_loss_per_char": 0.7261854410171509, "incorrect_loss_per_char": 0.8406513035297394, "correct_loss_per_token": 1.4523708820343018, "incorrect_loss_per_token": 1.6813026070594788, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4692778587341309, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.4692778587341309, "logits_per_char": -0.7346389293670654, "num_chars": 2}, {"sum_logits": -1.5343611240386963, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5343611240386963, "logits_per_char": -0.7671805620193481, "num_chars": 2}, {"sum_logits": -1.4523708820343018, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.4523708820343018, "logits_per_char": -0.7261854410171509, "num_chars": 2}, {"sum_logits": -1.6866626739501953, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.6866626739501953, "logits_per_char": -0.8433313369750977, "num_chars": 2}, {"sum_logits": -2.0349087715148926, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.0349087715148926, "logits_per_char": -1.0174543857574463, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 942, "native_id": "e5ebbe0ea4097bb197ac525b49108362", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6382436752319336, "incorrect_loss_raw": 1.6151533722877502, "correct_loss_per_char": 0.8191218376159668, "incorrect_loss_per_char": 0.8075766861438751, "correct_loss_per_token": 1.6382436752319336, "incorrect_loss_per_token": 1.6151533722877502, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4564919471740723, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": true, "logits_per_token": -1.4564919471740723, "logits_per_char": -0.7282459735870361, "num_chars": 2}, {"sum_logits": -1.6952745914459229, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.6952745914459229, "logits_per_char": -0.8476372957229614, "num_chars": 2}, {"sum_logits": -1.6382436752319336, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.6382436752319336, "logits_per_char": -0.8191218376159668, "num_chars": 2}, {"sum_logits": -1.6319694519042969, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.6319694519042969, "logits_per_char": -0.8159847259521484, "num_chars": 2}, {"sum_logits": -1.676877498626709, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.676877498626709, "logits_per_char": -0.8384387493133545, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 943, "native_id": "029e36d8f65982b142c319064dc5e32f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4340068101882935, "incorrect_loss_raw": 1.7538962662220001, "correct_loss_per_char": 0.7170034050941467, "incorrect_loss_per_char": 0.8769481331110001, "correct_loss_per_token": 1.4340068101882935, "incorrect_loss_per_token": 1.7538962662220001, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.157732367515564, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.157732367515564, "logits_per_char": -0.578866183757782, "num_chars": 2}, {"sum_logits": -1.4340068101882935, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.4340068101882935, "logits_per_char": -0.7170034050941467, "num_chars": 2}, {"sum_logits": -1.707375407218933, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.707375407218933, "logits_per_char": -0.8536877036094666, "num_chars": 2}, {"sum_logits": -1.8009690046310425, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.8009690046310425, "logits_per_char": -0.9004845023155212, "num_chars": 2}, {"sum_logits": -2.349508285522461, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -2.349508285522461, "logits_per_char": -1.1747541427612305, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 944, "native_id": "3d1a67f87b34303f97549ba83e5521c2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3915736675262451, "incorrect_loss_raw": 1.7033635675907135, "correct_loss_per_char": 0.6957868337631226, "incorrect_loss_per_char": 0.8516817837953568, "correct_loss_per_token": 1.3915736675262451, "incorrect_loss_per_token": 1.7033635675907135, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3915736675262451, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.3915736675262451, "logits_per_char": -0.6957868337631226, "num_chars": 2}, {"sum_logits": -1.5923832654953003, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5923832654953003, "logits_per_char": -0.7961916327476501, "num_chars": 2}, {"sum_logits": -1.576933741569519, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.576933741569519, "logits_per_char": -0.7884668707847595, "num_chars": 2}, {"sum_logits": -1.5391215085983276, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5391215085983276, "logits_per_char": -0.7695607542991638, "num_chars": 2}, {"sum_logits": -2.105015754699707, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -2.105015754699707, "logits_per_char": -1.0525078773498535, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 945, "native_id": "e050bce7048da1b3743a54153e91694e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4169840812683105, "incorrect_loss_raw": 1.689856767654419, "correct_loss_per_char": 0.7084920406341553, "incorrect_loss_per_char": 0.8449283838272095, "correct_loss_per_token": 1.4169840812683105, "incorrect_loss_per_token": 1.689856767654419, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4169840812683105, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.4169840812683105, "logits_per_char": -0.7084920406341553, "num_chars": 2}, {"sum_logits": -1.4511122703552246, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.4511122703552246, "logits_per_char": -0.7255561351776123, "num_chars": 2}, {"sum_logits": -1.5904464721679688, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5904464721679688, "logits_per_char": -0.7952232360839844, "num_chars": 2}, {"sum_logits": -1.7132055759429932, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.7132055759429932, "logits_per_char": -0.8566027879714966, "num_chars": 2}, {"sum_logits": -2.0046627521514893, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -2.0046627521514893, "logits_per_char": -1.0023313760757446, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 946, "native_id": "8233ccb60dd0c0ff3b7ca5d73e5681f2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.556598424911499, "incorrect_loss_raw": 1.6700472831726074, "correct_loss_per_char": 0.7782992124557495, "incorrect_loss_per_char": 0.8350236415863037, "correct_loss_per_token": 1.556598424911499, "incorrect_loss_per_token": 1.6700472831726074, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2729597091674805, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.2729597091674805, "logits_per_char": -0.6364798545837402, "num_chars": 2}, {"sum_logits": -1.556598424911499, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.556598424911499, "logits_per_char": -0.7782992124557495, "num_chars": 2}, {"sum_logits": -1.5698120594024658, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5698120594024658, "logits_per_char": -0.7849060297012329, "num_chars": 2}, {"sum_logits": -1.8266081809997559, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.8266081809997559, "logits_per_char": -0.9133040904998779, "num_chars": 2}, {"sum_logits": -2.0108091831207275, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.0108091831207275, "logits_per_char": -1.0054045915603638, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 947, "native_id": "eb4b2cd0f2a69686e5a82250c5806b84", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.020911455154419, "incorrect_loss_raw": 1.5456904172897339, "correct_loss_per_char": 1.0104557275772095, "incorrect_loss_per_char": 0.7728452086448669, "correct_loss_per_token": 2.020911455154419, "incorrect_loss_per_token": 1.5456904172897339, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3251774311065674, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.3251774311065674, "logits_per_char": -0.6625887155532837, "num_chars": 2}, {"sum_logits": -1.5302860736846924, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5302860736846924, "logits_per_char": -0.7651430368423462, "num_chars": 2}, {"sum_logits": -1.5532562732696533, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5532562732696533, "logits_per_char": -0.7766281366348267, "num_chars": 2}, {"sum_logits": -1.7740418910980225, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.7740418910980225, "logits_per_char": -0.8870209455490112, "num_chars": 2}, {"sum_logits": -2.020911455154419, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -2.020911455154419, "logits_per_char": -1.0104557275772095, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 948, "native_id": "d0bda97a087904320216e4d0b8a08a8d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7042038440704346, "incorrect_loss_raw": 1.6408186256885529, "correct_loss_per_char": 0.8521019220352173, "incorrect_loss_per_char": 0.8204093128442764, "correct_loss_per_token": 1.7042038440704346, "incorrect_loss_per_token": 1.6408186256885529, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3255213499069214, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": true, "logits_per_token": -1.3255213499069214, "logits_per_char": -0.6627606749534607, "num_chars": 2}, {"sum_logits": -1.4600656032562256, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.4600656032562256, "logits_per_char": -0.7300328016281128, "num_chars": 2}, {"sum_logits": -1.6016411781311035, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.6016411781311035, "logits_per_char": -0.8008205890655518, "num_chars": 2}, {"sum_logits": -1.7042038440704346, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.7042038440704346, "logits_per_char": -0.8521019220352173, "num_chars": 2}, {"sum_logits": -2.176046371459961, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -2.176046371459961, "logits_per_char": -1.0880231857299805, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 949, "native_id": "e216381e9f0ddd1d248ee25fccca2b1f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.273730754852295, "incorrect_loss_raw": 1.536931186914444, "correct_loss_per_char": 1.1368653774261475, "incorrect_loss_per_char": 0.768465593457222, "correct_loss_per_token": 2.273730754852295, "incorrect_loss_per_token": 1.536931186914444, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1425129175186157, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.1425129175186157, "logits_per_char": -0.5712564587593079, "num_chars": 2}, {"sum_logits": -1.4708282947540283, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.4708282947540283, "logits_per_char": -0.7354141473770142, "num_chars": 2}, {"sum_logits": -1.6767899990081787, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.6767899990081787, "logits_per_char": -0.8383949995040894, "num_chars": 2}, {"sum_logits": -1.8575935363769531, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.8575935363769531, "logits_per_char": -0.9287967681884766, "num_chars": 2}, {"sum_logits": -2.273730754852295, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.273730754852295, "logits_per_char": -1.1368653774261475, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 950, "native_id": "b1fba9ad6193c6751ddb3f58f7f39b35", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.405409574508667, "incorrect_loss_raw": 1.684333860874176, "correct_loss_per_char": 0.7027047872543335, "incorrect_loss_per_char": 0.842166930437088, "correct_loss_per_token": 1.405409574508667, "incorrect_loss_per_token": 1.684333860874176, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.405409574508667, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": true, "logits_per_token": -1.405409574508667, "logits_per_char": -0.7027047872543335, "num_chars": 2}, {"sum_logits": -1.5125489234924316, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.5125489234924316, "logits_per_char": -0.7562744617462158, "num_chars": 2}, {"sum_logits": -1.6646814346313477, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.6646814346313477, "logits_per_char": -0.8323407173156738, "num_chars": 2}, {"sum_logits": -1.653482437133789, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.653482437133789, "logits_per_char": -0.8267412185668945, "num_chars": 2}, {"sum_logits": -1.9066226482391357, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.9066226482391357, "logits_per_char": -0.9533113241195679, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 951, "native_id": "3ceae7a18073050bd2c0448abef1f393", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7116807699203491, "incorrect_loss_raw": 1.600169986486435, "correct_loss_per_char": 0.8558403849601746, "incorrect_loss_per_char": 0.8000849932432175, "correct_loss_per_token": 1.7116807699203491, "incorrect_loss_per_token": 1.600169986486435, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.399815559387207, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.399815559387207, "logits_per_char": -0.6999077796936035, "num_chars": 2}, {"sum_logits": -1.7116807699203491, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.7116807699203491, "logits_per_char": -0.8558403849601746, "num_chars": 2}, {"sum_logits": -1.5856910943984985, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5856910943984985, "logits_per_char": -0.7928455471992493, "num_chars": 2}, {"sum_logits": -1.6716524362564087, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6716524362564087, "logits_per_char": -0.8358262181282043, "num_chars": 2}, {"sum_logits": -1.7435208559036255, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.7435208559036255, "logits_per_char": -0.8717604279518127, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 952, "native_id": "f1182e3a070f5a1be529843aa6e5c20c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.605462670326233, "incorrect_loss_raw": 1.6729693710803986, "correct_loss_per_char": 0.8027313351631165, "incorrect_loss_per_char": 0.8364846855401993, "correct_loss_per_token": 1.605462670326233, "incorrect_loss_per_token": 1.6729693710803986, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.353037714958191, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.353037714958191, "logits_per_char": -0.6765188574790955, "num_chars": 2}, {"sum_logits": -1.5195776224136353, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5195776224136353, "logits_per_char": -0.7597888112068176, "num_chars": 2}, {"sum_logits": -1.5300594568252563, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5300594568252563, "logits_per_char": -0.7650297284126282, "num_chars": 2}, {"sum_logits": -1.605462670326233, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.605462670326233, "logits_per_char": -0.8027313351631165, "num_chars": 2}, {"sum_logits": -2.2892026901245117, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.2892026901245117, "logits_per_char": -1.1446013450622559, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 953, "native_id": "5799089c131e26473697afc54d5f6964", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9089852571487427, "incorrect_loss_raw": 1.5614306330680847, "correct_loss_per_char": 0.9544926285743713, "incorrect_loss_per_char": 0.7807153165340424, "correct_loss_per_token": 1.9089852571487427, "incorrect_loss_per_token": 1.5614306330680847, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3482221364974976, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.3482221364974976, "logits_per_char": -0.6741110682487488, "num_chars": 2}, {"sum_logits": -1.568869709968567, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.568869709968567, "logits_per_char": -0.7844348549842834, "num_chars": 2}, {"sum_logits": -1.6129776239395142, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6129776239395142, "logits_per_char": -0.8064888119697571, "num_chars": 2}, {"sum_logits": -1.7156530618667603, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.7156530618667603, "logits_per_char": -0.8578265309333801, "num_chars": 2}, {"sum_logits": -1.9089852571487427, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.9089852571487427, "logits_per_char": -0.9544926285743713, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 954, "native_id": "7ce1f99e8185489a7113e6d18c71abb0", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4992939233779907, "incorrect_loss_raw": 1.6613940596580505, "correct_loss_per_char": 0.7496469616889954, "incorrect_loss_per_char": 0.8306970298290253, "correct_loss_per_token": 1.4992939233779907, "incorrect_loss_per_token": 1.6613940596580505, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5076922178268433, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.5076922178268433, "logits_per_char": -0.7538461089134216, "num_chars": 2}, {"sum_logits": -1.578987717628479, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.578987717628479, "logits_per_char": -0.7894938588142395, "num_chars": 2}, {"sum_logits": -1.576119065284729, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.576119065284729, "logits_per_char": -0.7880595326423645, "num_chars": 2}, {"sum_logits": -1.4992939233779907, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": true, "logits_per_token": -1.4992939233779907, "logits_per_char": -0.7496469616889954, "num_chars": 2}, {"sum_logits": -1.9827772378921509, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.9827772378921509, "logits_per_char": -0.9913886189460754, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 955, "native_id": "69425fb4cd2dc034e9ff223d2d5676ec", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5480352640151978, "incorrect_loss_raw": 1.6456401944160461, "correct_loss_per_char": 0.7740176320075989, "incorrect_loss_per_char": 0.8228200972080231, "correct_loss_per_token": 1.5480352640151978, "incorrect_loss_per_token": 1.6456401944160461, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4069455862045288, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": true, "logits_per_token": -1.4069455862045288, "logits_per_char": -0.7034727931022644, "num_chars": 2}, {"sum_logits": -1.5480352640151978, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": false, "logits_per_token": -1.5480352640151978, "logits_per_char": -0.7740176320075989, "num_chars": 2}, {"sum_logits": -1.6023000478744507, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": false, "logits_per_token": -1.6023000478744507, "logits_per_char": -0.8011500239372253, "num_chars": 2}, {"sum_logits": -1.752320647239685, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": false, "logits_per_token": -1.752320647239685, "logits_per_char": -0.8761603236198425, "num_chars": 2}, {"sum_logits": -1.82099449634552, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": false, "logits_per_token": -1.82099449634552, "logits_per_char": -0.91049724817276, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 956, "native_id": "f75b22d5b88ac56ae7df030c1ebeded5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8906069993972778, "incorrect_loss_raw": 1.5556910932064056, "correct_loss_per_char": 0.9453034996986389, "incorrect_loss_per_char": 0.7778455466032028, "correct_loss_per_token": 1.8906069993972778, "incorrect_loss_per_token": 1.5556910932064056, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4948251247406006, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.4948251247406006, "logits_per_char": -0.7474125623703003, "num_chars": 2}, {"sum_logits": -1.5907305479049683, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5907305479049683, "logits_per_char": -0.7953652739524841, "num_chars": 2}, {"sum_logits": -1.5673037767410278, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5673037767410278, "logits_per_char": -0.7836518883705139, "num_chars": 2}, {"sum_logits": -1.5699049234390259, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5699049234390259, "logits_per_char": -0.7849524617195129, "num_chars": 2}, {"sum_logits": -1.8906069993972778, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.8906069993972778, "logits_per_char": -0.9453034996986389, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 957, "native_id": "4eb3e69c0d42a2287692d2b9d2cb5979", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4198740720748901, "incorrect_loss_raw": 1.6950806379318237, "correct_loss_per_char": 0.7099370360374451, "incorrect_loss_per_char": 0.8475403189659119, "correct_loss_per_token": 1.4198740720748901, "incorrect_loss_per_token": 1.6950806379318237, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4518156051635742, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.4518156051635742, "logits_per_char": -0.7259078025817871, "num_chars": 2}, {"sum_logits": -1.4198740720748901, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": true, "logits_per_token": -1.4198740720748901, "logits_per_char": -0.7099370360374451, "num_chars": 2}, {"sum_logits": -1.5565873384475708, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.5565873384475708, "logits_per_char": -0.7782936692237854, "num_chars": 2}, {"sum_logits": -1.69493567943573, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.69493567943573, "logits_per_char": -0.847467839717865, "num_chars": 2}, {"sum_logits": -2.07698392868042, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -2.07698392868042, "logits_per_char": -1.03849196434021, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 958, "native_id": "7d937233b4a9043da0b976dbd42d141b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6366366147994995, "incorrect_loss_raw": 1.620700627565384, "correct_loss_per_char": 0.8183183073997498, "incorrect_loss_per_char": 0.810350313782692, "correct_loss_per_token": 1.6366366147994995, "incorrect_loss_per_token": 1.620700627565384, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4739634990692139, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": true, "logits_per_token": -1.4739634990692139, "logits_per_char": -0.7369817495346069, "num_chars": 2}, {"sum_logits": -1.569930911064148, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.569930911064148, "logits_per_char": -0.784965455532074, "num_chars": 2}, {"sum_logits": -1.5489423274993896, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.5489423274993896, "logits_per_char": -0.7744711637496948, "num_chars": 2}, {"sum_logits": -1.6366366147994995, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.6366366147994995, "logits_per_char": -0.8183183073997498, "num_chars": 2}, {"sum_logits": -1.8899657726287842, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.8899657726287842, "logits_per_char": -0.9449828863143921, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 959, "native_id": "6bd176cc91a2a2088807ec446c008856", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3633084297180176, "incorrect_loss_raw": 1.696705162525177, "correct_loss_per_char": 0.6816542148590088, "incorrect_loss_per_char": 0.8483525812625885, "correct_loss_per_token": 1.3633084297180176, "incorrect_loss_per_token": 1.696705162525177, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3633084297180176, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": true, "logits_per_token": -1.3633084297180176, "logits_per_char": -0.6816542148590088, "num_chars": 2}, {"sum_logits": -1.6526094675064087, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.6526094675064087, "logits_per_char": -0.8263047337532043, "num_chars": 2}, {"sum_logits": -1.6280287504196167, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.6280287504196167, "logits_per_char": -0.8140143752098083, "num_chars": 2}, {"sum_logits": -1.638705849647522, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.638705849647522, "logits_per_char": -0.819352924823761, "num_chars": 2}, {"sum_logits": -1.8674765825271606, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.8674765825271606, "logits_per_char": -0.9337382912635803, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 960, "native_id": "c3890d43b84635d9e61c007ca2521d5b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.814697027206421, "incorrect_loss_raw": 1.6837342083454132, "correct_loss_per_char": 0.9073485136032104, "incorrect_loss_per_char": 0.8418671041727066, "correct_loss_per_token": 1.814697027206421, "incorrect_loss_per_token": 1.6837342083454132, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1097126007080078, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.1097126007080078, "logits_per_char": -0.5548563003540039, "num_chars": 2}, {"sum_logits": -1.4299002885818481, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.4299002885818481, "logits_per_char": -0.7149501442909241, "num_chars": 2}, {"sum_logits": -1.7253594398498535, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.7253594398498535, "logits_per_char": -0.8626797199249268, "num_chars": 2}, {"sum_logits": -1.814697027206421, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.814697027206421, "logits_per_char": -0.9073485136032104, "num_chars": 2}, {"sum_logits": -2.4699645042419434, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.4699645042419434, "logits_per_char": -1.2349822521209717, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 961, "native_id": "6195ed74cf445cb5d991e1076a080dde", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.695255160331726, "incorrect_loss_raw": 1.626312404870987, "correct_loss_per_char": 0.847627580165863, "incorrect_loss_per_char": 0.8131562024354935, "correct_loss_per_token": 1.695255160331726, "incorrect_loss_per_token": 1.626312404870987, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3321815729141235, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.3321815729141235, "logits_per_char": -0.6660907864570618, "num_chars": 2}, {"sum_logits": -1.5109714269638062, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5109714269638062, "logits_per_char": -0.7554857134819031, "num_chars": 2}, {"sum_logits": -1.6277943849563599, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6277943849563599, "logits_per_char": -0.8138971924781799, "num_chars": 2}, {"sum_logits": -1.695255160331726, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.695255160331726, "logits_per_char": -0.847627580165863, "num_chars": 2}, {"sum_logits": -2.034302234649658, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -2.034302234649658, "logits_per_char": -1.017151117324829, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 962, "native_id": "37644422df4bcd28b3f54bbf3fc2c0f8", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.566718578338623, "incorrect_loss_raw": 1.6386320292949677, "correct_loss_per_char": 0.7833592891693115, "incorrect_loss_per_char": 0.8193160146474838, "correct_loss_per_token": 1.566718578338623, "incorrect_loss_per_token": 1.6386320292949677, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4068745374679565, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": true, "logits_per_token": -1.4068745374679565, "logits_per_char": -0.7034372687339783, "num_chars": 2}, {"sum_logits": -1.566718578338623, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.566718578338623, "logits_per_char": -0.7833592891693115, "num_chars": 2}, {"sum_logits": -1.6474394798278809, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.6474394798278809, "logits_per_char": -0.8237197399139404, "num_chars": 2}, {"sum_logits": -1.6839990615844727, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.6839990615844727, "logits_per_char": -0.8419995307922363, "num_chars": 2}, {"sum_logits": -1.8162150382995605, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.8162150382995605, "logits_per_char": -0.9081075191497803, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 963, "native_id": "23d97480fe45bace231503f8fc367a5b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.3566951751708984, "incorrect_loss_raw": 1.5135513544082642, "correct_loss_per_char": 1.1783475875854492, "incorrect_loss_per_char": 0.7567756772041321, "correct_loss_per_token": 2.3566951751708984, "incorrect_loss_per_token": 1.5135513544082642, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1959785223007202, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.1959785223007202, "logits_per_char": -0.5979892611503601, "num_chars": 2}, {"sum_logits": -1.462830901145935, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.462830901145935, "logits_per_char": -0.7314154505729675, "num_chars": 2}, {"sum_logits": -1.699702262878418, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.699702262878418, "logits_per_char": -0.849851131439209, "num_chars": 2}, {"sum_logits": -1.6956937313079834, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6956937313079834, "logits_per_char": -0.8478468656539917, "num_chars": 2}, {"sum_logits": -2.3566951751708984, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.3566951751708984, "logits_per_char": -1.1783475875854492, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 964, "native_id": "15556e26feaa5a8a29c9f30896e535d4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4108130931854248, "incorrect_loss_raw": 1.704131931066513, "correct_loss_per_char": 0.7054065465927124, "incorrect_loss_per_char": 0.8520659655332565, "correct_loss_per_token": 1.4108130931854248, "incorrect_loss_per_token": 1.704131931066513, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4108130931854248, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.4108130931854248, "logits_per_char": -0.7054065465927124, "num_chars": 2}, {"sum_logits": -1.482953429222107, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.482953429222107, "logits_per_char": -0.7414767146110535, "num_chars": 2}, {"sum_logits": -1.5774818658828735, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5774818658828735, "logits_per_char": -0.7887409329414368, "num_chars": 2}, {"sum_logits": -1.59211003780365, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.59211003780365, "logits_per_char": -0.796055018901825, "num_chars": 2}, {"sum_logits": -2.163982391357422, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.163982391357422, "logits_per_char": -1.081991195678711, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 965, "native_id": "6be05d227f4f6fe727218fc8be9df340", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0473392009735107, "incorrect_loss_raw": 1.5458128154277802, "correct_loss_per_char": 1.0236696004867554, "incorrect_loss_per_char": 0.7729064077138901, "correct_loss_per_token": 2.0473392009735107, "incorrect_loss_per_token": 1.5458128154277802, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2752124071121216, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.2752124071121216, "logits_per_char": -0.6376062035560608, "num_chars": 2}, {"sum_logits": -1.5328187942504883, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5328187942504883, "logits_per_char": -0.7664093971252441, "num_chars": 2}, {"sum_logits": -1.665327548980713, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.665327548980713, "logits_per_char": -0.8326637744903564, "num_chars": 2}, {"sum_logits": -1.7098925113677979, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.7098925113677979, "logits_per_char": -0.8549462556838989, "num_chars": 2}, {"sum_logits": -2.0473392009735107, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.0473392009735107, "logits_per_char": -1.0236696004867554, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 966, "native_id": "3f3ba1d9a3bfe63df11247a968eaddce", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5344353914260864, "incorrect_loss_raw": 1.656334936618805, "correct_loss_per_char": 0.7672176957130432, "incorrect_loss_per_char": 0.8281674683094025, "correct_loss_per_token": 1.5344353914260864, "incorrect_loss_per_token": 1.656334936618805, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4017935991287231, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.4017935991287231, "logits_per_char": -0.7008967995643616, "num_chars": 2}, {"sum_logits": -1.5344353914260864, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5344353914260864, "logits_per_char": -0.7672176957130432, "num_chars": 2}, {"sum_logits": -1.569546103477478, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.569546103477478, "logits_per_char": -0.784773051738739, "num_chars": 2}, {"sum_logits": -1.6970218420028687, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6970218420028687, "logits_per_char": -0.8485109210014343, "num_chars": 2}, {"sum_logits": -1.95697820186615, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.95697820186615, "logits_per_char": -0.978489100933075, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 967, "native_id": "ca9a3ccfb140aa66816f96ac983b6d9f_1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6074604988098145, "incorrect_loss_raw": 1.6377083659172058, "correct_loss_per_char": 0.8037302494049072, "incorrect_loss_per_char": 0.8188541829586029, "correct_loss_per_token": 1.6074604988098145, "incorrect_loss_per_token": 1.6377083659172058, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.432525634765625, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": true, "logits_per_token": -1.432525634765625, "logits_per_char": -0.7162628173828125, "num_chars": 2}, {"sum_logits": -1.5105888843536377, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.5105888843536377, "logits_per_char": -0.7552944421768188, "num_chars": 2}, {"sum_logits": -1.6074604988098145, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.6074604988098145, "logits_per_char": -0.8037302494049072, "num_chars": 2}, {"sum_logits": -1.607215404510498, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.607215404510498, "logits_per_char": -0.803607702255249, "num_chars": 2}, {"sum_logits": -2.0005035400390625, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -2.0005035400390625, "logits_per_char": -1.0002517700195312, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 968, "native_id": "487cabfcd776d89748ee7e7bb681ad59", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7034138441085815, "incorrect_loss_raw": 1.6362292170524597, "correct_loss_per_char": 0.8517069220542908, "incorrect_loss_per_char": 0.8181146085262299, "correct_loss_per_token": 1.7034138441085815, "incorrect_loss_per_token": 1.6362292170524597, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2583080530166626, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.2583080530166626, "logits_per_char": -0.6291540265083313, "num_chars": 2}, {"sum_logits": -1.5232092142105103, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5232092142105103, "logits_per_char": -0.7616046071052551, "num_chars": 2}, {"sum_logits": -1.7034138441085815, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.7034138441085815, "logits_per_char": -0.8517069220542908, "num_chars": 2}, {"sum_logits": -1.8306922912597656, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.8306922912597656, "logits_per_char": -0.9153461456298828, "num_chars": 2}, {"sum_logits": -1.9327073097229004, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.9327073097229004, "logits_per_char": -0.9663536548614502, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 969, "native_id": "6915dfdefe3b1cd5fd8886c8bb84929a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3563764095306396, "incorrect_loss_raw": 1.7108406722545624, "correct_loss_per_char": 0.6781882047653198, "incorrect_loss_per_char": 0.8554203361272812, "correct_loss_per_token": 1.3563764095306396, "incorrect_loss_per_token": 1.7108406722545624, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3563764095306396, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": true, "logits_per_token": -1.3563764095306396, "logits_per_char": -0.6781882047653198, "num_chars": 2}, {"sum_logits": -1.5277472734451294, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.5277472734451294, "logits_per_char": -0.7638736367225647, "num_chars": 2}, {"sum_logits": -1.568914771080017, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.568914771080017, "logits_per_char": -0.7844573855400085, "num_chars": 2}, {"sum_logits": -1.6784647703170776, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.6784647703170776, "logits_per_char": -0.8392323851585388, "num_chars": 2}, {"sum_logits": -2.0682358741760254, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -2.0682358741760254, "logits_per_char": -1.0341179370880127, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 970, "native_id": "ec224c1dbfb569cce7ec317fe987ae68", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4625877141952515, "incorrect_loss_raw": 1.6952909529209137, "correct_loss_per_char": 0.7312938570976257, "incorrect_loss_per_char": 0.8476454764604568, "correct_loss_per_token": 1.4625877141952515, "incorrect_loss_per_token": 1.6952909529209137, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3206945657730103, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.3206945657730103, "logits_per_char": -0.6603472828865051, "num_chars": 2}, {"sum_logits": -1.4625877141952515, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.4625877141952515, "logits_per_char": -0.7312938570976257, "num_chars": 2}, {"sum_logits": -1.6094660758972168, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.6094660758972168, "logits_per_char": -0.8047330379486084, "num_chars": 2}, {"sum_logits": -1.7579231262207031, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.7579231262207031, "logits_per_char": -0.8789615631103516, "num_chars": 2}, {"sum_logits": -2.0930800437927246, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -2.0930800437927246, "logits_per_char": -1.0465400218963623, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 971, "native_id": "0cba8ddda21e29c8c53482e131d741cd", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5731405019760132, "incorrect_loss_raw": 1.6440095007419586, "correct_loss_per_char": 0.7865702509880066, "incorrect_loss_per_char": 0.8220047503709793, "correct_loss_per_token": 1.5731405019760132, "incorrect_loss_per_token": 1.6440095007419586, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3771002292633057, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": true, "logits_per_token": -1.3771002292633057, "logits_per_char": -0.6885501146316528, "num_chars": 2}, {"sum_logits": -1.5731405019760132, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.5731405019760132, "logits_per_char": -0.7865702509880066, "num_chars": 2}, {"sum_logits": -1.6393488645553589, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.6393488645553589, "logits_per_char": -0.8196744322776794, "num_chars": 2}, {"sum_logits": -1.6235400438308716, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.6235400438308716, "logits_per_char": -0.8117700219154358, "num_chars": 2}, {"sum_logits": -1.9360488653182983, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.9360488653182983, "logits_per_char": -0.9680244326591492, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 972, "native_id": "e65559cd9f5d96b577caeb78d9033502", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4763853549957275, "incorrect_loss_raw": 1.6712203621864319, "correct_loss_per_char": 0.7381926774978638, "incorrect_loss_per_char": 0.8356101810932159, "correct_loss_per_token": 1.4763853549957275, "incorrect_loss_per_token": 1.6712203621864319, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4182583093643188, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.4182583093643188, "logits_per_char": -0.7091291546821594, "num_chars": 2}, {"sum_logits": -1.4763853549957275, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.4763853549957275, "logits_per_char": -0.7381926774978638, "num_chars": 2}, {"sum_logits": -1.584774136543274, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.584774136543274, "logits_per_char": -0.792387068271637, "num_chars": 2}, {"sum_logits": -1.7315372228622437, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.7315372228622437, "logits_per_char": -0.8657686114311218, "num_chars": 2}, {"sum_logits": -1.9503117799758911, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.9503117799758911, "logits_per_char": -0.9751558899879456, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 973, "native_id": "b8937a30f25093910c040f4e63e1d352", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6554657220840454, "incorrect_loss_raw": 1.6663084924221039, "correct_loss_per_char": 0.8277328610420227, "incorrect_loss_per_char": 0.8331542462110519, "correct_loss_per_token": 1.6554657220840454, "incorrect_loss_per_token": 1.6663084924221039, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2428250312805176, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.2428250312805176, "logits_per_char": -0.6214125156402588, "num_chars": 2}, {"sum_logits": -1.4645633697509766, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.4645633697509766, "logits_per_char": -0.7322816848754883, "num_chars": 2}, {"sum_logits": -1.6554657220840454, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6554657220840454, "logits_per_char": -0.8277328610420227, "num_chars": 2}, {"sum_logits": -1.7434033155441284, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.7434033155441284, "logits_per_char": -0.8717016577720642, "num_chars": 2}, {"sum_logits": -2.214442253112793, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.214442253112793, "logits_per_char": -1.1072211265563965, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 974, "native_id": "aabe8eb218468fc63b6c9aa6d428c951", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5428357124328613, "incorrect_loss_raw": 1.6391724348068237, "correct_loss_per_char": 0.7714178562164307, "incorrect_loss_per_char": 0.8195862174034119, "correct_loss_per_token": 1.5428357124328613, "incorrect_loss_per_token": 1.6391724348068237, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5363638401031494, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": true, "logits_per_token": -1.5363638401031494, "logits_per_char": -0.7681819200515747, "num_chars": 2}, {"sum_logits": -1.5428357124328613, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.5428357124328613, "logits_per_char": -0.7714178562164307, "num_chars": 2}, {"sum_logits": -1.6213421821594238, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.6213421821594238, "logits_per_char": -0.8106710910797119, "num_chars": 2}, {"sum_logits": -1.561152696609497, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.561152696609497, "logits_per_char": -0.7805763483047485, "num_chars": 2}, {"sum_logits": -1.8378310203552246, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.8378310203552246, "logits_per_char": -0.9189155101776123, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 975, "native_id": "43ba9669564217f2f909f33acbedaf95", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5638242959976196, "incorrect_loss_raw": 1.6914914846420288, "correct_loss_per_char": 0.7819121479988098, "incorrect_loss_per_char": 0.8457457423210144, "correct_loss_per_token": 1.5638242959976196, "incorrect_loss_per_token": 1.6914914846420288, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2016406059265137, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": true, "logits_per_token": -1.2016406059265137, "logits_per_char": -0.6008203029632568, "num_chars": 2}, {"sum_logits": -1.5638242959976196, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5638242959976196, "logits_per_char": -0.7819121479988098, "num_chars": 2}, {"sum_logits": -1.6778637170791626, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.6778637170791626, "logits_per_char": -0.8389318585395813, "num_chars": 2}, {"sum_logits": -1.74234139919281, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.74234139919281, "logits_per_char": -0.871170699596405, "num_chars": 2}, {"sum_logits": -2.144120216369629, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -2.144120216369629, "logits_per_char": -1.0720601081848145, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 976, "native_id": "2b9b625c788584b8d41f1a74d740e126", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6895719766616821, "incorrect_loss_raw": 1.6143154203891754, "correct_loss_per_char": 0.8447859883308411, "incorrect_loss_per_char": 0.8071577101945877, "correct_loss_per_token": 1.6895719766616821, "incorrect_loss_per_token": 1.6143154203891754, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4566919803619385, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": true, "logits_per_token": -1.4566919803619385, "logits_per_char": -0.7283459901809692, "num_chars": 2}, {"sum_logits": -1.5712456703186035, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.5712456703186035, "logits_per_char": -0.7856228351593018, "num_chars": 2}, {"sum_logits": -1.5133850574493408, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.5133850574493408, "logits_per_char": -0.7566925287246704, "num_chars": 2}, {"sum_logits": -1.6895719766616821, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.6895719766616821, "logits_per_char": -0.8447859883308411, "num_chars": 2}, {"sum_logits": -1.9159389734268188, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.9159389734268188, "logits_per_char": -0.9579694867134094, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 977, "native_id": "eb6807290df71b040e2c7bcc5d11fdea", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.424324631690979, "incorrect_loss_raw": 1.6922760009765625, "correct_loss_per_char": 0.7121623158454895, "incorrect_loss_per_char": 0.8461380004882812, "correct_loss_per_token": 1.424324631690979, "incorrect_loss_per_token": 1.6922760009765625, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.424324631690979, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.424324631690979, "logits_per_char": -0.7121623158454895, "num_chars": 2}, {"sum_logits": -1.4282207489013672, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.4282207489013672, "logits_per_char": -0.7141103744506836, "num_chars": 2}, {"sum_logits": -1.6230525970458984, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6230525970458984, "logits_per_char": -0.8115262985229492, "num_chars": 2}, {"sum_logits": -1.6595609188079834, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6595609188079834, "logits_per_char": -0.8297804594039917, "num_chars": 2}, {"sum_logits": -2.058269739151001, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -2.058269739151001, "logits_per_char": -1.0291348695755005, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 978, "native_id": "f06852fb4bb2764dc208a991d037f211", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.630154013633728, "incorrect_loss_raw": 1.6190223693847656, "correct_loss_per_char": 0.815077006816864, "incorrect_loss_per_char": 0.8095111846923828, "correct_loss_per_token": 1.630154013633728, "incorrect_loss_per_token": 1.6190223693847656, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4630690813064575, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.4630690813064575, "logits_per_char": -0.7315345406532288, "num_chars": 2}, {"sum_logits": -1.5644983053207397, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5644983053207397, "logits_per_char": -0.7822491526603699, "num_chars": 2}, {"sum_logits": -1.630154013633728, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.630154013633728, "logits_per_char": -0.815077006816864, "num_chars": 2}, {"sum_logits": -1.6499316692352295, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6499316692352295, "logits_per_char": -0.8249658346176147, "num_chars": 2}, {"sum_logits": -1.7985904216766357, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.7985904216766357, "logits_per_char": -0.8992952108383179, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 979, "native_id": "5efadabaf61b5174916e3ab659bcd283", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4273180961608887, "incorrect_loss_raw": 1.6713732481002808, "correct_loss_per_char": 0.7136590480804443, "incorrect_loss_per_char": 0.8356866240501404, "correct_loss_per_token": 1.4273180961608887, "incorrect_loss_per_token": 1.6713732481002808, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.7222031354904175, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.7222031354904175, "logits_per_char": -0.8611015677452087, "num_chars": 2}, {"sum_logits": -1.6436692476272583, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.6436692476272583, "logits_per_char": -0.8218346238136292, "num_chars": 2}, {"sum_logits": -1.6220999956130981, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.6220999956130981, "logits_per_char": -0.8110499978065491, "num_chars": 2}, {"sum_logits": -1.6975206136703491, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.6975206136703491, "logits_per_char": -0.8487603068351746, "num_chars": 2}, {"sum_logits": -1.4273180961608887, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": true, "logits_per_token": -1.4273180961608887, "logits_per_char": -0.7136590480804443, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 980, "native_id": "e9d4c747018ff81b8c0aefb5abc3c539", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2006607055664062, "incorrect_loss_raw": 1.7822440564632416, "correct_loss_per_char": 0.6003303527832031, "incorrect_loss_per_char": 0.8911220282316208, "correct_loss_per_token": 1.2006607055664062, "incorrect_loss_per_token": 1.7822440564632416, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2006607055664062, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.2006607055664062, "logits_per_char": -0.6003303527832031, "num_chars": 2}, {"sum_logits": -1.4538627862930298, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.4538627862930298, "logits_per_char": -0.7269313931465149, "num_chars": 2}, {"sum_logits": -1.7140237092971802, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.7140237092971802, "logits_per_char": -0.8570118546485901, "num_chars": 2}, {"sum_logits": -1.821147084236145, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.821147084236145, "logits_per_char": -0.9105735421180725, "num_chars": 2}, {"sum_logits": -2.1399426460266113, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -2.1399426460266113, "logits_per_char": -1.0699713230133057, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 981, "native_id": "30a8cfd186f1aae5acd425a52d058863", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.570013403892517, "incorrect_loss_raw": 1.6426788568496704, "correct_loss_per_char": 0.7850067019462585, "incorrect_loss_per_char": 0.8213394284248352, "correct_loss_per_token": 1.570013403892517, "incorrect_loss_per_token": 1.6426788568496704, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3804032802581787, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.3804032802581787, "logits_per_char": -0.6902016401290894, "num_chars": 2}, {"sum_logits": -1.570013403892517, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.570013403892517, "logits_per_char": -0.7850067019462585, "num_chars": 2}, {"sum_logits": -1.6393120288848877, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6393120288848877, "logits_per_char": -0.8196560144424438, "num_chars": 2}, {"sum_logits": -1.6450475454330444, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6450475454330444, "logits_per_char": -0.8225237727165222, "num_chars": 2}, {"sum_logits": -1.9059525728225708, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.9059525728225708, "logits_per_char": -0.9529762864112854, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 982, "native_id": "9e7805871c8a276300a89fe910a90949", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3369656801223755, "incorrect_loss_raw": 1.718528836965561, "correct_loss_per_char": 0.6684828400611877, "incorrect_loss_per_char": 0.8592644184827805, "correct_loss_per_token": 1.3369656801223755, "incorrect_loss_per_token": 1.718528836965561, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3369656801223755, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": true, "logits_per_token": -1.3369656801223755, "logits_per_char": -0.6684828400611877, "num_chars": 2}, {"sum_logits": -1.5165570974349976, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.5165570974349976, "logits_per_char": -0.7582785487174988, "num_chars": 2}, {"sum_logits": -1.6012927293777466, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.6012927293777466, "logits_per_char": -0.8006463646888733, "num_chars": 2}, {"sum_logits": -1.6853736639022827, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.6853736639022827, "logits_per_char": -0.8426868319511414, "num_chars": 2}, {"sum_logits": -2.070891857147217, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -2.070891857147217, "logits_per_char": -1.0354459285736084, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 983, "native_id": "047c2d8c65d297b39aa42821c1ca76a9", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.528031349182129, "incorrect_loss_raw": 1.6873573064804077, "correct_loss_per_char": 0.7640156745910645, "incorrect_loss_per_char": 0.8436786532402039, "correct_loss_per_token": 1.528031349182129, "incorrect_loss_per_token": 1.6873573064804077, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1967432498931885, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": true, "logits_per_token": -1.1967432498931885, "logits_per_char": -0.5983716249465942, "num_chars": 2}, {"sum_logits": -1.528031349182129, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.528031349182129, "logits_per_char": -0.7640156745910645, "num_chars": 2}, {"sum_logits": -1.7150633335113525, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.7150633335113525, "logits_per_char": -0.8575316667556763, "num_chars": 2}, {"sum_logits": -1.8418021202087402, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.8418021202087402, "logits_per_char": -0.9209010601043701, "num_chars": 2}, {"sum_logits": -1.9958205223083496, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.9958205223083496, "logits_per_char": -0.9979102611541748, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 984, "native_id": "0bed77da54b6c54facd0ee6614aad72e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4052330255508423, "incorrect_loss_raw": 1.6810139417648315, "correct_loss_per_char": 0.7026165127754211, "incorrect_loss_per_char": 0.8405069708824158, "correct_loss_per_token": 1.4052330255508423, "incorrect_loss_per_token": 1.6810139417648315, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4052330255508423, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": true, "logits_per_token": -1.4052330255508423, "logits_per_char": -0.7026165127754211, "num_chars": 2}, {"sum_logits": -1.5065804719924927, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.5065804719924927, "logits_per_char": -0.7532902359962463, "num_chars": 2}, {"sum_logits": -1.6805611848831177, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.6805611848831177, "logits_per_char": -0.8402805924415588, "num_chars": 2}, {"sum_logits": -1.8261312246322632, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.8261312246322632, "logits_per_char": -0.9130656123161316, "num_chars": 2}, {"sum_logits": -1.7107828855514526, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.7107828855514526, "logits_per_char": -0.8553914427757263, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 985, "native_id": "32e2adee67aace0a98c830fb39463015", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6457375288009644, "incorrect_loss_raw": 1.6187565624713898, "correct_loss_per_char": 0.8228687644004822, "incorrect_loss_per_char": 0.8093782812356949, "correct_loss_per_token": 1.6457375288009644, "incorrect_loss_per_token": 1.6187565624713898, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4306676387786865, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.4306676387786865, "logits_per_char": -0.7153338193893433, "num_chars": 2}, {"sum_logits": -1.5792205333709717, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5792205333709717, "logits_per_char": -0.7896102666854858, "num_chars": 2}, {"sum_logits": -1.6193326711654663, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6193326711654663, "logits_per_char": -0.8096663355827332, "num_chars": 2}, {"sum_logits": -1.6457375288009644, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6457375288009644, "logits_per_char": -0.8228687644004822, "num_chars": 2}, {"sum_logits": -1.8458054065704346, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.8458054065704346, "logits_per_char": -0.9229027032852173, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 986, "native_id": "8272f08792b873885f93d4c148e307e5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5296111106872559, "incorrect_loss_raw": 1.6987221539020538, "correct_loss_per_char": 0.7648055553436279, "incorrect_loss_per_char": 0.8493610769510269, "correct_loss_per_token": 1.5296111106872559, "incorrect_loss_per_token": 1.6987221539020538, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.311653971672058, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.311653971672058, "logits_per_char": -0.655826985836029, "num_chars": 2}, {"sum_logits": -1.5296111106872559, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5296111106872559, "logits_per_char": -0.7648055553436279, "num_chars": 2}, {"sum_logits": -1.5667259693145752, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5667259693145752, "logits_per_char": -0.7833629846572876, "num_chars": 2}, {"sum_logits": -1.6015470027923584, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.6015470027923584, "logits_per_char": -0.8007735013961792, "num_chars": 2}, {"sum_logits": -2.3149616718292236, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -2.3149616718292236, "logits_per_char": -1.1574808359146118, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 987, "native_id": "bc05bc6b4df7a3d25a361515fe8912ad", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7020467519760132, "incorrect_loss_raw": 1.607992023229599, "correct_loss_per_char": 0.8510233759880066, "incorrect_loss_per_char": 0.8039960116147995, "correct_loss_per_token": 1.7020467519760132, "incorrect_loss_per_token": 1.607992023229599, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4068814516067505, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.4068814516067505, "logits_per_char": -0.7034407258033752, "num_chars": 2}, {"sum_logits": -1.8389308452606201, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.8389308452606201, "logits_per_char": -0.9194654226303101, "num_chars": 2}, {"sum_logits": -1.6612448692321777, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6612448692321777, "logits_per_char": -0.8306224346160889, "num_chars": 2}, {"sum_logits": -1.7020467519760132, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.7020467519760132, "logits_per_char": -0.8510233759880066, "num_chars": 2}, {"sum_logits": -1.5249109268188477, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5249109268188477, "logits_per_char": -0.7624554634094238, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 988, "native_id": "b893a6e7a2b172bd71f03c9dbee4f960", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6390968561172485, "incorrect_loss_raw": 1.6292597353458405, "correct_loss_per_char": 0.8195484280586243, "incorrect_loss_per_char": 0.8146298676729202, "correct_loss_per_token": 1.6390968561172485, "incorrect_loss_per_token": 1.6292597353458405, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.425478219985962, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": true, "logits_per_token": -1.425478219985962, "logits_per_char": -0.712739109992981, "num_chars": 2}, {"sum_logits": -1.5705112218856812, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5705112218856812, "logits_per_char": -0.7852556109428406, "num_chars": 2}, {"sum_logits": -1.5361430644989014, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5361430644989014, "logits_per_char": -0.7680715322494507, "num_chars": 2}, {"sum_logits": -1.6390968561172485, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.6390968561172485, "logits_per_char": -0.8195484280586243, "num_chars": 2}, {"sum_logits": -1.9849064350128174, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.9849064350128174, "logits_per_char": -0.9924532175064087, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 989, "native_id": "cf8e30dd6956d03e3f0f0397112a8696", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4153262376785278, "incorrect_loss_raw": 1.6861997842788696, "correct_loss_per_char": 0.7076631188392639, "incorrect_loss_per_char": 0.8430998921394348, "correct_loss_per_token": 1.4153262376785278, "incorrect_loss_per_token": 1.6861997842788696, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4153262376785278, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.4153262376785278, "logits_per_char": -0.7076631188392639, "num_chars": 2}, {"sum_logits": -1.4797645807266235, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.4797645807266235, "logits_per_char": -0.7398822903633118, "num_chars": 2}, {"sum_logits": -1.5984033346176147, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5984033346176147, "logits_per_char": -0.7992016673088074, "num_chars": 2}, {"sum_logits": -1.714807152748108, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.714807152748108, "logits_per_char": -0.857403576374054, "num_chars": 2}, {"sum_logits": -1.9518240690231323, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.9518240690231323, "logits_per_char": -0.9759120345115662, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 990, "native_id": "159d50e325b59c6d29ec371500e173b4", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.123106002807617, "incorrect_loss_raw": 1.521075576543808, "correct_loss_per_char": 1.0615530014038086, "incorrect_loss_per_char": 0.760537788271904, "correct_loss_per_token": 2.123106002807617, "incorrect_loss_per_token": 1.521075576543808, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5881472826004028, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5881472826004028, "logits_per_char": -0.7940736413002014, "num_chars": 2}, {"sum_logits": -1.5743485689163208, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5743485689163208, "logits_per_char": -0.7871742844581604, "num_chars": 2}, {"sum_logits": -1.4634212255477905, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.4634212255477905, "logits_per_char": -0.7317106127738953, "num_chars": 2}, {"sum_logits": -1.4583852291107178, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.4583852291107178, "logits_per_char": -0.7291926145553589, "num_chars": 2}, {"sum_logits": -2.123106002807617, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.123106002807617, "logits_per_char": -1.0615530014038086, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 991, "native_id": "17eafc807b198236faf06a66f4c05313", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5002121925354004, "incorrect_loss_raw": 1.666731059551239, "correct_loss_per_char": 0.7501060962677002, "incorrect_loss_per_char": 0.8333655297756195, "correct_loss_per_token": 1.5002121925354004, "incorrect_loss_per_token": 1.666731059551239, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3671480417251587, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": true, "logits_per_token": -1.3671480417251587, "logits_per_char": -0.6835740208625793, "num_chars": 2}, {"sum_logits": -1.6755783557891846, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.6755783557891846, "logits_per_char": -0.8377891778945923, "num_chars": 2}, {"sum_logits": -1.5002121925354004, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5002121925354004, "logits_per_char": -0.7501060962677002, "num_chars": 2}, {"sum_logits": -1.7137547731399536, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.7137547731399536, "logits_per_char": -0.8568773865699768, "num_chars": 2}, {"sum_logits": -1.9104430675506592, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.9104430675506592, "logits_per_char": -0.9552215337753296, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 992, "native_id": "24eebfa678112100803da16dde148b2d", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9314064979553223, "incorrect_loss_raw": 1.549730658531189, "correct_loss_per_char": 0.9657032489776611, "incorrect_loss_per_char": 0.7748653292655945, "correct_loss_per_token": 1.9314064979553223, "incorrect_loss_per_token": 1.549730658531189, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5641605854034424, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5641605854034424, "logits_per_char": -0.7820802927017212, "num_chars": 2}, {"sum_logits": -1.6470756530761719, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6470756530761719, "logits_per_char": -0.8235378265380859, "num_chars": 2}, {"sum_logits": -1.4821147918701172, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.4821147918701172, "logits_per_char": -0.7410573959350586, "num_chars": 2}, {"sum_logits": -1.5055716037750244, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5055716037750244, "logits_per_char": -0.7527858018875122, "num_chars": 2}, {"sum_logits": -1.9314064979553223, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.9314064979553223, "logits_per_char": -0.9657032489776611, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 993, "native_id": "ec882fc3a9bfaeae2a26fe31c2ef2c07", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5008625984191895, "incorrect_loss_raw": 1.6584153175354004, "correct_loss_per_char": 0.7504312992095947, "incorrect_loss_per_char": 0.8292076587677002, "correct_loss_per_token": 1.5008625984191895, "incorrect_loss_per_token": 1.6584153175354004, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4760351181030273, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.4760351181030273, "logits_per_char": -0.7380175590515137, "num_chars": 2}, {"sum_logits": -1.5008625984191895, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5008625984191895, "logits_per_char": -0.7504312992095947, "num_chars": 2}, {"sum_logits": -1.6299290657043457, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6299290657043457, "logits_per_char": -0.8149645328521729, "num_chars": 2}, {"sum_logits": -1.5935609340667725, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5935609340667725, "logits_per_char": -0.7967804670333862, "num_chars": 2}, {"sum_logits": -1.934136152267456, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.934136152267456, "logits_per_char": -0.967068076133728, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 994, "native_id": "0a006d16d9042e0c170935e5fbf7f9af", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.009305477142334, "incorrect_loss_raw": 1.541269063949585, "correct_loss_per_char": 1.004652738571167, "incorrect_loss_per_char": 0.7706345319747925, "correct_loss_per_token": 2.009305477142334, "incorrect_loss_per_token": 1.541269063949585, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4137670993804932, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.4137670993804932, "logits_per_char": -0.7068835496902466, "num_chars": 2}, {"sum_logits": -1.496356725692749, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.496356725692749, "logits_per_char": -0.7481783628463745, "num_chars": 2}, {"sum_logits": -1.7093945741653442, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.7093945741653442, "logits_per_char": -0.8546972870826721, "num_chars": 2}, {"sum_logits": -1.5455578565597534, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5455578565597534, "logits_per_char": -0.7727789282798767, "num_chars": 2}, {"sum_logits": -2.009305477142334, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -2.009305477142334, "logits_per_char": -1.004652738571167, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 995, "native_id": "d33a81660058e570a18fb2eafa284a78", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3866904973983765, "incorrect_loss_raw": 1.7249193489551544, "correct_loss_per_char": 0.6933452486991882, "incorrect_loss_per_char": 0.8624596744775772, "correct_loss_per_token": 1.3866904973983765, "incorrect_loss_per_token": 1.7249193489551544, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3866904973983765, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.3866904973983765, "logits_per_char": -0.6933452486991882, "num_chars": 2}, {"sum_logits": -1.4125293493270874, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.4125293493270874, "logits_per_char": -0.7062646746635437, "num_chars": 2}, {"sum_logits": -1.5791354179382324, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5791354179382324, "logits_per_char": -0.7895677089691162, "num_chars": 2}, {"sum_logits": -1.650221824645996, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.650221824645996, "logits_per_char": -0.825110912322998, "num_chars": 2}, {"sum_logits": -2.2577908039093018, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.2577908039093018, "logits_per_char": -1.1288954019546509, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 996, "native_id": "1e09c3136a743b862e783700b7667028", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0725321769714355, "incorrect_loss_raw": 1.5278289020061493, "correct_loss_per_char": 1.0362660884857178, "incorrect_loss_per_char": 0.7639144510030746, "correct_loss_per_token": 2.0725321769714355, "incorrect_loss_per_token": 1.5278289020061493, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4399547576904297, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.4399547576904297, "logits_per_char": -0.7199773788452148, "num_chars": 2}, {"sum_logits": -1.544442057609558, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.544442057609558, "logits_per_char": -0.772221028804779, "num_chars": 2}, {"sum_logits": -1.5318406820297241, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5318406820297241, "logits_per_char": -0.7659203410148621, "num_chars": 2}, {"sum_logits": -1.5950781106948853, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5950781106948853, "logits_per_char": -0.7975390553474426, "num_chars": 2}, {"sum_logits": -2.0725321769714355, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.0725321769714355, "logits_per_char": -1.0362660884857178, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 997, "native_id": "5e851c47682bdf79ec7c139ecf124c9a", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5856379270553589, "incorrect_loss_raw": 1.6303166151046753, "correct_loss_per_char": 0.7928189635276794, "incorrect_loss_per_char": 0.8151583075523376, "correct_loss_per_token": 1.5856379270553589, "incorrect_loss_per_token": 1.6303166151046753, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.589272141456604, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.589272141456604, "logits_per_char": -0.794636070728302, "num_chars": 2}, {"sum_logits": -1.5130618810653687, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": true, "logits_per_token": -1.5130618810653687, "logits_per_char": -0.7565309405326843, "num_chars": 2}, {"sum_logits": -1.5856379270553589, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.5856379270553589, "logits_per_char": -0.7928189635276794, "num_chars": 2}, {"sum_logits": -1.5660630464553833, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.5660630464553833, "logits_per_char": -0.7830315232276917, "num_chars": 2}, {"sum_logits": -1.8528693914413452, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.8528693914413452, "logits_per_char": -0.9264346957206726, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 998, "native_id": "b148f18fb8b5a504b67078ef6ac29717", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7216819524765015, "incorrect_loss_raw": 1.6222862005233765, "correct_loss_per_char": 0.8608409762382507, "incorrect_loss_per_char": 0.8111431002616882, "correct_loss_per_token": 1.7216819524765015, "incorrect_loss_per_token": 1.6222862005233765, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.352627158164978, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.352627158164978, "logits_per_char": -0.676313579082489, "num_chars": 2}, {"sum_logits": -1.4977208375930786, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.4977208375930786, "logits_per_char": -0.7488604187965393, "num_chars": 2}, {"sum_logits": -1.595536708831787, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.595536708831787, "logits_per_char": -0.7977683544158936, "num_chars": 2}, {"sum_logits": -1.7216819524765015, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.7216819524765015, "logits_per_char": -0.8608409762382507, "num_chars": 2}, {"sum_logits": -2.043260097503662, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -2.043260097503662, "logits_per_char": -1.021630048751831, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 999, "native_id": "b6bbe013995fdb5def3d504319af0791", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6025267839431763, "incorrect_loss_raw": 1.6235666275024414, "correct_loss_per_char": 0.8012633919715881, "incorrect_loss_per_char": 0.8117833137512207, "correct_loss_per_token": 1.6025267839431763, "incorrect_loss_per_token": 1.6235666275024414, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4367283582687378, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": true, "logits_per_token": -1.4367283582687378, "logits_per_char": -0.7183641791343689, "num_chars": 2}, {"sum_logits": -1.7084418535232544, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.7084418535232544, "logits_per_char": -0.8542209267616272, "num_chars": 2}, {"sum_logits": -1.6025267839431763, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.6025267839431763, "logits_per_char": -0.8012633919715881, "num_chars": 2}, {"sum_logits": -1.6304792165756226, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.6304792165756226, "logits_per_char": -0.8152396082878113, "num_chars": 2}, {"sum_logits": -1.7186170816421509, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.7186170816421509, "logits_per_char": -0.8593085408210754, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1000, "native_id": "0c2fa15a02d0b6ca6707e98fac7589e4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3155014514923096, "incorrect_loss_raw": 1.7407133281230927, "correct_loss_per_char": 0.6577507257461548, "incorrect_loss_per_char": 0.8703566640615463, "correct_loss_per_token": 1.3155014514923096, "incorrect_loss_per_token": 1.7407133281230927, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3155014514923096, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.3155014514923096, "logits_per_char": -0.6577507257461548, "num_chars": 2}, {"sum_logits": -1.4298053979873657, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.4298053979873657, "logits_per_char": -0.7149026989936829, "num_chars": 2}, {"sum_logits": -1.6046757698059082, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6046757698059082, "logits_per_char": -0.8023378849029541, "num_chars": 2}, {"sum_logits": -1.7543795108795166, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.7543795108795166, "logits_per_char": -0.8771897554397583, "num_chars": 2}, {"sum_logits": -2.17399263381958, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -2.17399263381958, "logits_per_char": -1.08699631690979, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1001, "native_id": "a656e74a943f9e2698a25bbcfb4e96db", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7510790824890137, "incorrect_loss_raw": 1.6035617887973785, "correct_loss_per_char": 0.8755395412445068, "incorrect_loss_per_char": 0.8017808943986893, "correct_loss_per_token": 1.7510790824890137, "incorrect_loss_per_token": 1.6035617887973785, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.302219033241272, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": true, "logits_per_token": -1.302219033241272, "logits_per_char": -0.651109516620636, "num_chars": 2}, {"sum_logits": -1.559849739074707, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.559849739074707, "logits_per_char": -0.7799248695373535, "num_chars": 2}, {"sum_logits": -1.7510790824890137, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.7510790824890137, "logits_per_char": -0.8755395412445068, "num_chars": 2}, {"sum_logits": -1.7607781887054443, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.7607781887054443, "logits_per_char": -0.8803890943527222, "num_chars": 2}, {"sum_logits": -1.7914001941680908, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.7914001941680908, "logits_per_char": -0.8957000970840454, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1002, "native_id": "8086f022f2d4a4888ae1f8c7e4541ab9", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5741946697235107, "incorrect_loss_raw": 1.6532637178897858, "correct_loss_per_char": 0.7870973348617554, "incorrect_loss_per_char": 0.8266318589448929, "correct_loss_per_token": 1.5741946697235107, "incorrect_loss_per_token": 1.6532637178897858, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.374071717262268, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.374071717262268, "logits_per_char": -0.687035858631134, "num_chars": 2}, {"sum_logits": -1.5598742961883545, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5598742961883545, "logits_per_char": -0.7799371480941772, "num_chars": 2}, {"sum_logits": -1.5741946697235107, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5741946697235107, "logits_per_char": -0.7870973348617554, "num_chars": 2}, {"sum_logits": -1.6403930187225342, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.6403930187225342, "logits_per_char": -0.8201965093612671, "num_chars": 2}, {"sum_logits": -2.0387158393859863, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.0387158393859863, "logits_per_char": -1.0193579196929932, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1003, "native_id": "5655a3002dd9a6b7dabede1dd26a5893", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9266135692596436, "incorrect_loss_raw": 1.5517849922180176, "correct_loss_per_char": 0.9633067846298218, "incorrect_loss_per_char": 0.7758924961090088, "correct_loss_per_token": 1.9266135692596436, "incorrect_loss_per_token": 1.5517849922180176, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.554933786392212, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.554933786392212, "logits_per_char": -0.777466893196106, "num_chars": 2}, {"sum_logits": -1.6803922653198242, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.6803922653198242, "logits_per_char": -0.8401961326599121, "num_chars": 2}, {"sum_logits": -1.4570252895355225, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": true, "logits_per_token": -1.4570252895355225, "logits_per_char": -0.7285126447677612, "num_chars": 2}, {"sum_logits": -1.5147886276245117, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.5147886276245117, "logits_per_char": -0.7573943138122559, "num_chars": 2}, {"sum_logits": -1.9266135692596436, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.9266135692596436, "logits_per_char": -0.9633067846298218, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1004, "native_id": "17d9bfaee1efac51b1ca240125bc5977", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8340890407562256, "incorrect_loss_raw": 1.6713004410266876, "correct_loss_per_char": 0.9170445203781128, "incorrect_loss_per_char": 0.8356502205133438, "correct_loss_per_token": 1.8340890407562256, "incorrect_loss_per_token": 1.6713004410266876, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0779528617858887, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.0779528617858887, "logits_per_char": -0.5389764308929443, "num_chars": 2}, {"sum_logits": -1.5119446516036987, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5119446516036987, "logits_per_char": -0.7559723258018494, "num_chars": 2}, {"sum_logits": -1.7015771865844727, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.7015771865844727, "logits_per_char": -0.8507885932922363, "num_chars": 2}, {"sum_logits": -1.8340890407562256, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.8340890407562256, "logits_per_char": -0.9170445203781128, "num_chars": 2}, {"sum_logits": -2.3937270641326904, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.3937270641326904, "logits_per_char": -1.1968635320663452, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1005, "native_id": "801431167b8bff06b9870abe9721536b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6930664777755737, "incorrect_loss_raw": 1.6423626244068146, "correct_loss_per_char": 0.8465332388877869, "incorrect_loss_per_char": 0.8211813122034073, "correct_loss_per_token": 1.6930664777755737, "incorrect_loss_per_token": 1.6423626244068146, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3201943635940552, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.3201943635940552, "logits_per_char": -0.6600971817970276, "num_chars": 2}, {"sum_logits": -1.44969642162323, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.44969642162323, "logits_per_char": -0.724848210811615, "num_chars": 2}, {"sum_logits": -1.6341086626052856, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6341086626052856, "logits_per_char": -0.8170543313026428, "num_chars": 2}, {"sum_logits": -1.6930664777755737, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6930664777755737, "logits_per_char": -0.8465332388877869, "num_chars": 2}, {"sum_logits": -2.1654510498046875, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.1654510498046875, "logits_per_char": -1.0827255249023438, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1006, "native_id": "85ebdd4f1a3c2ac900eee8e75e48ccaa", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.201638698577881, "incorrect_loss_raw": 1.5343887209892273, "correct_loss_per_char": 1.1008193492889404, "incorrect_loss_per_char": 0.7671943604946136, "correct_loss_per_token": 2.201638698577881, "incorrect_loss_per_token": 1.5343887209892273, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1974339485168457, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.1974339485168457, "logits_per_char": -0.5987169742584229, "num_chars": 2}, {"sum_logits": -1.4748659133911133, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.4748659133911133, "logits_per_char": -0.7374329566955566, "num_chars": 2}, {"sum_logits": -1.6576284170150757, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6576284170150757, "logits_per_char": -0.8288142085075378, "num_chars": 2}, {"sum_logits": -1.8076266050338745, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.8076266050338745, "logits_per_char": -0.9038133025169373, "num_chars": 2}, {"sum_logits": -2.201638698577881, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.201638698577881, "logits_per_char": -1.1008193492889404, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1007, "native_id": "db1eb157671109bbb9113b0f71a6b957", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4947681427001953, "incorrect_loss_raw": 1.6596794128417969, "correct_loss_per_char": 0.7473840713500977, "incorrect_loss_per_char": 0.8298397064208984, "correct_loss_per_token": 1.4947681427001953, "incorrect_loss_per_token": 1.6596794128417969, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4947681427001953, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.4947681427001953, "logits_per_char": -0.7473840713500977, "num_chars": 2}, {"sum_logits": -1.4803736209869385, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.4803736209869385, "logits_per_char": -0.7401868104934692, "num_chars": 2}, {"sum_logits": -1.6125054359436035, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6125054359436035, "logits_per_char": -0.8062527179718018, "num_chars": 2}, {"sum_logits": -1.5881602764129639, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5881602764129639, "logits_per_char": -0.7940801382064819, "num_chars": 2}, {"sum_logits": -1.9576783180236816, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.9576783180236816, "logits_per_char": -0.9788391590118408, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1008, "native_id": "c02a3c2d4f726b9e1be99533a24a6ab4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6125022172927856, "incorrect_loss_raw": 1.6456514298915863, "correct_loss_per_char": 0.8062511086463928, "incorrect_loss_per_char": 0.8228257149457932, "correct_loss_per_token": 1.6125022172927856, "incorrect_loss_per_token": 1.6456514298915863, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4221686124801636, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.4221686124801636, "logits_per_char": -0.7110843062400818, "num_chars": 2}, {"sum_logits": -1.4544357061386108, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.4544357061386108, "logits_per_char": -0.7272178530693054, "num_chars": 2}, {"sum_logits": -1.6309067010879517, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6309067010879517, "logits_per_char": -0.8154533505439758, "num_chars": 2}, {"sum_logits": -1.6125022172927856, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6125022172927856, "logits_per_char": -0.8062511086463928, "num_chars": 2}, {"sum_logits": -2.075094699859619, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.075094699859619, "logits_per_char": -1.0375473499298096, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1009, "native_id": "3ed6391c539e6daa5b5fdb1b6d5d8ace", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2044767141342163, "incorrect_loss_raw": 1.7842081785202026, "correct_loss_per_char": 0.6022383570671082, "incorrect_loss_per_char": 0.8921040892601013, "correct_loss_per_token": 1.2044767141342163, "incorrect_loss_per_token": 1.7842081785202026, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2044767141342163, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": true, "logits_per_token": -1.2044767141342163, "logits_per_char": -0.6022383570671082, "num_chars": 2}, {"sum_logits": -1.4509624242782593, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.4509624242782593, "logits_per_char": -0.7254812121391296, "num_chars": 2}, {"sum_logits": -1.698467493057251, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.698467493057251, "logits_per_char": -0.8492337465286255, "num_chars": 2}, {"sum_logits": -1.8076790571212769, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.8076790571212769, "logits_per_char": -0.9038395285606384, "num_chars": 2}, {"sum_logits": -2.1797237396240234, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -2.1797237396240234, "logits_per_char": -1.0898618698120117, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1010, "native_id": "1db19a32a3edbff9981976dc9ec800ce", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.730302333831787, "incorrect_loss_raw": 1.5936636328697205, "correct_loss_per_char": 0.8651511669158936, "incorrect_loss_per_char": 0.7968318164348602, "correct_loss_per_token": 1.730302333831787, "incorrect_loss_per_token": 1.5936636328697205, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.730302333831787, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.730302333831787, "logits_per_char": -0.8651511669158936, "num_chars": 2}, {"sum_logits": -1.5274600982666016, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5274600982666016, "logits_per_char": -0.7637300491333008, "num_chars": 2}, {"sum_logits": -1.5192062854766846, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.5192062854766846, "logits_per_char": -0.7596031427383423, "num_chars": 2}, {"sum_logits": -1.5275784730911255, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5275784730911255, "logits_per_char": -0.7637892365455627, "num_chars": 2}, {"sum_logits": -1.8004096746444702, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.8004096746444702, "logits_per_char": -0.9002048373222351, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1011, "native_id": "1e5a138b4c7d456c37abf4990b402bbe", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3814046382904053, "incorrect_loss_raw": 1.6883728504180908, "correct_loss_per_char": 0.6907023191452026, "incorrect_loss_per_char": 0.8441864252090454, "correct_loss_per_token": 1.3814046382904053, "incorrect_loss_per_token": 1.6883728504180908, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3814046382904053, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": true, "logits_per_token": -1.3814046382904053, "logits_per_char": -0.6907023191452026, "num_chars": 2}, {"sum_logits": -1.5700205564498901, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.5700205564498901, "logits_per_char": -0.7850102782249451, "num_chars": 2}, {"sum_logits": -1.6652692556381226, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.6652692556381226, "logits_per_char": -0.8326346278190613, "num_chars": 2}, {"sum_logits": -1.6404458284378052, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.6404458284378052, "logits_per_char": -0.8202229142189026, "num_chars": 2}, {"sum_logits": -1.8777557611465454, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.8777557611465454, "logits_per_char": -0.9388778805732727, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1012, "native_id": "9402864beae075392d2ee6c10115fc21", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5991013050079346, "incorrect_loss_raw": 1.6500205993652344, "correct_loss_per_char": 0.7995506525039673, "incorrect_loss_per_char": 0.8250102996826172, "correct_loss_per_token": 1.5991013050079346, "incorrect_loss_per_token": 1.6500205993652344, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4655176401138306, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": true, "logits_per_token": -1.4655176401138306, "logits_per_char": -0.7327588200569153, "num_chars": 2}, {"sum_logits": -1.4893145561218262, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.4893145561218262, "logits_per_char": -0.7446572780609131, "num_chars": 2}, {"sum_logits": -1.5299752950668335, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5299752950668335, "logits_per_char": -0.7649876475334167, "num_chars": 2}, {"sum_logits": -1.5991013050079346, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5991013050079346, "logits_per_char": -0.7995506525039673, "num_chars": 2}, {"sum_logits": -2.1152749061584473, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -2.1152749061584473, "logits_per_char": -1.0576374530792236, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1013, "native_id": "25136807f7b2e78b115698daa1677b4a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.680005669593811, "incorrect_loss_raw": 1.6203340590000153, "correct_loss_per_char": 0.8400028347969055, "incorrect_loss_per_char": 0.8101670295000076, "correct_loss_per_token": 1.680005669593811, "incorrect_loss_per_token": 1.6203340590000153, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3811712265014648, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.3811712265014648, "logits_per_char": -0.6905856132507324, "num_chars": 2}, {"sum_logits": -1.577734351158142, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.577734351158142, "logits_per_char": -0.788867175579071, "num_chars": 2}, {"sum_logits": -1.5487726926803589, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5487726926803589, "logits_per_char": -0.7743863463401794, "num_chars": 2}, {"sum_logits": -1.680005669593811, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.680005669593811, "logits_per_char": -0.8400028347969055, "num_chars": 2}, {"sum_logits": -1.9736579656600952, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.9736579656600952, "logits_per_char": -0.9868289828300476, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1014, "native_id": "bc10bf2bfae26a2226823d42956f6cf0", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.482265591621399, "incorrect_loss_raw": 1.667600393295288, "correct_loss_per_char": 0.7411327958106995, "incorrect_loss_per_char": 0.833800196647644, "correct_loss_per_token": 1.482265591621399, "incorrect_loss_per_token": 1.667600393295288, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4069315195083618, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.4069315195083618, "logits_per_char": -0.7034657597541809, "num_chars": 2}, {"sum_logits": -1.482265591621399, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.482265591621399, "logits_per_char": -0.7411327958106995, "num_chars": 2}, {"sum_logits": -1.6493993997573853, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6493993997573853, "logits_per_char": -0.8246996998786926, "num_chars": 2}, {"sum_logits": -1.672242283821106, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.672242283821106, "logits_per_char": -0.836121141910553, "num_chars": 2}, {"sum_logits": -1.9418283700942993, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.9418283700942993, "logits_per_char": -0.9709141850471497, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1015, "native_id": "5a6559db6bae37e3a8af7350be212219", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7323381900787354, "incorrect_loss_raw": 1.6019320487976074, "correct_loss_per_char": 0.8661690950393677, "incorrect_loss_per_char": 0.8009660243988037, "correct_loss_per_token": 1.7323381900787354, "incorrect_loss_per_token": 1.6019320487976074, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.353557825088501, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": true, "logits_per_token": -1.353557825088501, "logits_per_char": -0.6767789125442505, "num_chars": 2}, {"sum_logits": -1.5948164463043213, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": false, "logits_per_token": -1.5948164463043213, "logits_per_char": -0.7974082231521606, "num_chars": 2}, {"sum_logits": -1.6590628623962402, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": false, "logits_per_token": -1.6590628623962402, "logits_per_char": -0.8295314311981201, "num_chars": 2}, {"sum_logits": -1.7323381900787354, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": false, "logits_per_token": -1.7323381900787354, "logits_per_char": -0.8661690950393677, "num_chars": 2}, {"sum_logits": -1.8002910614013672, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": false, "logits_per_token": -1.8002910614013672, "logits_per_char": -0.9001455307006836, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1016, "native_id": "7ae17f5aecacf18c94a47cc48deb6c36", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.73395836353302, "incorrect_loss_raw": 1.6138823330402374, "correct_loss_per_char": 0.86697918176651, "incorrect_loss_per_char": 0.8069411665201187, "correct_loss_per_token": 1.73395836353302, "incorrect_loss_per_token": 1.6138823330402374, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.371829628944397, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": true, "logits_per_token": -1.371829628944397, "logits_per_char": -0.6859148144721985, "num_chars": 2}, {"sum_logits": -1.4571582078933716, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.4571582078933716, "logits_per_char": -0.7285791039466858, "num_chars": 2}, {"sum_logits": -1.6046308279037476, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.6046308279037476, "logits_per_char": -0.8023154139518738, "num_chars": 2}, {"sum_logits": -1.73395836353302, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.73395836353302, "logits_per_char": -0.86697918176651, "num_chars": 2}, {"sum_logits": -2.0219106674194336, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -2.0219106674194336, "logits_per_char": -1.0109553337097168, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1017, "native_id": "5d809e0ee19badc66071653630ea7c51", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4608607292175293, "incorrect_loss_raw": 1.6683107614517212, "correct_loss_per_char": 0.7304303646087646, "incorrect_loss_per_char": 0.8341553807258606, "correct_loss_per_token": 1.4608607292175293, "incorrect_loss_per_token": 1.6683107614517212, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4608607292175293, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": true, "logits_per_token": -1.4608607292175293, "logits_per_char": -0.7304303646087646, "num_chars": 2}, {"sum_logits": -1.4929900169372559, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.4929900169372559, "logits_per_char": -0.7464950084686279, "num_chars": 2}, {"sum_logits": -1.572943925857544, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.572943925857544, "logits_per_char": -0.786471962928772, "num_chars": 2}, {"sum_logits": -1.716904878616333, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.716904878616333, "logits_per_char": -0.8584524393081665, "num_chars": 2}, {"sum_logits": -1.890404224395752, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.890404224395752, "logits_per_char": -0.945202112197876, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1018, "native_id": "ad0943fc37034cd2b7e485021f8b1b8c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7708503007888794, "incorrect_loss_raw": 1.6139385402202606, "correct_loss_per_char": 0.8854251503944397, "incorrect_loss_per_char": 0.8069692701101303, "correct_loss_per_token": 1.7708503007888794, "incorrect_loss_per_token": 1.6139385402202606, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3208056688308716, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.3208056688308716, "logits_per_char": -0.6604028344154358, "num_chars": 2}, {"sum_logits": -1.5147587060928345, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5147587060928345, "logits_per_char": -0.7573793530464172, "num_chars": 2}, {"sum_logits": -1.5444241762161255, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5444241762161255, "logits_per_char": -0.7722120881080627, "num_chars": 2}, {"sum_logits": -1.7708503007888794, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.7708503007888794, "logits_per_char": -0.8854251503944397, "num_chars": 2}, {"sum_logits": -2.075765609741211, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.075765609741211, "logits_per_char": -1.0378828048706055, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1019, "native_id": "c2a8c6814ed3e207771cfc23b3b42cf1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5661698579788208, "incorrect_loss_raw": 1.659537136554718, "correct_loss_per_char": 0.7830849289894104, "incorrect_loss_per_char": 0.829768568277359, "correct_loss_per_token": 1.5661698579788208, "incorrect_loss_per_token": 1.659537136554718, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4164354801177979, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.4164354801177979, "logits_per_char": -0.7082177400588989, "num_chars": 2}, {"sum_logits": -1.5661698579788208, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5661698579788208, "logits_per_char": -0.7830849289894104, "num_chars": 2}, {"sum_logits": -1.6000314950942993, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.6000314950942993, "logits_per_char": -0.8000157475471497, "num_chars": 2}, {"sum_logits": -1.5104221105575562, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5104221105575562, "logits_per_char": -0.7552110552787781, "num_chars": 2}, {"sum_logits": -2.1112594604492188, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.1112594604492188, "logits_per_char": -1.0556297302246094, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1020, "native_id": "0b52cc905fff0ca69a45e6353d10e401", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.651058316230774, "incorrect_loss_raw": 1.6408122777938843, "correct_loss_per_char": 0.825529158115387, "incorrect_loss_per_char": 0.8204061388969421, "correct_loss_per_token": 1.651058316230774, "incorrect_loss_per_token": 1.6408122777938843, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2727398872375488, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": true, "logits_per_token": -1.2727398872375488, "logits_per_char": -0.6363699436187744, "num_chars": 2}, {"sum_logits": -1.6190534830093384, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.6190534830093384, "logits_per_char": -0.8095267415046692, "num_chars": 2}, {"sum_logits": -1.6301010847091675, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.6301010847091675, "logits_per_char": -0.8150505423545837, "num_chars": 2}, {"sum_logits": -1.651058316230774, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.651058316230774, "logits_per_char": -0.825529158115387, "num_chars": 2}, {"sum_logits": -2.0413546562194824, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -2.0413546562194824, "logits_per_char": -1.0206773281097412, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1021, "native_id": "30d0c2006613eec41ae814d76c17a798", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.899855613708496, "incorrect_loss_raw": 1.5613327622413635, "correct_loss_per_char": 0.949927806854248, "incorrect_loss_per_char": 0.7806663811206818, "correct_loss_per_token": 1.899855613708496, "incorrect_loss_per_token": 1.5613327622413635, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.348773717880249, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.348773717880249, "logits_per_char": -0.6743868589401245, "num_chars": 2}, {"sum_logits": -1.6393051147460938, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6393051147460938, "logits_per_char": -0.8196525573730469, "num_chars": 2}, {"sum_logits": -1.6110267639160156, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6110267639160156, "logits_per_char": -0.8055133819580078, "num_chars": 2}, {"sum_logits": -1.6462254524230957, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6462254524230957, "logits_per_char": -0.8231127262115479, "num_chars": 2}, {"sum_logits": -1.899855613708496, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.899855613708496, "logits_per_char": -0.949927806854248, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1022, "native_id": "f7a6d0d816d14210f3af5dabe21bf804", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4737335443496704, "incorrect_loss_raw": 1.6620363593101501, "correct_loss_per_char": 0.7368667721748352, "incorrect_loss_per_char": 0.8310181796550751, "correct_loss_per_token": 1.4737335443496704, "incorrect_loss_per_token": 1.6620363593101501, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4737335443496704, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": true, "logits_per_token": -1.4737335443496704, "logits_per_char": -0.7368667721748352, "num_chars": 2}, {"sum_logits": -1.559358835220337, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.559358835220337, "logits_per_char": -0.7796794176101685, "num_chars": 2}, {"sum_logits": -1.6664750576019287, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.6664750576019287, "logits_per_char": -0.8332375288009644, "num_chars": 2}, {"sum_logits": -1.5468628406524658, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.5468628406524658, "logits_per_char": -0.7734314203262329, "num_chars": 2}, {"sum_logits": -1.8754487037658691, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.8754487037658691, "logits_per_char": -0.9377243518829346, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1023, "native_id": "c306ab28498b67c53decb9dde1d78bd5", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7914352416992188, "incorrect_loss_raw": 1.5766960680484772, "correct_loss_per_char": 0.8957176208496094, "incorrect_loss_per_char": 0.7883480340242386, "correct_loss_per_token": 1.7914352416992188, "incorrect_loss_per_token": 1.5766960680484772, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.598770022392273, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.598770022392273, "logits_per_char": -0.7993850111961365, "num_chars": 2}, {"sum_logits": -1.5646394491195679, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5646394491195679, "logits_per_char": -0.7823197245597839, "num_chars": 2}, {"sum_logits": -1.6608620882034302, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6608620882034302, "logits_per_char": -0.8304310441017151, "num_chars": 2}, {"sum_logits": -1.4825127124786377, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.4825127124786377, "logits_per_char": -0.7412563562393188, "num_chars": 2}, {"sum_logits": -1.7914352416992188, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.7914352416992188, "logits_per_char": -0.8957176208496094, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1024, "native_id": "637c710ec9582fd9b9e8eaa3f3fe83bb", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5474841594696045, "incorrect_loss_raw": 1.6520050168037415, "correct_loss_per_char": 0.7737420797348022, "incorrect_loss_per_char": 0.8260025084018707, "correct_loss_per_token": 1.5474841594696045, "incorrect_loss_per_token": 1.6520050168037415, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4400742053985596, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.4400742053985596, "logits_per_char": -0.7200371026992798, "num_chars": 2}, {"sum_logits": -1.5474841594696045, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5474841594696045, "logits_per_char": -0.7737420797348022, "num_chars": 2}, {"sum_logits": -1.581392765045166, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.581392765045166, "logits_per_char": -0.790696382522583, "num_chars": 2}, {"sum_logits": -1.5868072509765625, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5868072509765625, "logits_per_char": -0.7934036254882812, "num_chars": 2}, {"sum_logits": -1.9997458457946777, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.9997458457946777, "logits_per_char": -0.9998729228973389, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1025, "native_id": "9ae52783d8fdb5cc2e8caa01542c3341", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.242039442062378, "incorrect_loss_raw": 1.5309427976608276, "correct_loss_per_char": 1.121019721031189, "incorrect_loss_per_char": 0.7654713988304138, "correct_loss_per_token": 2.242039442062378, "incorrect_loss_per_token": 1.5309427976608276, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.198122501373291, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.198122501373291, "logits_per_char": -0.5990612506866455, "num_chars": 2}, {"sum_logits": -1.4870378971099854, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.4870378971099854, "logits_per_char": -0.7435189485549927, "num_chars": 2}, {"sum_logits": -1.6372389793395996, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.6372389793395996, "logits_per_char": -0.8186194896697998, "num_chars": 2}, {"sum_logits": -1.8013718128204346, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.8013718128204346, "logits_per_char": -0.9006859064102173, "num_chars": 2}, {"sum_logits": -2.242039442062378, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -2.242039442062378, "logits_per_char": -1.121019721031189, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1026, "native_id": "4f23829b96b38b5633ecc3325281726d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5509113073349, "incorrect_loss_raw": 1.6546657085418701, "correct_loss_per_char": 0.77545565366745, "incorrect_loss_per_char": 0.8273328542709351, "correct_loss_per_token": 1.5509113073349, "incorrect_loss_per_token": 1.6546657085418701, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5007926225662231, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": true, "logits_per_token": -1.5007926225662231, "logits_per_char": -0.7503963112831116, "num_chars": 2}, {"sum_logits": -1.5231173038482666, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.5231173038482666, "logits_per_char": -0.7615586519241333, "num_chars": 2}, {"sum_logits": -1.539998173713684, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.539998173713684, "logits_per_char": -0.769999086856842, "num_chars": 2}, {"sum_logits": -1.5509113073349, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.5509113073349, "logits_per_char": -0.77545565366745, "num_chars": 2}, {"sum_logits": -2.0547547340393066, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -2.0547547340393066, "logits_per_char": -1.0273773670196533, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1027, "native_id": "3fcdc0b03e3c8b10692d642676931f4b", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4504138231277466, "incorrect_loss_raw": 1.6721760034561157, "correct_loss_per_char": 0.7252069115638733, "incorrect_loss_per_char": 0.8360880017280579, "correct_loss_per_token": 1.4504138231277466, "incorrect_loss_per_token": 1.6721760034561157, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4504138231277466, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.4504138231277466, "logits_per_char": -0.7252069115638733, "num_chars": 2}, {"sum_logits": -1.4308289289474487, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.4308289289474487, "logits_per_char": -0.7154144644737244, "num_chars": 2}, {"sum_logits": -1.6909772157669067, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6909772157669067, "logits_per_char": -0.8454886078834534, "num_chars": 2}, {"sum_logits": -1.7509900331497192, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.7509900331497192, "logits_per_char": -0.8754950165748596, "num_chars": 2}, {"sum_logits": -1.8159078359603882, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.8159078359603882, "logits_per_char": -0.9079539179801941, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1028, "native_id": "ddd606743cf71679438a85280f64593a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4062995910644531, "incorrect_loss_raw": 1.6948057115077972, "correct_loss_per_char": 0.7031497955322266, "incorrect_loss_per_char": 0.8474028557538986, "correct_loss_per_token": 1.4062995910644531, "incorrect_loss_per_token": 1.6948057115077972, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4062995910644531, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.4062995910644531, "logits_per_char": -0.7031497955322266, "num_chars": 2}, {"sum_logits": -1.5728206634521484, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5728206634521484, "logits_per_char": -0.7864103317260742, "num_chars": 2}, {"sum_logits": -1.6040672063827515, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6040672063827515, "logits_per_char": -0.8020336031913757, "num_chars": 2}, {"sum_logits": -1.532850742340088, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.532850742340088, "logits_per_char": -0.766425371170044, "num_chars": 2}, {"sum_logits": -2.069484233856201, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.069484233856201, "logits_per_char": -1.0347421169281006, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1029, "native_id": "420641003ba20b966887dfac684efb17", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3933377265930176, "incorrect_loss_raw": 1.7222633957862854, "correct_loss_per_char": 0.6966688632965088, "incorrect_loss_per_char": 0.8611316978931427, "correct_loss_per_token": 1.3933377265930176, "incorrect_loss_per_token": 1.7222633957862854, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.364759922027588, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.364759922027588, "logits_per_char": -0.682379961013794, "num_chars": 2}, {"sum_logits": -1.3933377265930176, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.3933377265930176, "logits_per_char": -0.6966688632965088, "num_chars": 2}, {"sum_logits": -1.6035475730895996, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6035475730895996, "logits_per_char": -0.8017737865447998, "num_chars": 2}, {"sum_logits": -1.6988341808319092, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6988341808319092, "logits_per_char": -0.8494170904159546, "num_chars": 2}, {"sum_logits": -2.221911907196045, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -2.221911907196045, "logits_per_char": -1.1109559535980225, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1030, "native_id": "064c3074a682893d49c3c5b4f1e89984", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5032451152801514, "incorrect_loss_raw": 1.6753025949001312, "correct_loss_per_char": 0.7516225576400757, "incorrect_loss_per_char": 0.8376512974500656, "correct_loss_per_token": 1.5032451152801514, "incorrect_loss_per_token": 1.6753025949001312, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4112032651901245, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": true, "logits_per_token": -1.4112032651901245, "logits_per_char": -0.7056016325950623, "num_chars": 2}, {"sum_logits": -1.6023619174957275, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.6023619174957275, "logits_per_char": -0.8011809587478638, "num_chars": 2}, {"sum_logits": -1.5032451152801514, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.5032451152801514, "logits_per_char": -0.7516225576400757, "num_chars": 2}, {"sum_logits": -1.572049856185913, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.572049856185913, "logits_per_char": -0.7860249280929565, "num_chars": 2}, {"sum_logits": -2.1155953407287598, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -2.1155953407287598, "logits_per_char": -1.0577976703643799, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1031, "native_id": "c640116ca6905d5256edadb616b3f76e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7695341110229492, "incorrect_loss_raw": 1.611831158399582, "correct_loss_per_char": 0.8847670555114746, "incorrect_loss_per_char": 0.805915579199791, "correct_loss_per_token": 1.7695341110229492, "incorrect_loss_per_token": 1.611831158399582, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3551095724105835, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.3551095724105835, "logits_per_char": -0.6775547862052917, "num_chars": 2}, {"sum_logits": -1.4427722692489624, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.4427722692489624, "logits_per_char": -0.7213861346244812, "num_chars": 2}, {"sum_logits": -1.6043823957443237, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6043823957443237, "logits_per_char": -0.8021911978721619, "num_chars": 2}, {"sum_logits": -1.7695341110229492, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.7695341110229492, "logits_per_char": -0.8847670555114746, "num_chars": 2}, {"sum_logits": -2.045060396194458, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -2.045060396194458, "logits_per_char": -1.022530198097229, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1032, "native_id": "35ad89c198d5d6311a71c993bb7b6cba", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0827279090881348, "incorrect_loss_raw": 1.5259845852851868, "correct_loss_per_char": 1.0413639545440674, "incorrect_loss_per_char": 0.7629922926425934, "correct_loss_per_token": 2.0827279090881348, "incorrect_loss_per_token": 1.5259845852851868, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5926566123962402, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5926566123962402, "logits_per_char": -0.7963283061981201, "num_chars": 2}, {"sum_logits": -1.4850695133209229, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.4850695133209229, "logits_per_char": -0.7425347566604614, "num_chars": 2}, {"sum_logits": -1.44516921043396, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.44516921043396, "logits_per_char": -0.72258460521698, "num_chars": 2}, {"sum_logits": -1.581043004989624, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.581043004989624, "logits_per_char": -0.790521502494812, "num_chars": 2}, {"sum_logits": -2.0827279090881348, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -2.0827279090881348, "logits_per_char": -1.0413639545440674, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1033, "native_id": "916bbd27545446ca5d83d07c10d013ea", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7194788455963135, "incorrect_loss_raw": 1.5914096236228943, "correct_loss_per_char": 0.8597394227981567, "incorrect_loss_per_char": 0.7957048118114471, "correct_loss_per_token": 1.7194788455963135, "incorrect_loss_per_token": 1.5914096236228943, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.495424509048462, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": true, "logits_per_token": -1.495424509048462, "logits_per_char": -0.747712254524231, "num_chars": 2}, {"sum_logits": -1.6236541271209717, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.6236541271209717, "logits_per_char": -0.8118270635604858, "num_chars": 2}, {"sum_logits": -1.6797151565551758, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.6797151565551758, "logits_per_char": -0.8398575782775879, "num_chars": 2}, {"sum_logits": -1.5668447017669678, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.5668447017669678, "logits_per_char": -0.7834223508834839, "num_chars": 2}, {"sum_logits": -1.7194788455963135, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.7194788455963135, "logits_per_char": -0.8597394227981567, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1034, "native_id": "e40fd2c17fe2cde4bd4af540d35fd518", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8298453092575073, "incorrect_loss_raw": 1.5721562206745148, "correct_loss_per_char": 0.9149226546287537, "incorrect_loss_per_char": 0.7860781103372574, "correct_loss_per_token": 1.8298453092575073, "incorrect_loss_per_token": 1.5721562206745148, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4202852249145508, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.4202852249145508, "logits_per_char": -0.7101426124572754, "num_chars": 2}, {"sum_logits": -1.6077966690063477, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6077966690063477, "logits_per_char": -0.8038983345031738, "num_chars": 2}, {"sum_logits": -1.6927939653396606, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6927939653396606, "logits_per_char": -0.8463969826698303, "num_chars": 2}, {"sum_logits": -1.5677490234375, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5677490234375, "logits_per_char": -0.78387451171875, "num_chars": 2}, {"sum_logits": -1.8298453092575073, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.8298453092575073, "logits_per_char": -0.9149226546287537, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1035, "native_id": "98a04457025f18c2287d5c610ff8000d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9699171781539917, "incorrect_loss_raw": 1.5454545319080353, "correct_loss_per_char": 0.9849585890769958, "incorrect_loss_per_char": 0.7727272659540176, "correct_loss_per_token": 1.9699171781539917, "incorrect_loss_per_token": 1.5454545319080353, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.42538583278656, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.42538583278656, "logits_per_char": -0.71269291639328, "num_chars": 2}, {"sum_logits": -1.543302297592163, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.543302297592163, "logits_per_char": -0.7716511487960815, "num_chars": 2}, {"sum_logits": -1.5851572751998901, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5851572751998901, "logits_per_char": -0.7925786375999451, "num_chars": 2}, {"sum_logits": -1.6279727220535278, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.6279727220535278, "logits_per_char": -0.8139863610267639, "num_chars": 2}, {"sum_logits": -1.9699171781539917, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.9699171781539917, "logits_per_char": -0.9849585890769958, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1036, "native_id": "f656a475f07d3adba9d1486eda8e834a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.16672420501709, "incorrect_loss_raw": 1.5195147693157196, "correct_loss_per_char": 1.083362102508545, "incorrect_loss_per_char": 0.7597573846578598, "correct_loss_per_token": 2.16672420501709, "incorrect_loss_per_token": 1.5195147693157196, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3350834846496582, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.3350834846496582, "logits_per_char": -0.6675417423248291, "num_chars": 2}, {"sum_logits": -1.6044436693191528, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.6044436693191528, "logits_per_char": -0.8022218346595764, "num_chars": 2}, {"sum_logits": -1.5532876253128052, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5532876253128052, "logits_per_char": -0.7766438126564026, "num_chars": 2}, {"sum_logits": -1.5852442979812622, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5852442979812622, "logits_per_char": -0.7926221489906311, "num_chars": 2}, {"sum_logits": -2.16672420501709, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -2.16672420501709, "logits_per_char": -1.083362102508545, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1037, "native_id": "c865b3547c2a2e3c3916d7be6ab25752", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5621140003204346, "incorrect_loss_raw": 1.6376719772815704, "correct_loss_per_char": 0.7810570001602173, "incorrect_loss_per_char": 0.8188359886407852, "correct_loss_per_token": 1.5621140003204346, "incorrect_loss_per_token": 1.6376719772815704, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5462480783462524, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5462480783462524, "logits_per_char": -0.7731240391731262, "num_chars": 2}, {"sum_logits": -1.5348927974700928, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.5348927974700928, "logits_per_char": -0.7674463987350464, "num_chars": 2}, {"sum_logits": -1.5621140003204346, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5621140003204346, "logits_per_char": -0.7810570001602173, "num_chars": 2}, {"sum_logits": -1.6015207767486572, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6015207767486572, "logits_per_char": -0.8007603883743286, "num_chars": 2}, {"sum_logits": -1.8680262565612793, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.8680262565612793, "logits_per_char": -0.9340131282806396, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1038, "native_id": "abd30bab9b96f902fead5378d4f4a1e4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5742297172546387, "incorrect_loss_raw": 1.6826676726341248, "correct_loss_per_char": 0.7871148586273193, "incorrect_loss_per_char": 0.8413338363170624, "correct_loss_per_token": 1.5742297172546387, "incorrect_loss_per_token": 1.6826676726341248, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2629542350769043, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.2629542350769043, "logits_per_char": -0.6314771175384521, "num_chars": 2}, {"sum_logits": -1.5103347301483154, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5103347301483154, "logits_per_char": -0.7551673650741577, "num_chars": 2}, {"sum_logits": -1.5742297172546387, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5742297172546387, "logits_per_char": -0.7871148586273193, "num_chars": 2}, {"sum_logits": -1.7351810932159424, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.7351810932159424, "logits_per_char": -0.8675905466079712, "num_chars": 2}, {"sum_logits": -2.222200632095337, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -2.222200632095337, "logits_per_char": -1.1111003160476685, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1039, "native_id": "a4b44a986e7f9045432e20ea75611df4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6850627660751343, "incorrect_loss_raw": 1.6286782920360565, "correct_loss_per_char": 0.8425313830375671, "incorrect_loss_per_char": 0.8143391460180283, "correct_loss_per_token": 1.6850627660751343, "incorrect_loss_per_token": 1.6286782920360565, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.302688479423523, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": true, "logits_per_token": -1.302688479423523, "logits_per_char": -0.6513442397117615, "num_chars": 2}, {"sum_logits": -1.547094702720642, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.547094702720642, "logits_per_char": -0.773547351360321, "num_chars": 2}, {"sum_logits": -1.6850627660751343, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.6850627660751343, "logits_per_char": -0.8425313830375671, "num_chars": 2}, {"sum_logits": -1.6556235551834106, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.6556235551834106, "logits_per_char": -0.8278117775917053, "num_chars": 2}, {"sum_logits": -2.0093064308166504, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -2.0093064308166504, "logits_per_char": -1.0046532154083252, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1040, "native_id": "1f492f556fae64f72ce36b6caa242dd0", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7003849744796753, "incorrect_loss_raw": 1.6322448253631592, "correct_loss_per_char": 0.8501924872398376, "incorrect_loss_per_char": 0.8161224126815796, "correct_loss_per_token": 1.7003849744796753, "incorrect_loss_per_token": 1.6322448253631592, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2392444610595703, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.2392444610595703, "logits_per_char": -0.6196222305297852, "num_chars": 2}, {"sum_logits": -1.5556045770645142, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5556045770645142, "logits_per_char": -0.7778022885322571, "num_chars": 2}, {"sum_logits": -1.7003849744796753, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.7003849744796753, "logits_per_char": -0.8501924872398376, "num_chars": 2}, {"sum_logits": -1.716429591178894, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.716429591178894, "logits_per_char": -0.858214795589447, "num_chars": 2}, {"sum_logits": -2.017700672149658, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -2.017700672149658, "logits_per_char": -1.008850336074829, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1041, "native_id": "d0c67c7ae6f2361fe237110455127866", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5779062509536743, "incorrect_loss_raw": 1.6378038823604584, "correct_loss_per_char": 0.7889531254768372, "incorrect_loss_per_char": 0.8189019411802292, "correct_loss_per_token": 1.5779062509536743, "incorrect_loss_per_token": 1.6378038823604584, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4725878238677979, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": true, "logits_per_token": -1.4725878238677979, "logits_per_char": -0.7362939119338989, "num_chars": 2}, {"sum_logits": -1.6310539245605469, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.6310539245605469, "logits_per_char": -0.8155269622802734, "num_chars": 2}, {"sum_logits": -1.5779062509536743, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.5779062509536743, "logits_per_char": -0.7889531254768372, "num_chars": 2}, {"sum_logits": -1.5623470544815063, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.5623470544815063, "logits_per_char": -0.7811735272407532, "num_chars": 2}, {"sum_logits": -1.8852267265319824, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.8852267265319824, "logits_per_char": -0.9426133632659912, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1042, "native_id": "7bb279e38a1c9eb47a0c7af979a131a2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7457212209701538, "incorrect_loss_raw": 1.6203058362007141, "correct_loss_per_char": 0.8728606104850769, "incorrect_loss_per_char": 0.8101529181003571, "correct_loss_per_token": 1.7457212209701538, "incorrect_loss_per_token": 1.6203058362007141, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2936034202575684, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.2936034202575684, "logits_per_char": -0.6468017101287842, "num_chars": 2}, {"sum_logits": -1.5383552312850952, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5383552312850952, "logits_per_char": -0.7691776156425476, "num_chars": 2}, {"sum_logits": -1.5935639142990112, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5935639142990112, "logits_per_char": -0.7967819571495056, "num_chars": 2}, {"sum_logits": -1.7457212209701538, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.7457212209701538, "logits_per_char": -0.8728606104850769, "num_chars": 2}, {"sum_logits": -2.0557007789611816, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.0557007789611816, "logits_per_char": -1.0278503894805908, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1043, "native_id": "3095078e4771053d9d5fa8d4f5f3dc38", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6599584817886353, "incorrect_loss_raw": 1.6873279213905334, "correct_loss_per_char": 0.8299792408943176, "incorrect_loss_per_char": 0.8436639606952667, "correct_loss_per_token": 1.6599584817886353, "incorrect_loss_per_token": 1.6873279213905334, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2187851667404175, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.2187851667404175, "logits_per_char": -0.6093925833702087, "num_chars": 2}, {"sum_logits": -1.4178252220153809, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.4178252220153809, "logits_per_char": -0.7089126110076904, "num_chars": 2}, {"sum_logits": -1.6599584817886353, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.6599584817886353, "logits_per_char": -0.8299792408943176, "num_chars": 2}, {"sum_logits": -1.7542275190353394, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.7542275190353394, "logits_per_char": -0.8771137595176697, "num_chars": 2}, {"sum_logits": -2.358473777770996, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -2.358473777770996, "logits_per_char": -1.179236888885498, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1044, "native_id": "b23edb651e623e5d1e03e8ed3937e8fc", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.520627498626709, "incorrect_loss_raw": 1.6746017932891846, "correct_loss_per_char": 0.7603137493133545, "incorrect_loss_per_char": 0.8373008966445923, "correct_loss_per_token": 1.520627498626709, "incorrect_loss_per_token": 1.6746017932891846, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2827517986297607, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.2827517986297607, "logits_per_char": -0.6413758993148804, "num_chars": 2}, {"sum_logits": -1.520627498626709, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.520627498626709, "logits_per_char": -0.7603137493133545, "num_chars": 2}, {"sum_logits": -1.677401065826416, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.677401065826416, "logits_per_char": -0.838700532913208, "num_chars": 2}, {"sum_logits": -1.7272231578826904, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.7272231578826904, "logits_per_char": -0.8636115789413452, "num_chars": 2}, {"sum_logits": -2.011031150817871, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.011031150817871, "logits_per_char": -1.0055155754089355, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1045, "native_id": "acf6b667e9353b1743b7c4f60a6a9017", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.693107008934021, "incorrect_loss_raw": 1.6636734306812286, "correct_loss_per_char": 0.8465535044670105, "incorrect_loss_per_char": 0.8318367153406143, "correct_loss_per_token": 1.693107008934021, "incorrect_loss_per_token": 1.6636734306812286, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.219154715538025, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.219154715538025, "logits_per_char": -0.6095773577690125, "num_chars": 2}, {"sum_logits": -1.4590986967086792, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.4590986967086792, "logits_per_char": -0.7295493483543396, "num_chars": 2}, {"sum_logits": -1.693107008934021, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.693107008934021, "logits_per_char": -0.8465535044670105, "num_chars": 2}, {"sum_logits": -1.7319320440292358, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.7319320440292358, "logits_per_char": -0.8659660220146179, "num_chars": 2}, {"sum_logits": -2.2445082664489746, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -2.2445082664489746, "logits_per_char": -1.1222541332244873, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1046, "native_id": "15b090801256085ad465e74af47cbee9", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6375066041946411, "incorrect_loss_raw": 1.6653913855552673, "correct_loss_per_char": 0.8187533020973206, "incorrect_loss_per_char": 0.8326956927776337, "correct_loss_per_token": 1.6375066041946411, "incorrect_loss_per_token": 1.6653913855552673, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1912975311279297, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.1912975311279297, "logits_per_char": -0.5956487655639648, "num_chars": 2}, {"sum_logits": -1.54523503780365, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.54523503780365, "logits_per_char": -0.772617518901825, "num_chars": 2}, {"sum_logits": -1.6375066041946411, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.6375066041946411, "logits_per_char": -0.8187533020973206, "num_chars": 2}, {"sum_logits": -1.8536490201950073, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.8536490201950073, "logits_per_char": -0.9268245100975037, "num_chars": 2}, {"sum_logits": -2.0713839530944824, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.0713839530944824, "logits_per_char": -1.0356919765472412, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1047, "native_id": "790b3f583e9bc9424c771691ecc70c20", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5840703248977661, "incorrect_loss_raw": 1.6400695145130157, "correct_loss_per_char": 0.7920351624488831, "incorrect_loss_per_char": 0.8200347572565079, "correct_loss_per_token": 1.5840703248977661, "incorrect_loss_per_token": 1.6400695145130157, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4737858772277832, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.4737858772277832, "logits_per_char": -0.7368929386138916, "num_chars": 2}, {"sum_logits": -1.5426864624023438, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5426864624023438, "logits_per_char": -0.7713432312011719, "num_chars": 2}, {"sum_logits": -1.583885908126831, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.583885908126831, "logits_per_char": -0.7919429540634155, "num_chars": 2}, {"sum_logits": -1.5840703248977661, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5840703248977661, "logits_per_char": -0.7920351624488831, "num_chars": 2}, {"sum_logits": -1.959919810295105, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.959919810295105, "logits_per_char": -0.9799599051475525, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1048, "native_id": "22b8219d43a38a1130e0a35ece152337", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.504094123840332, "incorrect_loss_raw": 1.6621641516685486, "correct_loss_per_char": 0.752047061920166, "incorrect_loss_per_char": 0.8310820758342743, "correct_loss_per_token": 1.504094123840332, "incorrect_loss_per_token": 1.6621641516685486, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.504094123840332, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.504094123840332, "logits_per_char": -0.752047061920166, "num_chars": 2}, {"sum_logits": -1.5217671394348145, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.5217671394348145, "logits_per_char": -0.7608835697174072, "num_chars": 2}, {"sum_logits": -1.4995887279510498, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": true, "logits_per_token": -1.4995887279510498, "logits_per_char": -0.7497943639755249, "num_chars": 2}, {"sum_logits": -1.627471685409546, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.627471685409546, "logits_per_char": -0.813735842704773, "num_chars": 2}, {"sum_logits": -1.9998290538787842, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.9998290538787842, "logits_per_char": -0.9999145269393921, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1049, "native_id": "5d4233146435ab0ca211e8ac9bfce76f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3473269939422607, "incorrect_loss_raw": 1.7217110395431519, "correct_loss_per_char": 0.6736634969711304, "incorrect_loss_per_char": 0.8608555197715759, "correct_loss_per_token": 1.3473269939422607, "incorrect_loss_per_token": 1.7217110395431519, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3473269939422607, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": true, "logits_per_token": -1.3473269939422607, "logits_per_char": -0.6736634969711304, "num_chars": 2}, {"sum_logits": -1.4921035766601562, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.4921035766601562, "logits_per_char": -0.7460517883300781, "num_chars": 2}, {"sum_logits": -1.5723521709442139, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.5723521709442139, "logits_per_char": -0.7861760854721069, "num_chars": 2}, {"sum_logits": -1.7000067234039307, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.7000067234039307, "logits_per_char": -0.8500033617019653, "num_chars": 2}, {"sum_logits": -2.1223816871643066, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -2.1223816871643066, "logits_per_char": -1.0611908435821533, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1050, "native_id": "be737cd4db844574ef594442ce6c9453", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4308398962020874, "incorrect_loss_raw": 1.6705543398857117, "correct_loss_per_char": 0.7154199481010437, "incorrect_loss_per_char": 0.8352771699428558, "correct_loss_per_token": 1.4308398962020874, "incorrect_loss_per_token": 1.6705543398857117, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4308398962020874, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": true, "logits_per_token": -1.4308398962020874, "logits_per_char": -0.7154199481010437, "num_chars": 2}, {"sum_logits": -1.6998045444488525, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.6998045444488525, "logits_per_char": -0.8499022722244263, "num_chars": 2}, {"sum_logits": -1.5817372798919678, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.5817372798919678, "logits_per_char": -0.7908686399459839, "num_chars": 2}, {"sum_logits": -1.6034348011016846, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.6034348011016846, "logits_per_char": -0.8017174005508423, "num_chars": 2}, {"sum_logits": -1.7972407341003418, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.7972407341003418, "logits_per_char": -0.8986203670501709, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1051, "native_id": "550164b7cf4e03153484136f10122c70", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6830590963363647, "incorrect_loss_raw": 1.6155522167682648, "correct_loss_per_char": 0.8415295481681824, "incorrect_loss_per_char": 0.8077761083841324, "correct_loss_per_token": 1.6830590963363647, "incorrect_loss_per_token": 1.6155522167682648, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4046173095703125, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": true, "logits_per_token": -1.4046173095703125, "logits_per_char": -0.7023086547851562, "num_chars": 2}, {"sum_logits": -1.5049632787704468, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.5049632787704468, "logits_per_char": -0.7524816393852234, "num_chars": 2}, {"sum_logits": -1.6287261247634888, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.6287261247634888, "logits_per_char": -0.8143630623817444, "num_chars": 2}, {"sum_logits": -1.6830590963363647, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.6830590963363647, "logits_per_char": -0.8415295481681824, "num_chars": 2}, {"sum_logits": -1.923902153968811, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.923902153968811, "logits_per_char": -0.9619510769844055, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1052, "native_id": "a617eb4d27edea93e7fd630ce00c8219", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6251593828201294, "incorrect_loss_raw": 1.6636618077754974, "correct_loss_per_char": 0.8125796914100647, "incorrect_loss_per_char": 0.8318309038877487, "correct_loss_per_token": 1.6251593828201294, "incorrect_loss_per_token": 1.6636618077754974, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.214892029762268, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.214892029762268, "logits_per_char": -0.607446014881134, "num_chars": 2}, {"sum_logits": -1.5536361932754517, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5536361932754517, "logits_per_char": -0.7768180966377258, "num_chars": 2}, {"sum_logits": -1.6251593828201294, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6251593828201294, "logits_per_char": -0.8125796914100647, "num_chars": 2}, {"sum_logits": -1.7909013032913208, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.7909013032913208, "logits_per_char": -0.8954506516456604, "num_chars": 2}, {"sum_logits": -2.095217704772949, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -2.095217704772949, "logits_per_char": -1.0476088523864746, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1053, "native_id": "bd47827418d5b8d7fb3502a398644435", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.791928768157959, "incorrect_loss_raw": 1.6076865792274475, "correct_loss_per_char": 0.8959643840789795, "incorrect_loss_per_char": 0.8038432896137238, "correct_loss_per_token": 1.791928768157959, "incorrect_loss_per_token": 1.6076865792274475, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3164522647857666, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.3164522647857666, "logits_per_char": -0.6582261323928833, "num_chars": 2}, {"sum_logits": -1.4511675834655762, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.4511675834655762, "logits_per_char": -0.7255837917327881, "num_chars": 2}, {"sum_logits": -1.6404273509979248, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6404273509979248, "logits_per_char": -0.8202136754989624, "num_chars": 2}, {"sum_logits": -1.791928768157959, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.791928768157959, "logits_per_char": -0.8959643840789795, "num_chars": 2}, {"sum_logits": -2.0226991176605225, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -2.0226991176605225, "logits_per_char": -1.0113495588302612, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1054, "native_id": "31487ab8b1e8f12e252590cc58bd19c2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4865862131118774, "incorrect_loss_raw": 1.6602661609649658, "correct_loss_per_char": 0.7432931065559387, "incorrect_loss_per_char": 0.8301330804824829, "correct_loss_per_token": 1.4865862131118774, "incorrect_loss_per_token": 1.6602661609649658, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4865862131118774, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": true, "logits_per_token": -1.4865862131118774, "logits_per_char": -0.7432931065559387, "num_chars": 2}, {"sum_logits": -1.6067453622817993, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.6067453622817993, "logits_per_char": -0.8033726811408997, "num_chars": 2}, {"sum_logits": -1.536980390548706, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.536980390548706, "logits_per_char": -0.768490195274353, "num_chars": 2}, {"sum_logits": -1.5889652967453003, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5889652967453003, "logits_per_char": -0.7944826483726501, "num_chars": 2}, {"sum_logits": -1.9083735942840576, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.9083735942840576, "logits_per_char": -0.9541867971420288, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1055, "native_id": "ce2fd94212243f843b3f357046051f57", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2887911796569824, "incorrect_loss_raw": 1.7431812584400177, "correct_loss_per_char": 0.6443955898284912, "incorrect_loss_per_char": 0.8715906292200089, "correct_loss_per_token": 1.2887911796569824, "incorrect_loss_per_token": 1.7431812584400177, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2887911796569824, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.2887911796569824, "logits_per_char": -0.6443955898284912, "num_chars": 2}, {"sum_logits": -1.4053093194961548, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.4053093194961548, "logits_per_char": -0.7026546597480774, "num_chars": 2}, {"sum_logits": -1.746184229850769, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.746184229850769, "logits_per_char": -0.8730921149253845, "num_chars": 2}, {"sum_logits": -1.7394148111343384, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.7394148111343384, "logits_per_char": -0.8697074055671692, "num_chars": 2}, {"sum_logits": -2.0818166732788086, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.0818166732788086, "logits_per_char": -1.0409083366394043, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1056, "native_id": "f87f40db71a56b5beda3194550202dc9_1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5990418195724487, "incorrect_loss_raw": 1.6468139588832855, "correct_loss_per_char": 0.7995209097862244, "incorrect_loss_per_char": 0.8234069794416428, "correct_loss_per_token": 1.5990418195724487, "incorrect_loss_per_token": 1.6468139588832855, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4342433214187622, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.4342433214187622, "logits_per_char": -0.7171216607093811, "num_chars": 2}, {"sum_logits": -1.5207860469818115, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5207860469818115, "logits_per_char": -0.7603930234909058, "num_chars": 2}, {"sum_logits": -1.5990418195724487, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5990418195724487, "logits_per_char": -0.7995209097862244, "num_chars": 2}, {"sum_logits": -1.5608553886413574, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5608553886413574, "logits_per_char": -0.7804276943206787, "num_chars": 2}, {"sum_logits": -2.071371078491211, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -2.071371078491211, "logits_per_char": -1.0356855392456055, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1057, "native_id": "0b25bbd9e9aa976655e1975e31331709", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.555173397064209, "incorrect_loss_raw": 1.6532586514949799, "correct_loss_per_char": 0.7775866985321045, "incorrect_loss_per_char": 0.8266293257474899, "correct_loss_per_token": 1.555173397064209, "incorrect_loss_per_token": 1.6532586514949799, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3611537218093872, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": true, "logits_per_token": -1.3611537218093872, "logits_per_char": -0.6805768609046936, "num_chars": 2}, {"sum_logits": -1.555173397064209, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.555173397064209, "logits_per_char": -0.7775866985321045, "num_chars": 2}, {"sum_logits": -1.5969703197479248, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.5969703197479248, "logits_per_char": -0.7984851598739624, "num_chars": 2}, {"sum_logits": -1.6835932731628418, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.6835932731628418, "logits_per_char": -0.8417966365814209, "num_chars": 2}, {"sum_logits": -1.9713172912597656, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.9713172912597656, "logits_per_char": -0.9856586456298828, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1058, "native_id": "925232b4c9bba945a38ac7ef0f15f8d0", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6357930898666382, "incorrect_loss_raw": 1.620436191558838, "correct_loss_per_char": 0.8178965449333191, "incorrect_loss_per_char": 0.810218095779419, "correct_loss_per_token": 1.6357930898666382, "incorrect_loss_per_token": 1.620436191558838, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5517226457595825, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.5517226457595825, "logits_per_char": -0.7758613228797913, "num_chars": 2}, {"sum_logits": -1.632015347480774, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.632015347480774, "logits_per_char": -0.816007673740387, "num_chars": 2}, {"sum_logits": -1.6357930898666382, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.6357930898666382, "logits_per_char": -0.8178965449333191, "num_chars": 2}, {"sum_logits": -1.4553464651107788, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": true, "logits_per_token": -1.4553464651107788, "logits_per_char": -0.7276732325553894, "num_chars": 2}, {"sum_logits": -1.8426603078842163, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.8426603078842163, "logits_per_char": -0.9213301539421082, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1059, "native_id": "3338109fcafaaa370c8900a53e1b3ed8", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4660475254058838, "incorrect_loss_raw": 1.6771153211593628, "correct_loss_per_char": 0.7330237627029419, "incorrect_loss_per_char": 0.8385576605796814, "correct_loss_per_token": 1.4660475254058838, "incorrect_loss_per_token": 1.6771153211593628, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3703267574310303, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": true, "logits_per_token": -1.3703267574310303, "logits_per_char": -0.6851633787155151, "num_chars": 2}, {"sum_logits": -1.4660475254058838, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.4660475254058838, "logits_per_char": -0.7330237627029419, "num_chars": 2}, {"sum_logits": -1.649186611175537, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.649186611175537, "logits_per_char": -0.8245933055877686, "num_chars": 2}, {"sum_logits": -1.7139074802398682, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.7139074802398682, "logits_per_char": -0.8569537401199341, "num_chars": 2}, {"sum_logits": -1.9750404357910156, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.9750404357910156, "logits_per_char": -0.9875202178955078, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1060, "native_id": "e172a93c72d305ee8262a8deb00d9fc3", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2594256401062012, "incorrect_loss_raw": 1.7755905985832214, "correct_loss_per_char": 0.6297128200531006, "incorrect_loss_per_char": 0.8877952992916107, "correct_loss_per_token": 1.2594256401062012, "incorrect_loss_per_token": 1.7755905985832214, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2594256401062012, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.2594256401062012, "logits_per_char": -0.6297128200531006, "num_chars": 2}, {"sum_logits": -1.4445884227752686, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.4445884227752686, "logits_per_char": -0.7222942113876343, "num_chars": 2}, {"sum_logits": -1.6238903999328613, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6238903999328613, "logits_per_char": -0.8119451999664307, "num_chars": 2}, {"sum_logits": -1.7207679748535156, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.7207679748535156, "logits_per_char": -0.8603839874267578, "num_chars": 2}, {"sum_logits": -2.3131155967712402, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.3131155967712402, "logits_per_char": -1.1565577983856201, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1061, "native_id": "f1c2e37abf17d9e4ad16eb40f966c79f", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.071255683898926, "incorrect_loss_raw": 1.52736097574234, "correct_loss_per_char": 1.035627841949463, "incorrect_loss_per_char": 0.76368048787117, "correct_loss_per_token": 2.071255683898926, "incorrect_loss_per_token": 1.52736097574234, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4937716722488403, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.4937716722488403, "logits_per_char": -0.7468858361244202, "num_chars": 2}, {"sum_logits": -1.479887843132019, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.479887843132019, "logits_per_char": -0.7399439215660095, "num_chars": 2}, {"sum_logits": -1.5154979228973389, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5154979228973389, "logits_per_char": -0.7577489614486694, "num_chars": 2}, {"sum_logits": -1.620286464691162, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.620286464691162, "logits_per_char": -0.810143232345581, "num_chars": 2}, {"sum_logits": -2.071255683898926, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -2.071255683898926, "logits_per_char": -1.035627841949463, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1062, "native_id": "d29252ddaf7c7ef491abcce342d7bb98", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4822548627853394, "incorrect_loss_raw": 1.7048664689064026, "correct_loss_per_char": 0.7411274313926697, "incorrect_loss_per_char": 0.8524332344532013, "correct_loss_per_token": 1.4822548627853394, "incorrect_loss_per_token": 1.7048664689064026, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.259878158569336, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.259878158569336, "logits_per_char": -0.629939079284668, "num_chars": 2}, {"sum_logits": -1.4822548627853394, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.4822548627853394, "logits_per_char": -0.7411274313926697, "num_chars": 2}, {"sum_logits": -1.5847257375717163, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5847257375717163, "logits_per_char": -0.7923628687858582, "num_chars": 2}, {"sum_logits": -1.808578372001648, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.808578372001648, "logits_per_char": -0.904289186000824, "num_chars": 2}, {"sum_logits": -2.16628360748291, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -2.16628360748291, "logits_per_char": -1.083141803741455, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1063, "native_id": "8c3c6b34bdb650a6517bca3786406c99", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.2541580200195312, "incorrect_loss_raw": 1.5162582993507385, "correct_loss_per_char": 1.1270790100097656, "incorrect_loss_per_char": 0.7581291496753693, "correct_loss_per_token": 2.2541580200195312, "incorrect_loss_per_token": 1.5162582993507385, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3210490942001343, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.3210490942001343, "logits_per_char": -0.6605245471000671, "num_chars": 2}, {"sum_logits": -1.3848103284835815, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.3848103284835815, "logits_per_char": -0.6924051642417908, "num_chars": 2}, {"sum_logits": -1.6365898847579956, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6365898847579956, "logits_per_char": -0.8182949423789978, "num_chars": 2}, {"sum_logits": -1.7225838899612427, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.7225838899612427, "logits_per_char": -0.8612919449806213, "num_chars": 2}, {"sum_logits": -2.2541580200195312, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -2.2541580200195312, "logits_per_char": -1.1270790100097656, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1064, "native_id": "ff1bf2ec835c9df8695ae0cfb5281646", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.684887170791626, "incorrect_loss_raw": 1.6390261054039001, "correct_loss_per_char": 0.842443585395813, "incorrect_loss_per_char": 0.8195130527019501, "correct_loss_per_token": 1.684887170791626, "incorrect_loss_per_token": 1.6390261054039001, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3113386631011963, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.3113386631011963, "logits_per_char": -0.6556693315505981, "num_chars": 2}, {"sum_logits": -1.5359601974487305, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5359601974487305, "logits_per_char": -0.7679800987243652, "num_chars": 2}, {"sum_logits": -1.584108829498291, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.584108829498291, "logits_per_char": -0.7920544147491455, "num_chars": 2}, {"sum_logits": -1.684887170791626, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.684887170791626, "logits_per_char": -0.842443585395813, "num_chars": 2}, {"sum_logits": -2.124696731567383, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.124696731567383, "logits_per_char": -1.0623483657836914, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1065, "native_id": "c7526b682e64f355384631b35cd78fc9", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4974045753479004, "incorrect_loss_raw": 1.6711987555027008, "correct_loss_per_char": 0.7487022876739502, "incorrect_loss_per_char": 0.8355993777513504, "correct_loss_per_token": 1.4974045753479004, "incorrect_loss_per_token": 1.6711987555027008, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3804515600204468, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.3804515600204468, "logits_per_char": -0.6902257800102234, "num_chars": 2}, {"sum_logits": -1.4974045753479004, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.4974045753479004, "logits_per_char": -0.7487022876739502, "num_chars": 2}, {"sum_logits": -1.695397973060608, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.695397973060608, "logits_per_char": -0.847698986530304, "num_chars": 2}, {"sum_logits": -1.58950674533844, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.58950674533844, "logits_per_char": -0.79475337266922, "num_chars": 2}, {"sum_logits": -2.0194387435913086, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -2.0194387435913086, "logits_per_char": -1.0097193717956543, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1066, "native_id": "0fba83d3997f048adcc31937221af77e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5555263757705688, "incorrect_loss_raw": 1.6625750362873077, "correct_loss_per_char": 0.7777631878852844, "incorrect_loss_per_char": 0.8312875181436539, "correct_loss_per_token": 1.5555263757705688, "incorrect_loss_per_token": 1.6625750362873077, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.353869080543518, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.353869080543518, "logits_per_char": -0.676934540271759, "num_chars": 2}, {"sum_logits": -1.5555263757705688, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5555263757705688, "logits_per_char": -0.7777631878852844, "num_chars": 2}, {"sum_logits": -1.6594732999801636, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6594732999801636, "logits_per_char": -0.8297366499900818, "num_chars": 2}, {"sum_logits": -1.5465322732925415, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5465322732925415, "logits_per_char": -0.7732661366462708, "num_chars": 2}, {"sum_logits": -2.090425491333008, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -2.090425491333008, "logits_per_char": -1.045212745666504, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1067, "native_id": "a5456dc611aa93b81d7ab6ed8e160f85", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7011646032333374, "incorrect_loss_raw": 1.6078230440616608, "correct_loss_per_char": 0.8505823016166687, "incorrect_loss_per_char": 0.8039115220308304, "correct_loss_per_token": 1.7011646032333374, "incorrect_loss_per_token": 1.6078230440616608, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.343665361404419, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.343665361404419, "logits_per_char": -0.6718326807022095, "num_chars": 2}, {"sum_logits": -1.6276038885116577, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6276038885116577, "logits_per_char": -0.8138019442558289, "num_chars": 2}, {"sum_logits": -1.6724663972854614, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6724663972854614, "logits_per_char": -0.8362331986427307, "num_chars": 2}, {"sum_logits": -1.7011646032333374, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.7011646032333374, "logits_per_char": -0.8505823016166687, "num_chars": 2}, {"sum_logits": -1.787556529045105, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.787556529045105, "logits_per_char": -0.8937782645225525, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1068, "native_id": "11416df796f63d2f0dddc846b9c139d3", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6315529346466064, "incorrect_loss_raw": 1.6547434031963348, "correct_loss_per_char": 0.8157764673233032, "incorrect_loss_per_char": 0.8273717015981674, "correct_loss_per_token": 1.6315529346466064, "incorrect_loss_per_token": 1.6547434031963348, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3305314779281616, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.3305314779281616, "logits_per_char": -0.6652657389640808, "num_chars": 2}, {"sum_logits": -1.5097458362579346, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5097458362579346, "logits_per_char": -0.7548729181289673, "num_chars": 2}, {"sum_logits": -1.5977871417999268, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5977871417999268, "logits_per_char": -0.7988935708999634, "num_chars": 2}, {"sum_logits": -1.6315529346466064, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6315529346466064, "logits_per_char": -0.8157764673233032, "num_chars": 2}, {"sum_logits": -2.1809091567993164, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -2.1809091567993164, "logits_per_char": -1.0904545783996582, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1069, "native_id": "c908d7c4633c5e6add9463bdd47cb27e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9707684516906738, "incorrect_loss_raw": 1.5455197989940643, "correct_loss_per_char": 0.9853842258453369, "incorrect_loss_per_char": 0.7727598994970322, "correct_loss_per_token": 1.9707684516906738, "incorrect_loss_per_token": 1.5455197989940643, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.445491909980774, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.445491909980774, "logits_per_char": -0.722745954990387, "num_chars": 2}, {"sum_logits": -1.513669490814209, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.513669490814209, "logits_per_char": -0.7568347454071045, "num_chars": 2}, {"sum_logits": -1.6816818714141846, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6816818714141846, "logits_per_char": -0.8408409357070923, "num_chars": 2}, {"sum_logits": -1.5412359237670898, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5412359237670898, "logits_per_char": -0.7706179618835449, "num_chars": 2}, {"sum_logits": -1.9707684516906738, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.9707684516906738, "logits_per_char": -0.9853842258453369, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1070, "native_id": "7e522a60756f854c5331125f998bc36b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.645882248878479, "incorrect_loss_raw": 1.6292016804218292, "correct_loss_per_char": 0.8229411244392395, "incorrect_loss_per_char": 0.8146008402109146, "correct_loss_per_token": 1.645882248878479, "incorrect_loss_per_token": 1.6292016804218292, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3895156383514404, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": true, "logits_per_token": -1.3895156383514404, "logits_per_char": -0.6947578191757202, "num_chars": 2}, {"sum_logits": -1.592652440071106, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.592652440071106, "logits_per_char": -0.796326220035553, "num_chars": 2}, {"sum_logits": -1.5565904378890991, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.5565904378890991, "logits_per_char": -0.7782952189445496, "num_chars": 2}, {"sum_logits": -1.645882248878479, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.645882248878479, "logits_per_char": -0.8229411244392395, "num_chars": 2}, {"sum_logits": -1.9780482053756714, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.9780482053756714, "logits_per_char": -0.9890241026878357, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1071, "native_id": "f4a75bf3f115b826a8097edfd0ff2781", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4719123840332031, "incorrect_loss_raw": 1.688034176826477, "correct_loss_per_char": 0.7359561920166016, "incorrect_loss_per_char": 0.8440170884132385, "correct_loss_per_token": 1.4719123840332031, "incorrect_loss_per_token": 1.688034176826477, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4719123840332031, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.4719123840332031, "logits_per_char": -0.7359561920166016, "num_chars": 2}, {"sum_logits": -1.4933793544769287, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.4933793544769287, "logits_per_char": -0.7466896772384644, "num_chars": 2}, {"sum_logits": -1.4225759506225586, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.4225759506225586, "logits_per_char": -0.7112879753112793, "num_chars": 2}, {"sum_logits": -1.7062654495239258, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.7062654495239258, "logits_per_char": -0.8531327247619629, "num_chars": 2}, {"sum_logits": -2.129915952682495, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.129915952682495, "logits_per_char": -1.0649579763412476, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1072, "native_id": "02f43014a135cbd39f23b044c99de96e", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5293560028076172, "incorrect_loss_raw": 1.6565470695495605, "correct_loss_per_char": 0.7646780014038086, "incorrect_loss_per_char": 0.8282735347747803, "correct_loss_per_token": 1.5293560028076172, "incorrect_loss_per_token": 1.6565470695495605, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5293560028076172, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5293560028076172, "logits_per_char": -0.7646780014038086, "num_chars": 2}, {"sum_logits": -1.5537219047546387, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5537219047546387, "logits_per_char": -0.7768609523773193, "num_chars": 2}, {"sum_logits": -1.445246696472168, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.445246696472168, "logits_per_char": -0.722623348236084, "num_chars": 2}, {"sum_logits": -1.62709641456604, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.62709641456604, "logits_per_char": -0.81354820728302, "num_chars": 2}, {"sum_logits": -2.0001232624053955, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.0001232624053955, "logits_per_char": -1.0000616312026978, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1073, "native_id": "8cf478192696744b3427f7c109019af5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.1607961654663086, "incorrect_loss_raw": 1.5214515328407288, "correct_loss_per_char": 1.0803980827331543, "incorrect_loss_per_char": 0.7607257664203644, "correct_loss_per_token": 2.1607961654663086, "incorrect_loss_per_token": 1.5214515328407288, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3403706550598145, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.3403706550598145, "logits_per_char": -0.6701853275299072, "num_chars": 2}, {"sum_logits": -1.5244157314300537, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5244157314300537, "logits_per_char": -0.7622078657150269, "num_chars": 2}, {"sum_logits": -1.549651861190796, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.549651861190796, "logits_per_char": -0.774825930595398, "num_chars": 2}, {"sum_logits": -1.671367883682251, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.671367883682251, "logits_per_char": -0.8356839418411255, "num_chars": 2}, {"sum_logits": -2.1607961654663086, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -2.1607961654663086, "logits_per_char": -1.0803980827331543, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1074, "native_id": "4ccd43cdff044bc4c644dadff1ff1e0b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.599289059638977, "incorrect_loss_raw": 1.675548642873764, "correct_loss_per_char": 0.7996445298194885, "incorrect_loss_per_char": 0.837774321436882, "correct_loss_per_token": 1.599289059638977, "incorrect_loss_per_token": 1.675548642873764, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2638105154037476, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.2638105154037476, "logits_per_char": -0.6319052577018738, "num_chars": 2}, {"sum_logits": -1.534103274345398, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.534103274345398, "logits_per_char": -0.767051637172699, "num_chars": 2}, {"sum_logits": -1.599289059638977, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.599289059638977, "logits_per_char": -0.7996445298194885, "num_chars": 2}, {"sum_logits": -1.6788994073867798, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6788994073867798, "logits_per_char": -0.8394497036933899, "num_chars": 2}, {"sum_logits": -2.225381374359131, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.225381374359131, "logits_per_char": -1.1126906871795654, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1075, "native_id": "7b7941b883328ad39048d4dfb1eb5623", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.103947877883911, "incorrect_loss_raw": 1.5257359147071838, "correct_loss_per_char": 1.0519739389419556, "incorrect_loss_per_char": 0.7628679573535919, "correct_loss_per_token": 2.103947877883911, "incorrect_loss_per_token": 1.5257359147071838, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4393839836120605, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.4393839836120605, "logits_per_char": -0.7196919918060303, "num_chars": 2}, {"sum_logits": -1.5219614505767822, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5219614505767822, "logits_per_char": -0.7609807252883911, "num_chars": 2}, {"sum_logits": -1.4550211429595947, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.4550211429595947, "logits_per_char": -0.7275105714797974, "num_chars": 2}, {"sum_logits": -1.6865770816802979, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6865770816802979, "logits_per_char": -0.8432885408401489, "num_chars": 2}, {"sum_logits": -2.103947877883911, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -2.103947877883911, "logits_per_char": -1.0519739389419556, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1076, "native_id": "008b7ba0c039f6d0d542c6c90aae173c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7299493551254272, "incorrect_loss_raw": 1.5994510054588318, "correct_loss_per_char": 0.8649746775627136, "incorrect_loss_per_char": 0.7997255027294159, "correct_loss_per_token": 1.7299493551254272, "incorrect_loss_per_token": 1.5994510054588318, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4108948707580566, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": true, "logits_per_token": -1.4108948707580566, "logits_per_char": -0.7054474353790283, "num_chars": 2}, {"sum_logits": -1.5578100681304932, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.5578100681304932, "logits_per_char": -0.7789050340652466, "num_chars": 2}, {"sum_logits": -1.7299493551254272, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.7299493551254272, "logits_per_char": -0.8649746775627136, "num_chars": 2}, {"sum_logits": -1.597565770149231, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.597565770149231, "logits_per_char": -0.7987828850746155, "num_chars": 2}, {"sum_logits": -1.8315333127975464, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.8315333127975464, "logits_per_char": -0.9157666563987732, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1077, "native_id": "4c968fa73699a38639ba3ffa1745bc21", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5464428663253784, "incorrect_loss_raw": 1.6518020033836365, "correct_loss_per_char": 0.7732214331626892, "incorrect_loss_per_char": 0.8259010016918182, "correct_loss_per_token": 1.5464428663253784, "incorrect_loss_per_token": 1.6518020033836365, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.394389033317566, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": true, "logits_per_token": -1.394389033317566, "logits_per_char": -0.697194516658783, "num_chars": 2}, {"sum_logits": -1.5464428663253784, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5464428663253784, "logits_per_char": -0.7732214331626892, "num_chars": 2}, {"sum_logits": -1.6493934392929077, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.6493934392929077, "logits_per_char": -0.8246967196464539, "num_chars": 2}, {"sum_logits": -1.6055387258529663, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.6055387258529663, "logits_per_char": -0.8027693629264832, "num_chars": 2}, {"sum_logits": -1.957886815071106, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.957886815071106, "logits_per_char": -0.978943407535553, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1078, "native_id": "b1d5cdbf8ef7b3954a6a352bd4df5866", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.32219398021698, "incorrect_loss_raw": 1.7260931432247162, "correct_loss_per_char": 0.66109699010849, "incorrect_loss_per_char": 0.8630465716123581, "correct_loss_per_token": 1.32219398021698, "incorrect_loss_per_token": 1.7260931432247162, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.32219398021698, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.32219398021698, "logits_per_char": -0.66109699010849, "num_chars": 2}, {"sum_logits": -1.4566842317581177, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.4566842317581177, "logits_per_char": -0.7283421158790588, "num_chars": 2}, {"sum_logits": -1.717089056968689, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.717089056968689, "logits_per_char": -0.8585445284843445, "num_chars": 2}, {"sum_logits": -1.6562966108322144, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6562966108322144, "logits_per_char": -0.8281483054161072, "num_chars": 2}, {"sum_logits": -2.0743026733398438, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -2.0743026733398438, "logits_per_char": -1.0371513366699219, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1079, "native_id": "c3bc395561113c96ec43afd715da5061", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.338456630706787, "incorrect_loss_raw": 1.711616575717926, "correct_loss_per_char": 0.6692283153533936, "incorrect_loss_per_char": 0.855808287858963, "correct_loss_per_token": 1.338456630706787, "incorrect_loss_per_token": 1.711616575717926, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.338456630706787, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.338456630706787, "logits_per_char": -0.6692283153533936, "num_chars": 2}, {"sum_logits": -1.4699602127075195, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.4699602127075195, "logits_per_char": -0.7349801063537598, "num_chars": 2}, {"sum_logits": -1.6588082313537598, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6588082313537598, "logits_per_char": -0.8294041156768799, "num_chars": 2}, {"sum_logits": -1.8111536502838135, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.8111536502838135, "logits_per_char": -0.9055768251419067, "num_chars": 2}, {"sum_logits": -1.9065442085266113, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.9065442085266113, "logits_per_char": -0.9532721042633057, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1080, "native_id": "d0bd5b5ee7319d1c4727e38d429dd54e", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9940016269683838, "incorrect_loss_raw": 1.540692925453186, "correct_loss_per_char": 0.9970008134841919, "incorrect_loss_per_char": 0.770346462726593, "correct_loss_per_token": 1.9940016269683838, "incorrect_loss_per_token": 1.540692925453186, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5280922651290894, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5280922651290894, "logits_per_char": -0.7640461325645447, "num_chars": 2}, {"sum_logits": -1.5977635383605957, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5977635383605957, "logits_per_char": -0.7988817691802979, "num_chars": 2}, {"sum_logits": -1.4215651750564575, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.4215651750564575, "logits_per_char": -0.7107825875282288, "num_chars": 2}, {"sum_logits": -1.6153507232666016, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6153507232666016, "logits_per_char": -0.8076753616333008, "num_chars": 2}, {"sum_logits": -1.9940016269683838, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.9940016269683838, "logits_per_char": -0.9970008134841919, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1081, "native_id": "81f5e741d970578867495ceea5a0c848", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6730903387069702, "incorrect_loss_raw": 1.6513776779174805, "correct_loss_per_char": 0.8365451693534851, "incorrect_loss_per_char": 0.8256888389587402, "correct_loss_per_token": 1.6730903387069702, "incorrect_loss_per_token": 1.6513776779174805, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2526187896728516, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.2526187896728516, "logits_per_char": -0.6263093948364258, "num_chars": 2}, {"sum_logits": -1.504952311515808, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.504952311515808, "logits_per_char": -0.752476155757904, "num_chars": 2}, {"sum_logits": -1.6730903387069702, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6730903387069702, "logits_per_char": -0.8365451693534851, "num_chars": 2}, {"sum_logits": -1.683580994606018, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.683580994606018, "logits_per_char": -0.841790497303009, "num_chars": 2}, {"sum_logits": -2.164358615875244, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -2.164358615875244, "logits_per_char": -1.082179307937622, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1082, "native_id": "6714593a8d1f8ae39930c1f0316e9ffc", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7039450407028198, "incorrect_loss_raw": 1.6408225297927856, "correct_loss_per_char": 0.8519725203514099, "incorrect_loss_per_char": 0.8204112648963928, "correct_loss_per_token": 1.7039450407028198, "incorrect_loss_per_token": 1.6408225297927856, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2748284339904785, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": true, "logits_per_token": -1.2748284339904785, "logits_per_char": -0.6374142169952393, "num_chars": 2}, {"sum_logits": -1.5432523488998413, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.5432523488998413, "logits_per_char": -0.7716261744499207, "num_chars": 2}, {"sum_logits": -1.5928746461868286, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.5928746461868286, "logits_per_char": -0.7964373230934143, "num_chars": 2}, {"sum_logits": -1.7039450407028198, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.7039450407028198, "logits_per_char": -0.8519725203514099, "num_chars": 2}, {"sum_logits": -2.152334690093994, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -2.152334690093994, "logits_per_char": -1.076167345046997, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1083, "native_id": "75cb55aec7e64f592c01eee5d4578dcd", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4079240560531616, "incorrect_loss_raw": 1.6920241713523865, "correct_loss_per_char": 0.7039620280265808, "incorrect_loss_per_char": 0.8460120856761932, "correct_loss_per_token": 1.4079240560531616, "incorrect_loss_per_token": 1.6920241713523865, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4079240560531616, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.4079240560531616, "logits_per_char": -0.7039620280265808, "num_chars": 2}, {"sum_logits": -1.5210782289505005, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5210782289505005, "logits_per_char": -0.7605391144752502, "num_chars": 2}, {"sum_logits": -1.556065320968628, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.556065320968628, "logits_per_char": -0.778032660484314, "num_chars": 2}, {"sum_logits": -1.6512445211410522, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6512445211410522, "logits_per_char": -0.8256222605705261, "num_chars": 2}, {"sum_logits": -2.0397086143493652, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.0397086143493652, "logits_per_char": -1.0198543071746826, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1084, "native_id": "0b30831fb1862bc62339bdf930cbc447", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5151289701461792, "incorrect_loss_raw": 1.6759093701839447, "correct_loss_per_char": 0.7575644850730896, "incorrect_loss_per_char": 0.8379546850919724, "correct_loss_per_token": 1.5151289701461792, "incorrect_loss_per_token": 1.6759093701839447, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4152928590774536, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.4152928590774536, "logits_per_char": -0.7076464295387268, "num_chars": 2}, {"sum_logits": -1.5151289701461792, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5151289701461792, "logits_per_char": -0.7575644850730896, "num_chars": 2}, {"sum_logits": -1.5118330717086792, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5118330717086792, "logits_per_char": -0.7559165358543396, "num_chars": 2}, {"sum_logits": -1.6369928121566772, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6369928121566772, "logits_per_char": -0.8184964060783386, "num_chars": 2}, {"sum_logits": -2.1395187377929688, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.1395187377929688, "logits_per_char": -1.0697593688964844, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1085, "native_id": "29c194d032a266a7160bff6f546a4d9d", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.589782953262329, "incorrect_loss_raw": 1.6430982947349548, "correct_loss_per_char": 0.7948914766311646, "incorrect_loss_per_char": 0.8215491473674774, "correct_loss_per_token": 1.589782953262329, "incorrect_loss_per_token": 1.6430982947349548, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5170351266860962, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5170351266860962, "logits_per_char": -0.7585175633430481, "num_chars": 2}, {"sum_logits": -1.5169757604599, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5169757604599, "logits_per_char": -0.75848788022995, "num_chars": 2}, {"sum_logits": -1.5096781253814697, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": true, "logits_per_token": -1.5096781253814697, "logits_per_char": -0.7548390626907349, "num_chars": 2}, {"sum_logits": -1.589782953262329, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.589782953262329, "logits_per_char": -0.7948914766311646, "num_chars": 2}, {"sum_logits": -2.0287041664123535, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -2.0287041664123535, "logits_per_char": -1.0143520832061768, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1086, "native_id": "ea33206992fb7ad1c3476e9673bb4a9c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6504400968551636, "incorrect_loss_raw": 1.635126143693924, "correct_loss_per_char": 0.8252200484275818, "incorrect_loss_per_char": 0.817563071846962, "correct_loss_per_token": 1.6504400968551636, "incorrect_loss_per_token": 1.635126143693924, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3678263425827026, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.3678263425827026, "logits_per_char": -0.6839131712913513, "num_chars": 2}, {"sum_logits": -1.5027731657028198, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5027731657028198, "logits_per_char": -0.7513865828514099, "num_chars": 2}, {"sum_logits": -1.6253150701522827, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.6253150701522827, "logits_per_char": -0.8126575350761414, "num_chars": 2}, {"sum_logits": -1.6504400968551636, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.6504400968551636, "logits_per_char": -0.8252200484275818, "num_chars": 2}, {"sum_logits": -2.0445899963378906, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -2.0445899963378906, "logits_per_char": -1.0222949981689453, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1087, "native_id": "2b7dd91da5dde1560ace2cd82af926de", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0145792961120605, "incorrect_loss_raw": 1.5403202176094055, "correct_loss_per_char": 1.0072896480560303, "incorrect_loss_per_char": 0.7701601088047028, "correct_loss_per_token": 2.0145792961120605, "incorrect_loss_per_token": 1.5403202176094055, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4156763553619385, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.4156763553619385, "logits_per_char": -0.7078381776809692, "num_chars": 2}, {"sum_logits": -1.4682327508926392, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.4682327508926392, "logits_per_char": -0.7341163754463196, "num_chars": 2}, {"sum_logits": -1.6070177555084229, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6070177555084229, "logits_per_char": -0.8035088777542114, "num_chars": 2}, {"sum_logits": -1.6703540086746216, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6703540086746216, "logits_per_char": -0.8351770043373108, "num_chars": 2}, {"sum_logits": -2.0145792961120605, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.0145792961120605, "logits_per_char": -1.0072896480560303, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1088, "native_id": "eb50f536830ba18ab987c7ff652e2aba", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2020034790039062, "incorrect_loss_raw": 1.770483136177063, "correct_loss_per_char": 0.6010017395019531, "incorrect_loss_per_char": 0.8852415680885315, "correct_loss_per_token": 1.2020034790039062, "incorrect_loss_per_token": 1.770483136177063, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2020034790039062, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.2020034790039062, "logits_per_char": -0.6010017395019531, "num_chars": 2}, {"sum_logits": -1.5834100246429443, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5834100246429443, "logits_per_char": -0.7917050123214722, "num_chars": 2}, {"sum_logits": -1.6216940879821777, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6216940879821777, "logits_per_char": -0.8108470439910889, "num_chars": 2}, {"sum_logits": -1.8067407608032227, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.8067407608032227, "logits_per_char": -0.9033703804016113, "num_chars": 2}, {"sum_logits": -2.0700876712799072, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -2.0700876712799072, "logits_per_char": -1.0350438356399536, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1089, "native_id": "6bc3ebcfd04965c25bde71339955746c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5131248235702515, "incorrect_loss_raw": 1.6552553176879883, "correct_loss_per_char": 0.7565624117851257, "incorrect_loss_per_char": 0.8276276588439941, "correct_loss_per_token": 1.5131248235702515, "incorrect_loss_per_token": 1.6552553176879883, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4982649087905884, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": true, "logits_per_token": -1.4982649087905884, "logits_per_char": -0.7491324543952942, "num_chars": 2}, {"sum_logits": -1.5131248235702515, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.5131248235702515, "logits_per_char": -0.7565624117851257, "num_chars": 2}, {"sum_logits": -1.5360337495803833, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.5360337495803833, "logits_per_char": -0.7680168747901917, "num_chars": 2}, {"sum_logits": -1.6353780031204224, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.6353780031204224, "logits_per_char": -0.8176890015602112, "num_chars": 2}, {"sum_logits": -1.951344609260559, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.951344609260559, "logits_per_char": -0.9756723046302795, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1090, "native_id": "163898952cb6baf3a6440696e1352e86", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5721067190170288, "incorrect_loss_raw": 1.6302081942558289, "correct_loss_per_char": 0.7860533595085144, "incorrect_loss_per_char": 0.8151040971279144, "correct_loss_per_token": 1.5721067190170288, "incorrect_loss_per_token": 1.6302081942558289, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5611151456832886, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5611151456832886, "logits_per_char": -0.7805575728416443, "num_chars": 2}, {"sum_logits": -1.5721067190170288, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5721067190170288, "logits_per_char": -0.7860533595085144, "num_chars": 2}, {"sum_logits": -1.551141619682312, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.551141619682312, "logits_per_char": -0.775570809841156, "num_chars": 2}, {"sum_logits": -1.6035844087600708, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6035844087600708, "logits_per_char": -0.8017922043800354, "num_chars": 2}, {"sum_logits": -1.804991602897644, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.804991602897644, "logits_per_char": -0.902495801448822, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1091, "native_id": "aa984e2b487d08889bc0c73bab5ac945", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6863877773284912, "incorrect_loss_raw": 1.6318065226078033, "correct_loss_per_char": 0.8431938886642456, "incorrect_loss_per_char": 0.8159032613039017, "correct_loss_per_token": 1.6863877773284912, "incorrect_loss_per_token": 1.6318065226078033, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3104716539382935, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.3104716539382935, "logits_per_char": -0.6552358269691467, "num_chars": 2}, {"sum_logits": -1.5251295566558838, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5251295566558838, "logits_per_char": -0.7625647783279419, "num_chars": 2}, {"sum_logits": -1.6309056282043457, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.6309056282043457, "logits_per_char": -0.8154528141021729, "num_chars": 2}, {"sum_logits": -1.6863877773284912, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.6863877773284912, "logits_per_char": -0.8431938886642456, "num_chars": 2}, {"sum_logits": -2.0607192516326904, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.0607192516326904, "logits_per_char": -1.0303596258163452, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1092, "native_id": "d78baca23e0a636a8961e17119047e63", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6253007650375366, "incorrect_loss_raw": 1.6247060596942902, "correct_loss_per_char": 0.8126503825187683, "incorrect_loss_per_char": 0.8123530298471451, "correct_loss_per_token": 1.6253007650375366, "incorrect_loss_per_token": 1.6247060596942902, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4140287637710571, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.4140287637710571, "logits_per_char": -0.7070143818855286, "num_chars": 2}, {"sum_logits": -1.5728424787521362, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5728424787521362, "logits_per_char": -0.7864212393760681, "num_chars": 2}, {"sum_logits": -1.6253007650375366, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6253007650375366, "logits_per_char": -0.8126503825187683, "num_chars": 2}, {"sum_logits": -1.6415289640426636, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6415289640426636, "logits_per_char": -0.8207644820213318, "num_chars": 2}, {"sum_logits": -1.8704240322113037, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.8704240322113037, "logits_per_char": -0.9352120161056519, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1093, "native_id": "ac6378b5e8462dc1bde1155d706213d8", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7742555141448975, "incorrect_loss_raw": 1.6019684374332428, "correct_loss_per_char": 0.8871277570724487, "incorrect_loss_per_char": 0.8009842187166214, "correct_loss_per_token": 1.7742555141448975, "incorrect_loss_per_token": 1.6019684374332428, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3647055625915527, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.3647055625915527, "logits_per_char": -0.6823527812957764, "num_chars": 2}, {"sum_logits": -1.5694619417190552, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5694619417190552, "logits_per_char": -0.7847309708595276, "num_chars": 2}, {"sum_logits": -1.7742555141448975, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.7742555141448975, "logits_per_char": -0.8871277570724487, "num_chars": 2}, {"sum_logits": -1.5383925437927246, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5383925437927246, "logits_per_char": -0.7691962718963623, "num_chars": 2}, {"sum_logits": -1.9353137016296387, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.9353137016296387, "logits_per_char": -0.9676568508148193, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1094, "native_id": "c1aebf059c5102f4e773f7fe4afe13f0", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7399933338165283, "incorrect_loss_raw": 1.6079429984092712, "correct_loss_per_char": 0.8699966669082642, "incorrect_loss_per_char": 0.8039714992046356, "correct_loss_per_token": 1.7399933338165283, "incorrect_loss_per_token": 1.6079429984092712, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2785518169403076, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.2785518169403076, "logits_per_char": -0.6392759084701538, "num_chars": 2}, {"sum_logits": -1.635676622390747, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.635676622390747, "logits_per_char": -0.8178383111953735, "num_chars": 2}, {"sum_logits": -1.6330628395080566, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6330628395080566, "logits_per_char": -0.8165314197540283, "num_chars": 2}, {"sum_logits": -1.7399933338165283, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.7399933338165283, "logits_per_char": -0.8699966669082642, "num_chars": 2}, {"sum_logits": -1.8844807147979736, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.8844807147979736, "logits_per_char": -0.9422403573989868, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1095, "native_id": "1017807310a25d3ea4a4ec305e91cba3", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5978569984436035, "incorrect_loss_raw": 1.6263003945350647, "correct_loss_per_char": 0.7989284992218018, "incorrect_loss_per_char": 0.8131501972675323, "correct_loss_per_token": 1.5978569984436035, "incorrect_loss_per_token": 1.6263003945350647, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.497434139251709, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": true, "logits_per_token": -1.497434139251709, "logits_per_char": -0.7487170696258545, "num_chars": 2}, {"sum_logits": -1.5199742317199707, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.5199742317199707, "logits_per_char": -0.7599871158599854, "num_chars": 2}, {"sum_logits": -1.5978569984436035, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.5978569984436035, "logits_per_char": -0.7989284992218018, "num_chars": 2}, {"sum_logits": -1.6711475849151611, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.6711475849151611, "logits_per_char": -0.8355737924575806, "num_chars": 2}, {"sum_logits": -1.816645622253418, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.816645622253418, "logits_per_char": -0.908322811126709, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1096, "native_id": "7192c9f5c513aac9042bad595ff5af9f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.4199204444885254, "incorrect_loss_raw": 1.5104557573795319, "correct_loss_per_char": 1.2099602222442627, "incorrect_loss_per_char": 0.7552278786897659, "correct_loss_per_token": 2.4199204444885254, "incorrect_loss_per_token": 1.5104557573795319, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2044687271118164, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.2044687271118164, "logits_per_char": -0.6022343635559082, "num_chars": 2}, {"sum_logits": -1.3968523740768433, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.3968523740768433, "logits_per_char": -0.6984261870384216, "num_chars": 2}, {"sum_logits": -1.6442824602127075, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6442824602127075, "logits_per_char": -0.8221412301063538, "num_chars": 2}, {"sum_logits": -1.7962194681167603, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.7962194681167603, "logits_per_char": -0.8981097340583801, "num_chars": 2}, {"sum_logits": -2.4199204444885254, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.4199204444885254, "logits_per_char": -1.2099602222442627, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1097, "native_id": "7c05e8d5a057085455eea243fbd1cd90", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6762614250183105, "incorrect_loss_raw": 1.6522248089313507, "correct_loss_per_char": 0.8381307125091553, "incorrect_loss_per_char": 0.8261124044656754, "correct_loss_per_token": 1.6762614250183105, "incorrect_loss_per_token": 1.6522248089313507, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3009636402130127, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.3009636402130127, "logits_per_char": -0.6504818201065063, "num_chars": 2}, {"sum_logits": -1.4773837327957153, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.4773837327957153, "logits_per_char": -0.7386918663978577, "num_chars": 2}, {"sum_logits": -1.6323482990264893, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6323482990264893, "logits_per_char": -0.8161741495132446, "num_chars": 2}, {"sum_logits": -1.6762614250183105, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6762614250183105, "logits_per_char": -0.8381307125091553, "num_chars": 2}, {"sum_logits": -2.1982035636901855, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.1982035636901855, "logits_per_char": -1.0991017818450928, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1098, "native_id": "3cb91a71a6567da870eedf37becc97ef", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4381740093231201, "incorrect_loss_raw": 1.6900922060012817, "correct_loss_per_char": 0.7190870046615601, "incorrect_loss_per_char": 0.8450461030006409, "correct_loss_per_token": 1.4381740093231201, "incorrect_loss_per_token": 1.6900922060012817, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4381740093231201, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.4381740093231201, "logits_per_char": -0.7190870046615601, "num_chars": 2}, {"sum_logits": -1.581596851348877, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.581596851348877, "logits_per_char": -0.7907984256744385, "num_chars": 2}, {"sum_logits": -1.4534597396850586, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.4534597396850586, "logits_per_char": -0.7267298698425293, "num_chars": 2}, {"sum_logits": -1.6462864875793457, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6462864875793457, "logits_per_char": -0.8231432437896729, "num_chars": 2}, {"sum_logits": -2.0790257453918457, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.0790257453918457, "logits_per_char": -1.0395128726959229, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1099, "native_id": "9b4bbf3c4d24ecdb4b27320afb706808", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.231048822402954, "incorrect_loss_raw": 1.7517785727977753, "correct_loss_per_char": 0.615524411201477, "incorrect_loss_per_char": 0.8758892863988876, "correct_loss_per_token": 1.231048822402954, "incorrect_loss_per_token": 1.7517785727977753, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.231048822402954, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.231048822402954, "logits_per_char": -0.615524411201477, "num_chars": 2}, {"sum_logits": -1.5914579629898071, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5914579629898071, "logits_per_char": -0.7957289814949036, "num_chars": 2}, {"sum_logits": -1.6597199440002441, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6597199440002441, "logits_per_char": -0.8298599720001221, "num_chars": 2}, {"sum_logits": -1.7057337760925293, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.7057337760925293, "logits_per_char": -0.8528668880462646, "num_chars": 2}, {"sum_logits": -2.0502026081085205, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.0502026081085205, "logits_per_char": -1.0251013040542603, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1100, "native_id": "43df3a316880d8bab346c06bd43b94dd", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7281286716461182, "incorrect_loss_raw": 1.6125516295433044, "correct_loss_per_char": 0.8640643358230591, "incorrect_loss_per_char": 0.8062758147716522, "correct_loss_per_token": 1.7281286716461182, "incorrect_loss_per_token": 1.6125516295433044, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.374429702758789, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.374429702758789, "logits_per_char": -0.6872148513793945, "num_chars": 2}, {"sum_logits": -1.497755765914917, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.497755765914917, "logits_per_char": -0.7488778829574585, "num_chars": 2}, {"sum_logits": -1.5913455486297607, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5913455486297607, "logits_per_char": -0.7956727743148804, "num_chars": 2}, {"sum_logits": -1.7281286716461182, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.7281286716461182, "logits_per_char": -0.8640643358230591, "num_chars": 2}, {"sum_logits": -1.986675500869751, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.986675500869751, "logits_per_char": -0.9933377504348755, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1101, "native_id": "858a5eaa587fe0e266722228671a6bd1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.443807601928711, "incorrect_loss_raw": 1.6789478957653046, "correct_loss_per_char": 0.7219038009643555, "incorrect_loss_per_char": 0.8394739478826523, "correct_loss_per_token": 1.443807601928711, "incorrect_loss_per_token": 1.6789478957653046, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.443807601928711, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.443807601928711, "logits_per_char": -0.7219038009643555, "num_chars": 2}, {"sum_logits": -1.6214646100997925, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6214646100997925, "logits_per_char": -0.8107323050498962, "num_chars": 2}, {"sum_logits": -1.4849659204483032, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.4849659204483032, "logits_per_char": -0.7424829602241516, "num_chars": 2}, {"sum_logits": -1.630990982055664, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.630990982055664, "logits_per_char": -0.815495491027832, "num_chars": 2}, {"sum_logits": -1.9783700704574585, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.9783700704574585, "logits_per_char": -0.9891850352287292, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1102, "native_id": "34005ef0caafefc8585c9fcd50e94557", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.418149471282959, "incorrect_loss_raw": 1.7143506407737732, "correct_loss_per_char": 0.7090747356414795, "incorrect_loss_per_char": 0.8571753203868866, "correct_loss_per_token": 1.418149471282959, "incorrect_loss_per_token": 1.7143506407737732, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.31882643699646, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.31882643699646, "logits_per_char": -0.65941321849823, "num_chars": 2}, {"sum_logits": -1.418149471282959, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.418149471282959, "logits_per_char": -0.7090747356414795, "num_chars": 2}, {"sum_logits": -1.608359694480896, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.608359694480896, "logits_per_char": -0.804179847240448, "num_chars": 2}, {"sum_logits": -1.783897042274475, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.783897042274475, "logits_per_char": -0.8919485211372375, "num_chars": 2}, {"sum_logits": -2.1463193893432617, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -2.1463193893432617, "logits_per_char": -1.0731596946716309, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1103, "native_id": "f61d83f90b92a8d537989e55ee70542d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6510032415390015, "incorrect_loss_raw": 1.631701022386551, "correct_loss_per_char": 0.8255016207695007, "incorrect_loss_per_char": 0.8158505111932755, "correct_loss_per_token": 1.6510032415390015, "incorrect_loss_per_token": 1.631701022386551, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3791639804840088, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.3791639804840088, "logits_per_char": -0.6895819902420044, "num_chars": 2}, {"sum_logits": -1.4682059288024902, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.4682059288024902, "logits_per_char": -0.7341029644012451, "num_chars": 2}, {"sum_logits": -1.694952130317688, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.694952130317688, "logits_per_char": -0.847476065158844, "num_chars": 2}, {"sum_logits": -1.6510032415390015, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6510032415390015, "logits_per_char": -0.8255016207695007, "num_chars": 2}, {"sum_logits": -1.9844820499420166, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.9844820499420166, "logits_per_char": -0.9922410249710083, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1104, "native_id": "3bf06235a537adc9d85431846595b800", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4382870197296143, "incorrect_loss_raw": 1.7019701600074768, "correct_loss_per_char": 0.7191435098648071, "incorrect_loss_per_char": 0.8509850800037384, "correct_loss_per_token": 1.4382870197296143, "incorrect_loss_per_token": 1.7019701600074768, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3013215065002441, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.3013215065002441, "logits_per_char": -0.6506607532501221, "num_chars": 2}, {"sum_logits": -1.4382870197296143, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.4382870197296143, "logits_per_char": -0.7191435098648071, "num_chars": 2}, {"sum_logits": -1.6647356748580933, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6647356748580933, "logits_per_char": -0.8323678374290466, "num_chars": 2}, {"sum_logits": -1.7947291135787964, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.7947291135787964, "logits_per_char": -0.8973645567893982, "num_chars": 2}, {"sum_logits": -2.0470943450927734, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.0470943450927734, "logits_per_char": -1.0235471725463867, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1105, "native_id": "79ec11d8072ce42779adfe0a19bd5374", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.083089828491211, "incorrect_loss_raw": 1.5381513237953186, "correct_loss_per_char": 1.0415449142456055, "incorrect_loss_per_char": 0.7690756618976593, "correct_loss_per_token": 2.083089828491211, "incorrect_loss_per_token": 1.5381513237953186, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2728157043457031, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.2728157043457031, "logits_per_char": -0.6364078521728516, "num_chars": 2}, {"sum_logits": -1.547057867050171, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.547057867050171, "logits_per_char": -0.7735289335250854, "num_chars": 2}, {"sum_logits": -1.635484218597412, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.635484218597412, "logits_per_char": -0.817742109298706, "num_chars": 2}, {"sum_logits": -1.6972475051879883, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6972475051879883, "logits_per_char": -0.8486237525939941, "num_chars": 2}, {"sum_logits": -2.083089828491211, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -2.083089828491211, "logits_per_char": -1.0415449142456055, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1106, "native_id": "2982d0eae1bf880f5930341af7665716", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.557244896888733, "incorrect_loss_raw": 1.6570726335048676, "correct_loss_per_char": 0.7786224484443665, "incorrect_loss_per_char": 0.8285363167524338, "correct_loss_per_token": 1.557244896888733, "incorrect_loss_per_token": 1.6570726335048676, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4782741069793701, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.4782741069793701, "logits_per_char": -0.7391370534896851, "num_chars": 2}, {"sum_logits": -1.588362455368042, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.588362455368042, "logits_per_char": -0.794181227684021, "num_chars": 2}, {"sum_logits": -1.4886809587478638, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.4886809587478638, "logits_per_char": -0.7443404793739319, "num_chars": 2}, {"sum_logits": -1.557244896888733, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.557244896888733, "logits_per_char": -0.7786224484443665, "num_chars": 2}, {"sum_logits": -2.0729730129241943, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.0729730129241943, "logits_per_char": -1.0364865064620972, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1107, "native_id": "ba9132ebf2bc3ad21e6a0631dc4e0a77", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9597843885421753, "incorrect_loss_raw": 1.5513363480567932, "correct_loss_per_char": 0.9798921942710876, "incorrect_loss_per_char": 0.7756681740283966, "correct_loss_per_token": 1.9597843885421753, "incorrect_loss_per_token": 1.5513363480567932, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3419562578201294, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.3419562578201294, "logits_per_char": -0.6709781289100647, "num_chars": 2}, {"sum_logits": -1.5608450174331665, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5608450174331665, "logits_per_char": -0.7804225087165833, "num_chars": 2}, {"sum_logits": -1.6308590173721313, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6308590173721313, "logits_per_char": -0.8154295086860657, "num_chars": 2}, {"sum_logits": -1.6716850996017456, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6716850996017456, "logits_per_char": -0.8358425498008728, "num_chars": 2}, {"sum_logits": -1.9597843885421753, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.9597843885421753, "logits_per_char": -0.9798921942710876, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1108, "native_id": "d06de16a4aaeaef32b398c1213257b4a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5866730213165283, "incorrect_loss_raw": 1.6724563837051392, "correct_loss_per_char": 0.7933365106582642, "incorrect_loss_per_char": 0.8362281918525696, "correct_loss_per_token": 1.5866730213165283, "incorrect_loss_per_token": 1.6724563837051392, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2257113456726074, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.2257113456726074, "logits_per_char": -0.6128556728363037, "num_chars": 2}, {"sum_logits": -1.5836596488952637, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5836596488952637, "logits_per_char": -0.7918298244476318, "num_chars": 2}, {"sum_logits": -1.5866730213165283, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5866730213165283, "logits_per_char": -0.7933365106582642, "num_chars": 2}, {"sum_logits": -1.7867207527160645, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.7867207527160645, "logits_per_char": -0.8933603763580322, "num_chars": 2}, {"sum_logits": -2.093733787536621, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -2.093733787536621, "logits_per_char": -1.0468668937683105, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1109, "native_id": "eee9476bf29498b7d74b043afe316fc6", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4952200651168823, "incorrect_loss_raw": 1.6827944815158844, "correct_loss_per_char": 0.7476100325584412, "incorrect_loss_per_char": 0.8413972407579422, "correct_loss_per_token": 1.4952200651168823, "incorrect_loss_per_token": 1.6827944815158844, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4629842042922974, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.4629842042922974, "logits_per_char": -0.7314921021461487, "num_chars": 2}, {"sum_logits": -1.5845561027526855, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5845561027526855, "logits_per_char": -0.7922780513763428, "num_chars": 2}, {"sum_logits": -1.518822193145752, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.518822193145752, "logits_per_char": -0.759411096572876, "num_chars": 2}, {"sum_logits": -1.4952200651168823, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.4952200651168823, "logits_per_char": -0.7476100325584412, "num_chars": 2}, {"sum_logits": -2.1648154258728027, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.1648154258728027, "logits_per_char": -1.0824077129364014, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1110, "native_id": "a85441d6a0e3f871d81a9f19b31360b7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6146961450576782, "incorrect_loss_raw": 1.6411767601966858, "correct_loss_per_char": 0.8073480725288391, "incorrect_loss_per_char": 0.8205883800983429, "correct_loss_per_token": 1.6146961450576782, "incorrect_loss_per_token": 1.6411767601966858, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3167839050292969, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.3167839050292969, "logits_per_char": -0.6583919525146484, "num_chars": 2}, {"sum_logits": -1.5965993404388428, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5965993404388428, "logits_per_char": -0.7982996702194214, "num_chars": 2}, {"sum_logits": -1.6866213083267212, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6866213083267212, "logits_per_char": -0.8433106541633606, "num_chars": 2}, {"sum_logits": -1.6146961450576782, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6146961450576782, "logits_per_char": -0.8073480725288391, "num_chars": 2}, {"sum_logits": -1.9647024869918823, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.9647024869918823, "logits_per_char": -0.9823512434959412, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1111, "native_id": "f11a2975898033893d6a38f75d791fdf", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.1590099334716797, "incorrect_loss_raw": 1.529792070388794, "correct_loss_per_char": 1.0795049667358398, "incorrect_loss_per_char": 0.764896035194397, "correct_loss_per_token": 2.1590099334716797, "incorrect_loss_per_token": 1.529792070388794, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2626482248306274, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.2626482248306274, "logits_per_char": -0.6313241124153137, "num_chars": 2}, {"sum_logits": -1.5763238668441772, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5763238668441772, "logits_per_char": -0.7881619334220886, "num_chars": 2}, {"sum_logits": -1.5630732774734497, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5630732774734497, "logits_per_char": -0.7815366387367249, "num_chars": 2}, {"sum_logits": -1.7171229124069214, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.7171229124069214, "logits_per_char": -0.8585614562034607, "num_chars": 2}, {"sum_logits": -2.1590099334716797, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.1590099334716797, "logits_per_char": -1.0795049667358398, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1112, "native_id": "a2977fd575faba162d04a490dabd1b9b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6294355392456055, "incorrect_loss_raw": 1.6372801065444946, "correct_loss_per_char": 0.8147177696228027, "incorrect_loss_per_char": 0.8186400532722473, "correct_loss_per_token": 1.6294355392456055, "incorrect_loss_per_token": 1.6372801065444946, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3524339199066162, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": true, "logits_per_token": -1.3524339199066162, "logits_per_char": -0.6762169599533081, "num_chars": 2}, {"sum_logits": -1.5921249389648438, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.5921249389648438, "logits_per_char": -0.7960624694824219, "num_chars": 2}, {"sum_logits": -1.6294355392456055, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.6294355392456055, "logits_per_char": -0.8147177696228027, "num_chars": 2}, {"sum_logits": -1.6254017353057861, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.6254017353057861, "logits_per_char": -0.8127008676528931, "num_chars": 2}, {"sum_logits": -1.9791598320007324, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.9791598320007324, "logits_per_char": -0.9895799160003662, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1113, "native_id": "cd39e442204d3edf7acc185fd59c8a44", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6385133266448975, "incorrect_loss_raw": 1.6278765499591827, "correct_loss_per_char": 0.8192566633224487, "incorrect_loss_per_char": 0.8139382749795914, "correct_loss_per_token": 1.6385133266448975, "incorrect_loss_per_token": 1.6278765499591827, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3848930597305298, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": true, "logits_per_token": -1.3848930597305298, "logits_per_char": -0.6924465298652649, "num_chars": 2}, {"sum_logits": -1.5078728199005127, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.5078728199005127, "logits_per_char": -0.7539364099502563, "num_chars": 2}, {"sum_logits": -1.723095178604126, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.723095178604126, "logits_per_char": -0.861547589302063, "num_chars": 2}, {"sum_logits": -1.6385133266448975, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.6385133266448975, "logits_per_char": -0.8192566633224487, "num_chars": 2}, {"sum_logits": -1.8956451416015625, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.8956451416015625, "logits_per_char": -0.9478225708007812, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1114, "native_id": "c77e1039d78cdff197a370fcda0f2b9f", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4843922853469849, "incorrect_loss_raw": 1.6631795167922974, "correct_loss_per_char": 0.7421961426734924, "incorrect_loss_per_char": 0.8315897583961487, "correct_loss_per_token": 1.4843922853469849, "incorrect_loss_per_token": 1.6631795167922974, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4843922853469849, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.4843922853469849, "logits_per_char": -0.7421961426734924, "num_chars": 2}, {"sum_logits": -1.6717205047607422, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6717205047607422, "logits_per_char": -0.8358602523803711, "num_chars": 2}, {"sum_logits": -1.5958526134490967, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5958526134490967, "logits_per_char": -0.7979263067245483, "num_chars": 2}, {"sum_logits": -1.4742212295532227, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.4742212295532227, "logits_per_char": -0.7371106147766113, "num_chars": 2}, {"sum_logits": -1.910923719406128, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.910923719406128, "logits_per_char": -0.955461859703064, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1115, "native_id": "f537f6bb8527724e0b1e1c1051326cd5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6294947862625122, "incorrect_loss_raw": 1.636508196592331, "correct_loss_per_char": 0.8147473931312561, "incorrect_loss_per_char": 0.8182540982961655, "correct_loss_per_token": 1.6294947862625122, "incorrect_loss_per_token": 1.636508196592331, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4456654787063599, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.4456654787063599, "logits_per_char": -0.7228327393531799, "num_chars": 2}, {"sum_logits": -1.509863257408142, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.509863257408142, "logits_per_char": -0.754931628704071, "num_chars": 2}, {"sum_logits": -1.547653079032898, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.547653079032898, "logits_per_char": -0.773826539516449, "num_chars": 2}, {"sum_logits": -1.6294947862625122, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.6294947862625122, "logits_per_char": -0.8147473931312561, "num_chars": 2}, {"sum_logits": -2.042850971221924, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.042850971221924, "logits_per_char": -1.021425485610962, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1116, "native_id": "d3b145911a76fd6fbe9a23ab027be024", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.55741548538208, "incorrect_loss_raw": 1.6497081816196442, "correct_loss_per_char": 0.77870774269104, "incorrect_loss_per_char": 0.8248540908098221, "correct_loss_per_token": 1.55741548538208, "incorrect_loss_per_token": 1.6497081816196442, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.55741548538208, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.55741548538208, "logits_per_char": -0.77870774269104, "num_chars": 2}, {"sum_logits": -1.5875318050384521, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5875318050384521, "logits_per_char": -0.7937659025192261, "num_chars": 2}, {"sum_logits": -1.4723039865493774, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": true, "logits_per_token": -1.4723039865493774, "logits_per_char": -0.7361519932746887, "num_chars": 2}, {"sum_logits": -1.5309948921203613, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5309948921203613, "logits_per_char": -0.7654974460601807, "num_chars": 2}, {"sum_logits": -2.0080020427703857, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -2.0080020427703857, "logits_per_char": -1.0040010213851929, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1117, "native_id": "dc2fa76467ff342abdb4cf142f92dddd", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4642088413238525, "incorrect_loss_raw": 1.7169148921966553, "correct_loss_per_char": 0.7321044206619263, "incorrect_loss_per_char": 0.8584574460983276, "correct_loss_per_token": 1.4642088413238525, "incorrect_loss_per_token": 1.7169148921966553, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2374255657196045, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.2374255657196045, "logits_per_char": -0.6187127828598022, "num_chars": 2}, {"sum_logits": -1.4642088413238525, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.4642088413238525, "logits_per_char": -0.7321044206619263, "num_chars": 2}, {"sum_logits": -1.6422618627548218, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6422618627548218, "logits_per_char": -0.8211309313774109, "num_chars": 2}, {"sum_logits": -1.7460983991622925, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.7460983991622925, "logits_per_char": -0.8730491995811462, "num_chars": 2}, {"sum_logits": -2.2418737411499023, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -2.2418737411499023, "logits_per_char": -1.1209368705749512, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1118, "native_id": "246249cd7976358051a9811ff9c30736", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7884174585342407, "incorrect_loss_raw": 1.714615285396576, "correct_loss_per_char": 0.8942087292671204, "incorrect_loss_per_char": 0.857307642698288, "correct_loss_per_token": 1.7884174585342407, "incorrect_loss_per_token": 1.714615285396576, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1257076263427734, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.1257076263427734, "logits_per_char": -0.5628538131713867, "num_chars": 2}, {"sum_logits": -1.4622458219528198, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.4622458219528198, "logits_per_char": -0.7311229109764099, "num_chars": 2}, {"sum_logits": -1.5911768674850464, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5911768674850464, "logits_per_char": -0.7955884337425232, "num_chars": 2}, {"sum_logits": -1.7884174585342407, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.7884174585342407, "logits_per_char": -0.8942087292671204, "num_chars": 2}, {"sum_logits": -2.679330825805664, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -2.679330825805664, "logits_per_char": -1.339665412902832, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1119, "native_id": "32be8cbc1b5a967310bcab8b80563481", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4770077466964722, "incorrect_loss_raw": 1.7531636655330658, "correct_loss_per_char": 0.7385038733482361, "incorrect_loss_per_char": 0.8765818327665329, "correct_loss_per_token": 1.4770077466964722, "incorrect_loss_per_token": 1.7531636655330658, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1142171621322632, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.1142171621322632, "logits_per_char": -0.5571085810661316, "num_chars": 2}, {"sum_logits": -1.4770077466964722, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.4770077466964722, "logits_per_char": -0.7385038733482361, "num_chars": 2}, {"sum_logits": -1.7212573289871216, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.7212573289871216, "logits_per_char": -0.8606286644935608, "num_chars": 2}, {"sum_logits": -1.797648310661316, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.797648310661316, "logits_per_char": -0.898824155330658, "num_chars": 2}, {"sum_logits": -2.3795318603515625, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -2.3795318603515625, "logits_per_char": -1.1897659301757812, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1120, "native_id": "ad769851a59375865607452d3bf2a45d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.501760721206665, "incorrect_loss_raw": 1.6937256157398224, "correct_loss_per_char": 0.7508803606033325, "incorrect_loss_per_char": 0.8468628078699112, "correct_loss_per_token": 1.501760721206665, "incorrect_loss_per_token": 1.6937256157398224, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2793229818344116, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": true, "logits_per_token": -1.2793229818344116, "logits_per_char": -0.6396614909172058, "num_chars": 2}, {"sum_logits": -1.501760721206665, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.501760721206665, "logits_per_char": -0.7508803606033325, "num_chars": 2}, {"sum_logits": -1.575476884841919, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.575476884841919, "logits_per_char": -0.7877384424209595, "num_chars": 2}, {"sum_logits": -1.7774178981781006, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.7774178981781006, "logits_per_char": -0.8887089490890503, "num_chars": 2}, {"sum_logits": -2.1426846981048584, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -2.1426846981048584, "logits_per_char": -1.0713423490524292, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1121, "native_id": "5ea6b94d1a911365b06cf776919413e8", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5024042129516602, "incorrect_loss_raw": 1.7120487689971924, "correct_loss_per_char": 0.7512021064758301, "incorrect_loss_per_char": 0.8560243844985962, "correct_loss_per_token": 1.5024042129516602, "incorrect_loss_per_token": 1.7120487689971924, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2213454246520996, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.2213454246520996, "logits_per_char": -0.6106727123260498, "num_chars": 2}, {"sum_logits": -1.5024042129516602, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5024042129516602, "logits_per_char": -0.7512021064758301, "num_chars": 2}, {"sum_logits": -1.6225630044937134, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6225630044937134, "logits_per_char": -0.8112815022468567, "num_chars": 2}, {"sum_logits": -1.7518051862716675, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.7518051862716675, "logits_per_char": -0.8759025931358337, "num_chars": 2}, {"sum_logits": -2.252481460571289, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.252481460571289, "logits_per_char": -1.1262407302856445, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1122, "native_id": "820df15b615d221e38a71fcc44461085", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5880789756774902, "incorrect_loss_raw": 1.6436012089252472, "correct_loss_per_char": 0.7940394878387451, "incorrect_loss_per_char": 0.8218006044626236, "correct_loss_per_token": 1.5880789756774902, "incorrect_loss_per_token": 1.6436012089252472, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3534737825393677, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.3534737825393677, "logits_per_char": -0.6767368912696838, "num_chars": 2}, {"sum_logits": -1.609309434890747, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.609309434890747, "logits_per_char": -0.8046547174453735, "num_chars": 2}, {"sum_logits": -1.6578959226608276, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.6578959226608276, "logits_per_char": -0.8289479613304138, "num_chars": 2}, {"sum_logits": -1.5880789756774902, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5880789756774902, "logits_per_char": -0.7940394878387451, "num_chars": 2}, {"sum_logits": -1.9537256956100464, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.9537256956100464, "logits_per_char": -0.9768628478050232, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1123, "native_id": "0a4a00ba435397c4a0496dd2c2426be7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.1548495292663574, "incorrect_loss_raw": 1.53458970785141, "correct_loss_per_char": 1.0774247646331787, "incorrect_loss_per_char": 0.767294853925705, "correct_loss_per_token": 2.1548495292663574, "incorrect_loss_per_token": 1.53458970785141, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2407870292663574, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": true, "logits_per_token": -1.2407870292663574, "logits_per_char": -0.6203935146331787, "num_chars": 2}, {"sum_logits": -1.4703004360198975, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.4703004360198975, "logits_per_char": -0.7351502180099487, "num_chars": 2}, {"sum_logits": -1.6844366788864136, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.6844366788864136, "logits_per_char": -0.8422183394432068, "num_chars": 2}, {"sum_logits": -1.7428346872329712, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.7428346872329712, "logits_per_char": -0.8714173436164856, "num_chars": 2}, {"sum_logits": -2.1548495292663574, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -2.1548495292663574, "logits_per_char": -1.0774247646331787, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1124, "native_id": "a7f29f4aebe0e3bcb77038fea71bf28c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5593000650405884, "incorrect_loss_raw": 1.6511940360069275, "correct_loss_per_char": 0.7796500325202942, "incorrect_loss_per_char": 0.8255970180034637, "correct_loss_per_token": 1.5593000650405884, "incorrect_loss_per_token": 1.6511940360069275, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.483107566833496, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.483107566833496, "logits_per_char": -0.741553783416748, "num_chars": 2}, {"sum_logits": -1.5253099203109741, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5253099203109741, "logits_per_char": -0.7626549601554871, "num_chars": 2}, {"sum_logits": -1.5593000650405884, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5593000650405884, "logits_per_char": -0.7796500325202942, "num_chars": 2}, {"sum_logits": -1.5634218454360962, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5634218454360962, "logits_per_char": -0.7817109227180481, "num_chars": 2}, {"sum_logits": -2.0329368114471436, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.0329368114471436, "logits_per_char": -1.0164684057235718, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1125, "native_id": "ecd32cc0c17d4738a27bba3399f04591", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5232020616531372, "incorrect_loss_raw": 1.6572152972221375, "correct_loss_per_char": 0.7616010308265686, "incorrect_loss_per_char": 0.8286076486110687, "correct_loss_per_token": 1.5232020616531372, "incorrect_loss_per_token": 1.6572152972221375, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4769604206085205, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": true, "logits_per_token": -1.4769604206085205, "logits_per_char": -0.7384802103042603, "num_chars": 2}, {"sum_logits": -1.5741840600967407, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.5741840600967407, "logits_per_char": -0.7870920300483704, "num_chars": 2}, {"sum_logits": -1.5232020616531372, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.5232020616531372, "logits_per_char": -0.7616010308265686, "num_chars": 2}, {"sum_logits": -1.5756105184555054, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.5756105184555054, "logits_per_char": -0.7878052592277527, "num_chars": 2}, {"sum_logits": -2.002106189727783, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -2.002106189727783, "logits_per_char": -1.0010530948638916, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1126, "native_id": "8b2af2d865b7dc500427786c846eacaf", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7257728576660156, "incorrect_loss_raw": 1.606808990240097, "correct_loss_per_char": 0.8628864288330078, "incorrect_loss_per_char": 0.8034044951200485, "correct_loss_per_token": 1.7257728576660156, "incorrect_loss_per_token": 1.606808990240097, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3716634511947632, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": true, "logits_per_token": -1.3716634511947632, "logits_per_char": -0.6858317255973816, "num_chars": 2}, {"sum_logits": -1.5806853771209717, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.5806853771209717, "logits_per_char": -0.7903426885604858, "num_chars": 2}, {"sum_logits": -1.555833101272583, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.555833101272583, "logits_per_char": -0.7779165506362915, "num_chars": 2}, {"sum_logits": -1.7257728576660156, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.7257728576660156, "logits_per_char": -0.8628864288330078, "num_chars": 2}, {"sum_logits": -1.9190540313720703, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.9190540313720703, "logits_per_char": -0.9595270156860352, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1127, "native_id": "383282aace64dd49138bac2392f8b38e", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.5396698713302612, "incorrect_loss_raw": 1.6359546184539795, "correct_loss_per_char": 0.7698349356651306, "incorrect_loss_per_char": 0.8179773092269897, "correct_loss_per_token": 1.5396698713302612, "incorrect_loss_per_token": 1.6359546184539795, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6166332960128784, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.6166332960128784, "logits_per_char": -0.8083166480064392, "num_chars": 2}, {"sum_logits": -1.5396698713302612, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": true, "logits_per_token": -1.5396698713302612, "logits_per_char": -0.7698349356651306, "num_chars": 2}, {"sum_logits": -1.5669238567352295, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.5669238567352295, "logits_per_char": -0.7834619283676147, "num_chars": 2}, {"sum_logits": -1.5960187911987305, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.5960187911987305, "logits_per_char": -0.7980093955993652, "num_chars": 2}, {"sum_logits": -1.7642425298690796, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.7642425298690796, "logits_per_char": -0.8821212649345398, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1128, "native_id": "eaf6838d29bcd4ebf408da2f75aa65c3", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.614914059638977, "incorrect_loss_raw": 1.6276614367961884, "correct_loss_per_char": 0.8074570298194885, "incorrect_loss_per_char": 0.8138307183980942, "correct_loss_per_token": 1.614914059638977, "incorrect_loss_per_token": 1.6276614367961884, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4179744720458984, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.4179744720458984, "logits_per_char": -0.7089872360229492, "num_chars": 2}, {"sum_logits": -1.614914059638977, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.614914059638977, "logits_per_char": -0.8074570298194885, "num_chars": 2}, {"sum_logits": -1.63196861743927, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.63196861743927, "logits_per_char": -0.815984308719635, "num_chars": 2}, {"sum_logits": -1.5733729600906372, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5733729600906372, "logits_per_char": -0.7866864800453186, "num_chars": 2}, {"sum_logits": -1.8873296976089478, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.8873296976089478, "logits_per_char": -0.9436648488044739, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1129, "native_id": "7c8bc9c0e56389eef033bca40c88c151", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6528366804122925, "incorrect_loss_raw": 1.6195325255393982, "correct_loss_per_char": 0.8264183402061462, "incorrect_loss_per_char": 0.8097662627696991, "correct_loss_per_token": 1.6528366804122925, "incorrect_loss_per_token": 1.6195325255393982, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.389243721961975, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.389243721961975, "logits_per_char": -0.6946218609809875, "num_chars": 2}, {"sum_logits": -1.5715056657791138, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5715056657791138, "logits_per_char": -0.7857528328895569, "num_chars": 2}, {"sum_logits": -1.64435613155365, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.64435613155365, "logits_per_char": -0.822178065776825, "num_chars": 2}, {"sum_logits": -1.6528366804122925, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6528366804122925, "logits_per_char": -0.8264183402061462, "num_chars": 2}, {"sum_logits": -1.873024582862854, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.873024582862854, "logits_per_char": -0.936512291431427, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1130, "native_id": "ca60a46c9007e4b6213f50bfb5342fdd", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4775310754776, "incorrect_loss_raw": 1.667179673910141, "correct_loss_per_char": 0.7387655377388, "incorrect_loss_per_char": 0.8335898369550705, "correct_loss_per_token": 1.4775310754776, "incorrect_loss_per_token": 1.667179673910141, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4587451219558716, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.4587451219558716, "logits_per_char": -0.7293725609779358, "num_chars": 2}, {"sum_logits": -1.4775310754776, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.4775310754776, "logits_per_char": -0.7387655377388, "num_chars": 2}, {"sum_logits": -1.607088565826416, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.607088565826416, "logits_per_char": -0.803544282913208, "num_chars": 2}, {"sum_logits": -1.652484655380249, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.652484655380249, "logits_per_char": -0.8262423276901245, "num_chars": 2}, {"sum_logits": -1.9504003524780273, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.9504003524780273, "logits_per_char": -0.9752001762390137, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1131, "native_id": "f50209f04d11690d7c8f30e29b35ff02", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5681504011154175, "incorrect_loss_raw": 1.6513035297393799, "correct_loss_per_char": 0.7840752005577087, "incorrect_loss_per_char": 0.8256517648696899, "correct_loss_per_token": 1.5681504011154175, "incorrect_loss_per_token": 1.6513035297393799, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3393751382827759, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": true, "logits_per_token": -1.3393751382827759, "logits_per_char": -0.6696875691413879, "num_chars": 2}, {"sum_logits": -1.5681504011154175, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.5681504011154175, "logits_per_char": -0.7840752005577087, "num_chars": 2}, {"sum_logits": -1.6827279329299927, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.6827279329299927, "logits_per_char": -0.8413639664649963, "num_chars": 2}, {"sum_logits": -1.6331100463867188, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.6331100463867188, "logits_per_char": -0.8165550231933594, "num_chars": 2}, {"sum_logits": -1.9500010013580322, "num_tokens": 1, "num_tokens_all": 296, "is_greedy": false, "logits_per_token": -1.9500010013580322, "logits_per_char": -0.9750005006790161, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1132, "native_id": "d725f1c2e150a3221de31612123f3f46", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1338547468185425, "incorrect_loss_raw": 1.8254015147686005, "correct_loss_per_char": 0.5669273734092712, "incorrect_loss_per_char": 0.9127007573843002, "correct_loss_per_token": 1.1338547468185425, "incorrect_loss_per_token": 1.8254015147686005, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1338547468185425, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.1338547468185425, "logits_per_char": -0.5669273734092712, "num_chars": 2}, {"sum_logits": -1.4396287202835083, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.4396287202835083, "logits_per_char": -0.7198143601417542, "num_chars": 2}, {"sum_logits": -1.744011640548706, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.744011640548706, "logits_per_char": -0.872005820274353, "num_chars": 2}, {"sum_logits": -1.8370978832244873, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.8370978832244873, "logits_per_char": -0.9185489416122437, "num_chars": 2}, {"sum_logits": -2.2808678150177, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -2.2808678150177, "logits_per_char": -1.14043390750885, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1133, "native_id": "f7735d721dfdc94621154951d4eaa4cf", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6208107471466064, "incorrect_loss_raw": 1.6364374160766602, "correct_loss_per_char": 0.8104053735733032, "incorrect_loss_per_char": 0.8182187080383301, "correct_loss_per_token": 1.6208107471466064, "incorrect_loss_per_token": 1.6364374160766602, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.465617060661316, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.465617060661316, "logits_per_char": -0.732808530330658, "num_chars": 2}, {"sum_logits": -1.4098713397979736, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": true, "logits_per_token": -1.4098713397979736, "logits_per_char": -0.7049356698989868, "num_chars": 2}, {"sum_logits": -1.6208107471466064, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.6208107471466064, "logits_per_char": -0.8104053735733032, "num_chars": 2}, {"sum_logits": -1.6809110641479492, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.6809110641479492, "logits_per_char": -0.8404555320739746, "num_chars": 2}, {"sum_logits": -1.9893501996994019, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.9893501996994019, "logits_per_char": -0.9946750998497009, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1134, "native_id": "eaf980db7e945b1cf6d648fa55ddcb5e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5110374689102173, "incorrect_loss_raw": 1.730410873889923, "correct_loss_per_char": 0.7555187344551086, "incorrect_loss_per_char": 0.8652054369449615, "correct_loss_per_token": 1.5110374689102173, "incorrect_loss_per_token": 1.730410873889923, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1783605813980103, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": true, "logits_per_token": -1.1783605813980103, "logits_per_char": -0.5891802906990051, "num_chars": 2}, {"sum_logits": -1.5110374689102173, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.5110374689102173, "logits_per_char": -0.7555187344551086, "num_chars": 2}, {"sum_logits": -1.539178490638733, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.539178490638733, "logits_per_char": -0.7695892453193665, "num_chars": 2}, {"sum_logits": -1.8384780883789062, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.8384780883789062, "logits_per_char": -0.9192390441894531, "num_chars": 2}, {"sum_logits": -2.365626335144043, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -2.365626335144043, "logits_per_char": -1.1828131675720215, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1135, "native_id": "8bbfe8cd056d612e9d3190f278bef287", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7340006828308105, "incorrect_loss_raw": 1.6115827858448029, "correct_loss_per_char": 0.8670003414154053, "incorrect_loss_per_char": 0.8057913929224014, "correct_loss_per_token": 1.7340006828308105, "incorrect_loss_per_token": 1.6115827858448029, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3303474187850952, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.3303474187850952, "logits_per_char": -0.6651737093925476, "num_chars": 2}, {"sum_logits": -1.4716625213623047, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.4716625213623047, "logits_per_char": -0.7358312606811523, "num_chars": 2}, {"sum_logits": -1.7340006828308105, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.7340006828308105, "logits_per_char": -0.8670003414154053, "num_chars": 2}, {"sum_logits": -1.7936298847198486, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.7936298847198486, "logits_per_char": -0.8968149423599243, "num_chars": 2}, {"sum_logits": -1.850691318511963, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.850691318511963, "logits_per_char": -0.9253456592559814, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1136, "native_id": "aa7c4c351cf8d59792aa68e3de339db4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5413075685501099, "incorrect_loss_raw": 1.6637632548809052, "correct_loss_per_char": 0.7706537842750549, "incorrect_loss_per_char": 0.8318816274404526, "correct_loss_per_token": 1.5413075685501099, "incorrect_loss_per_token": 1.6637632548809052, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3431540727615356, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.3431540727615356, "logits_per_char": -0.6715770363807678, "num_chars": 2}, {"sum_logits": -1.5413075685501099, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5413075685501099, "logits_per_char": -0.7706537842750549, "num_chars": 2}, {"sum_logits": -1.5737550258636475, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5737550258636475, "logits_per_char": -0.7868775129318237, "num_chars": 2}, {"sum_logits": -1.7219672203063965, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.7219672203063965, "logits_per_char": -0.8609836101531982, "num_chars": 2}, {"sum_logits": -2.016176700592041, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -2.016176700592041, "logits_per_char": -1.0080883502960205, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1137, "native_id": "23df3bac9cfcb156f4cfd8a05f21c5e2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4068455696105957, "incorrect_loss_raw": 1.6952933073043823, "correct_loss_per_char": 0.7034227848052979, "incorrect_loss_per_char": 0.8476466536521912, "correct_loss_per_token": 1.4068455696105957, "incorrect_loss_per_token": 1.6952933073043823, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4068455696105957, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": true, "logits_per_token": -1.4068455696105957, "logits_per_char": -0.7034227848052979, "num_chars": 2}, {"sum_logits": -1.5194449424743652, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.5194449424743652, "logits_per_char": -0.7597224712371826, "num_chars": 2}, {"sum_logits": -1.5989603996276855, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.5989603996276855, "logits_per_char": -0.7994801998138428, "num_chars": 2}, {"sum_logits": -1.5816709995269775, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.5816709995269775, "logits_per_char": -0.7908354997634888, "num_chars": 2}, {"sum_logits": -2.081096887588501, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -2.081096887588501, "logits_per_char": -1.0405484437942505, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1138, "native_id": "d21777d771dc6fd08e769d378651817e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.086329936981201, "incorrect_loss_raw": 1.5262189209461212, "correct_loss_per_char": 1.0431649684906006, "incorrect_loss_per_char": 0.7631094604730606, "correct_loss_per_token": 2.086329936981201, "incorrect_loss_per_token": 1.5262189209461212, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4663796424865723, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": true, "logits_per_token": -1.4663796424865723, "logits_per_char": -0.7331898212432861, "num_chars": 2}, {"sum_logits": -1.504013180732727, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.504013180732727, "logits_per_char": -0.7520065903663635, "num_chars": 2}, {"sum_logits": -1.5500222444534302, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.5500222444534302, "logits_per_char": -0.7750111222267151, "num_chars": 2}, {"sum_logits": -1.5844606161117554, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.5844606161117554, "logits_per_char": -0.7922303080558777, "num_chars": 2}, {"sum_logits": -2.086329936981201, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -2.086329936981201, "logits_per_char": -1.0431649684906006, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1139, "native_id": "611a4cc0e288b8a11afa923f48cb2ab4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.326388359069824, "incorrect_loss_raw": 1.5015155673027039, "correct_loss_per_char": 1.163194179534912, "incorrect_loss_per_char": 0.7507577836513519, "correct_loss_per_token": 2.326388359069824, "incorrect_loss_per_token": 1.5015155673027039, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.353183627128601, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.353183627128601, "logits_per_char": -0.6765918135643005, "num_chars": 2}, {"sum_logits": -1.5206081867218018, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5206081867218018, "logits_per_char": -0.7603040933609009, "num_chars": 2}, {"sum_logits": -1.6329915523529053, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6329915523529053, "logits_per_char": -0.8164957761764526, "num_chars": 2}, {"sum_logits": -1.4992789030075073, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.4992789030075073, "logits_per_char": -0.7496394515037537, "num_chars": 2}, {"sum_logits": -2.326388359069824, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.326388359069824, "logits_per_char": -1.163194179534912, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1140, "native_id": "8e7941ce31996ca83cc0a68f7313c96d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.388543963432312, "incorrect_loss_raw": 1.7303280532360077, "correct_loss_per_char": 0.694271981716156, "incorrect_loss_per_char": 0.8651640266180038, "correct_loss_per_token": 1.388543963432312, "incorrect_loss_per_token": 1.7303280532360077, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2776063680648804, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.2776063680648804, "logits_per_char": -0.6388031840324402, "num_chars": 2}, {"sum_logits": -1.388543963432312, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.388543963432312, "logits_per_char": -0.694271981716156, "num_chars": 2}, {"sum_logits": -1.714939832687378, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.714939832687378, "logits_per_char": -0.857469916343689, "num_chars": 2}, {"sum_logits": -1.7409932613372803, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.7409932613372803, "logits_per_char": -0.8704966306686401, "num_chars": 2}, {"sum_logits": -2.187772750854492, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.187772750854492, "logits_per_char": -1.093886375427246, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1141, "native_id": "ea02772e27f5bd40eced3b65e8c6427f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6779452562332153, "incorrect_loss_raw": 1.6331741213798523, "correct_loss_per_char": 0.8389726281166077, "incorrect_loss_per_char": 0.8165870606899261, "correct_loss_per_token": 1.6779452562332153, "incorrect_loss_per_token": 1.6331741213798523, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4330631494522095, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": true, "logits_per_token": -1.4330631494522095, "logits_per_char": -0.7165315747261047, "num_chars": 2}, {"sum_logits": -1.6779452562332153, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.6779452562332153, "logits_per_char": -0.8389726281166077, "num_chars": 2}, {"sum_logits": -1.4488143920898438, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.4488143920898438, "logits_per_char": -0.7244071960449219, "num_chars": 2}, {"sum_logits": -1.5475016832351685, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.5475016832351685, "logits_per_char": -0.7737508416175842, "num_chars": 2}, {"sum_logits": -2.1033172607421875, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -2.1033172607421875, "logits_per_char": -1.0516586303710938, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1142, "native_id": "de54d03e69d9765872f95ff06ed21499", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5915395021438599, "incorrect_loss_raw": 1.6489911377429962, "correct_loss_per_char": 0.7957697510719299, "incorrect_loss_per_char": 0.8244955688714981, "correct_loss_per_token": 1.5915395021438599, "incorrect_loss_per_token": 1.6489911377429962, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.277087926864624, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.277087926864624, "logits_per_char": -0.638543963432312, "num_chars": 2}, {"sum_logits": -1.5915395021438599, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5915395021438599, "logits_per_char": -0.7957697510719299, "num_chars": 2}, {"sum_logits": -1.648403525352478, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.648403525352478, "logits_per_char": -0.824201762676239, "num_chars": 2}, {"sum_logits": -1.7638286352157593, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.7638286352157593, "logits_per_char": -0.8819143176078796, "num_chars": 2}, {"sum_logits": -1.9066444635391235, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.9066444635391235, "logits_per_char": -0.9533222317695618, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1143, "native_id": "b231a732a3fdf0621391e7e385f8d651", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7705044746398926, "incorrect_loss_raw": 1.6031966507434845, "correct_loss_per_char": 0.8852522373199463, "incorrect_loss_per_char": 0.8015983253717422, "correct_loss_per_token": 1.7705044746398926, "incorrect_loss_per_token": 1.6031966507434845, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3080841302871704, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": true, "logits_per_token": -1.3080841302871704, "logits_per_char": -0.6540420651435852, "num_chars": 2}, {"sum_logits": -1.5176327228546143, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.5176327228546143, "logits_per_char": -0.7588163614273071, "num_chars": 2}, {"sum_logits": -1.6846263408660889, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.6846263408660889, "logits_per_char": -0.8423131704330444, "num_chars": 2}, {"sum_logits": -1.7705044746398926, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.7705044746398926, "logits_per_char": -0.8852522373199463, "num_chars": 2}, {"sum_logits": -1.9024434089660645, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.9024434089660645, "logits_per_char": -0.9512217044830322, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1144, "native_id": "b9121c3228f961c5ad68958c702cd94b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5916959047317505, "incorrect_loss_raw": 1.638371080160141, "correct_loss_per_char": 0.7958479523658752, "incorrect_loss_per_char": 0.8191855400800705, "correct_loss_per_token": 1.5916959047317505, "incorrect_loss_per_token": 1.638371080160141, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4316291809082031, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.4316291809082031, "logits_per_char": -0.7158145904541016, "num_chars": 2}, {"sum_logits": -1.5463006496429443, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5463006496429443, "logits_per_char": -0.7731503248214722, "num_chars": 2}, {"sum_logits": -1.5916959047317505, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5916959047317505, "logits_per_char": -0.7958479523658752, "num_chars": 2}, {"sum_logits": -1.5977587699890137, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5977587699890137, "logits_per_char": -0.7988793849945068, "num_chars": 2}, {"sum_logits": -1.9777957201004028, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.9777957201004028, "logits_per_char": -0.9888978600502014, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1145, "native_id": "4015ab002ff8c233d1c7ef26f5156b88", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.011687755584717, "incorrect_loss_raw": 1.5428906679153442, "correct_loss_per_char": 1.0058438777923584, "incorrect_loss_per_char": 0.7714453339576721, "correct_loss_per_token": 2.011687755584717, "incorrect_loss_per_token": 1.5428906679153442, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3659275770187378, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.3659275770187378, "logits_per_char": -0.6829637885093689, "num_chars": 2}, {"sum_logits": -1.5352020263671875, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5352020263671875, "logits_per_char": -0.7676010131835938, "num_chars": 2}, {"sum_logits": -1.5604472160339355, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5604472160339355, "logits_per_char": -0.7802236080169678, "num_chars": 2}, {"sum_logits": -1.7099858522415161, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.7099858522415161, "logits_per_char": -0.8549929261207581, "num_chars": 2}, {"sum_logits": -2.011687755584717, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -2.011687755584717, "logits_per_char": -1.0058438777923584, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1146, "native_id": "0197ade3bb26d163ab2e284c960c626f", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4813759326934814, "incorrect_loss_raw": 1.7003042995929718, "correct_loss_per_char": 0.7406879663467407, "incorrect_loss_per_char": 0.8501521497964859, "correct_loss_per_token": 1.4813759326934814, "incorrect_loss_per_token": 1.7003042995929718, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4813759326934814, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.4813759326934814, "logits_per_char": -0.7406879663467407, "num_chars": 2}, {"sum_logits": -1.589285969734192, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.589285969734192, "logits_per_char": -0.794642984867096, "num_chars": 2}, {"sum_logits": -1.4505211114883423, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.4505211114883423, "logits_per_char": -0.7252605557441711, "num_chars": 2}, {"sum_logits": -1.4830049276351929, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.4830049276351929, "logits_per_char": -0.7415024638175964, "num_chars": 2}, {"sum_logits": -2.27840518951416, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -2.27840518951416, "logits_per_char": -1.13920259475708, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1147, "native_id": "a90f9197a13c64089c9ba95bcba275ad", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6451348066329956, "incorrect_loss_raw": 1.6213939785957336, "correct_loss_per_char": 0.8225674033164978, "incorrect_loss_per_char": 0.8106969892978668, "correct_loss_per_token": 1.6451348066329956, "incorrect_loss_per_token": 1.6213939785957336, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.436374545097351, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": true, "logits_per_token": -1.436374545097351, "logits_per_char": -0.7181872725486755, "num_chars": 2}, {"sum_logits": -1.5591672658920288, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.5591672658920288, "logits_per_char": -0.7795836329460144, "num_chars": 2}, {"sum_logits": -1.564682126045227, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.564682126045227, "logits_per_char": -0.7823410630226135, "num_chars": 2}, {"sum_logits": -1.6451348066329956, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.6451348066329956, "logits_per_char": -0.8225674033164978, "num_chars": 2}, {"sum_logits": -1.9253519773483276, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.9253519773483276, "logits_per_char": -0.9626759886741638, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1148, "native_id": "684204df916cc58d47293960f9c6ed9f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.938137173652649, "incorrect_loss_raw": 1.5549173653125763, "correct_loss_per_char": 0.9690685868263245, "incorrect_loss_per_char": 0.7774586826562881, "correct_loss_per_token": 1.938137173652649, "incorrect_loss_per_token": 1.5549173653125763, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.356710433959961, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.356710433959961, "logits_per_char": -0.6783552169799805, "num_chars": 2}, {"sum_logits": -1.5457583665847778, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5457583665847778, "logits_per_char": -0.7728791832923889, "num_chars": 2}, {"sum_logits": -1.6600044965744019, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6600044965744019, "logits_per_char": -0.8300022482872009, "num_chars": 2}, {"sum_logits": -1.6571961641311646, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6571961641311646, "logits_per_char": -0.8285980820655823, "num_chars": 2}, {"sum_logits": -1.938137173652649, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.938137173652649, "logits_per_char": -0.9690685868263245, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1149, "native_id": "a2aa95861ef74bf1ecfc55db505e3982", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.60261869430542, "incorrect_loss_raw": 1.6445484459400177, "correct_loss_per_char": 0.80130934715271, "incorrect_loss_per_char": 0.8222742229700089, "correct_loss_per_token": 1.60261869430542, "incorrect_loss_per_token": 1.6445484459400177, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3976606130599976, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.3976606130599976, "logits_per_char": -0.6988303065299988, "num_chars": 2}, {"sum_logits": -1.4741206169128418, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.4741206169128418, "logits_per_char": -0.7370603084564209, "num_chars": 2}, {"sum_logits": -1.6843957901000977, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6843957901000977, "logits_per_char": -0.8421978950500488, "num_chars": 2}, {"sum_logits": -1.60261869430542, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.60261869430542, "logits_per_char": -0.80130934715271, "num_chars": 2}, {"sum_logits": -2.022016763687134, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -2.022016763687134, "logits_per_char": -1.011008381843567, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1150, "native_id": "8555dd9667d010018961a2f7d1c22704", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.624550461769104, "incorrect_loss_raw": 1.626405507326126, "correct_loss_per_char": 0.812275230884552, "incorrect_loss_per_char": 0.813202753663063, "correct_loss_per_token": 1.624550461769104, "incorrect_loss_per_token": 1.626405507326126, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4240539073944092, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.4240539073944092, "logits_per_char": -0.7120269536972046, "num_chars": 2}, {"sum_logits": -1.518844485282898, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.518844485282898, "logits_per_char": -0.759422242641449, "num_chars": 2}, {"sum_logits": -1.685604453086853, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.685604453086853, "logits_per_char": -0.8428022265434265, "num_chars": 2}, {"sum_logits": -1.624550461769104, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.624550461769104, "logits_per_char": -0.812275230884552, "num_chars": 2}, {"sum_logits": -1.8771191835403442, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.8771191835403442, "logits_per_char": -0.9385595917701721, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1151, "native_id": "84a761f516efce04ab27d7ca8dd25255", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5511860847473145, "incorrect_loss_raw": 1.6503327786922455, "correct_loss_per_char": 0.7755930423736572, "incorrect_loss_per_char": 0.8251663893461227, "correct_loss_per_token": 1.5511860847473145, "incorrect_loss_per_token": 1.6503327786922455, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3821247816085815, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.3821247816085815, "logits_per_char": -0.6910623908042908, "num_chars": 2}, {"sum_logits": -1.5511860847473145, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5511860847473145, "logits_per_char": -0.7755930423736572, "num_chars": 2}, {"sum_logits": -1.6275663375854492, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6275663375854492, "logits_per_char": -0.8137831687927246, "num_chars": 2}, {"sum_logits": -1.6250925064086914, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6250925064086914, "logits_per_char": -0.8125462532043457, "num_chars": 2}, {"sum_logits": -1.9665474891662598, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.9665474891662598, "logits_per_char": -0.9832737445831299, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1152, "native_id": "45a6becd307342669d9d17474e50b97a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5519541501998901, "incorrect_loss_raw": 1.6675235629081726, "correct_loss_per_char": 0.7759770750999451, "incorrect_loss_per_char": 0.8337617814540863, "correct_loss_per_token": 1.5519541501998901, "incorrect_loss_per_token": 1.6675235629081726, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3363739252090454, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": true, "logits_per_token": -1.3363739252090454, "logits_per_char": -0.6681869626045227, "num_chars": 2}, {"sum_logits": -1.5638256072998047, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.5638256072998047, "logits_per_char": -0.7819128036499023, "num_chars": 2}, {"sum_logits": -1.5519541501998901, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.5519541501998901, "logits_per_char": -0.7759770750999451, "num_chars": 2}, {"sum_logits": -1.6907902956008911, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.6907902956008911, "logits_per_char": -0.8453951478004456, "num_chars": 2}, {"sum_logits": -2.079104423522949, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -2.079104423522949, "logits_per_char": -1.0395522117614746, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1153, "native_id": "c509c499bace6de324b39c0d4d0c30fa", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7918176651000977, "incorrect_loss_raw": 1.5780056416988373, "correct_loss_per_char": 0.8959088325500488, "incorrect_loss_per_char": 0.7890028208494186, "correct_loss_per_token": 1.7918176651000977, "incorrect_loss_per_token": 1.5780056416988373, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4518224000930786, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": true, "logits_per_token": -1.4518224000930786, "logits_per_char": -0.7259112000465393, "num_chars": 2}, {"sum_logits": -1.5576813220977783, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.5576813220977783, "logits_per_char": -0.7788406610488892, "num_chars": 2}, {"sum_logits": -1.7011079788208008, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.7011079788208008, "logits_per_char": -0.8505539894104004, "num_chars": 2}, {"sum_logits": -1.6014108657836914, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.6014108657836914, "logits_per_char": -0.8007054328918457, "num_chars": 2}, {"sum_logits": -1.7918176651000977, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.7918176651000977, "logits_per_char": -0.8959088325500488, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1154, "native_id": "77ddc9134bb27f9962aa2ed5ec5a5ef9", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5507935285568237, "incorrect_loss_raw": 1.654489517211914, "correct_loss_per_char": 0.7753967642784119, "incorrect_loss_per_char": 0.827244758605957, "correct_loss_per_token": 1.5507935285568237, "incorrect_loss_per_token": 1.654489517211914, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3192774057388306, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.3192774057388306, "logits_per_char": -0.6596387028694153, "num_chars": 2}, {"sum_logits": -1.5507935285568237, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5507935285568237, "logits_per_char": -0.7753967642784119, "num_chars": 2}, {"sum_logits": -1.639003872871399, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.639003872871399, "logits_per_char": -0.8195019364356995, "num_chars": 2}, {"sum_logits": -1.7712103128433228, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.7712103128433228, "logits_per_char": -0.8856051564216614, "num_chars": 2}, {"sum_logits": -1.888466477394104, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.888466477394104, "logits_per_char": -0.944233238697052, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1155, "native_id": "715583129369c0c5c9f499c93a1c095e", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.06887149810791, "incorrect_loss_raw": 1.5298264026641846, "correct_loss_per_char": 1.034435749053955, "incorrect_loss_per_char": 0.7649132013320923, "correct_loss_per_token": 2.06887149810791, "incorrect_loss_per_token": 1.5298264026641846, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4817073345184326, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.4817073345184326, "logits_per_char": -0.7408536672592163, "num_chars": 2}, {"sum_logits": -1.5368680953979492, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5368680953979492, "logits_per_char": -0.7684340476989746, "num_chars": 2}, {"sum_logits": -1.4625929594039917, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.4625929594039917, "logits_per_char": -0.7312964797019958, "num_chars": 2}, {"sum_logits": -1.6381372213363647, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6381372213363647, "logits_per_char": -0.8190686106681824, "num_chars": 2}, {"sum_logits": -2.06887149810791, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.06887149810791, "logits_per_char": -1.034435749053955, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1156, "native_id": "a478e8b7c049781574f7fbb11ba1eec0", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5877901315689087, "incorrect_loss_raw": 1.6292537152767181, "correct_loss_per_char": 0.7938950657844543, "incorrect_loss_per_char": 0.8146268576383591, "correct_loss_per_token": 1.5877901315689087, "incorrect_loss_per_token": 1.6292537152767181, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5959943532943726, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.5959943532943726, "logits_per_char": -0.7979971766471863, "num_chars": 2}, {"sum_logits": -1.5877901315689087, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.5877901315689087, "logits_per_char": -0.7938950657844543, "num_chars": 2}, {"sum_logits": -1.4981647729873657, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": true, "logits_per_token": -1.4981647729873657, "logits_per_char": -0.7490823864936829, "num_chars": 2}, {"sum_logits": -1.578585147857666, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.578585147857666, "logits_per_char": -0.789292573928833, "num_chars": 2}, {"sum_logits": -1.8442705869674683, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.8442705869674683, "logits_per_char": -0.9221352934837341, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1157, "native_id": "f427f9de6bf580314531baf86de8acbc", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6335275173187256, "incorrect_loss_raw": 1.6202957034111023, "correct_loss_per_char": 0.8167637586593628, "incorrect_loss_per_char": 0.8101478517055511, "correct_loss_per_token": 1.6335275173187256, "incorrect_loss_per_token": 1.6202957034111023, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6405514478683472, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.6405514478683472, "logits_per_char": -0.8202757239341736, "num_chars": 2}, {"sum_logits": -1.6335275173187256, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.6335275173187256, "logits_per_char": -0.8167637586593628, "num_chars": 2}, {"sum_logits": -1.4025192260742188, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": true, "logits_per_token": -1.4025192260742188, "logits_per_char": -0.7012596130371094, "num_chars": 2}, {"sum_logits": -1.6188024282455444, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.6188024282455444, "logits_per_char": -0.8094012141227722, "num_chars": 2}, {"sum_logits": -1.8193097114562988, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.8193097114562988, "logits_per_char": -0.9096548557281494, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1158, "native_id": "0f7425ecbe369bf41a230aab92d84132", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.5112221240997314, "incorrect_loss_raw": 1.6452616453170776, "correct_loss_per_char": 0.7556110620498657, "incorrect_loss_per_char": 0.8226308226585388, "correct_loss_per_token": 1.5112221240997314, "incorrect_loss_per_token": 1.6452616453170776, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5778648853302002, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": false, "logits_per_token": -1.5778648853302002, "logits_per_char": -0.7889324426651001, "num_chars": 2}, {"sum_logits": -1.5112221240997314, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": true, "logits_per_token": -1.5112221240997314, "logits_per_char": -0.7556110620498657, "num_chars": 2}, {"sum_logits": -1.605607032775879, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": false, "logits_per_token": -1.605607032775879, "logits_per_char": -0.8028035163879395, "num_chars": 2}, {"sum_logits": -1.6005210876464844, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": false, "logits_per_token": -1.6005210876464844, "logits_per_char": -0.8002605438232422, "num_chars": 2}, {"sum_logits": -1.797053575515747, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": false, "logits_per_token": -1.797053575515747, "logits_per_char": -0.8985267877578735, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1159, "native_id": "c872c08a95dd28a16479b76f240a4ad5", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6527084112167358, "incorrect_loss_raw": 1.6084564328193665, "correct_loss_per_char": 0.8263542056083679, "incorrect_loss_per_char": 0.8042282164096832, "correct_loss_per_token": 1.6527084112167358, "incorrect_loss_per_token": 1.6084564328193665, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5698713064193726, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.5698713064193726, "logits_per_char": -0.7849356532096863, "num_chars": 2}, {"sum_logits": -1.5241364240646362, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": true, "logits_per_token": -1.5241364240646362, "logits_per_char": -0.7620682120323181, "num_chars": 2}, {"sum_logits": -1.5793477296829224, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.5793477296829224, "logits_per_char": -0.7896738648414612, "num_chars": 2}, {"sum_logits": -1.6527084112167358, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.6527084112167358, "logits_per_char": -0.8263542056083679, "num_chars": 2}, {"sum_logits": -1.7604702711105347, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.7604702711105347, "logits_per_char": -0.8802351355552673, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1160, "native_id": "08d908ed723f813574992195d61386a2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4559626579284668, "incorrect_loss_raw": 1.6786934733390808, "correct_loss_per_char": 0.7279813289642334, "incorrect_loss_per_char": 0.8393467366695404, "correct_loss_per_token": 1.4559626579284668, "incorrect_loss_per_token": 1.6786934733390808, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4515070915222168, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.4515070915222168, "logits_per_char": -0.7257535457611084, "num_chars": 2}, {"sum_logits": -1.4559626579284668, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.4559626579284668, "logits_per_char": -0.7279813289642334, "num_chars": 2}, {"sum_logits": -1.539968490600586, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.539968490600586, "logits_per_char": -0.769984245300293, "num_chars": 2}, {"sum_logits": -1.7387332916259766, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.7387332916259766, "logits_per_char": -0.8693666458129883, "num_chars": 2}, {"sum_logits": -1.984565019607544, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.984565019607544, "logits_per_char": -0.992282509803772, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1161, "native_id": "5365fd00ef8cec62ee5685e246a939db", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9892410039901733, "incorrect_loss_raw": 1.5447402596473694, "correct_loss_per_char": 0.9946205019950867, "incorrect_loss_per_char": 0.7723701298236847, "correct_loss_per_token": 1.9892410039901733, "incorrect_loss_per_token": 1.5447402596473694, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4230916500091553, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": true, "logits_per_token": -1.4230916500091553, "logits_per_char": -0.7115458250045776, "num_chars": 2}, {"sum_logits": -1.5223972797393799, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.5223972797393799, "logits_per_char": -0.7611986398696899, "num_chars": 2}, {"sum_logits": -1.5635097026824951, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.5635097026824951, "logits_per_char": -0.7817548513412476, "num_chars": 2}, {"sum_logits": -1.6699624061584473, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.6699624061584473, "logits_per_char": -0.8349812030792236, "num_chars": 2}, {"sum_logits": -1.9892410039901733, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.9892410039901733, "logits_per_char": -0.9946205019950867, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1162, "native_id": "5649bd90dbb57e223fd843b7a4563a0f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6857858896255493, "incorrect_loss_raw": 1.6490690112113953, "correct_loss_per_char": 0.8428929448127747, "incorrect_loss_per_char": 0.8245345056056976, "correct_loss_per_token": 1.6857858896255493, "incorrect_loss_per_token": 1.6490690112113953, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2752251625061035, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": true, "logits_per_token": -1.2752251625061035, "logits_per_char": -0.6376125812530518, "num_chars": 2}, {"sum_logits": -1.5206655263900757, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.5206655263900757, "logits_per_char": -0.7603327631950378, "num_chars": 2}, {"sum_logits": -1.6153424978256226, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.6153424978256226, "logits_per_char": -0.8076712489128113, "num_chars": 2}, {"sum_logits": -1.6857858896255493, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.6857858896255493, "logits_per_char": -0.8428929448127747, "num_chars": 2}, {"sum_logits": -2.1850428581237793, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -2.1850428581237793, "logits_per_char": -1.0925214290618896, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1163, "native_id": "0a2195ae8d4706abc5721578c9991466", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4985624551773071, "incorrect_loss_raw": 1.6707410514354706, "correct_loss_per_char": 0.7492812275886536, "incorrect_loss_per_char": 0.8353705257177353, "correct_loss_per_token": 1.4985624551773071, "incorrect_loss_per_token": 1.6707410514354706, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4146443605422974, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.4146443605422974, "logits_per_char": -0.7073221802711487, "num_chars": 2}, {"sum_logits": -1.4985624551773071, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.4985624551773071, "logits_per_char": -0.7492812275886536, "num_chars": 2}, {"sum_logits": -1.5742096900939941, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5742096900939941, "logits_per_char": -0.7871048450469971, "num_chars": 2}, {"sum_logits": -1.638704538345337, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.638704538345337, "logits_per_char": -0.8193522691726685, "num_chars": 2}, {"sum_logits": -2.055405616760254, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -2.055405616760254, "logits_per_char": -1.027702808380127, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1164, "native_id": "5d15989039d46156b417c149728591de", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4977978467941284, "incorrect_loss_raw": 1.6849685609340668, "correct_loss_per_char": 0.7488989233970642, "incorrect_loss_per_char": 0.8424842804670334, "correct_loss_per_token": 1.4977978467941284, "incorrect_loss_per_token": 1.6849685609340668, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3091870546340942, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": true, "logits_per_token": -1.3091870546340942, "logits_per_char": -0.6545935273170471, "num_chars": 2}, {"sum_logits": -1.4977978467941284, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.4977978467941284, "logits_per_char": -0.7488989233970642, "num_chars": 2}, {"sum_logits": -1.5885735750198364, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.5885735750198364, "logits_per_char": -0.7942867875099182, "num_chars": 2}, {"sum_logits": -1.7577720880508423, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.7577720880508423, "logits_per_char": -0.8788860440254211, "num_chars": 2}, {"sum_logits": -2.084341526031494, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -2.084341526031494, "logits_per_char": -1.042170763015747, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1165, "native_id": "6eb57102b44ab74163d8f9821cbdabd0", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4039065837860107, "incorrect_loss_raw": 1.6834149956703186, "correct_loss_per_char": 0.7019532918930054, "incorrect_loss_per_char": 0.8417074978351593, "correct_loss_per_token": 1.4039065837860107, "incorrect_loss_per_token": 1.6834149956703186, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4039065837860107, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.4039065837860107, "logits_per_char": -0.7019532918930054, "num_chars": 2}, {"sum_logits": -1.584100365638733, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.584100365638733, "logits_per_char": -0.7920501828193665, "num_chars": 2}, {"sum_logits": -1.6300448179244995, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6300448179244995, "logits_per_char": -0.8150224089622498, "num_chars": 2}, {"sum_logits": -1.6354035139083862, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6354035139083862, "logits_per_char": -0.8177017569541931, "num_chars": 2}, {"sum_logits": -1.8841112852096558, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.8841112852096558, "logits_per_char": -0.9420556426048279, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1166, "native_id": "63861ac5e633db9090704ae315ef6f93", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8400269746780396, "incorrect_loss_raw": 1.5650196075439453, "correct_loss_per_char": 0.9200134873390198, "incorrect_loss_per_char": 0.7825098037719727, "correct_loss_per_token": 1.8400269746780396, "incorrect_loss_per_token": 1.5650196075439453, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5424259901046753, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5424259901046753, "logits_per_char": -0.7712129950523376, "num_chars": 2}, {"sum_logits": -1.512020468711853, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.512020468711853, "logits_per_char": -0.7560102343559265, "num_chars": 2}, {"sum_logits": -1.6122552156448364, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6122552156448364, "logits_per_char": -0.8061276078224182, "num_chars": 2}, {"sum_logits": -1.5933767557144165, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5933767557144165, "logits_per_char": -0.7966883778572083, "num_chars": 2}, {"sum_logits": -1.8400269746780396, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.8400269746780396, "logits_per_char": -0.9200134873390198, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1167, "native_id": "8058c566a4f488033d00e6520b17caea", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5665825605392456, "incorrect_loss_raw": 1.6309201121330261, "correct_loss_per_char": 0.7832912802696228, "incorrect_loss_per_char": 0.8154600560665131, "correct_loss_per_token": 1.5665825605392456, "incorrect_loss_per_token": 1.6309201121330261, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5065616369247437, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": true, "logits_per_token": -1.5065616369247437, "logits_per_char": -0.7532808184623718, "num_chars": 2}, {"sum_logits": -1.6012556552886963, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.6012556552886963, "logits_per_char": -0.8006278276443481, "num_chars": 2}, {"sum_logits": -1.6437021493911743, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.6437021493911743, "logits_per_char": -0.8218510746955872, "num_chars": 2}, {"sum_logits": -1.5665825605392456, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.5665825605392456, "logits_per_char": -0.7832912802696228, "num_chars": 2}, {"sum_logits": -1.7721610069274902, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.7721610069274902, "logits_per_char": -0.8860805034637451, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1168, "native_id": "57b83653d82b27d32bc39228130f3516", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0505242347717285, "incorrect_loss_raw": 1.5320043861865997, "correct_loss_per_char": 1.0252621173858643, "incorrect_loss_per_char": 0.7660021930932999, "correct_loss_per_token": 2.0505242347717285, "incorrect_loss_per_token": 1.5320043861865997, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4464370012283325, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.4464370012283325, "logits_per_char": -0.7232185006141663, "num_chars": 2}, {"sum_logits": -1.4785878658294678, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.4785878658294678, "logits_per_char": -0.7392939329147339, "num_chars": 2}, {"sum_logits": -1.6218870878219604, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6218870878219604, "logits_per_char": -0.8109435439109802, "num_chars": 2}, {"sum_logits": -1.5811055898666382, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5811055898666382, "logits_per_char": -0.7905527949333191, "num_chars": 2}, {"sum_logits": -2.0505242347717285, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -2.0505242347717285, "logits_per_char": -1.0252621173858643, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1169, "native_id": "410f907f817dd7aa8e73291a918d3d86", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5912772417068481, "incorrect_loss_raw": 1.64381542801857, "correct_loss_per_char": 0.7956386208534241, "incorrect_loss_per_char": 0.821907714009285, "correct_loss_per_token": 1.5912772417068481, "incorrect_loss_per_token": 1.64381542801857, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4699599742889404, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.4699599742889404, "logits_per_char": -0.7349799871444702, "num_chars": 2}, {"sum_logits": -1.4843193292617798, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.4843193292617798, "logits_per_char": -0.7421596646308899, "num_chars": 2}, {"sum_logits": -1.583336591720581, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.583336591720581, "logits_per_char": -0.7916682958602905, "num_chars": 2}, {"sum_logits": -1.5912772417068481, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5912772417068481, "logits_per_char": -0.7956386208534241, "num_chars": 2}, {"sum_logits": -2.0376458168029785, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -2.0376458168029785, "logits_per_char": -1.0188229084014893, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1170, "native_id": "506c2dbfe7b00a82bfdf0507a8de88fb", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.892471432685852, "incorrect_loss_raw": 1.5599040985107422, "correct_loss_per_char": 0.946235716342926, "incorrect_loss_per_char": 0.7799520492553711, "correct_loss_per_token": 1.892471432685852, "incorrect_loss_per_token": 1.5599040985107422, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4794642925262451, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.4794642925262451, "logits_per_char": -0.7397321462631226, "num_chars": 2}, {"sum_logits": -1.6319533586502075, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.6319533586502075, "logits_per_char": -0.8159766793251038, "num_chars": 2}, {"sum_logits": -1.6263320446014404, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.6263320446014404, "logits_per_char": -0.8131660223007202, "num_chars": 2}, {"sum_logits": -1.5018666982650757, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5018666982650757, "logits_per_char": -0.7509333491325378, "num_chars": 2}, {"sum_logits": -1.892471432685852, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.892471432685852, "logits_per_char": -0.946235716342926, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1171, "native_id": "42520bf3f93f8de23670044e019001a3", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.1211330890655518, "incorrect_loss_raw": 1.5233054757118225, "correct_loss_per_char": 1.0605665445327759, "incorrect_loss_per_char": 0.7616527378559113, "correct_loss_per_token": 2.1211330890655518, "incorrect_loss_per_token": 1.5233054757118225, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4007844924926758, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.4007844924926758, "logits_per_char": -0.7003922462463379, "num_chars": 2}, {"sum_logits": -1.464543104171753, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.464543104171753, "logits_per_char": -0.7322715520858765, "num_chars": 2}, {"sum_logits": -1.5731685161590576, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.5731685161590576, "logits_per_char": -0.7865842580795288, "num_chars": 2}, {"sum_logits": -1.6547257900238037, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6547257900238037, "logits_per_char": -0.8273628950119019, "num_chars": 2}, {"sum_logits": -2.1211330890655518, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -2.1211330890655518, "logits_per_char": -1.0605665445327759, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1172, "native_id": "5e260e1d96187716888cbd968010bb65", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5836188793182373, "incorrect_loss_raw": 1.660223811864853, "correct_loss_per_char": 0.7918094396591187, "incorrect_loss_per_char": 0.8301119059324265, "correct_loss_per_token": 1.5836188793182373, "incorrect_loss_per_token": 1.660223811864853, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4437508583068848, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.4437508583068848, "logits_per_char": -0.7218754291534424, "num_chars": 2}, {"sum_logits": -1.5148433446884155, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5148433446884155, "logits_per_char": -0.7574216723442078, "num_chars": 2}, {"sum_logits": -1.5164918899536133, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5164918899536133, "logits_per_char": -0.7582459449768066, "num_chars": 2}, {"sum_logits": -1.5836188793182373, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5836188793182373, "logits_per_char": -0.7918094396591187, "num_chars": 2}, {"sum_logits": -2.165809154510498, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.165809154510498, "logits_per_char": -1.082904577255249, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1173, "native_id": "ed50555f8db2b8f66caf9868dcd7e13b", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6800978183746338, "incorrect_loss_raw": 1.6003476083278656, "correct_loss_per_char": 0.8400489091873169, "incorrect_loss_per_char": 0.8001738041639328, "correct_loss_per_token": 1.6800978183746338, "incorrect_loss_per_token": 1.6003476083278656, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6800978183746338, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6800978183746338, "logits_per_char": -0.8400489091873169, "num_chars": 2}, {"sum_logits": -1.7150744199752808, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.7150744199752808, "logits_per_char": -0.8575372099876404, "num_chars": 2}, {"sum_logits": -1.5831037759780884, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5831037759780884, "logits_per_char": -0.7915518879890442, "num_chars": 2}, {"sum_logits": -1.544369101524353, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.544369101524353, "logits_per_char": -0.7721845507621765, "num_chars": 2}, {"sum_logits": -1.5588431358337402, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5588431358337402, "logits_per_char": -0.7794215679168701, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1174, "native_id": "a8c284637dabc87745a7eb05d4f7fcbc", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5629806518554688, "incorrect_loss_raw": 1.6503241062164307, "correct_loss_per_char": 0.7814903259277344, "incorrect_loss_per_char": 0.8251620531082153, "correct_loss_per_token": 1.5629806518554688, "incorrect_loss_per_token": 1.6503241062164307, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4763822555541992, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.4763822555541992, "logits_per_char": -0.7381911277770996, "num_chars": 2}, {"sum_logits": -1.590131402015686, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.590131402015686, "logits_per_char": -0.795065701007843, "num_chars": 2}, {"sum_logits": -1.502478003501892, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.502478003501892, "logits_per_char": -0.751239001750946, "num_chars": 2}, {"sum_logits": -1.5629806518554688, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5629806518554688, "logits_per_char": -0.7814903259277344, "num_chars": 2}, {"sum_logits": -2.0323047637939453, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.0323047637939453, "logits_per_char": -1.0161523818969727, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1175, "native_id": "5758a0fb686071e95d95b1cfad5299a0", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6332656145095825, "incorrect_loss_raw": 1.6338986158370972, "correct_loss_per_char": 0.8166328072547913, "incorrect_loss_per_char": 0.8169493079185486, "correct_loss_per_token": 1.6332656145095825, "incorrect_loss_per_token": 1.6338986158370972, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3373116254806519, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.3373116254806519, "logits_per_char": -0.6686558127403259, "num_chars": 2}, {"sum_logits": -1.58350670337677, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.58350670337677, "logits_per_char": -0.791753351688385, "num_chars": 2}, {"sum_logits": -1.6332656145095825, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6332656145095825, "logits_per_char": -0.8166328072547913, "num_chars": 2}, {"sum_logits": -1.6758219003677368, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6758219003677368, "logits_per_char": -0.8379109501838684, "num_chars": 2}, {"sum_logits": -1.93895423412323, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.93895423412323, "logits_per_char": -0.969477117061615, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1176, "native_id": "d986f17acb3ed19c77e3ca3f98c026b9", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7453497648239136, "incorrect_loss_raw": 1.6211986541748047, "correct_loss_per_char": 0.8726748824119568, "incorrect_loss_per_char": 0.8105993270874023, "correct_loss_per_token": 1.7453497648239136, "incorrect_loss_per_token": 1.6211986541748047, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.347099781036377, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": true, "logits_per_token": -1.347099781036377, "logits_per_char": -0.6735498905181885, "num_chars": 2}, {"sum_logits": -1.4071494340896606, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.4071494340896606, "logits_per_char": -0.7035747170448303, "num_chars": 2}, {"sum_logits": -1.663680911064148, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.663680911064148, "logits_per_char": -0.831840455532074, "num_chars": 2}, {"sum_logits": -1.7453497648239136, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.7453497648239136, "logits_per_char": -0.8726748824119568, "num_chars": 2}, {"sum_logits": -2.066864490509033, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -2.066864490509033, "logits_per_char": -1.0334322452545166, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1177, "native_id": "4a4f6408fae400ce0beb5bea0f9913e9", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.085324764251709, "incorrect_loss_raw": 1.533469557762146, "correct_loss_per_char": 1.0426623821258545, "incorrect_loss_per_char": 0.766734778881073, "correct_loss_per_token": 2.085324764251709, "incorrect_loss_per_token": 1.533469557762146, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3985319137573242, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.3985319137573242, "logits_per_char": -0.6992659568786621, "num_chars": 2}, {"sum_logits": -1.4939794540405273, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.4939794540405273, "logits_per_char": -0.7469897270202637, "num_chars": 2}, {"sum_logits": -1.7226101160049438, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.7226101160049438, "logits_per_char": -0.8613050580024719, "num_chars": 2}, {"sum_logits": -1.5187567472457886, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5187567472457886, "logits_per_char": -0.7593783736228943, "num_chars": 2}, {"sum_logits": -2.085324764251709, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.085324764251709, "logits_per_char": -1.0426623821258545, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1178, "native_id": "8c655f3a55bde41aad880f138d7a445d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5994672775268555, "incorrect_loss_raw": 1.6416893005371094, "correct_loss_per_char": 0.7997336387634277, "incorrect_loss_per_char": 0.8208446502685547, "correct_loss_per_token": 1.5994672775268555, "incorrect_loss_per_token": 1.6416893005371094, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3604557514190674, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.3604557514190674, "logits_per_char": -0.6802278757095337, "num_chars": 2}, {"sum_logits": -1.6017801761627197, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.6017801761627197, "logits_per_char": -0.8008900880813599, "num_chars": 2}, {"sum_logits": -1.5994672775268555, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5994672775268555, "logits_per_char": -0.7997336387634277, "num_chars": 2}, {"sum_logits": -1.6326885223388672, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.6326885223388672, "logits_per_char": -0.8163442611694336, "num_chars": 2}, {"sum_logits": -1.9718327522277832, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.9718327522277832, "logits_per_char": -0.9859163761138916, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1179, "native_id": "56417ee33b44f0d916bedfb6fd99b0ec", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5554230213165283, "incorrect_loss_raw": 1.6526778042316437, "correct_loss_per_char": 0.7777115106582642, "incorrect_loss_per_char": 0.8263389021158218, "correct_loss_per_token": 1.5554230213165283, "incorrect_loss_per_token": 1.6526778042316437, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.38536536693573, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.38536536693573, "logits_per_char": -0.692682683467865, "num_chars": 2}, {"sum_logits": -1.6077808141708374, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6077808141708374, "logits_per_char": -0.8038904070854187, "num_chars": 2}, {"sum_logits": -1.6212962865829468, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6212962865829468, "logits_per_char": -0.8106481432914734, "num_chars": 2}, {"sum_logits": -1.5554230213165283, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5554230213165283, "logits_per_char": -0.7777115106582642, "num_chars": 2}, {"sum_logits": -1.9962687492370605, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.9962687492370605, "logits_per_char": -0.9981343746185303, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1180, "native_id": "43fb083962f825ae651d88648bbd2f74", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.668213129043579, "incorrect_loss_raw": 1.6483837962150574, "correct_loss_per_char": 0.8341065645217896, "incorrect_loss_per_char": 0.8241918981075287, "correct_loss_per_token": 1.668213129043579, "incorrect_loss_per_token": 1.6483837962150574, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3856991529464722, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.3856991529464722, "logits_per_char": -0.6928495764732361, "num_chars": 2}, {"sum_logits": -1.4787691831588745, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.4787691831588745, "logits_per_char": -0.7393845915794373, "num_chars": 2}, {"sum_logits": -1.5304770469665527, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5304770469665527, "logits_per_char": -0.7652385234832764, "num_chars": 2}, {"sum_logits": -1.668213129043579, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.668213129043579, "logits_per_char": -0.8341065645217896, "num_chars": 2}, {"sum_logits": -2.19858980178833, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.19858980178833, "logits_per_char": -1.099294900894165, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1181, "native_id": "aed771629c8dbd0c2587891e98030607", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.178666353225708, "incorrect_loss_raw": 1.5176305174827576, "correct_loss_per_char": 1.089333176612854, "incorrect_loss_per_char": 0.7588152587413788, "correct_loss_per_token": 2.178666353225708, "incorrect_loss_per_token": 1.5176305174827576, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3890994787216187, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.3890994787216187, "logits_per_char": -0.6945497393608093, "num_chars": 2}, {"sum_logits": -1.4157406091690063, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.4157406091690063, "logits_per_char": -0.7078703045845032, "num_chars": 2}, {"sum_logits": -1.6491131782531738, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6491131782531738, "logits_per_char": -0.8245565891265869, "num_chars": 2}, {"sum_logits": -1.6165688037872314, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6165688037872314, "logits_per_char": -0.8082844018936157, "num_chars": 2}, {"sum_logits": -2.178666353225708, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.178666353225708, "logits_per_char": -1.089333176612854, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1182, "native_id": "d0a42c8180b4e080aa071dd70fce7e03", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.5343135595321655, "incorrect_loss_raw": 1.6379421651363373, "correct_loss_per_char": 0.7671567797660828, "incorrect_loss_per_char": 0.8189710825681686, "correct_loss_per_token": 1.5343135595321655, "incorrect_loss_per_token": 1.6379421651363373, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.574865460395813, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.574865460395813, "logits_per_char": -0.7874327301979065, "num_chars": 2}, {"sum_logits": -1.5343135595321655, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": true, "logits_per_token": -1.5343135595321655, "logits_per_char": -0.7671567797660828, "num_chars": 2}, {"sum_logits": -1.6790244579315186, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.6790244579315186, "logits_per_char": -0.8395122289657593, "num_chars": 2}, {"sum_logits": -1.6400260925292969, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.6400260925292969, "logits_per_char": -0.8200130462646484, "num_chars": 2}, {"sum_logits": -1.6578526496887207, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.6578526496887207, "logits_per_char": -0.8289263248443604, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1183, "native_id": "533599262a5dae7c7137cfe69e0e24fb", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7251319885253906, "incorrect_loss_raw": 1.5939865708351135, "correct_loss_per_char": 0.8625659942626953, "incorrect_loss_per_char": 0.7969932854175568, "correct_loss_per_token": 1.7251319885253906, "incorrect_loss_per_token": 1.5939865708351135, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5789438486099243, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.5789438486099243, "logits_per_char": -0.7894719243049622, "num_chars": 2}, {"sum_logits": -1.7251319885253906, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.7251319885253906, "logits_per_char": -0.8625659942626953, "num_chars": 2}, {"sum_logits": -1.594491958618164, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.594491958618164, "logits_per_char": -0.797245979309082, "num_chars": 2}, {"sum_logits": -1.6793630123138428, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.6793630123138428, "logits_per_char": -0.8396815061569214, "num_chars": 2}, {"sum_logits": -1.523147463798523, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": true, "logits_per_token": -1.523147463798523, "logits_per_char": -0.7615737318992615, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1184, "native_id": "edd1634d911614590c6b8ca730df95fe", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5417412519454956, "incorrect_loss_raw": 1.6458271443843842, "correct_loss_per_char": 0.7708706259727478, "incorrect_loss_per_char": 0.8229135721921921, "correct_loss_per_token": 1.5417412519454956, "incorrect_loss_per_token": 1.6458271443843842, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4644967317581177, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.4644967317581177, "logits_per_char": -0.7322483658790588, "num_chars": 2}, {"sum_logits": -1.6629979610443115, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6629979610443115, "logits_per_char": -0.8314989805221558, "num_chars": 2}, {"sum_logits": -1.5729552507400513, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5729552507400513, "logits_per_char": -0.7864776253700256, "num_chars": 2}, {"sum_logits": -1.5417412519454956, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5417412519454956, "logits_per_char": -0.7708706259727478, "num_chars": 2}, {"sum_logits": -1.8828586339950562, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.8828586339950562, "logits_per_char": -0.9414293169975281, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1185, "native_id": "9a544e9f4847c41a15fdf47ae7b98d8a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.579732894897461, "incorrect_loss_raw": 1.6350242793560028, "correct_loss_per_char": 0.7898664474487305, "incorrect_loss_per_char": 0.8175121396780014, "correct_loss_per_token": 1.579732894897461, "incorrect_loss_per_token": 1.6350242793560028, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4536393880844116, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": true, "logits_per_token": -1.4536393880844116, "logits_per_char": -0.7268196940422058, "num_chars": 2}, {"sum_logits": -1.579732894897461, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.579732894897461, "logits_per_char": -0.7898664474487305, "num_chars": 2}, {"sum_logits": -1.6863815784454346, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.6863815784454346, "logits_per_char": -0.8431907892227173, "num_chars": 2}, {"sum_logits": -1.5354831218719482, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.5354831218719482, "logits_per_char": -0.7677415609359741, "num_chars": 2}, {"sum_logits": -1.8645930290222168, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.8645930290222168, "logits_per_char": -0.9322965145111084, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1186, "native_id": "26bd85f05d29863ed777a4f1a4b8fa63", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8585785627365112, "incorrect_loss_raw": 1.629503846168518, "correct_loss_per_char": 0.9292892813682556, "incorrect_loss_per_char": 0.814751923084259, "correct_loss_per_token": 1.8585785627365112, "incorrect_loss_per_token": 1.629503846168518, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1735469102859497, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.1735469102859497, "logits_per_char": -0.5867734551429749, "num_chars": 2}, {"sum_logits": -1.4960448741912842, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.4960448741912842, "logits_per_char": -0.7480224370956421, "num_chars": 2}, {"sum_logits": -1.6417843103408813, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6417843103408813, "logits_per_char": -0.8208921551704407, "num_chars": 2}, {"sum_logits": -1.8585785627365112, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.8585785627365112, "logits_per_char": -0.9292892813682556, "num_chars": 2}, {"sum_logits": -2.206639289855957, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.206639289855957, "logits_per_char": -1.1033196449279785, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1187, "native_id": "3884d82524f2337ce53ce64776293cf7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3731721639633179, "incorrect_loss_raw": 1.6882920265197754, "correct_loss_per_char": 0.6865860819816589, "incorrect_loss_per_char": 0.8441460132598877, "correct_loss_per_token": 1.3731721639633179, "incorrect_loss_per_token": 1.6882920265197754, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3731721639633179, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": true, "logits_per_token": -1.3731721639633179, "logits_per_char": -0.6865860819816589, "num_chars": 2}, {"sum_logits": -1.6297719478607178, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": false, "logits_per_token": -1.6297719478607178, "logits_per_char": -0.8148859739303589, "num_chars": 2}, {"sum_logits": -1.6544687747955322, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": false, "logits_per_token": -1.6544687747955322, "logits_per_char": -0.8272343873977661, "num_chars": 2}, {"sum_logits": -1.6608803272247314, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": false, "logits_per_token": -1.6608803272247314, "logits_per_char": -0.8304401636123657, "num_chars": 2}, {"sum_logits": -1.8080470561981201, "num_tokens": 1, "num_tokens_all": 299, "is_greedy": false, "logits_per_token": -1.8080470561981201, "logits_per_char": -0.9040235280990601, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1188, "native_id": "acb3147d946db3b06a596d48e0be56cf", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.508557915687561, "incorrect_loss_raw": 1.6532451510429382, "correct_loss_per_char": 0.7542789578437805, "incorrect_loss_per_char": 0.8266225755214691, "correct_loss_per_token": 1.508557915687561, "incorrect_loss_per_token": 1.6532451510429382, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.508557915687561, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.508557915687561, "logits_per_char": -0.7542789578437805, "num_chars": 2}, {"sum_logits": -1.604146122932434, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.604146122932434, "logits_per_char": -0.802073061466217, "num_chars": 2}, {"sum_logits": -1.53095281124115, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.53095281124115, "logits_per_char": -0.765476405620575, "num_chars": 2}, {"sum_logits": -1.5732301473617554, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5732301473617554, "logits_per_char": -0.7866150736808777, "num_chars": 2}, {"sum_logits": -1.9046515226364136, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.9046515226364136, "logits_per_char": -0.9523257613182068, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1189, "native_id": "52ab95f9216f1994e37cc08f7f258f13", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7007286548614502, "incorrect_loss_raw": 1.6756758987903595, "correct_loss_per_char": 0.8503643274307251, "incorrect_loss_per_char": 0.8378379493951797, "correct_loss_per_token": 1.7007286548614502, "incorrect_loss_per_token": 1.6756758987903595, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1601516008377075, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": true, "logits_per_token": -1.1601516008377075, "logits_per_char": -0.5800758004188538, "num_chars": 2}, {"sum_logits": -1.5085129737854004, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.5085129737854004, "logits_per_char": -0.7542564868927002, "num_chars": 2}, {"sum_logits": -1.7007286548614502, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.7007286548614502, "logits_per_char": -0.8503643274307251, "num_chars": 2}, {"sum_logits": -1.739197015762329, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.739197015762329, "logits_per_char": -0.8695985078811646, "num_chars": 2}, {"sum_logits": -2.294842004776001, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -2.294842004776001, "logits_per_char": -1.1474210023880005, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1190, "native_id": "f60641f550d5ee44ac1bedcaf6ad6357", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4512542486190796, "incorrect_loss_raw": 1.69233438372612, "correct_loss_per_char": 0.7256271243095398, "incorrect_loss_per_char": 0.84616719186306, "correct_loss_per_token": 1.4512542486190796, "incorrect_loss_per_token": 1.69233438372612, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3476225137710571, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.3476225137710571, "logits_per_char": -0.6738112568855286, "num_chars": 2}, {"sum_logits": -1.4512542486190796, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.4512542486190796, "logits_per_char": -0.7256271243095398, "num_chars": 2}, {"sum_logits": -1.6004558801651, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.6004558801651, "logits_per_char": -0.80022794008255, "num_chars": 2}, {"sum_logits": -1.7921048402786255, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.7921048402786255, "logits_per_char": -0.8960524201393127, "num_chars": 2}, {"sum_logits": -2.0291543006896973, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.0291543006896973, "logits_per_char": -1.0145771503448486, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1191, "native_id": "d9835ede7a0ed79325de13ca95b85b78", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8360567092895508, "incorrect_loss_raw": 1.6291730105876923, "correct_loss_per_char": 0.9180283546447754, "incorrect_loss_per_char": 0.8145865052938461, "correct_loss_per_token": 1.8360567092895508, "incorrect_loss_per_token": 1.6291730105876923, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2064863443374634, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.2064863443374634, "logits_per_char": -0.6032431721687317, "num_chars": 2}, {"sum_logits": -1.4681966304779053, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.4681966304779053, "logits_per_char": -0.7340983152389526, "num_chars": 2}, {"sum_logits": -1.635279893875122, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.635279893875122, "logits_per_char": -0.817639946937561, "num_chars": 2}, {"sum_logits": -1.8360567092895508, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.8360567092895508, "logits_per_char": -0.9180283546447754, "num_chars": 2}, {"sum_logits": -2.2067291736602783, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.2067291736602783, "logits_per_char": -1.1033645868301392, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1192, "native_id": "2987db72e66f5fa0015ac64f9b3614ec", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2709599733352661, "incorrect_loss_raw": 1.7824685871601105, "correct_loss_per_char": 0.6354799866676331, "incorrect_loss_per_char": 0.8912342935800552, "correct_loss_per_token": 1.2709599733352661, "incorrect_loss_per_token": 1.7824685871601105, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2709599733352661, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.2709599733352661, "logits_per_char": -0.6354799866676331, "num_chars": 2}, {"sum_logits": -1.3685733079910278, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.3685733079910278, "logits_per_char": -0.6842866539955139, "num_chars": 2}, {"sum_logits": -1.625638723373413, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.625638723373413, "logits_per_char": -0.8128193616867065, "num_chars": 2}, {"sum_logits": -1.7877702713012695, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.7877702713012695, "logits_per_char": -0.8938851356506348, "num_chars": 2}, {"sum_logits": -2.3478920459747314, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.3478920459747314, "logits_per_char": -1.1739460229873657, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1193, "native_id": "8b548832703a8c68a788e2f9c0e222ae", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4284331798553467, "incorrect_loss_raw": 1.6727150082588196, "correct_loss_per_char": 0.7142165899276733, "incorrect_loss_per_char": 0.8363575041294098, "correct_loss_per_token": 1.4284331798553467, "incorrect_loss_per_token": 1.6727150082588196, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4284331798553467, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.4284331798553467, "logits_per_char": -0.7142165899276733, "num_chars": 2}, {"sum_logits": -1.706092119216919, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.706092119216919, "logits_per_char": -0.8530460596084595, "num_chars": 2}, {"sum_logits": -1.610485315322876, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.610485315322876, "logits_per_char": -0.805242657661438, "num_chars": 2}, {"sum_logits": -1.5439372062683105, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5439372062683105, "logits_per_char": -0.7719686031341553, "num_chars": 2}, {"sum_logits": -1.8303453922271729, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.8303453922271729, "logits_per_char": -0.9151726961135864, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1194, "native_id": "1ddd239a2a6438a891cb411b82e7f450", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.971964955329895, "incorrect_loss_raw": 1.5456597208976746, "correct_loss_per_char": 0.9859824776649475, "incorrect_loss_per_char": 0.7728298604488373, "correct_loss_per_token": 1.971964955329895, "incorrect_loss_per_token": 1.5456597208976746, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4201396703720093, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.4201396703720093, "logits_per_char": -0.7100698351860046, "num_chars": 2}, {"sum_logits": -1.5326467752456665, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5326467752456665, "logits_per_char": -0.7663233876228333, "num_chars": 2}, {"sum_logits": -1.5687073469161987, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5687073469161987, "logits_per_char": -0.7843536734580994, "num_chars": 2}, {"sum_logits": -1.6611450910568237, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.6611450910568237, "logits_per_char": -0.8305725455284119, "num_chars": 2}, {"sum_logits": -1.971964955329895, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.971964955329895, "logits_per_char": -0.9859824776649475, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1195, "native_id": "6544a50bf9563d52dbd2034e81df0bf3", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5989298820495605, "incorrect_loss_raw": 1.6559533774852753, "correct_loss_per_char": 0.7994649410247803, "incorrect_loss_per_char": 0.8279766887426376, "correct_loss_per_token": 1.5989298820495605, "incorrect_loss_per_token": 1.6559533774852753, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3614410161972046, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.3614410161972046, "logits_per_char": -0.6807205080986023, "num_chars": 2}, {"sum_logits": -1.5122740268707275, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5122740268707275, "logits_per_char": -0.7561370134353638, "num_chars": 2}, {"sum_logits": -1.6219353675842285, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6219353675842285, "logits_per_char": -0.8109676837921143, "num_chars": 2}, {"sum_logits": -1.5989298820495605, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5989298820495605, "logits_per_char": -0.7994649410247803, "num_chars": 2}, {"sum_logits": -2.1281630992889404, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -2.1281630992889404, "logits_per_char": -1.0640815496444702, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1196, "native_id": "5ff6ce8ad88459272ffe23d33db4970a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5716723203659058, "incorrect_loss_raw": 1.6350741684436798, "correct_loss_per_char": 0.7858361601829529, "incorrect_loss_per_char": 0.8175370842218399, "correct_loss_per_token": 1.5716723203659058, "incorrect_loss_per_token": 1.6350741684436798, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4814180135726929, "num_tokens": 1, "num_tokens_all": 301, "is_greedy": true, "logits_per_token": -1.4814180135726929, "logits_per_char": -0.7407090067863464, "num_chars": 2}, {"sum_logits": -1.5384373664855957, "num_tokens": 1, "num_tokens_all": 301, "is_greedy": false, "logits_per_token": -1.5384373664855957, "logits_per_char": -0.7692186832427979, "num_chars": 2}, {"sum_logits": -1.6723062992095947, "num_tokens": 1, "num_tokens_all": 301, "is_greedy": false, "logits_per_token": -1.6723062992095947, "logits_per_char": -0.8361531496047974, "num_chars": 2}, {"sum_logits": -1.5716723203659058, "num_tokens": 1, "num_tokens_all": 301, "is_greedy": false, "logits_per_token": -1.5716723203659058, "logits_per_char": -0.7858361601829529, "num_chars": 2}, {"sum_logits": -1.848134994506836, "num_tokens": 1, "num_tokens_all": 301, "is_greedy": false, "logits_per_token": -1.848134994506836, "logits_per_char": -0.924067497253418, "num_chars": 2}], "label": 3, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1197, "native_id": "2ca05683157a3cd89d82016f13e560ec", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.618178129196167, "incorrect_loss_raw": 1.6399831771850586, "correct_loss_per_char": 0.8090890645980835, "incorrect_loss_per_char": 0.8199915885925293, "correct_loss_per_token": 1.618178129196167, "incorrect_loss_per_token": 1.6399831771850586, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2856974601745605, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.2856974601745605, "logits_per_char": -0.6428487300872803, "num_chars": 2}, {"sum_logits": -1.618178129196167, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.618178129196167, "logits_per_char": -0.8090890645980835, "num_chars": 2}, {"sum_logits": -1.656501054763794, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.656501054763794, "logits_per_char": -0.828250527381897, "num_chars": 2}, {"sum_logits": -1.7038087844848633, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.7038087844848633, "logits_per_char": -0.8519043922424316, "num_chars": 2}, {"sum_logits": -1.9139254093170166, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.9139254093170166, "logits_per_char": -0.9569627046585083, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1198, "native_id": "1a8fbab20bbdf0bbf3961894662d5f7c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9167389869689941, "incorrect_loss_raw": 1.5599529147148132, "correct_loss_per_char": 0.9583694934844971, "incorrect_loss_per_char": 0.7799764573574066, "correct_loss_per_token": 1.9167389869689941, "incorrect_loss_per_token": 1.5599529147148132, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.371605634689331, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.371605634689331, "logits_per_char": -0.6858028173446655, "num_chars": 2}, {"sum_logits": -1.4932003021240234, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.4932003021240234, "logits_per_char": -0.7466001510620117, "num_chars": 2}, {"sum_logits": -1.6493194103240967, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6493194103240967, "logits_per_char": -0.8246597051620483, "num_chars": 2}, {"sum_logits": -1.7256863117218018, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.7256863117218018, "logits_per_char": -0.8628431558609009, "num_chars": 2}, {"sum_logits": -1.9167389869689941, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.9167389869689941, "logits_per_char": -0.9583694934844971, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1199, "native_id": "5b5d2a8b83282f61c68a870116042f64", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.312911033630371, "incorrect_loss_raw": 1.5240024626255035, "correct_loss_per_char": 1.1564555168151855, "incorrect_loss_per_char": 0.7620012313127518, "correct_loss_per_token": 2.312911033630371, "incorrect_loss_per_token": 1.5240024626255035, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1455471515655518, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": true, "logits_per_token": -1.1455471515655518, "logits_per_char": -0.5727735757827759, "num_chars": 2}, {"sum_logits": -1.5695360898971558, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.5695360898971558, "logits_per_char": -0.7847680449485779, "num_chars": 2}, {"sum_logits": -1.6075235605239868, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.6075235605239868, "logits_per_char": -0.8037617802619934, "num_chars": 2}, {"sum_logits": -1.7734030485153198, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.7734030485153198, "logits_per_char": -0.8867015242576599, "num_chars": 2}, {"sum_logits": -2.312911033630371, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -2.312911033630371, "logits_per_char": -1.1564555168151855, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1200, "native_id": "cfa081b5ba90dae4d7ddb5b7ad9d369a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.000662326812744, "incorrect_loss_raw": 1.5533823370933533, "correct_loss_per_char": 1.000331163406372, "incorrect_loss_per_char": 0.7766911685466766, "correct_loss_per_token": 2.000662326812744, "incorrect_loss_per_token": 1.5533823370933533, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.300218105316162, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.300218105316162, "logits_per_char": -0.650109052658081, "num_chars": 2}, {"sum_logits": -1.5546581745147705, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5546581745147705, "logits_per_char": -0.7773290872573853, "num_chars": 2}, {"sum_logits": -1.5690503120422363, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5690503120422363, "logits_per_char": -0.7845251560211182, "num_chars": 2}, {"sum_logits": -1.7896027565002441, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.7896027565002441, "logits_per_char": -0.8948013782501221, "num_chars": 2}, {"sum_logits": -2.000662326812744, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -2.000662326812744, "logits_per_char": -1.000331163406372, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1201, "native_id": "009a7aabffe0583fc2df46656b29c326", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5850909948349, "incorrect_loss_raw": 1.653887152671814, "correct_loss_per_char": 0.79254549741745, "incorrect_loss_per_char": 0.826943576335907, "correct_loss_per_token": 1.5850909948349, "incorrect_loss_per_token": 1.653887152671814, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3558380603790283, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": true, "logits_per_token": -1.3558380603790283, "logits_per_char": -0.6779190301895142, "num_chars": 2}, {"sum_logits": -1.5674117803573608, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.5674117803573608, "logits_per_char": -0.7837058901786804, "num_chars": 2}, {"sum_logits": -1.5850909948349, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.5850909948349, "logits_per_char": -0.79254549741745, "num_chars": 2}, {"sum_logits": -1.6301640272140503, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -1.6301640272140503, "logits_per_char": -0.8150820136070251, "num_chars": 2}, {"sum_logits": -2.0621347427368164, "num_tokens": 1, "num_tokens_all": 292, "is_greedy": false, "logits_per_token": -2.0621347427368164, "logits_per_char": -1.0310673713684082, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1202, "native_id": "2521b3fe6bfd6aeb91f9107dc7c4fbee", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5426788330078125, "incorrect_loss_raw": 1.6662398278713226, "correct_loss_per_char": 0.7713394165039062, "incorrect_loss_per_char": 0.8331199139356613, "correct_loss_per_token": 1.5426788330078125, "incorrect_loss_per_token": 1.6662398278713226, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3900195360183716, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": true, "logits_per_token": -1.3900195360183716, "logits_per_char": -0.6950097680091858, "num_chars": 2}, {"sum_logits": -1.4659647941589355, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.4659647941589355, "logits_per_char": -0.7329823970794678, "num_chars": 2}, {"sum_logits": -1.5426788330078125, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.5426788330078125, "logits_per_char": -0.7713394165039062, "num_chars": 2}, {"sum_logits": -1.7447786331176758, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.7447786331176758, "logits_per_char": -0.8723893165588379, "num_chars": 2}, {"sum_logits": -2.0641963481903076, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -2.0641963481903076, "logits_per_char": -1.0320981740951538, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1203, "native_id": "3fe45ab3bd4a844ea290050fc0ece8c1_1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.1978507041931152, "incorrect_loss_raw": 1.512844294309616, "correct_loss_per_char": 1.0989253520965576, "incorrect_loss_per_char": 0.756422147154808, "correct_loss_per_token": 2.1978507041931152, "incorrect_loss_per_token": 1.512844294309616, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3861082792282104, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.3861082792282104, "logits_per_char": -0.6930541396141052, "num_chars": 2}, {"sum_logits": -1.5322685241699219, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5322685241699219, "logits_per_char": -0.7661342620849609, "num_chars": 2}, {"sum_logits": -1.5352131128311157, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5352131128311157, "logits_per_char": -0.7676065564155579, "num_chars": 2}, {"sum_logits": -1.5977872610092163, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5977872610092163, "logits_per_char": -0.7988936305046082, "num_chars": 2}, {"sum_logits": -2.1978507041931152, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.1978507041931152, "logits_per_char": -1.0989253520965576, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1204, "native_id": "a2e0f6b5651e5271fcff8d6f5c9adfee", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2746429443359375, "incorrect_loss_raw": 1.7835743427276611, "correct_loss_per_char": 0.6373214721679688, "incorrect_loss_per_char": 0.8917871713638306, "correct_loss_per_token": 1.2746429443359375, "incorrect_loss_per_token": 1.7835743427276611, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2746429443359375, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.2746429443359375, "logits_per_char": -0.6373214721679688, "num_chars": 2}, {"sum_logits": -1.3743417263031006, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.3743417263031006, "logits_per_char": -0.6871708631515503, "num_chars": 2}, {"sum_logits": -1.6349444389343262, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.6349444389343262, "logits_per_char": -0.8174722194671631, "num_chars": 2}, {"sum_logits": -1.7337861061096191, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.7337861061096191, "logits_per_char": -0.8668930530548096, "num_chars": 2}, {"sum_logits": -2.3912250995635986, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -2.3912250995635986, "logits_per_char": -1.1956125497817993, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1205, "native_id": "d6900a01a9dd6627b4bb22b0f6d191a5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.1811604499816895, "incorrect_loss_raw": 1.5260646045207977, "correct_loss_per_char": 1.0905802249908447, "incorrect_loss_per_char": 0.7630323022603989, "correct_loss_per_token": 2.1811604499816895, "incorrect_loss_per_token": 1.5260646045207977, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2657018899917603, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": true, "logits_per_token": -1.2657018899917603, "logits_per_char": -0.6328509449958801, "num_chars": 2}, {"sum_logits": -1.4938759803771973, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.4938759803771973, "logits_per_char": -0.7469379901885986, "num_chars": 2}, {"sum_logits": -1.6219104528427124, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.6219104528427124, "logits_per_char": -0.8109552264213562, "num_chars": 2}, {"sum_logits": -1.722770094871521, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -1.722770094871521, "logits_per_char": -0.8613850474357605, "num_chars": 2}, {"sum_logits": -2.1811604499816895, "num_tokens": 1, "num_tokens_all": 287, "is_greedy": false, "logits_per_token": -2.1811604499816895, "logits_per_char": -1.0905802249908447, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1206, "native_id": "8f2976690c83be6b8fa3a1196dfd9722", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0358150005340576, "incorrect_loss_raw": 1.550068438053131, "correct_loss_per_char": 1.0179075002670288, "incorrect_loss_per_char": 0.7750342190265656, "correct_loss_per_token": 2.0358150005340576, "incorrect_loss_per_token": 1.550068438053131, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2729649543762207, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": true, "logits_per_token": -1.2729649543762207, "logits_per_char": -0.6364824771881104, "num_chars": 2}, {"sum_logits": -1.5083873271942139, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.5083873271942139, "logits_per_char": -0.7541936635971069, "num_chars": 2}, {"sum_logits": -1.7439215183258057, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.7439215183258057, "logits_per_char": -0.8719607591629028, "num_chars": 2}, {"sum_logits": -1.6749999523162842, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.6749999523162842, "logits_per_char": -0.8374999761581421, "num_chars": 2}, {"sum_logits": -2.0358150005340576, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -2.0358150005340576, "logits_per_char": -1.0179075002670288, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1207, "native_id": "570be8c1edb8c638603dc5c8cae421cc", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.5227445363998413, "incorrect_loss_raw": 1.6396999061107635, "correct_loss_per_char": 0.7613722681999207, "incorrect_loss_per_char": 0.8198499530553818, "correct_loss_per_token": 1.5227445363998413, "incorrect_loss_per_token": 1.6396999061107635, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6015410423278809, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.6015410423278809, "logits_per_char": -0.8007705211639404, "num_chars": 2}, {"sum_logits": -1.645356297492981, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.645356297492981, "logits_per_char": -0.8226781487464905, "num_chars": 2}, {"sum_logits": -1.5227445363998413, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.5227445363998413, "logits_per_char": -0.7613722681999207, "num_chars": 2}, {"sum_logits": -1.5983506441116333, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.5983506441116333, "logits_per_char": -0.7991753220558167, "num_chars": 2}, {"sum_logits": -1.713551640510559, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.713551640510559, "logits_per_char": -0.8567758202552795, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1208, "native_id": "08d3175de59a639be02f2ebc032d56bd", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0293161869049072, "incorrect_loss_raw": 1.538998782634735, "correct_loss_per_char": 1.0146580934524536, "incorrect_loss_per_char": 0.7694993913173676, "correct_loss_per_token": 2.0293161869049072, "incorrect_loss_per_token": 1.538998782634735, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.347078800201416, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.347078800201416, "logits_per_char": -0.673539400100708, "num_chars": 2}, {"sum_logits": -1.580005168914795, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.580005168914795, "logits_per_char": -0.7900025844573975, "num_chars": 2}, {"sum_logits": -1.5996580123901367, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5996580123901367, "logits_per_char": -0.7998290061950684, "num_chars": 2}, {"sum_logits": -1.6292531490325928, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6292531490325928, "logits_per_char": -0.8146265745162964, "num_chars": 2}, {"sum_logits": -2.0293161869049072, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -2.0293161869049072, "logits_per_char": -1.0146580934524536, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1209, "native_id": "549cf641318edfc0510fa7c7dbb359e1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5422784090042114, "incorrect_loss_raw": 1.6739560067653656, "correct_loss_per_char": 0.7711392045021057, "incorrect_loss_per_char": 0.8369780033826828, "correct_loss_per_token": 1.5422784090042114, "incorrect_loss_per_token": 1.6739560067653656, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3707866668701172, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.3707866668701172, "logits_per_char": -0.6853933334350586, "num_chars": 2}, {"sum_logits": -1.4545199871063232, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.4545199871063232, "logits_per_char": -0.7272599935531616, "num_chars": 2}, {"sum_logits": -1.5422784090042114, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5422784090042114, "logits_per_char": -0.7711392045021057, "num_chars": 2}, {"sum_logits": -1.7496038675308228, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.7496038675308228, "logits_per_char": -0.8748019337654114, "num_chars": 2}, {"sum_logits": -2.120913505554199, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -2.120913505554199, "logits_per_char": -1.0604567527770996, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1210, "native_id": "dfa23d3422b7294843447b6950d2b476", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.274505853652954, "incorrect_loss_raw": 1.5180341303348541, "correct_loss_per_char": 1.137252926826477, "incorrect_loss_per_char": 0.7590170651674271, "correct_loss_per_token": 2.274505853652954, "incorrect_loss_per_token": 1.5180341303348541, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2870599031448364, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.2870599031448364, "logits_per_char": -0.6435299515724182, "num_chars": 2}, {"sum_logits": -1.4291927814483643, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.4291927814483643, "logits_per_char": -0.7145963907241821, "num_chars": 2}, {"sum_logits": -1.5970606803894043, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5970606803894043, "logits_per_char": -0.7985303401947021, "num_chars": 2}, {"sum_logits": -1.7588231563568115, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.7588231563568115, "logits_per_char": -0.8794115781784058, "num_chars": 2}, {"sum_logits": -2.274505853652954, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -2.274505853652954, "logits_per_char": -1.137252926826477, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1211, "native_id": "1fe90a4aee405e1aa2279442d28803ae", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.304147720336914, "incorrect_loss_raw": 1.5149677991867065, "correct_loss_per_char": 1.152073860168457, "incorrect_loss_per_char": 0.7574838995933533, "correct_loss_per_token": 2.304147720336914, "incorrect_loss_per_token": 1.5149677991867065, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2153187990188599, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.2153187990188599, "logits_per_char": -0.6076593995094299, "num_chars": 2}, {"sum_logits": -1.5556111335754395, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.5556111335754395, "logits_per_char": -0.7778055667877197, "num_chars": 2}, {"sum_logits": -1.552307367324829, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.552307367324829, "logits_per_char": -0.7761536836624146, "num_chars": 2}, {"sum_logits": -1.7366338968276978, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.7366338968276978, "logits_per_char": -0.8683169484138489, "num_chars": 2}, {"sum_logits": -2.304147720336914, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -2.304147720336914, "logits_per_char": -1.152073860168457, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1212, "native_id": "01794dde3ca2991615f1aa2f63fb22e3", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8596346378326416, "incorrect_loss_raw": 1.5630083680152893, "correct_loss_per_char": 0.9298173189163208, "incorrect_loss_per_char": 0.7815041840076447, "correct_loss_per_token": 1.8596346378326416, "incorrect_loss_per_token": 1.5630083680152893, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5972809791564941, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": false, "logits_per_token": -1.5972809791564941, "logits_per_char": -0.7986404895782471, "num_chars": 2}, {"sum_logits": -1.4723215103149414, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": true, "logits_per_token": -1.4723215103149414, "logits_per_char": -0.7361607551574707, "num_chars": 2}, {"sum_logits": -1.6384508609771729, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": false, "logits_per_token": -1.6384508609771729, "logits_per_char": -0.8192254304885864, "num_chars": 2}, {"sum_logits": -1.5439801216125488, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": false, "logits_per_token": -1.5439801216125488, "logits_per_char": -0.7719900608062744, "num_chars": 2}, {"sum_logits": -1.8596346378326416, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": false, "logits_per_token": -1.8596346378326416, "logits_per_char": -0.9298173189163208, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1213, "native_id": "f794e376672c98ac25d8f70506a26e68", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3561018705368042, "incorrect_loss_raw": 1.712031602859497, "correct_loss_per_char": 0.6780509352684021, "incorrect_loss_per_char": 0.8560158014297485, "correct_loss_per_token": 1.3561018705368042, "incorrect_loss_per_token": 1.712031602859497, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3561018705368042, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": true, "logits_per_token": -1.3561018705368042, "logits_per_char": -0.6780509352684021, "num_chars": 2}, {"sum_logits": -1.5428338050842285, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.5428338050842285, "logits_per_char": -0.7714169025421143, "num_chars": 2}, {"sum_logits": -1.5713272094726562, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.5713272094726562, "logits_per_char": -0.7856636047363281, "num_chars": 2}, {"sum_logits": -1.6854703426361084, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.6854703426361084, "logits_per_char": -0.8427351713180542, "num_chars": 2}, {"sum_logits": -2.048495054244995, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -2.048495054244995, "logits_per_char": -1.0242475271224976, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1214, "native_id": "ace8fa2943ba8414aebdb74b48906fae", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8574237823486328, "incorrect_loss_raw": 1.5646561086177826, "correct_loss_per_char": 0.9287118911743164, "incorrect_loss_per_char": 0.7823280543088913, "correct_loss_per_token": 1.8574237823486328, "incorrect_loss_per_token": 1.5646561086177826, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6515697240829468, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.6515697240829468, "logits_per_char": -0.8257848620414734, "num_chars": 2}, {"sum_logits": -1.5046660900115967, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": true, "logits_per_token": -1.5046660900115967, "logits_per_char": -0.7523330450057983, "num_chars": 2}, {"sum_logits": -1.579842448234558, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.579842448234558, "logits_per_char": -0.789921224117279, "num_chars": 2}, {"sum_logits": -1.5225461721420288, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.5225461721420288, "logits_per_char": -0.7612730860710144, "num_chars": 2}, {"sum_logits": -1.8574237823486328, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.8574237823486328, "logits_per_char": -0.9287118911743164, "num_chars": 2}], "label": 4, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1215, "native_id": "21ce6f7c5c3d1ad8cf234988c1ad471f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5107898712158203, "incorrect_loss_raw": 1.6690286099910736, "correct_loss_per_char": 0.7553949356079102, "incorrect_loss_per_char": 0.8345143049955368, "correct_loss_per_token": 1.5107898712158203, "incorrect_loss_per_token": 1.6690286099910736, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4173341989517212, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.4173341989517212, "logits_per_char": -0.7086670994758606, "num_chars": 2}, {"sum_logits": -1.5107898712158203, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5107898712158203, "logits_per_char": -0.7553949356079102, "num_chars": 2}, {"sum_logits": -1.540299654006958, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.540299654006958, "logits_per_char": -0.770149827003479, "num_chars": 2}, {"sum_logits": -1.6459102630615234, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.6459102630615234, "logits_per_char": -0.8229551315307617, "num_chars": 2}, {"sum_logits": -2.072570323944092, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -2.072570323944092, "logits_per_char": -1.036285161972046, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1216, "native_id": "6c84e79d0595efd99596faa07c4961d0", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2604882717132568, "incorrect_loss_raw": 1.7942901253700256, "correct_loss_per_char": 0.6302441358566284, "incorrect_loss_per_char": 0.8971450626850128, "correct_loss_per_token": 1.2604882717132568, "incorrect_loss_per_token": 1.7942901253700256, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2604882717132568, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.2604882717132568, "logits_per_char": -0.6302441358566284, "num_chars": 2}, {"sum_logits": -1.4690542221069336, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.4690542221069336, "logits_per_char": -0.7345271110534668, "num_chars": 2}, {"sum_logits": -1.5622056722640991, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5622056722640991, "logits_per_char": -0.7811028361320496, "num_chars": 2}, {"sum_logits": -1.6790560483932495, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6790560483932495, "logits_per_char": -0.8395280241966248, "num_chars": 2}, {"sum_logits": -2.4668445587158203, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -2.4668445587158203, "logits_per_char": -1.2334222793579102, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1217, "native_id": "88f1fe6cfbcb1a25f25454341c789463", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5709586143493652, "incorrect_loss_raw": 1.6397863030433655, "correct_loss_per_char": 0.7854793071746826, "incorrect_loss_per_char": 0.8198931515216827, "correct_loss_per_token": 1.5709586143493652, "incorrect_loss_per_token": 1.6397863030433655, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4916627407073975, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.4916627407073975, "logits_per_char": -0.7458313703536987, "num_chars": 2}, {"sum_logits": -1.5709586143493652, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5709586143493652, "logits_per_char": -0.7854793071746826, "num_chars": 2}, {"sum_logits": -1.4869165420532227, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.4869165420532227, "logits_per_char": -0.7434582710266113, "num_chars": 2}, {"sum_logits": -1.6590874195098877, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6590874195098877, "logits_per_char": -0.8295437097549438, "num_chars": 2}, {"sum_logits": -1.921478509902954, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.921478509902954, "logits_per_char": -0.960739254951477, "num_chars": 2}], "label": 1, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1218, "native_id": "5074bcaf0f700c9f3c8c563067af156a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5976159572601318, "incorrect_loss_raw": 1.6494799256324768, "correct_loss_per_char": 0.7988079786300659, "incorrect_loss_per_char": 0.8247399628162384, "correct_loss_per_token": 1.5976159572601318, "incorrect_loss_per_token": 1.6494799256324768, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4143221378326416, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.4143221378326416, "logits_per_char": -0.7071610689163208, "num_chars": 2}, {"sum_logits": -1.445061206817627, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.445061206817627, "logits_per_char": -0.7225306034088135, "num_chars": 2}, {"sum_logits": -1.5976159572601318, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5976159572601318, "logits_per_char": -0.7988079786300659, "num_chars": 2}, {"sum_logits": -1.6656978130340576, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.6656978130340576, "logits_per_char": -0.8328489065170288, "num_chars": 2}, {"sum_logits": -2.072838544845581, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -2.072838544845581, "logits_per_char": -1.0364192724227905, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1219, "native_id": "6a253e076cd2af00e17d9950d70daf47", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.4495062828063965, "incorrect_loss_raw": 1.6728792190551758, "correct_loss_per_char": 0.7247531414031982, "incorrect_loss_per_char": 0.8364396095275879, "correct_loss_per_token": 1.4495062828063965, "incorrect_loss_per_token": 1.6728792190551758, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4495062828063965, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": true, "logits_per_token": -1.4495062828063965, "logits_per_char": -0.7247531414031982, "num_chars": 2}, {"sum_logits": -1.5888445377349854, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5888445377349854, "logits_per_char": -0.7944222688674927, "num_chars": 2}, {"sum_logits": -1.5278379917144775, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.5278379917144775, "logits_per_char": -0.7639189958572388, "num_chars": 2}, {"sum_logits": -1.6130235195159912, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.6130235195159912, "logits_per_char": -0.8065117597579956, "num_chars": 2}, {"sum_logits": -1.961810827255249, "num_tokens": 1, "num_tokens_all": 285, "is_greedy": false, "logits_per_token": -1.961810827255249, "logits_per_char": -0.9809054136276245, "num_chars": 2}], "label": 0, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1220, "native_id": "5af7c7860e3be61d4cfd814cc109f9d9", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5247924327850342, "incorrect_loss_raw": 1.673317164182663, "correct_loss_per_char": 0.7623962163925171, "incorrect_loss_per_char": 0.8366585820913315, "correct_loss_per_token": 1.5247924327850342, "incorrect_loss_per_token": 1.673317164182663, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3261994123458862, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.3261994123458862, "logits_per_char": -0.6630997061729431, "num_chars": 2}, {"sum_logits": -1.5448468923568726, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5448468923568726, "logits_per_char": -0.7724234461784363, "num_chars": 2}, {"sum_logits": -1.5247924327850342, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.5247924327850342, "logits_per_char": -0.7623962163925171, "num_chars": 2}, {"sum_logits": -1.7774287462234497, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.7774287462234497, "logits_per_char": -0.8887143731117249, "num_chars": 2}, {"sum_logits": -2.0447936058044434, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -2.0447936058044434, "logits_per_char": -1.0223968029022217, "num_chars": 2}], "label": 2, "task_hash": "7dd00b56a8058d62c908535d927b9cda", "model_hash": "03418cf8091a9882619950ffb07429a5"}