{"doc_id": 0, "native_id": 0, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1613545417785645, "incorrect_loss_raw": 1.4834110339482625, "correct_loss_per_char": 0.5806772708892822, "incorrect_loss_per_char": 0.7417055169741312, "correct_loss_per_token": 1.1613545417785645, "incorrect_loss_per_token": 1.4834110339482625, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1613545417785645, "num_tokens": 1, "num_tokens_all": 591, "is_greedy": true, "logits_per_token": -1.1613545417785645, "logits_per_char": -0.5806772708892822, "num_chars": 2}, {"sum_logits": -1.4553941488265991, "num_tokens": 1, "num_tokens_all": 591, "is_greedy": false, "logits_per_token": -1.4553941488265991, "logits_per_char": -0.7276970744132996, "num_chars": 2}, {"sum_logits": -1.4849011898040771, "num_tokens": 1, "num_tokens_all": 591, "is_greedy": false, "logits_per_token": -1.4849011898040771, "logits_per_char": -0.7424505949020386, "num_chars": 2}, {"sum_logits": -1.5099377632141113, "num_tokens": 1, "num_tokens_all": 591, "is_greedy": false, "logits_per_token": -1.5099377632141113, "logits_per_char": -0.7549688816070557, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 1, "native_id": 1, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6598050594329834, "incorrect_loss_raw": 1.3559473752975464, "correct_loss_per_char": 0.8299025297164917, "incorrect_loss_per_char": 0.6779736876487732, "correct_loss_per_token": 1.6598050594329834, "incorrect_loss_per_token": 1.3559473752975464, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9810574054718018, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": true, "logits_per_token": -0.9810574054718018, "logits_per_char": -0.4905287027359009, "num_chars": 2}, {"sum_logits": -1.4718139171600342, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": false, "logits_per_token": -1.4718139171600342, "logits_per_char": -0.7359069585800171, "num_chars": 2}, {"sum_logits": -1.6149708032608032, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": false, "logits_per_token": -1.6149708032608032, "logits_per_char": -0.8074854016304016, "num_chars": 2}, {"sum_logits": -1.6598050594329834, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": false, "logits_per_token": -1.6598050594329834, "logits_per_char": -0.8299025297164917, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 2, "native_id": 2, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5157344341278076, "incorrect_loss_raw": 1.3669710556666057, "correct_loss_per_char": 0.7578672170639038, "incorrect_loss_per_char": 0.6834855278333029, "correct_loss_per_token": 1.5157344341278076, "incorrect_loss_per_token": 1.3669710556666057, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1348968744277954, "num_tokens": 1, "num_tokens_all": 597, "is_greedy": true, "logits_per_token": -1.1348968744277954, "logits_per_char": -0.5674484372138977, "num_chars": 2}, {"sum_logits": -1.4283640384674072, "num_tokens": 1, "num_tokens_all": 597, "is_greedy": false, "logits_per_token": -1.4283640384674072, "logits_per_char": -0.7141820192337036, "num_chars": 2}, {"sum_logits": -1.5157344341278076, "num_tokens": 1, "num_tokens_all": 597, "is_greedy": false, "logits_per_token": -1.5157344341278076, "logits_per_char": -0.7578672170639038, "num_chars": 2}, {"sum_logits": -1.5376522541046143, "num_tokens": 1, "num_tokens_all": 597, "is_greedy": false, "logits_per_token": -1.5376522541046143, "logits_per_char": -0.7688261270523071, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 3, "native_id": 3, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6131409406661987, "incorrect_loss_raw": 1.330522060394287, "correct_loss_per_char": 0.8065704703330994, "incorrect_loss_per_char": 0.6652610301971436, "correct_loss_per_token": 1.6131409406661987, "incorrect_loss_per_token": 1.330522060394287, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2447543144226074, "num_tokens": 1, "num_tokens_all": 623, "is_greedy": true, "logits_per_token": -1.2447543144226074, "logits_per_char": -0.6223771572113037, "num_chars": 2}, {"sum_logits": -1.3684262037277222, "num_tokens": 1, "num_tokens_all": 623, "is_greedy": false, "logits_per_token": -1.3684262037277222, "logits_per_char": -0.6842131018638611, "num_chars": 2}, {"sum_logits": -1.6131409406661987, "num_tokens": 1, "num_tokens_all": 623, "is_greedy": false, "logits_per_token": -1.6131409406661987, "logits_per_char": -0.8065704703330994, "num_chars": 2}, {"sum_logits": -1.3783856630325317, "num_tokens": 1, "num_tokens_all": 623, "is_greedy": false, "logits_per_token": -1.3783856630325317, "logits_per_char": -0.6891928315162659, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 4, "native_id": 4, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.82600998878479, "incorrect_loss_raw": 1.4229974548021953, "correct_loss_per_char": 0.913004994392395, "incorrect_loss_per_char": 0.7114987274010977, "correct_loss_per_token": 1.82600998878479, "incorrect_loss_per_token": 1.4229974548021953, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7231029868125916, "num_tokens": 1, "num_tokens_all": 588, "is_greedy": true, "logits_per_token": -0.7231029868125916, "logits_per_char": -0.3615514934062958, "num_chars": 2}, {"sum_logits": -1.5818445682525635, "num_tokens": 1, "num_tokens_all": 588, "is_greedy": false, "logits_per_token": -1.5818445682525635, "logits_per_char": -0.7909222841262817, "num_chars": 2}, {"sum_logits": -1.9640448093414307, "num_tokens": 1, "num_tokens_all": 588, "is_greedy": false, "logits_per_token": -1.9640448093414307, "logits_per_char": -0.9820224046707153, "num_chars": 2}, {"sum_logits": -1.82600998878479, "num_tokens": 1, "num_tokens_all": 588, "is_greedy": false, "logits_per_token": -1.82600998878479, "logits_per_char": -0.913004994392395, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 5, "native_id": 5, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5535805225372314, "incorrect_loss_raw": 1.3476033608118694, "correct_loss_per_char": 0.7767902612686157, "incorrect_loss_per_char": 0.6738016804059347, "correct_loss_per_token": 1.5535805225372314, "incorrect_loss_per_token": 1.3476033608118694, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2818500995635986, "num_tokens": 1, "num_tokens_all": 609, "is_greedy": true, "logits_per_token": -1.2818500995635986, "logits_per_char": -0.6409250497817993, "num_chars": 2}, {"sum_logits": -1.4190274477005005, "num_tokens": 1, "num_tokens_all": 609, "is_greedy": false, "logits_per_token": -1.4190274477005005, "logits_per_char": -0.7095137238502502, "num_chars": 2}, {"sum_logits": -1.5535805225372314, "num_tokens": 1, "num_tokens_all": 609, "is_greedy": false, "logits_per_token": -1.5535805225372314, "logits_per_char": -0.7767902612686157, "num_chars": 2}, {"sum_logits": -1.3419325351715088, "num_tokens": 1, "num_tokens_all": 609, "is_greedy": false, "logits_per_token": -1.3419325351715088, "logits_per_char": -0.6709662675857544, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 6, "native_id": 6, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3779600858688354, "incorrect_loss_raw": 1.4064379930496216, "correct_loss_per_char": 0.6889800429344177, "incorrect_loss_per_char": 0.7032189965248108, "correct_loss_per_token": 1.3779600858688354, "incorrect_loss_per_token": 1.4064379930496216, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2495907545089722, "num_tokens": 1, "num_tokens_all": 652, "is_greedy": true, "logits_per_token": -1.2495907545089722, "logits_per_char": -0.6247953772544861, "num_chars": 2}, {"sum_logits": -1.3779600858688354, "num_tokens": 1, "num_tokens_all": 652, "is_greedy": false, "logits_per_token": -1.3779600858688354, "logits_per_char": -0.6889800429344177, "num_chars": 2}, {"sum_logits": -1.6083300113677979, "num_tokens": 1, "num_tokens_all": 652, "is_greedy": false, "logits_per_token": -1.6083300113677979, "logits_per_char": -0.8041650056838989, "num_chars": 2}, {"sum_logits": -1.3613932132720947, "num_tokens": 1, "num_tokens_all": 652, "is_greedy": false, "logits_per_token": -1.3613932132720947, "logits_per_char": -0.6806966066360474, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 7, "native_id": 7, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3880714178085327, "incorrect_loss_raw": 1.3973507483800252, "correct_loss_per_char": 0.6940357089042664, "incorrect_loss_per_char": 0.6986753741900126, "correct_loss_per_token": 1.3880714178085327, "incorrect_loss_per_token": 1.3973507483800252, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2640300989151, "num_tokens": 1, "num_tokens_all": 621, "is_greedy": true, "logits_per_token": -1.2640300989151, "logits_per_char": -0.63201504945755, "num_chars": 2}, {"sum_logits": -1.3880714178085327, "num_tokens": 1, "num_tokens_all": 621, "is_greedy": false, "logits_per_token": -1.3880714178085327, "logits_per_char": -0.6940357089042664, "num_chars": 2}, {"sum_logits": -1.510805606842041, "num_tokens": 1, "num_tokens_all": 621, "is_greedy": false, "logits_per_token": -1.510805606842041, "logits_per_char": -0.7554028034210205, "num_chars": 2}, {"sum_logits": -1.4172165393829346, "num_tokens": 1, "num_tokens_all": 621, "is_greedy": false, "logits_per_token": -1.4172165393829346, "logits_per_char": -0.7086082696914673, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 8, "native_id": 8, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5236048698425293, "incorrect_loss_raw": 1.3693668842315674, "correct_loss_per_char": 0.7618024349212646, "incorrect_loss_per_char": 0.6846834421157837, "correct_loss_per_token": 1.5236048698425293, "incorrect_loss_per_token": 1.3693668842315674, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1711643934249878, "num_tokens": 1, "num_tokens_all": 560, "is_greedy": true, "logits_per_token": -1.1711643934249878, "logits_per_char": -0.5855821967124939, "num_chars": 2}, {"sum_logits": -1.3389643430709839, "num_tokens": 1, "num_tokens_all": 560, "is_greedy": false, "logits_per_token": -1.3389643430709839, "logits_per_char": -0.6694821715354919, "num_chars": 2}, {"sum_logits": -1.5979719161987305, "num_tokens": 1, "num_tokens_all": 560, "is_greedy": false, "logits_per_token": -1.5979719161987305, "logits_per_char": -0.7989859580993652, "num_chars": 2}, {"sum_logits": -1.5236048698425293, "num_tokens": 1, "num_tokens_all": 560, "is_greedy": false, "logits_per_token": -1.5236048698425293, "logits_per_char": -0.7618024349212646, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 9, "native_id": 9, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6592226028442383, "incorrect_loss_raw": 1.325545072555542, "correct_loss_per_char": 0.8296113014221191, "incorrect_loss_per_char": 0.662772536277771, "correct_loss_per_token": 1.6592226028442383, "incorrect_loss_per_token": 1.325545072555542, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3411884307861328, "num_tokens": 1, "num_tokens_all": 708, "is_greedy": false, "logits_per_token": -1.3411884307861328, "logits_per_char": -0.6705942153930664, "num_chars": 2}, {"sum_logits": -1.1423819065093994, "num_tokens": 1, "num_tokens_all": 708, "is_greedy": true, "logits_per_token": -1.1423819065093994, "logits_per_char": -0.5711909532546997, "num_chars": 2}, {"sum_logits": -1.4930648803710938, "num_tokens": 1, "num_tokens_all": 708, "is_greedy": false, "logits_per_token": -1.4930648803710938, "logits_per_char": -0.7465324401855469, "num_chars": 2}, {"sum_logits": -1.6592226028442383, "num_tokens": 1, "num_tokens_all": 708, "is_greedy": false, "logits_per_token": -1.6592226028442383, "logits_per_char": -0.8296113014221191, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 10, "native_id": 10, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5523338317871094, "incorrect_loss_raw": 1.3469990094502766, "correct_loss_per_char": 0.7761669158935547, "incorrect_loss_per_char": 0.6734995047251383, "correct_loss_per_token": 1.5523338317871094, "incorrect_loss_per_token": 1.3469990094502766, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.240119218826294, "num_tokens": 1, "num_tokens_all": 623, "is_greedy": true, "logits_per_token": -1.240119218826294, "logits_per_char": -0.620059609413147, "num_chars": 2}, {"sum_logits": -1.322777271270752, "num_tokens": 1, "num_tokens_all": 623, "is_greedy": false, "logits_per_token": -1.322777271270752, "logits_per_char": -0.661388635635376, "num_chars": 2}, {"sum_logits": -1.4781005382537842, "num_tokens": 1, "num_tokens_all": 623, "is_greedy": false, "logits_per_token": -1.4781005382537842, "logits_per_char": -0.7390502691268921, "num_chars": 2}, {"sum_logits": -1.5523338317871094, "num_tokens": 1, "num_tokens_all": 623, "is_greedy": false, "logits_per_token": -1.5523338317871094, "logits_per_char": -0.7761669158935547, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 11, "native_id": 11, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5172017812728882, "incorrect_loss_raw": 1.35405437151591, "correct_loss_per_char": 0.7586008906364441, "incorrect_loss_per_char": 0.677027185757955, "correct_loss_per_token": 1.5172017812728882, "incorrect_loss_per_token": 1.35405437151591, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2755858898162842, "num_tokens": 1, "num_tokens_all": 617, "is_greedy": true, "logits_per_token": -1.2755858898162842, "logits_per_char": -0.6377929449081421, "num_chars": 2}, {"sum_logits": -1.3608039617538452, "num_tokens": 1, "num_tokens_all": 617, "is_greedy": false, "logits_per_token": -1.3608039617538452, "logits_per_char": -0.6804019808769226, "num_chars": 2}, {"sum_logits": -1.5172017812728882, "num_tokens": 1, "num_tokens_all": 617, "is_greedy": false, "logits_per_token": -1.5172017812728882, "logits_per_char": -0.7586008906364441, "num_chars": 2}, {"sum_logits": -1.4257732629776, "num_tokens": 1, "num_tokens_all": 617, "is_greedy": false, "logits_per_token": -1.4257732629776, "logits_per_char": -0.7128866314888, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 12, "native_id": 12, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.120964765548706, "incorrect_loss_raw": 1.5089302062988281, "correct_loss_per_char": 0.560482382774353, "incorrect_loss_per_char": 0.7544651031494141, "correct_loss_per_token": 1.120964765548706, "incorrect_loss_per_token": 1.5089302062988281, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.120964765548706, "num_tokens": 1, "num_tokens_all": 590, "is_greedy": true, "logits_per_token": -1.120964765548706, "logits_per_char": -0.560482382774353, "num_chars": 2}, {"sum_logits": -1.4857276678085327, "num_tokens": 1, "num_tokens_all": 590, "is_greedy": false, "logits_per_token": -1.4857276678085327, "logits_per_char": -0.7428638339042664, "num_chars": 2}, {"sum_logits": -1.639350175857544, "num_tokens": 1, "num_tokens_all": 590, "is_greedy": false, "logits_per_token": -1.639350175857544, "logits_per_char": -0.819675087928772, "num_chars": 2}, {"sum_logits": -1.4017127752304077, "num_tokens": 1, "num_tokens_all": 590, "is_greedy": false, "logits_per_token": -1.4017127752304077, "logits_per_char": -0.7008563876152039, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 13, "native_id": 13, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5951941013336182, "incorrect_loss_raw": 1.3423208395640056, "correct_loss_per_char": 0.7975970506668091, "incorrect_loss_per_char": 0.6711604197820028, "correct_loss_per_token": 1.5951941013336182, "incorrect_loss_per_token": 1.3423208395640056, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2029792070388794, "num_tokens": 1, "num_tokens_all": 609, "is_greedy": true, "logits_per_token": -1.2029792070388794, "logits_per_char": -0.6014896035194397, "num_chars": 2}, {"sum_logits": -1.283704161643982, "num_tokens": 1, "num_tokens_all": 609, "is_greedy": false, "logits_per_token": -1.283704161643982, "logits_per_char": -0.641852080821991, "num_chars": 2}, {"sum_logits": -1.5402791500091553, "num_tokens": 1, "num_tokens_all": 609, "is_greedy": false, "logits_per_token": -1.5402791500091553, "logits_per_char": -0.7701395750045776, "num_chars": 2}, {"sum_logits": -1.5951941013336182, "num_tokens": 1, "num_tokens_all": 609, "is_greedy": false, "logits_per_token": -1.5951941013336182, "logits_per_char": -0.7975970506668091, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 14, "native_id": 14, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5606584548950195, "incorrect_loss_raw": 1.3636378049850464, "correct_loss_per_char": 0.7803292274475098, "incorrect_loss_per_char": 0.6818189024925232, "correct_loss_per_token": 1.5606584548950195, "incorrect_loss_per_token": 1.3636378049850464, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1145148277282715, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": true, "logits_per_token": -1.1145148277282715, "logits_per_char": -0.5572574138641357, "num_chars": 2}, {"sum_logits": -1.5606584548950195, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": false, "logits_per_token": -1.5606584548950195, "logits_per_char": -0.7803292274475098, "num_chars": 2}, {"sum_logits": -1.5627470016479492, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": false, "logits_per_token": -1.5627470016479492, "logits_per_char": -0.7813735008239746, "num_chars": 2}, {"sum_logits": -1.4136515855789185, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": false, "logits_per_token": -1.4136515855789185, "logits_per_char": -0.7068257927894592, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 15, "native_id": 15, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5221904516220093, "incorrect_loss_raw": 1.3865197896957397, "correct_loss_per_char": 0.7610952258110046, "incorrect_loss_per_char": 0.6932598948478699, "correct_loss_per_token": 1.5221904516220093, "incorrect_loss_per_token": 1.3865197896957397, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0831705331802368, "num_tokens": 1, "num_tokens_all": 560, "is_greedy": true, "logits_per_token": -1.0831705331802368, "logits_per_char": -0.5415852665901184, "num_chars": 2}, {"sum_logits": -1.3766967058181763, "num_tokens": 1, "num_tokens_all": 560, "is_greedy": false, "logits_per_token": -1.3766967058181763, "logits_per_char": -0.6883483529090881, "num_chars": 2}, {"sum_logits": -1.5221904516220093, "num_tokens": 1, "num_tokens_all": 560, "is_greedy": false, "logits_per_token": -1.5221904516220093, "logits_per_char": -0.7610952258110046, "num_chars": 2}, {"sum_logits": -1.6996921300888062, "num_tokens": 1, "num_tokens_all": 560, "is_greedy": false, "logits_per_token": -1.6996921300888062, "logits_per_char": -0.8498460650444031, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 16, "native_id": 16, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.538060188293457, "incorrect_loss_raw": 1.350937048594157, "correct_loss_per_char": 0.7690300941467285, "incorrect_loss_per_char": 0.6754685242970785, "correct_loss_per_token": 1.538060188293457, "incorrect_loss_per_token": 1.350937048594157, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.211814045906067, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": true, "logits_per_token": -1.211814045906067, "logits_per_char": -0.6059070229530334, "num_chars": 2}, {"sum_logits": -1.399722933769226, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.399722933769226, "logits_per_char": -0.699861466884613, "num_chars": 2}, {"sum_logits": -1.538060188293457, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.538060188293457, "logits_per_char": -0.7690300941467285, "num_chars": 2}, {"sum_logits": -1.4412741661071777, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.4412741661071777, "logits_per_char": -0.7206370830535889, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 17, "native_id": 17, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.21126389503479, "incorrect_loss_raw": 1.4736513296763103, "correct_loss_per_char": 0.605631947517395, "incorrect_loss_per_char": 0.7368256648381551, "correct_loss_per_token": 1.21126389503479, "incorrect_loss_per_token": 1.4736513296763103, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.21126389503479, "num_tokens": 1, "num_tokens_all": 634, "is_greedy": true, "logits_per_token": -1.21126389503479, "logits_per_char": -0.605631947517395, "num_chars": 2}, {"sum_logits": -1.3113073110580444, "num_tokens": 1, "num_tokens_all": 634, "is_greedy": false, "logits_per_token": -1.3113073110580444, "logits_per_char": -0.6556536555290222, "num_chars": 2}, {"sum_logits": -1.5987298488616943, "num_tokens": 1, "num_tokens_all": 634, "is_greedy": false, "logits_per_token": -1.5987298488616943, "logits_per_char": -0.7993649244308472, "num_chars": 2}, {"sum_logits": -1.510916829109192, "num_tokens": 1, "num_tokens_all": 634, "is_greedy": false, "logits_per_token": -1.510916829109192, "logits_per_char": -0.755458414554596, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 18, "native_id": 18, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7154889106750488, "incorrect_loss_raw": 1.4467516740163167, "correct_loss_per_char": 0.8577444553375244, "incorrect_loss_per_char": 0.7233758370081583, "correct_loss_per_token": 1.7154889106750488, "incorrect_loss_per_token": 1.4467516740163167, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7346005439758301, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": true, "logits_per_token": -0.7346005439758301, "logits_per_char": -0.36730027198791504, "num_chars": 2}, {"sum_logits": -1.7798855304718018, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": false, "logits_per_token": -1.7798855304718018, "logits_per_char": -0.8899427652359009, "num_chars": 2}, {"sum_logits": -1.8257689476013184, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": false, "logits_per_token": -1.8257689476013184, "logits_per_char": -0.9128844738006592, "num_chars": 2}, {"sum_logits": -1.7154889106750488, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": false, "logits_per_token": -1.7154889106750488, "logits_per_char": -0.8577444553375244, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 19, "native_id": 19, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5436949729919434, "incorrect_loss_raw": 1.3474860588709514, "correct_loss_per_char": 0.7718474864959717, "incorrect_loss_per_char": 0.6737430294354757, "correct_loss_per_token": 1.5436949729919434, "incorrect_loss_per_token": 1.3474860588709514, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.230893850326538, "num_tokens": 1, "num_tokens_all": 622, "is_greedy": true, "logits_per_token": -1.230893850326538, "logits_per_char": -0.615446925163269, "num_chars": 2}, {"sum_logits": -1.4302583932876587, "num_tokens": 1, "num_tokens_all": 622, "is_greedy": false, "logits_per_token": -1.4302583932876587, "logits_per_char": -0.7151291966438293, "num_chars": 2}, {"sum_logits": -1.5436949729919434, "num_tokens": 1, "num_tokens_all": 622, "is_greedy": false, "logits_per_token": -1.5436949729919434, "logits_per_char": -0.7718474864959717, "num_chars": 2}, {"sum_logits": -1.3813059329986572, "num_tokens": 1, "num_tokens_all": 622, "is_greedy": false, "logits_per_token": -1.3813059329986572, "logits_per_char": -0.6906529664993286, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 20, "native_id": 20, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4969419240951538, "incorrect_loss_raw": 1.3800681829452515, "correct_loss_per_char": 0.7484709620475769, "incorrect_loss_per_char": 0.6900340914726257, "correct_loss_per_token": 1.4969419240951538, "incorrect_loss_per_token": 1.3800681829452515, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1411175727844238, "num_tokens": 1, "num_tokens_all": 593, "is_greedy": true, "logits_per_token": -1.1411175727844238, "logits_per_char": -0.5705587863922119, "num_chars": 2}, {"sum_logits": -1.3461673259735107, "num_tokens": 1, "num_tokens_all": 593, "is_greedy": false, "logits_per_token": -1.3461673259735107, "logits_per_char": -0.6730836629867554, "num_chars": 2}, {"sum_logits": -1.6529196500778198, "num_tokens": 1, "num_tokens_all": 593, "is_greedy": false, "logits_per_token": -1.6529196500778198, "logits_per_char": -0.8264598250389099, "num_chars": 2}, {"sum_logits": -1.4969419240951538, "num_tokens": 1, "num_tokens_all": 593, "is_greedy": false, "logits_per_token": -1.4969419240951538, "logits_per_char": -0.7484709620475769, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 21, "native_id": 21, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5046066045761108, "incorrect_loss_raw": 1.3714491923650105, "correct_loss_per_char": 0.7523033022880554, "incorrect_loss_per_char": 0.6857245961825053, "correct_loss_per_token": 1.5046066045761108, "incorrect_loss_per_token": 1.3714491923650105, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.18536376953125, "num_tokens": 1, "num_tokens_all": 603, "is_greedy": true, "logits_per_token": -1.18536376953125, "logits_per_char": -0.592681884765625, "num_chars": 2}, {"sum_logits": -1.3490657806396484, "num_tokens": 1, "num_tokens_all": 603, "is_greedy": false, "logits_per_token": -1.3490657806396484, "logits_per_char": -0.6745328903198242, "num_chars": 2}, {"sum_logits": -1.5799180269241333, "num_tokens": 1, "num_tokens_all": 603, "is_greedy": false, "logits_per_token": -1.5799180269241333, "logits_per_char": -0.7899590134620667, "num_chars": 2}, {"sum_logits": -1.5046066045761108, "num_tokens": 1, "num_tokens_all": 603, "is_greedy": false, "logits_per_token": -1.5046066045761108, "logits_per_char": -0.7523033022880554, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 22, "native_id": 22, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.494605302810669, "incorrect_loss_raw": 1.3673003514607747, "correct_loss_per_char": 0.7473026514053345, "incorrect_loss_per_char": 0.6836501757303873, "correct_loss_per_token": 1.494605302810669, "incorrect_loss_per_token": 1.3673003514607747, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2383053302764893, "num_tokens": 1, "num_tokens_all": 621, "is_greedy": true, "logits_per_token": -1.2383053302764893, "logits_per_char": -0.6191526651382446, "num_chars": 2}, {"sum_logits": -1.3719418048858643, "num_tokens": 1, "num_tokens_all": 621, "is_greedy": false, "logits_per_token": -1.3719418048858643, "logits_per_char": -0.6859709024429321, "num_chars": 2}, {"sum_logits": -1.4916539192199707, "num_tokens": 1, "num_tokens_all": 621, "is_greedy": false, "logits_per_token": -1.4916539192199707, "logits_per_char": -0.7458269596099854, "num_chars": 2}, {"sum_logits": -1.494605302810669, "num_tokens": 1, "num_tokens_all": 621, "is_greedy": false, "logits_per_token": -1.494605302810669, "logits_per_char": -0.7473026514053345, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 23, "native_id": 23, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.358317494392395, "incorrect_loss_raw": 1.4277533292770386, "correct_loss_per_char": 0.6791587471961975, "incorrect_loss_per_char": 0.7138766646385193, "correct_loss_per_token": 1.358317494392395, "incorrect_loss_per_token": 1.4277533292770386, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1243599653244019, "num_tokens": 1, "num_tokens_all": 599, "is_greedy": true, "logits_per_token": -1.1243599653244019, "logits_per_char": -0.5621799826622009, "num_chars": 2}, {"sum_logits": -1.358317494392395, "num_tokens": 1, "num_tokens_all": 599, "is_greedy": false, "logits_per_token": -1.358317494392395, "logits_per_char": -0.6791587471961975, "num_chars": 2}, {"sum_logits": -1.6555421352386475, "num_tokens": 1, "num_tokens_all": 599, "is_greedy": false, "logits_per_token": -1.6555421352386475, "logits_per_char": -0.8277710676193237, "num_chars": 2}, {"sum_logits": -1.5033578872680664, "num_tokens": 1, "num_tokens_all": 599, "is_greedy": false, "logits_per_token": -1.5033578872680664, "logits_per_char": -0.7516789436340332, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 24, "native_id": 24, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4860882759094238, "incorrect_loss_raw": 1.3719042936960857, "correct_loss_per_char": 0.7430441379547119, "incorrect_loss_per_char": 0.6859521468480428, "correct_loss_per_token": 1.4860882759094238, "incorrect_loss_per_token": 1.3719042936960857, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.282420039176941, "num_tokens": 1, "num_tokens_all": 595, "is_greedy": false, "logits_per_token": -1.282420039176941, "logits_per_char": -0.6412100195884705, "num_chars": 2}, {"sum_logits": -1.2710906267166138, "num_tokens": 1, "num_tokens_all": 595, "is_greedy": true, "logits_per_token": -1.2710906267166138, "logits_per_char": -0.6355453133583069, "num_chars": 2}, {"sum_logits": -1.4860882759094238, "num_tokens": 1, "num_tokens_all": 595, "is_greedy": false, "logits_per_token": -1.4860882759094238, "logits_per_char": -0.7430441379547119, "num_chars": 2}, {"sum_logits": -1.5622022151947021, "num_tokens": 1, "num_tokens_all": 595, "is_greedy": false, "logits_per_token": -1.5622022151947021, "logits_per_char": -0.7811011075973511, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 25, "native_id": 25, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4453634023666382, "incorrect_loss_raw": 1.3931636412938435, "correct_loss_per_char": 0.7226817011833191, "incorrect_loss_per_char": 0.6965818206469218, "correct_loss_per_token": 1.4453634023666382, "incorrect_loss_per_token": 1.3931636412938435, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2426310777664185, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": true, "logits_per_token": -1.2426310777664185, "logits_per_char": -0.6213155388832092, "num_chars": 2}, {"sum_logits": -1.2865710258483887, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": false, "logits_per_token": -1.2865710258483887, "logits_per_char": -0.6432855129241943, "num_chars": 2}, {"sum_logits": -1.6502888202667236, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": false, "logits_per_token": -1.6502888202667236, "logits_per_char": -0.8251444101333618, "num_chars": 2}, {"sum_logits": -1.4453634023666382, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": false, "logits_per_token": -1.4453634023666382, "logits_per_char": -0.7226817011833191, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 26, "native_id": 26, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6040284633636475, "incorrect_loss_raw": 1.3544132312138875, "correct_loss_per_char": 0.8020142316818237, "incorrect_loss_per_char": 0.6772066156069437, "correct_loss_per_token": 1.6040284633636475, "incorrect_loss_per_token": 1.3544132312138875, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0590020418167114, "num_tokens": 1, "num_tokens_all": 597, "is_greedy": true, "logits_per_token": -1.0590020418167114, "logits_per_char": -0.5295010209083557, "num_chars": 2}, {"sum_logits": -1.4570209980010986, "num_tokens": 1, "num_tokens_all": 597, "is_greedy": false, "logits_per_token": -1.4570209980010986, "logits_per_char": -0.7285104990005493, "num_chars": 2}, {"sum_logits": -1.6040284633636475, "num_tokens": 1, "num_tokens_all": 597, "is_greedy": false, "logits_per_token": -1.6040284633636475, "logits_per_char": -0.8020142316818237, "num_chars": 2}, {"sum_logits": -1.5472166538238525, "num_tokens": 1, "num_tokens_all": 597, "is_greedy": false, "logits_per_token": -1.5472166538238525, "logits_per_char": -0.7736083269119263, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 27, "native_id": 27, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5241109132766724, "incorrect_loss_raw": 1.3869904279708862, "correct_loss_per_char": 0.7620554566383362, "incorrect_loss_per_char": 0.6934952139854431, "correct_loss_per_token": 1.5241109132766724, "incorrect_loss_per_token": 1.3869904279708862, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.066517949104309, "num_tokens": 1, "num_tokens_all": 582, "is_greedy": true, "logits_per_token": -1.066517949104309, "logits_per_char": -0.5332589745521545, "num_chars": 2}, {"sum_logits": -1.4818003177642822, "num_tokens": 1, "num_tokens_all": 582, "is_greedy": false, "logits_per_token": -1.4818003177642822, "logits_per_char": -0.7409001588821411, "num_chars": 2}, {"sum_logits": -1.6126530170440674, "num_tokens": 1, "num_tokens_all": 582, "is_greedy": false, "logits_per_token": -1.6126530170440674, "logits_per_char": -0.8063265085220337, "num_chars": 2}, {"sum_logits": -1.5241109132766724, "num_tokens": 1, "num_tokens_all": 582, "is_greedy": false, "logits_per_token": -1.5241109132766724, "logits_per_char": -0.7620554566383362, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 28, "native_id": 28, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3416953086853027, "incorrect_loss_raw": 1.415379007657369, "correct_loss_per_char": 0.6708476543426514, "incorrect_loss_per_char": 0.7076895038286845, "correct_loss_per_token": 1.3416953086853027, "incorrect_loss_per_token": 1.415379007657369, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.261059284210205, "num_tokens": 1, "num_tokens_all": 606, "is_greedy": true, "logits_per_token": -1.261059284210205, "logits_per_char": -0.6305296421051025, "num_chars": 2}, {"sum_logits": -1.3416953086853027, "num_tokens": 1, "num_tokens_all": 606, "is_greedy": false, "logits_per_token": -1.3416953086853027, "logits_per_char": -0.6708476543426514, "num_chars": 2}, {"sum_logits": -1.5595033168792725, "num_tokens": 1, "num_tokens_all": 606, "is_greedy": false, "logits_per_token": -1.5595033168792725, "logits_per_char": -0.7797516584396362, "num_chars": 2}, {"sum_logits": -1.4255744218826294, "num_tokens": 1, "num_tokens_all": 606, "is_greedy": false, "logits_per_token": -1.4255744218826294, "logits_per_char": -0.7127872109413147, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 29, "native_id": 29, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.581655502319336, "incorrect_loss_raw": 1.352758487065633, "correct_loss_per_char": 0.790827751159668, "incorrect_loss_per_char": 0.6763792435328165, "correct_loss_per_token": 1.581655502319336, "incorrect_loss_per_token": 1.352758487065633, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1290616989135742, "num_tokens": 1, "num_tokens_all": 604, "is_greedy": true, "logits_per_token": -1.1290616989135742, "logits_per_char": -0.5645308494567871, "num_chars": 2}, {"sum_logits": -1.322455644607544, "num_tokens": 1, "num_tokens_all": 604, "is_greedy": false, "logits_per_token": -1.322455644607544, "logits_per_char": -0.661227822303772, "num_chars": 2}, {"sum_logits": -1.581655502319336, "num_tokens": 1, "num_tokens_all": 604, "is_greedy": false, "logits_per_token": -1.581655502319336, "logits_per_char": -0.790827751159668, "num_chars": 2}, {"sum_logits": -1.6067581176757812, "num_tokens": 1, "num_tokens_all": 604, "is_greedy": false, "logits_per_token": -1.6067581176757812, "logits_per_char": -0.8033790588378906, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 30, "native_id": 30, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1706290245056152, "incorrect_loss_raw": 1.4942437012990315, "correct_loss_per_char": 0.5853145122528076, "incorrect_loss_per_char": 0.7471218506495158, "correct_loss_per_token": 1.1706290245056152, "incorrect_loss_per_token": 1.4942437012990315, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2534973621368408, "num_tokens": 1, "num_tokens_all": 678, "is_greedy": false, "logits_per_token": -1.2534973621368408, "logits_per_char": -0.6267486810684204, "num_chars": 2}, {"sum_logits": -1.1706290245056152, "num_tokens": 1, "num_tokens_all": 678, "is_greedy": true, "logits_per_token": -1.1706290245056152, "logits_per_char": -0.5853145122528076, "num_chars": 2}, {"sum_logits": -1.5533865690231323, "num_tokens": 1, "num_tokens_all": 678, "is_greedy": false, "logits_per_token": -1.5533865690231323, "logits_per_char": -0.7766932845115662, "num_chars": 2}, {"sum_logits": -1.6758471727371216, "num_tokens": 1, "num_tokens_all": 678, "is_greedy": false, "logits_per_token": -1.6758471727371216, "logits_per_char": -0.8379235863685608, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 31, "native_id": 31, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5102362632751465, "incorrect_loss_raw": 1.3545908530553181, "correct_loss_per_char": 0.7551181316375732, "incorrect_loss_per_char": 0.6772954265276591, "correct_loss_per_token": 1.5102362632751465, "incorrect_loss_per_token": 1.3545908530553181, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3074557781219482, "num_tokens": 1, "num_tokens_all": 609, "is_greedy": true, "logits_per_token": -1.3074557781219482, "logits_per_char": -0.6537278890609741, "num_chars": 2}, {"sum_logits": -1.3329534530639648, "num_tokens": 1, "num_tokens_all": 609, "is_greedy": false, "logits_per_token": -1.3329534530639648, "logits_per_char": -0.6664767265319824, "num_chars": 2}, {"sum_logits": -1.5102362632751465, "num_tokens": 1, "num_tokens_all": 609, "is_greedy": false, "logits_per_token": -1.5102362632751465, "logits_per_char": -0.7551181316375732, "num_chars": 2}, {"sum_logits": -1.4233633279800415, "num_tokens": 1, "num_tokens_all": 609, "is_greedy": false, "logits_per_token": -1.4233633279800415, "logits_per_char": -0.7116816639900208, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 32, "native_id": 32, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6035722494125366, "incorrect_loss_raw": 1.386130690574646, "correct_loss_per_char": 0.8017861247062683, "incorrect_loss_per_char": 0.693065345287323, "correct_loss_per_token": 1.6035722494125366, "incorrect_loss_per_token": 1.386130690574646, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9840278625488281, "num_tokens": 1, "num_tokens_all": 564, "is_greedy": true, "logits_per_token": -0.9840278625488281, "logits_per_char": -0.49201393127441406, "num_chars": 2}, {"sum_logits": -1.4007320404052734, "num_tokens": 1, "num_tokens_all": 564, "is_greedy": false, "logits_per_token": -1.4007320404052734, "logits_per_char": -0.7003660202026367, "num_chars": 2}, {"sum_logits": -1.7736321687698364, "num_tokens": 1, "num_tokens_all": 564, "is_greedy": false, "logits_per_token": -1.7736321687698364, "logits_per_char": -0.8868160843849182, "num_chars": 2}, {"sum_logits": -1.6035722494125366, "num_tokens": 1, "num_tokens_all": 564, "is_greedy": false, "logits_per_token": -1.6035722494125366, "logits_per_char": -0.8017861247062683, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 33, "native_id": 33, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.547389030456543, "incorrect_loss_raw": 1.3811195691426594, "correct_loss_per_char": 0.7736945152282715, "incorrect_loss_per_char": 0.6905597845713297, "correct_loss_per_token": 1.547389030456543, "incorrect_loss_per_token": 1.3811195691426594, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0322411060333252, "num_tokens": 1, "num_tokens_all": 595, "is_greedy": true, "logits_per_token": -1.0322411060333252, "logits_per_char": -0.5161205530166626, "num_chars": 2}, {"sum_logits": -1.425800085067749, "num_tokens": 1, "num_tokens_all": 595, "is_greedy": false, "logits_per_token": -1.425800085067749, "logits_per_char": -0.7129000425338745, "num_chars": 2}, {"sum_logits": -1.6853175163269043, "num_tokens": 1, "num_tokens_all": 595, "is_greedy": false, "logits_per_token": -1.6853175163269043, "logits_per_char": -0.8426587581634521, "num_chars": 2}, {"sum_logits": -1.547389030456543, "num_tokens": 1, "num_tokens_all": 595, "is_greedy": false, "logits_per_token": -1.547389030456543, "logits_per_char": -0.7736945152282715, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 34, "native_id": 34, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.511147379875183, "incorrect_loss_raw": 1.365453879038493, "correct_loss_per_char": 0.7555736899375916, "incorrect_loss_per_char": 0.6827269395192465, "correct_loss_per_token": 1.511147379875183, "incorrect_loss_per_token": 1.365453879038493, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1646406650543213, "num_tokens": 1, "num_tokens_all": 615, "is_greedy": true, "logits_per_token": -1.1646406650543213, "logits_per_char": -0.5823203325271606, "num_chars": 2}, {"sum_logits": -1.3997281789779663, "num_tokens": 1, "num_tokens_all": 615, "is_greedy": false, "logits_per_token": -1.3997281789779663, "logits_per_char": -0.6998640894889832, "num_chars": 2}, {"sum_logits": -1.511147379875183, "num_tokens": 1, "num_tokens_all": 615, "is_greedy": false, "logits_per_token": -1.511147379875183, "logits_per_char": -0.7555736899375916, "num_chars": 2}, {"sum_logits": -1.531992793083191, "num_tokens": 1, "num_tokens_all": 615, "is_greedy": false, "logits_per_token": -1.531992793083191, "logits_per_char": -0.7659963965415955, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 35, "native_id": 35, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3698620796203613, "incorrect_loss_raw": 1.4157682657241821, "correct_loss_per_char": 0.6849310398101807, "incorrect_loss_per_char": 0.7078841328620911, "correct_loss_per_token": 1.3698620796203613, "incorrect_loss_per_token": 1.4157682657241821, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1776539087295532, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": true, "logits_per_token": -1.1776539087295532, "logits_per_char": -0.5888269543647766, "num_chars": 2}, {"sum_logits": -1.4782688617706299, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.4782688617706299, "logits_per_char": -0.7391344308853149, "num_chars": 2}, {"sum_logits": -1.5913820266723633, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.5913820266723633, "logits_per_char": -0.7956910133361816, "num_chars": 2}, {"sum_logits": -1.3698620796203613, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.3698620796203613, "logits_per_char": -0.6849310398101807, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 36, "native_id": 36, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.27482008934021, "incorrect_loss_raw": 1.4344802300135295, "correct_loss_per_char": 0.637410044670105, "incorrect_loss_per_char": 0.7172401150067648, "correct_loss_per_token": 1.27482008934021, "incorrect_loss_per_token": 1.4344802300135295, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.27482008934021, "num_tokens": 1, "num_tokens_all": 595, "is_greedy": true, "logits_per_token": -1.27482008934021, "logits_per_char": -0.637410044670105, "num_chars": 2}, {"sum_logits": -1.4030876159667969, "num_tokens": 1, "num_tokens_all": 595, "is_greedy": false, "logits_per_token": -1.4030876159667969, "logits_per_char": -0.7015438079833984, "num_chars": 2}, {"sum_logits": -1.5193049907684326, "num_tokens": 1, "num_tokens_all": 595, "is_greedy": false, "logits_per_token": -1.5193049907684326, "logits_per_char": -0.7596524953842163, "num_chars": 2}, {"sum_logits": -1.3810480833053589, "num_tokens": 1, "num_tokens_all": 595, "is_greedy": false, "logits_per_token": -1.3810480833053589, "logits_per_char": -0.6905240416526794, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 37, "native_id": 37, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5914431810379028, "incorrect_loss_raw": 1.3441739877065022, "correct_loss_per_char": 0.7957215905189514, "incorrect_loss_per_char": 0.6720869938532511, "correct_loss_per_token": 1.5914431810379028, "incorrect_loss_per_token": 1.3441739877065022, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.132844090461731, "num_tokens": 1, "num_tokens_all": 578, "is_greedy": true, "logits_per_token": -1.132844090461731, "logits_per_char": -0.5664220452308655, "num_chars": 2}, {"sum_logits": -1.4357479810714722, "num_tokens": 1, "num_tokens_all": 578, "is_greedy": false, "logits_per_token": -1.4357479810714722, "logits_per_char": -0.7178739905357361, "num_chars": 2}, {"sum_logits": -1.5914431810379028, "num_tokens": 1, "num_tokens_all": 578, "is_greedy": false, "logits_per_token": -1.5914431810379028, "logits_per_char": -0.7957215905189514, "num_chars": 2}, {"sum_logits": -1.4639298915863037, "num_tokens": 1, "num_tokens_all": 578, "is_greedy": false, "logits_per_token": -1.4639298915863037, "logits_per_char": -0.7319649457931519, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 38, "native_id": 38, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3055614233016968, "incorrect_loss_raw": 1.4246279398600261, "correct_loss_per_char": 0.6527807116508484, "incorrect_loss_per_char": 0.7123139699300131, "correct_loss_per_token": 1.3055614233016968, "incorrect_loss_per_token": 1.4246279398600261, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3231983184814453, "num_tokens": 1, "num_tokens_all": 642, "is_greedy": false, "logits_per_token": -1.3231983184814453, "logits_per_char": -0.6615991592407227, "num_chars": 2}, {"sum_logits": -1.3055614233016968, "num_tokens": 1, "num_tokens_all": 642, "is_greedy": true, "logits_per_token": -1.3055614233016968, "logits_per_char": -0.6527807116508484, "num_chars": 2}, {"sum_logits": -1.4109501838684082, "num_tokens": 1, "num_tokens_all": 642, "is_greedy": false, "logits_per_token": -1.4109501838684082, "logits_per_char": -0.7054750919342041, "num_chars": 2}, {"sum_logits": -1.5397353172302246, "num_tokens": 1, "num_tokens_all": 642, "is_greedy": false, "logits_per_token": -1.5397353172302246, "logits_per_char": -0.7698676586151123, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 39, "native_id": 39, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2997757196426392, "incorrect_loss_raw": 1.432065725326538, "correct_loss_per_char": 0.6498878598213196, "incorrect_loss_per_char": 0.716032862663269, "correct_loss_per_token": 1.2997757196426392, "incorrect_loss_per_token": 1.432065725326538, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2620030641555786, "num_tokens": 1, "num_tokens_all": 625, "is_greedy": true, "logits_per_token": -1.2620030641555786, "logits_per_char": -0.6310015320777893, "num_chars": 2}, {"sum_logits": -1.2997757196426392, "num_tokens": 1, "num_tokens_all": 625, "is_greedy": false, "logits_per_token": -1.2997757196426392, "logits_per_char": -0.6498878598213196, "num_chars": 2}, {"sum_logits": -1.582425594329834, "num_tokens": 1, "num_tokens_all": 625, "is_greedy": false, "logits_per_token": -1.582425594329834, "logits_per_char": -0.791212797164917, "num_chars": 2}, {"sum_logits": -1.4517685174942017, "num_tokens": 1, "num_tokens_all": 625, "is_greedy": false, "logits_per_token": -1.4517685174942017, "logits_per_char": -0.7258842587471008, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 40, "native_id": 40, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5313831567764282, "incorrect_loss_raw": 1.3559023936589558, "correct_loss_per_char": 0.7656915783882141, "incorrect_loss_per_char": 0.6779511968294779, "correct_loss_per_token": 1.5313831567764282, "incorrect_loss_per_token": 1.3559023936589558, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2719449996948242, "num_tokens": 1, "num_tokens_all": 633, "is_greedy": true, "logits_per_token": -1.2719449996948242, "logits_per_char": -0.6359724998474121, "num_chars": 2}, {"sum_logits": -1.313534140586853, "num_tokens": 1, "num_tokens_all": 633, "is_greedy": false, "logits_per_token": -1.313534140586853, "logits_per_char": -0.6567670702934265, "num_chars": 2}, {"sum_logits": -1.5313831567764282, "num_tokens": 1, "num_tokens_all": 633, "is_greedy": false, "logits_per_token": -1.5313831567764282, "logits_per_char": -0.7656915783882141, "num_chars": 2}, {"sum_logits": -1.4822280406951904, "num_tokens": 1, "num_tokens_all": 633, "is_greedy": false, "logits_per_token": -1.4822280406951904, "logits_per_char": -0.7411140203475952, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 41, "native_id": 41, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4374064207077026, "incorrect_loss_raw": 1.3939170440038045, "correct_loss_per_char": 0.7187032103538513, "incorrect_loss_per_char": 0.6969585220019022, "correct_loss_per_token": 1.4374064207077026, "incorrect_loss_per_token": 1.3939170440038045, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2008148431777954, "num_tokens": 1, "num_tokens_all": 578, "is_greedy": true, "logits_per_token": -1.2008148431777954, "logits_per_char": -0.6004074215888977, "num_chars": 2}, {"sum_logits": -1.4004555940628052, "num_tokens": 1, "num_tokens_all": 578, "is_greedy": false, "logits_per_token": -1.4004555940628052, "logits_per_char": -0.7002277970314026, "num_chars": 2}, {"sum_logits": -1.580480694770813, "num_tokens": 1, "num_tokens_all": 578, "is_greedy": false, "logits_per_token": -1.580480694770813, "logits_per_char": -0.7902403473854065, "num_chars": 2}, {"sum_logits": -1.4374064207077026, "num_tokens": 1, "num_tokens_all": 578, "is_greedy": false, "logits_per_token": -1.4374064207077026, "logits_per_char": -0.7187032103538513, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 42, "native_id": 42, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1756048202514648, "incorrect_loss_raw": 1.4784985383351643, "correct_loss_per_char": 0.5878024101257324, "incorrect_loss_per_char": 0.7392492691675822, "correct_loss_per_token": 1.1756048202514648, "incorrect_loss_per_token": 1.4784985383351643, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1756048202514648, "num_tokens": 1, "num_tokens_all": 607, "is_greedy": true, "logits_per_token": -1.1756048202514648, "logits_per_char": -0.5878024101257324, "num_chars": 2}, {"sum_logits": -1.3518997430801392, "num_tokens": 1, "num_tokens_all": 607, "is_greedy": false, "logits_per_token": -1.3518997430801392, "logits_per_char": -0.6759498715400696, "num_chars": 2}, {"sum_logits": -1.5322662591934204, "num_tokens": 1, "num_tokens_all": 607, "is_greedy": false, "logits_per_token": -1.5322662591934204, "logits_per_char": -0.7661331295967102, "num_chars": 2}, {"sum_logits": -1.5513296127319336, "num_tokens": 1, "num_tokens_all": 607, "is_greedy": false, "logits_per_token": -1.5513296127319336, "logits_per_char": -0.7756648063659668, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 43, "native_id": 43, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2439225912094116, "incorrect_loss_raw": 1.4477628469467163, "correct_loss_per_char": 0.6219612956047058, "incorrect_loss_per_char": 0.7238814234733582, "correct_loss_per_token": 1.2439225912094116, "incorrect_loss_per_token": 1.4477628469467163, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2439225912094116, "num_tokens": 1, "num_tokens_all": 606, "is_greedy": true, "logits_per_token": -1.2439225912094116, "logits_per_char": -0.6219612956047058, "num_chars": 2}, {"sum_logits": -1.4569976329803467, "num_tokens": 1, "num_tokens_all": 606, "is_greedy": false, "logits_per_token": -1.4569976329803467, "logits_per_char": -0.7284988164901733, "num_chars": 2}, {"sum_logits": -1.5035041570663452, "num_tokens": 1, "num_tokens_all": 606, "is_greedy": false, "logits_per_token": -1.5035041570663452, "logits_per_char": -0.7517520785331726, "num_chars": 2}, {"sum_logits": -1.382786750793457, "num_tokens": 1, "num_tokens_all": 606, "is_greedy": false, "logits_per_token": -1.382786750793457, "logits_per_char": -0.6913933753967285, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 44, "native_id": 44, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5563411712646484, "incorrect_loss_raw": 1.3529913822809856, "correct_loss_per_char": 0.7781705856323242, "incorrect_loss_per_char": 0.6764956911404928, "correct_loss_per_token": 1.5563411712646484, "incorrect_loss_per_token": 1.3529913822809856, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1726691722869873, "num_tokens": 1, "num_tokens_all": 599, "is_greedy": true, "logits_per_token": -1.1726691722869873, "logits_per_char": -0.5863345861434937, "num_chars": 2}, {"sum_logits": -1.4290835857391357, "num_tokens": 1, "num_tokens_all": 599, "is_greedy": false, "logits_per_token": -1.4290835857391357, "logits_per_char": -0.7145417928695679, "num_chars": 2}, {"sum_logits": -1.5563411712646484, "num_tokens": 1, "num_tokens_all": 599, "is_greedy": false, "logits_per_token": -1.5563411712646484, "logits_per_char": -0.7781705856323242, "num_chars": 2}, {"sum_logits": -1.4572213888168335, "num_tokens": 1, "num_tokens_all": 599, "is_greedy": false, "logits_per_token": -1.4572213888168335, "logits_per_char": -0.7286106944084167, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 45, "native_id": 45, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.546808123588562, "incorrect_loss_raw": 1.3588117758433025, "correct_loss_per_char": 0.773404061794281, "incorrect_loss_per_char": 0.6794058879216512, "correct_loss_per_token": 1.546808123588562, "incorrect_loss_per_token": 1.3588117758433025, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1881403923034668, "num_tokens": 1, "num_tokens_all": 615, "is_greedy": true, "logits_per_token": -1.1881403923034668, "logits_per_char": -0.5940701961517334, "num_chars": 2}, {"sum_logits": -1.287375807762146, "num_tokens": 1, "num_tokens_all": 615, "is_greedy": false, "logits_per_token": -1.287375807762146, "logits_per_char": -0.643687903881073, "num_chars": 2}, {"sum_logits": -1.6009191274642944, "num_tokens": 1, "num_tokens_all": 615, "is_greedy": false, "logits_per_token": -1.6009191274642944, "logits_per_char": -0.8004595637321472, "num_chars": 2}, {"sum_logits": -1.546808123588562, "num_tokens": 1, "num_tokens_all": 615, "is_greedy": false, "logits_per_token": -1.546808123588562, "logits_per_char": -0.773404061794281, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 46, "native_id": 46, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4627478122711182, "incorrect_loss_raw": 1.3770805994669597, "correct_loss_per_char": 0.7313739061355591, "incorrect_loss_per_char": 0.6885402997334799, "correct_loss_per_token": 1.4627478122711182, "incorrect_loss_per_token": 1.3770805994669597, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1939940452575684, "num_tokens": 1, "num_tokens_all": 588, "is_greedy": true, "logits_per_token": -1.1939940452575684, "logits_per_char": -0.5969970226287842, "num_chars": 2}, {"sum_logits": -1.4686310291290283, "num_tokens": 1, "num_tokens_all": 588, "is_greedy": false, "logits_per_token": -1.4686310291290283, "logits_per_char": -0.7343155145645142, "num_chars": 2}, {"sum_logits": -1.4686167240142822, "num_tokens": 1, "num_tokens_all": 588, "is_greedy": false, "logits_per_token": -1.4686167240142822, "logits_per_char": -0.7343083620071411, "num_chars": 2}, {"sum_logits": -1.4627478122711182, "num_tokens": 1, "num_tokens_all": 588, "is_greedy": false, "logits_per_token": -1.4627478122711182, "logits_per_char": -0.7313739061355591, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 47, "native_id": 47, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0450847148895264, "incorrect_loss_raw": 1.5457937320073445, "correct_loss_per_char": 0.5225423574447632, "incorrect_loss_per_char": 0.7728968660036722, "correct_loss_per_token": 1.0450847148895264, "incorrect_loss_per_token": 1.5457937320073445, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0450847148895264, "num_tokens": 1, "num_tokens_all": 568, "is_greedy": true, "logits_per_token": -1.0450847148895264, "logits_per_char": -0.5225423574447632, "num_chars": 2}, {"sum_logits": -1.4321026802062988, "num_tokens": 1, "num_tokens_all": 568, "is_greedy": false, "logits_per_token": -1.4321026802062988, "logits_per_char": -0.7160513401031494, "num_chars": 2}, {"sum_logits": -1.6172001361846924, "num_tokens": 1, "num_tokens_all": 568, "is_greedy": false, "logits_per_token": -1.6172001361846924, "logits_per_char": -0.8086000680923462, "num_chars": 2}, {"sum_logits": -1.5880783796310425, "num_tokens": 1, "num_tokens_all": 568, "is_greedy": false, "logits_per_token": -1.5880783796310425, "logits_per_char": -0.7940391898155212, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 48, "native_id": 48, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4328268766403198, "incorrect_loss_raw": 1.387548804283142, "correct_loss_per_char": 0.7164134383201599, "incorrect_loss_per_char": 0.693774402141571, "correct_loss_per_token": 1.4328268766403198, "incorrect_loss_per_token": 1.387548804283142, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1896147727966309, "num_tokens": 1, "num_tokens_all": 618, "is_greedy": true, "logits_per_token": -1.1896147727966309, "logits_per_char": -0.5948073863983154, "num_chars": 2}, {"sum_logits": -1.4561043977737427, "num_tokens": 1, "num_tokens_all": 618, "is_greedy": false, "logits_per_token": -1.4561043977737427, "logits_per_char": -0.7280521988868713, "num_chars": 2}, {"sum_logits": -1.4328268766403198, "num_tokens": 1, "num_tokens_all": 618, "is_greedy": false, "logits_per_token": -1.4328268766403198, "logits_per_char": -0.7164134383201599, "num_chars": 2}, {"sum_logits": -1.5169272422790527, "num_tokens": 1, "num_tokens_all": 618, "is_greedy": false, "logits_per_token": -1.5169272422790527, "logits_per_char": -0.7584636211395264, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 49, "native_id": 49, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.317779541015625, "incorrect_loss_raw": 1.427040696144104, "correct_loss_per_char": 0.6588897705078125, "incorrect_loss_per_char": 0.713520348072052, "correct_loss_per_token": 1.317779541015625, "incorrect_loss_per_token": 1.427040696144104, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2422879934310913, "num_tokens": 1, "num_tokens_all": 642, "is_greedy": true, "logits_per_token": -1.2422879934310913, "logits_per_char": -0.6211439967155457, "num_chars": 2}, {"sum_logits": -1.317779541015625, "num_tokens": 1, "num_tokens_all": 642, "is_greedy": false, "logits_per_token": -1.317779541015625, "logits_per_char": -0.6588897705078125, "num_chars": 2}, {"sum_logits": -1.449819803237915, "num_tokens": 1, "num_tokens_all": 642, "is_greedy": false, "logits_per_token": -1.449819803237915, "logits_per_char": -0.7249099016189575, "num_chars": 2}, {"sum_logits": -1.5890142917633057, "num_tokens": 1, "num_tokens_all": 642, "is_greedy": false, "logits_per_token": -1.5890142917633057, "logits_per_char": -0.7945071458816528, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 50, "native_id": 50, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1726638078689575, "incorrect_loss_raw": 1.476945201555888, "correct_loss_per_char": 0.5863319039344788, "incorrect_loss_per_char": 0.738472600777944, "correct_loss_per_token": 1.1726638078689575, "incorrect_loss_per_token": 1.476945201555888, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1726638078689575, "num_tokens": 1, "num_tokens_all": 606, "is_greedy": true, "logits_per_token": -1.1726638078689575, "logits_per_char": -0.5863319039344788, "num_chars": 2}, {"sum_logits": -1.4014588594436646, "num_tokens": 1, "num_tokens_all": 606, "is_greedy": false, "logits_per_token": -1.4014588594436646, "logits_per_char": -0.7007294297218323, "num_chars": 2}, {"sum_logits": -1.5052460432052612, "num_tokens": 1, "num_tokens_all": 606, "is_greedy": false, "logits_per_token": -1.5052460432052612, "logits_per_char": -0.7526230216026306, "num_chars": 2}, {"sum_logits": -1.5241307020187378, "num_tokens": 1, "num_tokens_all": 606, "is_greedy": false, "logits_per_token": -1.5241307020187378, "logits_per_char": -0.7620653510093689, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 51, "native_id": 51, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3703694343566895, "incorrect_loss_raw": 1.4080922603607178, "correct_loss_per_char": 0.6851847171783447, "incorrect_loss_per_char": 0.7040461301803589, "correct_loss_per_token": 1.3703694343566895, "incorrect_loss_per_token": 1.4080922603607178, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3167784214019775, "num_tokens": 1, "num_tokens_all": 585, "is_greedy": true, "logits_per_token": -1.3167784214019775, "logits_per_char": -0.6583892107009888, "num_chars": 2}, {"sum_logits": -1.3684935569763184, "num_tokens": 1, "num_tokens_all": 585, "is_greedy": false, "logits_per_token": -1.3684935569763184, "logits_per_char": -0.6842467784881592, "num_chars": 2}, {"sum_logits": -1.5390048027038574, "num_tokens": 1, "num_tokens_all": 585, "is_greedy": false, "logits_per_token": -1.5390048027038574, "logits_per_char": -0.7695024013519287, "num_chars": 2}, {"sum_logits": -1.3703694343566895, "num_tokens": 1, "num_tokens_all": 585, "is_greedy": false, "logits_per_token": -1.3703694343566895, "logits_per_char": -0.6851847171783447, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 52, "native_id": 52, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5921061038970947, "incorrect_loss_raw": 1.3501110474268596, "correct_loss_per_char": 0.7960530519485474, "incorrect_loss_per_char": 0.6750555237134298, "correct_loss_per_token": 1.5921061038970947, "incorrect_loss_per_token": 1.3501110474268596, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1404123306274414, "num_tokens": 1, "num_tokens_all": 604, "is_greedy": true, "logits_per_token": -1.1404123306274414, "logits_per_char": -0.5702061653137207, "num_chars": 2}, {"sum_logits": -1.29997980594635, "num_tokens": 1, "num_tokens_all": 604, "is_greedy": false, "logits_per_token": -1.29997980594635, "logits_per_char": -0.649989902973175, "num_chars": 2}, {"sum_logits": -1.609941005706787, "num_tokens": 1, "num_tokens_all": 604, "is_greedy": false, "logits_per_token": -1.609941005706787, "logits_per_char": -0.8049705028533936, "num_chars": 2}, {"sum_logits": -1.5921061038970947, "num_tokens": 1, "num_tokens_all": 604, "is_greedy": false, "logits_per_token": -1.5921061038970947, "logits_per_char": -0.7960530519485474, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 53, "native_id": 53, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9409692883491516, "incorrect_loss_raw": 1.609895388285319, "correct_loss_per_char": 0.4704846441745758, "incorrect_loss_per_char": 0.8049476941426595, "correct_loss_per_token": 0.9409692883491516, "incorrect_loss_per_token": 1.609895388285319, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9409692883491516, "num_tokens": 1, "num_tokens_all": 571, "is_greedy": true, "logits_per_token": -0.9409692883491516, "logits_per_char": -0.4704846441745758, "num_chars": 2}, {"sum_logits": -1.5815250873565674, "num_tokens": 1, "num_tokens_all": 571, "is_greedy": false, "logits_per_token": -1.5815250873565674, "logits_per_char": -0.7907625436782837, "num_chars": 2}, {"sum_logits": -1.7299683094024658, "num_tokens": 1, "num_tokens_all": 571, "is_greedy": false, "logits_per_token": -1.7299683094024658, "logits_per_char": -0.8649841547012329, "num_chars": 2}, {"sum_logits": -1.5181927680969238, "num_tokens": 1, "num_tokens_all": 571, "is_greedy": false, "logits_per_token": -1.5181927680969238, "logits_per_char": -0.7590963840484619, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 54, "native_id": 54, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6015279293060303, "incorrect_loss_raw": 1.3613548278808594, "correct_loss_per_char": 0.8007639646530151, "incorrect_loss_per_char": 0.6806774139404297, "correct_loss_per_token": 1.6015279293060303, "incorrect_loss_per_token": 1.3613548278808594, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0616097450256348, "num_tokens": 1, "num_tokens_all": 591, "is_greedy": true, "logits_per_token": -1.0616097450256348, "logits_per_char": -0.5308048725128174, "num_chars": 2}, {"sum_logits": -1.4251513481140137, "num_tokens": 1, "num_tokens_all": 591, "is_greedy": false, "logits_per_token": -1.4251513481140137, "logits_per_char": -0.7125756740570068, "num_chars": 2}, {"sum_logits": -1.5973033905029297, "num_tokens": 1, "num_tokens_all": 591, "is_greedy": false, "logits_per_token": -1.5973033905029297, "logits_per_char": -0.7986516952514648, "num_chars": 2}, {"sum_logits": -1.6015279293060303, "num_tokens": 1, "num_tokens_all": 591, "is_greedy": false, "logits_per_token": -1.6015279293060303, "logits_per_char": -0.8007639646530151, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 55, "native_id": 55, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5070401430130005, "incorrect_loss_raw": 1.3606891234715779, "correct_loss_per_char": 0.7535200715065002, "incorrect_loss_per_char": 0.6803445617357889, "correct_loss_per_token": 1.5070401430130005, "incorrect_loss_per_token": 1.3606891234715779, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3097690343856812, "num_tokens": 1, "num_tokens_all": 644, "is_greedy": false, "logits_per_token": -1.3097690343856812, "logits_per_char": -0.6548845171928406, "num_chars": 2}, {"sum_logits": -1.275309681892395, "num_tokens": 1, "num_tokens_all": 644, "is_greedy": true, "logits_per_token": -1.275309681892395, "logits_per_char": -0.6376548409461975, "num_chars": 2}, {"sum_logits": -1.5070401430130005, "num_tokens": 1, "num_tokens_all": 644, "is_greedy": false, "logits_per_token": -1.5070401430130005, "logits_per_char": -0.7535200715065002, "num_chars": 2}, {"sum_logits": -1.4969886541366577, "num_tokens": 1, "num_tokens_all": 644, "is_greedy": false, "logits_per_token": -1.4969886541366577, "logits_per_char": -0.7484943270683289, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 56, "native_id": 56, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4244354963302612, "incorrect_loss_raw": 1.387311339378357, "correct_loss_per_char": 0.7122177481651306, "incorrect_loss_per_char": 0.6936556696891785, "correct_loss_per_token": 1.4244354963302612, "incorrect_loss_per_token": 1.387311339378357, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.367051362991333, "num_tokens": 1, "num_tokens_all": 565, "is_greedy": false, "logits_per_token": -1.367051362991333, "logits_per_char": -0.6835256814956665, "num_chars": 2}, {"sum_logits": -1.3534526824951172, "num_tokens": 1, "num_tokens_all": 565, "is_greedy": true, "logits_per_token": -1.3534526824951172, "logits_per_char": -0.6767263412475586, "num_chars": 2}, {"sum_logits": -1.4414299726486206, "num_tokens": 1, "num_tokens_all": 565, "is_greedy": false, "logits_per_token": -1.4414299726486206, "logits_per_char": -0.7207149863243103, "num_chars": 2}, {"sum_logits": -1.4244354963302612, "num_tokens": 1, "num_tokens_all": 565, "is_greedy": false, "logits_per_token": -1.4244354963302612, "logits_per_char": -0.7122177481651306, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 57, "native_id": 57, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5165908336639404, "incorrect_loss_raw": 1.375706632932027, "correct_loss_per_char": 0.7582954168319702, "incorrect_loss_per_char": 0.6878533164660136, "correct_loss_per_token": 1.5165908336639404, "incorrect_loss_per_token": 1.375706632932027, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.140629768371582, "num_tokens": 1, "num_tokens_all": 604, "is_greedy": true, "logits_per_token": -1.140629768371582, "logits_per_char": -0.570314884185791, "num_chars": 2}, {"sum_logits": -1.3541566133499146, "num_tokens": 1, "num_tokens_all": 604, "is_greedy": false, "logits_per_token": -1.3541566133499146, "logits_per_char": -0.6770783066749573, "num_chars": 2}, {"sum_logits": -1.632333517074585, "num_tokens": 1, "num_tokens_all": 604, "is_greedy": false, "logits_per_token": -1.632333517074585, "logits_per_char": -0.8161667585372925, "num_chars": 2}, {"sum_logits": -1.5165908336639404, "num_tokens": 1, "num_tokens_all": 604, "is_greedy": false, "logits_per_token": -1.5165908336639404, "logits_per_char": -0.7582954168319702, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 58, "native_id": 58, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6107079982757568, "incorrect_loss_raw": 1.3451894124348958, "correct_loss_per_char": 0.8053539991378784, "incorrect_loss_per_char": 0.6725947062174479, "correct_loss_per_token": 1.6107079982757568, "incorrect_loss_per_token": 1.3451894124348958, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.138596773147583, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": true, "logits_per_token": -1.138596773147583, "logits_per_char": -0.5692983865737915, "num_chars": 2}, {"sum_logits": -1.5551307201385498, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": false, "logits_per_token": -1.5551307201385498, "logits_per_char": -0.7775653600692749, "num_chars": 2}, {"sum_logits": -1.6107079982757568, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": false, "logits_per_token": -1.6107079982757568, "logits_per_char": -0.8053539991378784, "num_chars": 2}, {"sum_logits": -1.3418407440185547, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": false, "logits_per_token": -1.3418407440185547, "logits_per_char": -0.6709203720092773, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 59, "native_id": 59, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.443168044090271, "incorrect_loss_raw": 1.3763006528218586, "correct_loss_per_char": 0.7215840220451355, "incorrect_loss_per_char": 0.6881503264109293, "correct_loss_per_token": 1.443168044090271, "incorrect_loss_per_token": 1.3763006528218586, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3343888521194458, "num_tokens": 1, "num_tokens_all": 609, "is_greedy": true, "logits_per_token": -1.3343888521194458, "logits_per_char": -0.6671944260597229, "num_chars": 2}, {"sum_logits": -1.3465956449508667, "num_tokens": 1, "num_tokens_all": 609, "is_greedy": false, "logits_per_token": -1.3465956449508667, "logits_per_char": -0.6732978224754333, "num_chars": 2}, {"sum_logits": -1.4479174613952637, "num_tokens": 1, "num_tokens_all": 609, "is_greedy": false, "logits_per_token": -1.4479174613952637, "logits_per_char": -0.7239587306976318, "num_chars": 2}, {"sum_logits": -1.443168044090271, "num_tokens": 1, "num_tokens_all": 609, "is_greedy": false, "logits_per_token": -1.443168044090271, "logits_per_char": -0.7215840220451355, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 60, "native_id": 60, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5066893100738525, "incorrect_loss_raw": 1.364594539006551, "correct_loss_per_char": 0.7533446550369263, "incorrect_loss_per_char": 0.6822972695032755, "correct_loss_per_token": 1.5066893100738525, "incorrect_loss_per_token": 1.364594539006551, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1884794235229492, "num_tokens": 1, "num_tokens_all": 596, "is_greedy": true, "logits_per_token": -1.1884794235229492, "logits_per_char": -0.5942397117614746, "num_chars": 2}, {"sum_logits": -1.4143767356872559, "num_tokens": 1, "num_tokens_all": 596, "is_greedy": false, "logits_per_token": -1.4143767356872559, "logits_per_char": -0.7071883678436279, "num_chars": 2}, {"sum_logits": -1.4909274578094482, "num_tokens": 1, "num_tokens_all": 596, "is_greedy": false, "logits_per_token": -1.4909274578094482, "logits_per_char": -0.7454637289047241, "num_chars": 2}, {"sum_logits": -1.5066893100738525, "num_tokens": 1, "num_tokens_all": 596, "is_greedy": false, "logits_per_token": -1.5066893100738525, "logits_per_char": -0.7533446550369263, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 61, "native_id": 61, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5629892349243164, "incorrect_loss_raw": 1.3566092252731323, "correct_loss_per_char": 0.7814946174621582, "incorrect_loss_per_char": 0.6783046126365662, "correct_loss_per_token": 1.5629892349243164, "incorrect_loss_per_token": 1.3566092252731323, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1555854082107544, "num_tokens": 1, "num_tokens_all": 611, "is_greedy": true, "logits_per_token": -1.1555854082107544, "logits_per_char": -0.5777927041053772, "num_chars": 2}, {"sum_logits": -1.315298318862915, "num_tokens": 1, "num_tokens_all": 611, "is_greedy": false, "logits_per_token": -1.315298318862915, "logits_per_char": -0.6576491594314575, "num_chars": 2}, {"sum_logits": -1.5989439487457275, "num_tokens": 1, "num_tokens_all": 611, "is_greedy": false, "logits_per_token": -1.5989439487457275, "logits_per_char": -0.7994719743728638, "num_chars": 2}, {"sum_logits": -1.5629892349243164, "num_tokens": 1, "num_tokens_all": 611, "is_greedy": false, "logits_per_token": -1.5629892349243164, "logits_per_char": -0.7814946174621582, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 62, "native_id": 62, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3173604011535645, "incorrect_loss_raw": 1.4232064882914226, "correct_loss_per_char": 0.6586802005767822, "incorrect_loss_per_char": 0.7116032441457113, "correct_loss_per_token": 1.3173604011535645, "incorrect_loss_per_token": 1.4232064882914226, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.262727975845337, "num_tokens": 1, "num_tokens_all": 607, "is_greedy": true, "logits_per_token": -1.262727975845337, "logits_per_char": -0.6313639879226685, "num_chars": 2}, {"sum_logits": -1.3173604011535645, "num_tokens": 1, "num_tokens_all": 607, "is_greedy": false, "logits_per_token": -1.3173604011535645, "logits_per_char": -0.6586802005767822, "num_chars": 2}, {"sum_logits": -1.5451672077178955, "num_tokens": 1, "num_tokens_all": 607, "is_greedy": false, "logits_per_token": -1.5451672077178955, "logits_per_char": -0.7725836038589478, "num_chars": 2}, {"sum_logits": -1.4617242813110352, "num_tokens": 1, "num_tokens_all": 607, "is_greedy": false, "logits_per_token": -1.4617242813110352, "logits_per_char": -0.7308621406555176, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 63, "native_id": 63, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2151085138320923, "incorrect_loss_raw": 1.4586024681727092, "correct_loss_per_char": 0.6075542569160461, "incorrect_loss_per_char": 0.7293012340863546, "correct_loss_per_token": 1.2151085138320923, "incorrect_loss_per_token": 1.4586024681727092, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2151085138320923, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": true, "logits_per_token": -1.2151085138320923, "logits_per_char": -0.6075542569160461, "num_chars": 2}, {"sum_logits": -1.4309875965118408, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": false, "logits_per_token": -1.4309875965118408, "logits_per_char": -0.7154937982559204, "num_chars": 2}, {"sum_logits": -1.5487171411514282, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": false, "logits_per_token": -1.5487171411514282, "logits_per_char": -0.7743585705757141, "num_chars": 2}, {"sum_logits": -1.3961026668548584, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": false, "logits_per_token": -1.3961026668548584, "logits_per_char": -0.6980513334274292, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 64, "native_id": 64, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5597378015518188, "incorrect_loss_raw": 1.363482157389323, "correct_loss_per_char": 0.7798689007759094, "incorrect_loss_per_char": 0.6817410786946615, "correct_loss_per_token": 1.5597378015518188, "incorrect_loss_per_token": 1.363482157389323, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1175849437713623, "num_tokens": 1, "num_tokens_all": 582, "is_greedy": true, "logits_per_token": -1.1175849437713623, "logits_per_char": -0.5587924718856812, "num_chars": 2}, {"sum_logits": -1.38884437084198, "num_tokens": 1, "num_tokens_all": 582, "is_greedy": false, "logits_per_token": -1.38884437084198, "logits_per_char": -0.69442218542099, "num_chars": 2}, {"sum_logits": -1.5597378015518188, "num_tokens": 1, "num_tokens_all": 582, "is_greedy": false, "logits_per_token": -1.5597378015518188, "logits_per_char": -0.7798689007759094, "num_chars": 2}, {"sum_logits": -1.5840171575546265, "num_tokens": 1, "num_tokens_all": 582, "is_greedy": false, "logits_per_token": -1.5840171575546265, "logits_per_char": -0.7920085787773132, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 65, "native_id": 65, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5019011497497559, "incorrect_loss_raw": 1.3675605456034343, "correct_loss_per_char": 0.7509505748748779, "incorrect_loss_per_char": 0.6837802728017172, "correct_loss_per_token": 1.5019011497497559, "incorrect_loss_per_token": 1.3675605456034343, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2200044393539429, "num_tokens": 1, "num_tokens_all": 593, "is_greedy": true, "logits_per_token": -1.2200044393539429, "logits_per_char": -0.6100022196769714, "num_chars": 2}, {"sum_logits": -1.4596744775772095, "num_tokens": 1, "num_tokens_all": 593, "is_greedy": false, "logits_per_token": -1.4596744775772095, "logits_per_char": -0.7298372387886047, "num_chars": 2}, {"sum_logits": -1.5019011497497559, "num_tokens": 1, "num_tokens_all": 593, "is_greedy": false, "logits_per_token": -1.5019011497497559, "logits_per_char": -0.7509505748748779, "num_chars": 2}, {"sum_logits": -1.4230027198791504, "num_tokens": 1, "num_tokens_all": 593, "is_greedy": false, "logits_per_token": -1.4230027198791504, "logits_per_char": -0.7115013599395752, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 66, "native_id": 66, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3888885974884033, "incorrect_loss_raw": 1.4275764624277751, "correct_loss_per_char": 0.6944442987442017, "incorrect_loss_per_char": 0.7137882312138876, "correct_loss_per_token": 1.3888885974884033, "incorrect_loss_per_token": 1.4275764624277751, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0624642372131348, "num_tokens": 1, "num_tokens_all": 594, "is_greedy": true, "logits_per_token": -1.0624642372131348, "logits_per_char": -0.5312321186065674, "num_chars": 2}, {"sum_logits": -1.3888885974884033, "num_tokens": 1, "num_tokens_all": 594, "is_greedy": false, "logits_per_token": -1.3888885974884033, "logits_per_char": -0.6944442987442017, "num_chars": 2}, {"sum_logits": -1.6409342288970947, "num_tokens": 1, "num_tokens_all": 594, "is_greedy": false, "logits_per_token": -1.6409342288970947, "logits_per_char": -0.8204671144485474, "num_chars": 2}, {"sum_logits": -1.5793309211730957, "num_tokens": 1, "num_tokens_all": 594, "is_greedy": false, "logits_per_token": -1.5793309211730957, "logits_per_char": -0.7896654605865479, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 67, "native_id": 67, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4796311855316162, "incorrect_loss_raw": 1.3805001576741536, "correct_loss_per_char": 0.7398155927658081, "incorrect_loss_per_char": 0.6902500788370768, "correct_loss_per_token": 1.4796311855316162, "incorrect_loss_per_token": 1.3805001576741536, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1653879880905151, "num_tokens": 1, "num_tokens_all": 573, "is_greedy": true, "logits_per_token": -1.1653879880905151, "logits_per_char": -0.5826939940452576, "num_chars": 2}, {"sum_logits": -1.4796311855316162, "num_tokens": 1, "num_tokens_all": 573, "is_greedy": false, "logits_per_token": -1.4796311855316162, "logits_per_char": -0.7398155927658081, "num_chars": 2}, {"sum_logits": -1.5531046390533447, "num_tokens": 1, "num_tokens_all": 573, "is_greedy": false, "logits_per_token": -1.5531046390533447, "logits_per_char": -0.7765523195266724, "num_chars": 2}, {"sum_logits": -1.423007845878601, "num_tokens": 1, "num_tokens_all": 573, "is_greedy": false, "logits_per_token": -1.423007845878601, "logits_per_char": -0.7115039229393005, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 68, "native_id": 68, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4711613655090332, "incorrect_loss_raw": 1.3811426162719727, "correct_loss_per_char": 0.7355806827545166, "incorrect_loss_per_char": 0.6905713081359863, "correct_loss_per_token": 1.4711613655090332, "incorrect_loss_per_token": 1.3811426162719727, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4183634519577026, "num_tokens": 1, "num_tokens_all": 614, "is_greedy": false, "logits_per_token": -1.4183634519577026, "logits_per_char": -0.7091817259788513, "num_chars": 2}, {"sum_logits": -1.4711613655090332, "num_tokens": 1, "num_tokens_all": 614, "is_greedy": false, "logits_per_token": -1.4711613655090332, "logits_per_char": -0.7355806827545166, "num_chars": 2}, {"sum_logits": -1.5012431144714355, "num_tokens": 1, "num_tokens_all": 614, "is_greedy": false, "logits_per_token": -1.5012431144714355, "logits_per_char": -0.7506215572357178, "num_chars": 2}, {"sum_logits": -1.2238212823867798, "num_tokens": 1, "num_tokens_all": 614, "is_greedy": true, "logits_per_token": -1.2238212823867798, "logits_per_char": -0.6119106411933899, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 69, "native_id": 69, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.498544454574585, "incorrect_loss_raw": 1.3695123195648193, "correct_loss_per_char": 0.7492722272872925, "incorrect_loss_per_char": 0.6847561597824097, "correct_loss_per_token": 1.498544454574585, "incorrect_loss_per_token": 1.3695123195648193, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1835366487503052, "num_tokens": 1, "num_tokens_all": 602, "is_greedy": true, "logits_per_token": -1.1835366487503052, "logits_per_char": -0.5917683243751526, "num_chars": 2}, {"sum_logits": -1.3950378894805908, "num_tokens": 1, "num_tokens_all": 602, "is_greedy": false, "logits_per_token": -1.3950378894805908, "logits_per_char": -0.6975189447402954, "num_chars": 2}, {"sum_logits": -1.498544454574585, "num_tokens": 1, "num_tokens_all": 602, "is_greedy": false, "logits_per_token": -1.498544454574585, "logits_per_char": -0.7492722272872925, "num_chars": 2}, {"sum_logits": -1.529962420463562, "num_tokens": 1, "num_tokens_all": 602, "is_greedy": false, "logits_per_token": -1.529962420463562, "logits_per_char": -0.764981210231781, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 70, "native_id": 70, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.463661789894104, "incorrect_loss_raw": 1.3857109546661377, "correct_loss_per_char": 0.731830894947052, "incorrect_loss_per_char": 0.6928554773330688, "correct_loss_per_token": 1.463661789894104, "incorrect_loss_per_token": 1.3857109546661377, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1689879894256592, "num_tokens": 1, "num_tokens_all": 597, "is_greedy": true, "logits_per_token": -1.1689879894256592, "logits_per_char": -0.5844939947128296, "num_chars": 2}, {"sum_logits": -1.463661789894104, "num_tokens": 1, "num_tokens_all": 597, "is_greedy": false, "logits_per_token": -1.463661789894104, "logits_per_char": -0.731830894947052, "num_chars": 2}, {"sum_logits": -1.6021451950073242, "num_tokens": 1, "num_tokens_all": 597, "is_greedy": false, "logits_per_token": -1.6021451950073242, "logits_per_char": -0.8010725975036621, "num_chars": 2}, {"sum_logits": -1.3859996795654297, "num_tokens": 1, "num_tokens_all": 597, "is_greedy": false, "logits_per_token": -1.3859996795654297, "logits_per_char": -0.6929998397827148, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 71, "native_id": 71, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4695285558700562, "incorrect_loss_raw": 1.37014905611674, "correct_loss_per_char": 0.7347642779350281, "incorrect_loss_per_char": 0.68507452805837, "correct_loss_per_token": 1.4695285558700562, "incorrect_loss_per_token": 1.37014905611674, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.261439561843872, "num_tokens": 1, "num_tokens_all": 624, "is_greedy": true, "logits_per_token": -1.261439561843872, "logits_per_char": -0.630719780921936, "num_chars": 2}, {"sum_logits": -1.396624207496643, "num_tokens": 1, "num_tokens_all": 624, "is_greedy": false, "logits_per_token": -1.396624207496643, "logits_per_char": -0.6983121037483215, "num_chars": 2}, {"sum_logits": -1.4523833990097046, "num_tokens": 1, "num_tokens_all": 624, "is_greedy": false, "logits_per_token": -1.4523833990097046, "logits_per_char": -0.7261916995048523, "num_chars": 2}, {"sum_logits": -1.4695285558700562, "num_tokens": 1, "num_tokens_all": 624, "is_greedy": false, "logits_per_token": -1.4695285558700562, "logits_per_char": -0.7347642779350281, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 72, "native_id": 72, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4239481687545776, "incorrect_loss_raw": 1.38035249710083, "correct_loss_per_char": 0.7119740843772888, "incorrect_loss_per_char": 0.690176248550415, "correct_loss_per_token": 1.4239481687545776, "incorrect_loss_per_token": 1.38035249710083, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.371515154838562, "num_tokens": 1, "num_tokens_all": 605, "is_greedy": false, "logits_per_token": -1.371515154838562, "logits_per_char": -0.685757577419281, "num_chars": 2}, {"sum_logits": -1.4172323942184448, "num_tokens": 1, "num_tokens_all": 605, "is_greedy": false, "logits_per_token": -1.4172323942184448, "logits_per_char": -0.7086161971092224, "num_chars": 2}, {"sum_logits": -1.4239481687545776, "num_tokens": 1, "num_tokens_all": 605, "is_greedy": false, "logits_per_token": -1.4239481687545776, "logits_per_char": -0.7119740843772888, "num_chars": 2}, {"sum_logits": -1.3523099422454834, "num_tokens": 1, "num_tokens_all": 605, "is_greedy": true, "logits_per_token": -1.3523099422454834, "logits_per_char": -0.6761549711227417, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 73, "native_id": 73, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4746633768081665, "incorrect_loss_raw": 1.3730932076772053, "correct_loss_per_char": 0.7373316884040833, "incorrect_loss_per_char": 0.6865466038386027, "correct_loss_per_token": 1.4746633768081665, "incorrect_loss_per_token": 1.3730932076772053, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.19991135597229, "num_tokens": 1, "num_tokens_all": 567, "is_greedy": true, "logits_per_token": -1.19991135597229, "logits_per_char": -0.599955677986145, "num_chars": 2}, {"sum_logits": -1.42746102809906, "num_tokens": 1, "num_tokens_all": 567, "is_greedy": false, "logits_per_token": -1.42746102809906, "logits_per_char": -0.71373051404953, "num_chars": 2}, {"sum_logits": -1.4919072389602661, "num_tokens": 1, "num_tokens_all": 567, "is_greedy": false, "logits_per_token": -1.4919072389602661, "logits_per_char": -0.7459536194801331, "num_chars": 2}, {"sum_logits": -1.4746633768081665, "num_tokens": 1, "num_tokens_all": 567, "is_greedy": false, "logits_per_token": -1.4746633768081665, "logits_per_char": -0.7373316884040833, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 74, "native_id": 74, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5638961791992188, "incorrect_loss_raw": 1.3610823551813762, "correct_loss_per_char": 0.7819480895996094, "incorrect_loss_per_char": 0.6805411775906881, "correct_loss_per_token": 1.5638961791992188, "incorrect_loss_per_token": 1.3610823551813762, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0970056056976318, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": true, "logits_per_token": -1.0970056056976318, "logits_per_char": -0.5485028028488159, "num_chars": 2}, {"sum_logits": -1.527429223060608, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": false, "logits_per_token": -1.527429223060608, "logits_per_char": -0.763714611530304, "num_chars": 2}, {"sum_logits": -1.5638961791992188, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": false, "logits_per_token": -1.5638961791992188, "logits_per_char": -0.7819480895996094, "num_chars": 2}, {"sum_logits": -1.4588122367858887, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": false, "logits_per_token": -1.4588122367858887, "logits_per_char": -0.7294061183929443, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 75, "native_id": 75, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6448725461959839, "incorrect_loss_raw": 1.3232076168060303, "correct_loss_per_char": 0.8224362730979919, "incorrect_loss_per_char": 0.6616038084030151, "correct_loss_per_token": 1.6448725461959839, "incorrect_loss_per_token": 1.3232076168060303, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2295644283294678, "num_tokens": 1, "num_tokens_all": 592, "is_greedy": true, "logits_per_token": -1.2295644283294678, "logits_per_char": -0.6147822141647339, "num_chars": 2}, {"sum_logits": -1.3524627685546875, "num_tokens": 1, "num_tokens_all": 592, "is_greedy": false, "logits_per_token": -1.3524627685546875, "logits_per_char": -0.6762313842773438, "num_chars": 2}, {"sum_logits": -1.6448725461959839, "num_tokens": 1, "num_tokens_all": 592, "is_greedy": false, "logits_per_token": -1.6448725461959839, "logits_per_char": -0.8224362730979919, "num_chars": 2}, {"sum_logits": -1.3875956535339355, "num_tokens": 1, "num_tokens_all": 592, "is_greedy": false, "logits_per_token": -1.3875956535339355, "logits_per_char": -0.6937978267669678, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 76, "native_id": 76, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2890926599502563, "incorrect_loss_raw": 1.4295893907546997, "correct_loss_per_char": 0.6445463299751282, "incorrect_loss_per_char": 0.7147946953773499, "correct_loss_per_token": 1.2890926599502563, "incorrect_loss_per_token": 1.4295893907546997, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2890926599502563, "num_tokens": 1, "num_tokens_all": 623, "is_greedy": true, "logits_per_token": -1.2890926599502563, "logits_per_char": -0.6445463299751282, "num_chars": 2}, {"sum_logits": -1.346936821937561, "num_tokens": 1, "num_tokens_all": 623, "is_greedy": false, "logits_per_token": -1.346936821937561, "logits_per_char": -0.6734684109687805, "num_chars": 2}, {"sum_logits": -1.5209094285964966, "num_tokens": 1, "num_tokens_all": 623, "is_greedy": false, "logits_per_token": -1.5209094285964966, "logits_per_char": -0.7604547142982483, "num_chars": 2}, {"sum_logits": -1.4209219217300415, "num_tokens": 1, "num_tokens_all": 623, "is_greedy": false, "logits_per_token": -1.4209219217300415, "logits_per_char": -0.7104609608650208, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 77, "native_id": 77, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.998703122138977, "incorrect_loss_raw": 1.5698707103729248, "correct_loss_per_char": 0.4993515610694885, "incorrect_loss_per_char": 0.7849353551864624, "correct_loss_per_token": 0.998703122138977, "incorrect_loss_per_token": 1.5698707103729248, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.998703122138977, "num_tokens": 1, "num_tokens_all": 574, "is_greedy": true, "logits_per_token": -0.998703122138977, "logits_per_char": -0.4993515610694885, "num_chars": 2}, {"sum_logits": -1.4509055614471436, "num_tokens": 1, "num_tokens_all": 574, "is_greedy": false, "logits_per_token": -1.4509055614471436, "logits_per_char": -0.7254527807235718, "num_chars": 2}, {"sum_logits": -1.6591768264770508, "num_tokens": 1, "num_tokens_all": 574, "is_greedy": false, "logits_per_token": -1.6591768264770508, "logits_per_char": -0.8295884132385254, "num_chars": 2}, {"sum_logits": -1.59952974319458, "num_tokens": 1, "num_tokens_all": 574, "is_greedy": false, "logits_per_token": -1.59952974319458, "logits_per_char": -0.79976487159729, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 78, "native_id": 78, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.368455171585083, "incorrect_loss_raw": 1.4210989872614543, "correct_loss_per_char": 0.6842275857925415, "incorrect_loss_per_char": 0.7105494936307272, "correct_loss_per_token": 1.368455171585083, "incorrect_loss_per_token": 1.4210989872614543, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.151146650314331, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": true, "logits_per_token": -1.151146650314331, "logits_per_char": -0.5755733251571655, "num_chars": 2}, {"sum_logits": -1.368455171585083, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.368455171585083, "logits_per_char": -0.6842275857925415, "num_chars": 2}, {"sum_logits": -1.6218267679214478, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.6218267679214478, "logits_per_char": -0.8109133839607239, "num_chars": 2}, {"sum_logits": -1.490323543548584, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.490323543548584, "logits_per_char": -0.745161771774292, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 79, "native_id": 79, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.631685733795166, "incorrect_loss_raw": 1.3479454517364502, "correct_loss_per_char": 0.815842866897583, "incorrect_loss_per_char": 0.6739727258682251, "correct_loss_per_token": 1.631685733795166, "incorrect_loss_per_token": 1.3479454517364502, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1170470714569092, "num_tokens": 1, "num_tokens_all": 558, "is_greedy": true, "logits_per_token": -1.1170470714569092, "logits_per_char": -0.5585235357284546, "num_chars": 2}, {"sum_logits": -1.3210666179656982, "num_tokens": 1, "num_tokens_all": 558, "is_greedy": false, "logits_per_token": -1.3210666179656982, "logits_per_char": -0.6605333089828491, "num_chars": 2}, {"sum_logits": -1.631685733795166, "num_tokens": 1, "num_tokens_all": 558, "is_greedy": false, "logits_per_token": -1.631685733795166, "logits_per_char": -0.815842866897583, "num_chars": 2}, {"sum_logits": -1.6057226657867432, "num_tokens": 1, "num_tokens_all": 558, "is_greedy": false, "logits_per_token": -1.6057226657867432, "logits_per_char": -0.8028613328933716, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 80, "native_id": 80, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4887205362319946, "incorrect_loss_raw": 1.3688977162043254, "correct_loss_per_char": 0.7443602681159973, "incorrect_loss_per_char": 0.6844488581021627, "correct_loss_per_token": 1.4887205362319946, "incorrect_loss_per_token": 1.3688977162043254, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.353212594985962, "num_tokens": 1, "num_tokens_all": 634, "is_greedy": false, "logits_per_token": -1.353212594985962, "logits_per_char": -0.676606297492981, "num_chars": 2}, {"sum_logits": -1.2253289222717285, "num_tokens": 1, "num_tokens_all": 634, "is_greedy": true, "logits_per_token": -1.2253289222717285, "logits_per_char": -0.6126644611358643, "num_chars": 2}, {"sum_logits": -1.4887205362319946, "num_tokens": 1, "num_tokens_all": 634, "is_greedy": false, "logits_per_token": -1.4887205362319946, "logits_per_char": -0.7443602681159973, "num_chars": 2}, {"sum_logits": -1.5281516313552856, "num_tokens": 1, "num_tokens_all": 634, "is_greedy": false, "logits_per_token": -1.5281516313552856, "logits_per_char": -0.7640758156776428, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 81, "native_id": 81, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5837106704711914, "incorrect_loss_raw": 1.3603891928990681, "correct_loss_per_char": 0.7918553352355957, "incorrect_loss_per_char": 0.6801945964495341, "correct_loss_per_token": 1.5837106704711914, "incorrect_loss_per_token": 1.3603891928990681, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0721687078475952, "num_tokens": 1, "num_tokens_all": 602, "is_greedy": true, "logits_per_token": -1.0721687078475952, "logits_per_char": -0.5360843539237976, "num_chars": 2}, {"sum_logits": -1.449519157409668, "num_tokens": 1, "num_tokens_all": 602, "is_greedy": false, "logits_per_token": -1.449519157409668, "logits_per_char": -0.724759578704834, "num_chars": 2}, {"sum_logits": -1.5837106704711914, "num_tokens": 1, "num_tokens_all": 602, "is_greedy": false, "logits_per_token": -1.5837106704711914, "logits_per_char": -0.7918553352355957, "num_chars": 2}, {"sum_logits": -1.5594797134399414, "num_tokens": 1, "num_tokens_all": 602, "is_greedy": false, "logits_per_token": -1.5594797134399414, "logits_per_char": -0.7797398567199707, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 82, "native_id": 82, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3325215578079224, "incorrect_loss_raw": 1.4193352063496907, "correct_loss_per_char": 0.6662607789039612, "incorrect_loss_per_char": 0.7096676031748453, "correct_loss_per_token": 1.3325215578079224, "incorrect_loss_per_token": 1.4193352063496907, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3325215578079224, "num_tokens": 1, "num_tokens_all": 616, "is_greedy": false, "logits_per_token": -1.3325215578079224, "logits_per_char": -0.6662607789039612, "num_chars": 2}, {"sum_logits": -1.3876291513442993, "num_tokens": 1, "num_tokens_all": 616, "is_greedy": false, "logits_per_token": -1.3876291513442993, "logits_per_char": -0.6938145756721497, "num_chars": 2}, {"sum_logits": -1.5804531574249268, "num_tokens": 1, "num_tokens_all": 616, "is_greedy": false, "logits_per_token": -1.5804531574249268, "logits_per_char": -0.7902265787124634, "num_chars": 2}, {"sum_logits": -1.2899233102798462, "num_tokens": 1, "num_tokens_all": 616, "is_greedy": true, "logits_per_token": -1.2899233102798462, "logits_per_char": -0.6449616551399231, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 83, "native_id": 83, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3678696155548096, "incorrect_loss_raw": 1.4072306156158447, "correct_loss_per_char": 0.6839348077774048, "incorrect_loss_per_char": 0.7036153078079224, "correct_loss_per_token": 1.3678696155548096, "incorrect_loss_per_token": 1.4072306156158447, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3678696155548096, "num_tokens": 1, "num_tokens_all": 645, "is_greedy": false, "logits_per_token": -1.3678696155548096, "logits_per_char": -0.6839348077774048, "num_chars": 2}, {"sum_logits": -1.2432001829147339, "num_tokens": 1, "num_tokens_all": 645, "is_greedy": true, "logits_per_token": -1.2432001829147339, "logits_per_char": -0.6216000914573669, "num_chars": 2}, {"sum_logits": -1.4138011932373047, "num_tokens": 1, "num_tokens_all": 645, "is_greedy": false, "logits_per_token": -1.4138011932373047, "logits_per_char": -0.7069005966186523, "num_chars": 2}, {"sum_logits": -1.5646904706954956, "num_tokens": 1, "num_tokens_all": 645, "is_greedy": false, "logits_per_token": -1.5646904706954956, "logits_per_char": -0.7823452353477478, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 84, "native_id": 84, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1590561866760254, "incorrect_loss_raw": 1.4849124352137248, "correct_loss_per_char": 0.5795280933380127, "incorrect_loss_per_char": 0.7424562176068624, "correct_loss_per_token": 1.1590561866760254, "incorrect_loss_per_token": 1.4849124352137248, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1590561866760254, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": true, "logits_per_token": -1.1590561866760254, "logits_per_char": -0.5795280933380127, "num_chars": 2}, {"sum_logits": -1.4713619947433472, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": false, "logits_per_token": -1.4713619947433472, "logits_per_char": -0.7356809973716736, "num_chars": 2}, {"sum_logits": -1.5473564863204956, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": false, "logits_per_token": -1.5473564863204956, "logits_per_char": -0.7736782431602478, "num_chars": 2}, {"sum_logits": -1.4360188245773315, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": false, "logits_per_token": -1.4360188245773315, "logits_per_char": -0.7180094122886658, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 85, "native_id": 85, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1834752559661865, "incorrect_loss_raw": 1.475567619005839, "correct_loss_per_char": 0.5917376279830933, "incorrect_loss_per_char": 0.7377838095029196, "correct_loss_per_token": 1.1834752559661865, "incorrect_loss_per_token": 1.475567619005839, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1834752559661865, "num_tokens": 1, "num_tokens_all": 640, "is_greedy": true, "logits_per_token": -1.1834752559661865, "logits_per_char": -0.5917376279830933, "num_chars": 2}, {"sum_logits": -1.3639215230941772, "num_tokens": 1, "num_tokens_all": 640, "is_greedy": false, "logits_per_token": -1.3639215230941772, "logits_per_char": -0.6819607615470886, "num_chars": 2}, {"sum_logits": -1.5145634412765503, "num_tokens": 1, "num_tokens_all": 640, "is_greedy": false, "logits_per_token": -1.5145634412765503, "logits_per_char": -0.7572817206382751, "num_chars": 2}, {"sum_logits": -1.5482178926467896, "num_tokens": 1, "num_tokens_all": 640, "is_greedy": false, "logits_per_token": -1.5482178926467896, "logits_per_char": -0.7741089463233948, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 86, "native_id": 86, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.53363835811615, "incorrect_loss_raw": 1.3701386451721191, "correct_loss_per_char": 0.766819179058075, "incorrect_loss_per_char": 0.6850693225860596, "correct_loss_per_token": 1.53363835811615, "incorrect_loss_per_token": 1.3701386451721191, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1095913648605347, "num_tokens": 1, "num_tokens_all": 562, "is_greedy": true, "logits_per_token": -1.1095913648605347, "logits_per_char": -0.5547956824302673, "num_chars": 2}, {"sum_logits": -1.3824297189712524, "num_tokens": 1, "num_tokens_all": 562, "is_greedy": false, "logits_per_token": -1.3824297189712524, "logits_per_char": -0.6912148594856262, "num_chars": 2}, {"sum_logits": -1.53363835811615, "num_tokens": 1, "num_tokens_all": 562, "is_greedy": false, "logits_per_token": -1.53363835811615, "logits_per_char": -0.766819179058075, "num_chars": 2}, {"sum_logits": -1.6183948516845703, "num_tokens": 1, "num_tokens_all": 562, "is_greedy": false, "logits_per_token": -1.6183948516845703, "logits_per_char": -0.8091974258422852, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 87, "native_id": 87, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2091264724731445, "incorrect_loss_raw": 1.5218787988026936, "correct_loss_per_char": 0.6045632362365723, "incorrect_loss_per_char": 0.7609393994013468, "correct_loss_per_token": 1.2091264724731445, "incorrect_loss_per_token": 1.5218787988026936, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0752485990524292, "num_tokens": 1, "num_tokens_all": 622, "is_greedy": true, "logits_per_token": -1.0752485990524292, "logits_per_char": -0.5376242995262146, "num_chars": 2}, {"sum_logits": -1.2091264724731445, "num_tokens": 1, "num_tokens_all": 622, "is_greedy": false, "logits_per_token": -1.2091264724731445, "logits_per_char": -0.6045632362365723, "num_chars": 2}, {"sum_logits": -1.8957093954086304, "num_tokens": 1, "num_tokens_all": 622, "is_greedy": false, "logits_per_token": -1.8957093954086304, "logits_per_char": -0.9478546977043152, "num_chars": 2}, {"sum_logits": -1.5946784019470215, "num_tokens": 1, "num_tokens_all": 622, "is_greedy": false, "logits_per_token": -1.5946784019470215, "logits_per_char": -0.7973392009735107, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 88, "native_id": 88, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5105806589126587, "incorrect_loss_raw": 1.3641564051310222, "correct_loss_per_char": 0.7552903294563293, "incorrect_loss_per_char": 0.6820782025655111, "correct_loss_per_token": 1.5105806589126587, "incorrect_loss_per_token": 1.3641564051310222, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.246239185333252, "num_tokens": 1, "num_tokens_all": 599, "is_greedy": true, "logits_per_token": -1.246239185333252, "logits_per_char": -0.623119592666626, "num_chars": 2}, {"sum_logits": -1.5071020126342773, "num_tokens": 1, "num_tokens_all": 599, "is_greedy": false, "logits_per_token": -1.5071020126342773, "logits_per_char": -0.7535510063171387, "num_chars": 2}, {"sum_logits": -1.5105806589126587, "num_tokens": 1, "num_tokens_all": 599, "is_greedy": false, "logits_per_token": -1.5105806589126587, "logits_per_char": -0.7552903294563293, "num_chars": 2}, {"sum_logits": -1.339128017425537, "num_tokens": 1, "num_tokens_all": 599, "is_greedy": false, "logits_per_token": -1.339128017425537, "logits_per_char": -0.6695640087127686, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 89, "native_id": 89, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4263434410095215, "incorrect_loss_raw": 1.4057835340499878, "correct_loss_per_char": 0.7131717205047607, "incorrect_loss_per_char": 0.7028917670249939, "correct_loss_per_token": 1.4263434410095215, "incorrect_loss_per_token": 1.4057835340499878, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1236706972122192, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": true, "logits_per_token": -1.1236706972122192, "logits_per_char": -0.5618353486061096, "num_chars": 2}, {"sum_logits": -1.4696660041809082, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": false, "logits_per_token": -1.4696660041809082, "logits_per_char": -0.7348330020904541, "num_chars": 2}, {"sum_logits": -1.624013900756836, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": false, "logits_per_token": -1.624013900756836, "logits_per_char": -0.812006950378418, "num_chars": 2}, {"sum_logits": -1.4263434410095215, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": false, "logits_per_token": -1.4263434410095215, "logits_per_char": -0.7131717205047607, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 90, "native_id": 90, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4387524127960205, "incorrect_loss_raw": 1.3892066876093547, "correct_loss_per_char": 0.7193762063980103, "incorrect_loss_per_char": 0.6946033438046774, "correct_loss_per_token": 1.4387524127960205, "incorrect_loss_per_token": 1.3892066876093547, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2276941537857056, "num_tokens": 1, "num_tokens_all": 620, "is_greedy": true, "logits_per_token": -1.2276941537857056, "logits_per_char": -0.6138470768928528, "num_chars": 2}, {"sum_logits": -1.3346047401428223, "num_tokens": 1, "num_tokens_all": 620, "is_greedy": false, "logits_per_token": -1.3346047401428223, "logits_per_char": -0.6673023700714111, "num_chars": 2}, {"sum_logits": -1.6053211688995361, "num_tokens": 1, "num_tokens_all": 620, "is_greedy": false, "logits_per_token": -1.6053211688995361, "logits_per_char": -0.8026605844497681, "num_chars": 2}, {"sum_logits": -1.4387524127960205, "num_tokens": 1, "num_tokens_all": 620, "is_greedy": false, "logits_per_token": -1.4387524127960205, "logits_per_char": -0.7193762063980103, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 91, "native_id": 91, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3735688924789429, "incorrect_loss_raw": 1.414740761121114, "correct_loss_per_char": 0.6867844462394714, "incorrect_loss_per_char": 0.707370380560557, "correct_loss_per_token": 1.3735688924789429, "incorrect_loss_per_token": 1.414740761121114, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1556732654571533, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": true, "logits_per_token": -1.1556732654571533, "logits_per_char": -0.5778366327285767, "num_chars": 2}, {"sum_logits": -1.3735688924789429, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.3735688924789429, "logits_per_char": -0.6867844462394714, "num_chars": 2}, {"sum_logits": -1.5567047595977783, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.5567047595977783, "logits_per_char": -0.7783523797988892, "num_chars": 2}, {"sum_logits": -1.5318442583084106, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.5318442583084106, "logits_per_char": -0.7659221291542053, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 92, "native_id": 92, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.173256278038025, "incorrect_loss_raw": 1.4792864322662354, "correct_loss_per_char": 0.5866281390190125, "incorrect_loss_per_char": 0.7396432161331177, "correct_loss_per_token": 1.173256278038025, "incorrect_loss_per_token": 1.4792864322662354, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.173256278038025, "num_tokens": 1, "num_tokens_all": 626, "is_greedy": true, "logits_per_token": -1.173256278038025, "logits_per_char": -0.5866281390190125, "num_chars": 2}, {"sum_logits": -1.3883732557296753, "num_tokens": 1, "num_tokens_all": 626, "is_greedy": false, "logits_per_token": -1.3883732557296753, "logits_per_char": -0.6941866278648376, "num_chars": 2}, {"sum_logits": -1.5325771570205688, "num_tokens": 1, "num_tokens_all": 626, "is_greedy": false, "logits_per_token": -1.5325771570205688, "logits_per_char": -0.7662885785102844, "num_chars": 2}, {"sum_logits": -1.516908884048462, "num_tokens": 1, "num_tokens_all": 626, "is_greedy": false, "logits_per_token": -1.516908884048462, "logits_per_char": -0.758454442024231, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 93, "native_id": 93, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.60488760471344, "incorrect_loss_raw": 1.3609994252522786, "correct_loss_per_char": 0.80244380235672, "incorrect_loss_per_char": 0.6804997126261393, "correct_loss_per_token": 1.60488760471344, "incorrect_loss_per_token": 1.3609994252522786, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0528755187988281, "num_tokens": 1, "num_tokens_all": 604, "is_greedy": true, "logits_per_token": -1.0528755187988281, "logits_per_char": -0.5264377593994141, "num_chars": 2}, {"sum_logits": -1.426706314086914, "num_tokens": 1, "num_tokens_all": 604, "is_greedy": false, "logits_per_token": -1.426706314086914, "logits_per_char": -0.713353157043457, "num_chars": 2}, {"sum_logits": -1.6034164428710938, "num_tokens": 1, "num_tokens_all": 604, "is_greedy": false, "logits_per_token": -1.6034164428710938, "logits_per_char": -0.8017082214355469, "num_chars": 2}, {"sum_logits": -1.60488760471344, "num_tokens": 1, "num_tokens_all": 604, "is_greedy": false, "logits_per_token": -1.60488760471344, "logits_per_char": -0.80244380235672, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 94, "native_id": 94, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.823656439781189, "incorrect_loss_raw": 1.703574816385905, "correct_loss_per_char": 0.4118282198905945, "incorrect_loss_per_char": 0.8517874081929525, "correct_loss_per_token": 0.823656439781189, "incorrect_loss_per_token": 1.703574816385905, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.823656439781189, "num_tokens": 1, "num_tokens_all": 596, "is_greedy": true, "logits_per_token": -0.823656439781189, "logits_per_char": -0.4118282198905945, "num_chars": 2}, {"sum_logits": -1.5690584182739258, "num_tokens": 1, "num_tokens_all": 596, "is_greedy": false, "logits_per_token": -1.5690584182739258, "logits_per_char": -0.7845292091369629, "num_chars": 2}, {"sum_logits": -1.900006651878357, "num_tokens": 1, "num_tokens_all": 596, "is_greedy": false, "logits_per_token": -1.900006651878357, "logits_per_char": -0.9500033259391785, "num_chars": 2}, {"sum_logits": -1.6416593790054321, "num_tokens": 1, "num_tokens_all": 596, "is_greedy": false, "logits_per_token": -1.6416593790054321, "logits_per_char": -0.8208296895027161, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 95, "native_id": 95, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3117564916610718, "incorrect_loss_raw": 1.4374629656473796, "correct_loss_per_char": 0.6558782458305359, "incorrect_loss_per_char": 0.7187314828236898, "correct_loss_per_token": 1.3117564916610718, "incorrect_loss_per_token": 1.4374629656473796, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1951228380203247, "num_tokens": 1, "num_tokens_all": 598, "is_greedy": true, "logits_per_token": -1.1951228380203247, "logits_per_char": -0.5975614190101624, "num_chars": 2}, {"sum_logits": -1.3117564916610718, "num_tokens": 1, "num_tokens_all": 598, "is_greedy": false, "logits_per_token": -1.3117564916610718, "logits_per_char": -0.6558782458305359, "num_chars": 2}, {"sum_logits": -1.6393924951553345, "num_tokens": 1, "num_tokens_all": 598, "is_greedy": false, "logits_per_token": -1.6393924951553345, "logits_per_char": -0.8196962475776672, "num_chars": 2}, {"sum_logits": -1.4778735637664795, "num_tokens": 1, "num_tokens_all": 598, "is_greedy": false, "logits_per_token": -1.4778735637664795, "logits_per_char": -0.7389367818832397, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 96, "native_id": 96, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3546488285064697, "incorrect_loss_raw": 1.4147781133651733, "correct_loss_per_char": 0.6773244142532349, "incorrect_loss_per_char": 0.7073890566825867, "correct_loss_per_token": 1.3546488285064697, "incorrect_loss_per_token": 1.4147781133651733, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2575783729553223, "num_tokens": 1, "num_tokens_all": 575, "is_greedy": true, "logits_per_token": -1.2575783729553223, "logits_per_char": -0.6287891864776611, "num_chars": 2}, {"sum_logits": -1.372733235359192, "num_tokens": 1, "num_tokens_all": 575, "is_greedy": false, "logits_per_token": -1.372733235359192, "logits_per_char": -0.686366617679596, "num_chars": 2}, {"sum_logits": -1.6140227317810059, "num_tokens": 1, "num_tokens_all": 575, "is_greedy": false, "logits_per_token": -1.6140227317810059, "logits_per_char": -0.8070113658905029, "num_chars": 2}, {"sum_logits": -1.3546488285064697, "num_tokens": 1, "num_tokens_all": 575, "is_greedy": false, "logits_per_token": -1.3546488285064697, "logits_per_char": -0.6773244142532349, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 97, "native_id": 97, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5719819068908691, "incorrect_loss_raw": 1.357727289199829, "correct_loss_per_char": 0.7859909534454346, "incorrect_loss_per_char": 0.6788636445999146, "correct_loss_per_token": 1.5719819068908691, "incorrect_loss_per_token": 1.357727289199829, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.112638235092163, "num_tokens": 1, "num_tokens_all": 573, "is_greedy": true, "logits_per_token": -1.112638235092163, "logits_per_char": -0.5563191175460815, "num_chars": 2}, {"sum_logits": -1.5427662134170532, "num_tokens": 1, "num_tokens_all": 573, "is_greedy": false, "logits_per_token": -1.5427662134170532, "logits_per_char": -0.7713831067085266, "num_chars": 2}, {"sum_logits": -1.5719819068908691, "num_tokens": 1, "num_tokens_all": 573, "is_greedy": false, "logits_per_token": -1.5719819068908691, "logits_per_char": -0.7859909534454346, "num_chars": 2}, {"sum_logits": -1.417777419090271, "num_tokens": 1, "num_tokens_all": 573, "is_greedy": false, "logits_per_token": -1.417777419090271, "logits_per_char": -0.7088887095451355, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 98, "native_id": 98, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4960590600967407, "incorrect_loss_raw": 1.363818367322286, "correct_loss_per_char": 0.7480295300483704, "incorrect_loss_per_char": 0.681909183661143, "correct_loss_per_token": 1.4960590600967407, "incorrect_loss_per_token": 1.363818367322286, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2196370363235474, "num_tokens": 1, "num_tokens_all": 596, "is_greedy": true, "logits_per_token": -1.2196370363235474, "logits_per_char": -0.6098185181617737, "num_chars": 2}, {"sum_logits": -1.4365363121032715, "num_tokens": 1, "num_tokens_all": 596, "is_greedy": false, "logits_per_token": -1.4365363121032715, "logits_per_char": -0.7182681560516357, "num_chars": 2}, {"sum_logits": -1.4960590600967407, "num_tokens": 1, "num_tokens_all": 596, "is_greedy": false, "logits_per_token": -1.4960590600967407, "logits_per_char": -0.7480295300483704, "num_chars": 2}, {"sum_logits": -1.435281753540039, "num_tokens": 1, "num_tokens_all": 596, "is_greedy": false, "logits_per_token": -1.435281753540039, "logits_per_char": -0.7176408767700195, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 99, "native_id": 99, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.533949851989746, "incorrect_loss_raw": 1.3603532314300537, "correct_loss_per_char": 0.766974925994873, "incorrect_loss_per_char": 0.6801766157150269, "correct_loss_per_token": 1.533949851989746, "incorrect_loss_per_token": 1.3603532314300537, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.184836506843567, "num_tokens": 1, "num_tokens_all": 571, "is_greedy": true, "logits_per_token": -1.184836506843567, "logits_per_char": -0.5924182534217834, "num_chars": 2}, {"sum_logits": -1.4154019355773926, "num_tokens": 1, "num_tokens_all": 571, "is_greedy": false, "logits_per_token": -1.4154019355773926, "logits_per_char": -0.7077009677886963, "num_chars": 2}, {"sum_logits": -1.533949851989746, "num_tokens": 1, "num_tokens_all": 571, "is_greedy": false, "logits_per_token": -1.533949851989746, "logits_per_char": -0.766974925994873, "num_chars": 2}, {"sum_logits": -1.4808212518692017, "num_tokens": 1, "num_tokens_all": 571, "is_greedy": false, "logits_per_token": -1.4808212518692017, "logits_per_char": -0.7404106259346008, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 100, "native_id": 100, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0141959190368652, "incorrect_loss_raw": 1.5599599679311116, "correct_loss_per_char": 0.5070979595184326, "incorrect_loss_per_char": 0.7799799839655558, "correct_loss_per_token": 1.0141959190368652, "incorrect_loss_per_token": 1.5599599679311116, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0141959190368652, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": true, "logits_per_token": -1.0141959190368652, "logits_per_char": -0.5070979595184326, "num_chars": 2}, {"sum_logits": -1.463379144668579, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.463379144668579, "logits_per_char": -0.7316895723342896, "num_chars": 2}, {"sum_logits": -1.6146643161773682, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.6146643161773682, "logits_per_char": -0.8073321580886841, "num_chars": 2}, {"sum_logits": -1.6018364429473877, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.6018364429473877, "logits_per_char": -0.8009182214736938, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 101, "native_id": 101, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.44923996925354, "incorrect_loss_raw": 1.4118740161259968, "correct_loss_per_char": 0.72461998462677, "incorrect_loss_per_char": 0.7059370080629984, "correct_loss_per_token": 1.44923996925354, "incorrect_loss_per_token": 1.4118740161259968, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.037935495376587, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": true, "logits_per_token": -1.037935495376587, "logits_per_char": -0.5189677476882935, "num_chars": 2}, {"sum_logits": -1.5479129552841187, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": false, "logits_per_token": -1.5479129552841187, "logits_per_char": -0.7739564776420593, "num_chars": 2}, {"sum_logits": -1.6497735977172852, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": false, "logits_per_token": -1.6497735977172852, "logits_per_char": -0.8248867988586426, "num_chars": 2}, {"sum_logits": -1.44923996925354, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": false, "logits_per_token": -1.44923996925354, "logits_per_char": -0.72461998462677, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 102, "native_id": 102, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6569288969039917, "incorrect_loss_raw": 1.3395941257476807, "correct_loss_per_char": 0.8284644484519958, "incorrect_loss_per_char": 0.6697970628738403, "correct_loss_per_token": 1.6569288969039917, "incorrect_loss_per_token": 1.3395941257476807, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0918941497802734, "num_tokens": 1, "num_tokens_all": 575, "is_greedy": true, "logits_per_token": -1.0918941497802734, "logits_per_char": -0.5459470748901367, "num_chars": 2}, {"sum_logits": -1.440752625465393, "num_tokens": 1, "num_tokens_all": 575, "is_greedy": false, "logits_per_token": -1.440752625465393, "logits_per_char": -0.7203763127326965, "num_chars": 2}, {"sum_logits": -1.6569288969039917, "num_tokens": 1, "num_tokens_all": 575, "is_greedy": false, "logits_per_token": -1.6569288969039917, "logits_per_char": -0.8284644484519958, "num_chars": 2}, {"sum_logits": -1.4861356019973755, "num_tokens": 1, "num_tokens_all": 575, "is_greedy": false, "logits_per_token": -1.4861356019973755, "logits_per_char": -0.7430678009986877, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 103, "native_id": 103, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7878499031066895, "incorrect_loss_raw": 1.4091841379801433, "correct_loss_per_char": 0.8939249515533447, "incorrect_loss_per_char": 0.7045920689900717, "correct_loss_per_token": 1.7878499031066895, "incorrect_loss_per_token": 1.4091841379801433, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7803614139556885, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": true, "logits_per_token": -0.7803614139556885, "logits_per_char": -0.39018070697784424, "num_chars": 2}, {"sum_logits": -1.5032289028167725, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": false, "logits_per_token": -1.5032289028167725, "logits_per_char": -0.7516144514083862, "num_chars": 2}, {"sum_logits": -1.9439620971679688, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": false, "logits_per_token": -1.9439620971679688, "logits_per_char": -0.9719810485839844, "num_chars": 2}, {"sum_logits": -1.7878499031066895, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": false, "logits_per_token": -1.7878499031066895, "logits_per_char": -0.8939249515533447, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 104, "native_id": 104, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.544108510017395, "incorrect_loss_raw": 1.3628164132436116, "correct_loss_per_char": 0.7720542550086975, "incorrect_loss_per_char": 0.6814082066218058, "correct_loss_per_token": 1.544108510017395, "incorrect_loss_per_token": 1.3628164132436116, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1520227193832397, "num_tokens": 1, "num_tokens_all": 619, "is_greedy": true, "logits_per_token": -1.1520227193832397, "logits_per_char": -0.5760113596916199, "num_chars": 2}, {"sum_logits": -1.3311392068862915, "num_tokens": 1, "num_tokens_all": 619, "is_greedy": false, "logits_per_token": -1.3311392068862915, "logits_per_char": -0.6655696034431458, "num_chars": 2}, {"sum_logits": -1.544108510017395, "num_tokens": 1, "num_tokens_all": 619, "is_greedy": false, "logits_per_token": -1.544108510017395, "logits_per_char": -0.7720542550086975, "num_chars": 2}, {"sum_logits": -1.6052873134613037, "num_tokens": 1, "num_tokens_all": 619, "is_greedy": false, "logits_per_token": -1.6052873134613037, "logits_per_char": -0.8026436567306519, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 105, "native_id": 105, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.155099630355835, "incorrect_loss_raw": 1.490100582440694, "correct_loss_per_char": 0.5775498151779175, "incorrect_loss_per_char": 0.745050291220347, "correct_loss_per_token": 1.155099630355835, "incorrect_loss_per_token": 1.490100582440694, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.155099630355835, "num_tokens": 1, "num_tokens_all": 561, "is_greedy": true, "logits_per_token": -1.155099630355835, "logits_per_char": -0.5775498151779175, "num_chars": 2}, {"sum_logits": -1.3852667808532715, "num_tokens": 1, "num_tokens_all": 561, "is_greedy": false, "logits_per_token": -1.3852667808532715, "logits_per_char": -0.6926333904266357, "num_chars": 2}, {"sum_logits": -1.5616737604141235, "num_tokens": 1, "num_tokens_all": 561, "is_greedy": false, "logits_per_token": -1.5616737604141235, "logits_per_char": -0.7808368802070618, "num_chars": 2}, {"sum_logits": -1.5233612060546875, "num_tokens": 1, "num_tokens_all": 561, "is_greedy": false, "logits_per_token": -1.5233612060546875, "logits_per_char": -0.7616806030273438, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 106, "native_id": 106, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.423995852470398, "incorrect_loss_raw": 1.3996062278747559, "correct_loss_per_char": 0.711997926235199, "incorrect_loss_per_char": 0.6998031139373779, "correct_loss_per_token": 1.423995852470398, "incorrect_loss_per_token": 1.3996062278747559, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1468391418457031, "num_tokens": 1, "num_tokens_all": 608, "is_greedy": true, "logits_per_token": -1.1468391418457031, "logits_per_char": -0.5734195709228516, "num_chars": 2}, {"sum_logits": -1.423995852470398, "num_tokens": 1, "num_tokens_all": 608, "is_greedy": false, "logits_per_token": -1.423995852470398, "logits_per_char": -0.711997926235199, "num_chars": 2}, {"sum_logits": -1.5172289609909058, "num_tokens": 1, "num_tokens_all": 608, "is_greedy": false, "logits_per_token": -1.5172289609909058, "logits_per_char": -0.7586144804954529, "num_chars": 2}, {"sum_logits": -1.5347505807876587, "num_tokens": 1, "num_tokens_all": 608, "is_greedy": false, "logits_per_token": -1.5347505807876587, "logits_per_char": -0.7673752903938293, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 107, "native_id": 107, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4932185411453247, "incorrect_loss_raw": 1.379503846168518, "correct_loss_per_char": 0.7466092705726624, "incorrect_loss_per_char": 0.689751923084259, "correct_loss_per_token": 1.4932185411453247, "incorrect_loss_per_token": 1.379503846168518, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1075990200042725, "num_tokens": 1, "num_tokens_all": 572, "is_greedy": true, "logits_per_token": -1.1075990200042725, "logits_per_char": -0.5537995100021362, "num_chars": 2}, {"sum_logits": -1.4531766176223755, "num_tokens": 1, "num_tokens_all": 572, "is_greedy": false, "logits_per_token": -1.4531766176223755, "logits_per_char": -0.7265883088111877, "num_chars": 2}, {"sum_logits": -1.4932185411453247, "num_tokens": 1, "num_tokens_all": 572, "is_greedy": false, "logits_per_token": -1.4932185411453247, "logits_per_char": -0.7466092705726624, "num_chars": 2}, {"sum_logits": -1.5777359008789062, "num_tokens": 1, "num_tokens_all": 572, "is_greedy": false, "logits_per_token": -1.5777359008789062, "logits_per_char": -0.7888679504394531, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 108, "native_id": 108, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.49919593334198, "incorrect_loss_raw": 1.3698542912801106, "correct_loss_per_char": 0.74959796667099, "incorrect_loss_per_char": 0.6849271456400553, "correct_loss_per_token": 1.49919593334198, "incorrect_loss_per_token": 1.3698542912801106, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1533420085906982, "num_tokens": 1, "num_tokens_all": 593, "is_greedy": true, "logits_per_token": -1.1533420085906982, "logits_per_char": -0.5766710042953491, "num_chars": 2}, {"sum_logits": -1.431146264076233, "num_tokens": 1, "num_tokens_all": 593, "is_greedy": false, "logits_per_token": -1.431146264076233, "logits_per_char": -0.7155731320381165, "num_chars": 2}, {"sum_logits": -1.5250746011734009, "num_tokens": 1, "num_tokens_all": 593, "is_greedy": false, "logits_per_token": -1.5250746011734009, "logits_per_char": -0.7625373005867004, "num_chars": 2}, {"sum_logits": -1.49919593334198, "num_tokens": 1, "num_tokens_all": 593, "is_greedy": false, "logits_per_token": -1.49919593334198, "logits_per_char": -0.74959796667099, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 109, "native_id": 109, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3120280504226685, "incorrect_loss_raw": 1.4264583190282185, "correct_loss_per_char": 0.6560140252113342, "incorrect_loss_per_char": 0.7132291595141093, "correct_loss_per_token": 1.3120280504226685, "incorrect_loss_per_token": 1.4264583190282185, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2488915920257568, "num_tokens": 1, "num_tokens_all": 633, "is_greedy": true, "logits_per_token": -1.2488915920257568, "logits_per_char": -0.6244457960128784, "num_chars": 2}, {"sum_logits": -1.3120280504226685, "num_tokens": 1, "num_tokens_all": 633, "is_greedy": false, "logits_per_token": -1.3120280504226685, "logits_per_char": -0.6560140252113342, "num_chars": 2}, {"sum_logits": -1.5519129037857056, "num_tokens": 1, "num_tokens_all": 633, "is_greedy": false, "logits_per_token": -1.5519129037857056, "logits_per_char": -0.7759564518928528, "num_chars": 2}, {"sum_logits": -1.4785704612731934, "num_tokens": 1, "num_tokens_all": 633, "is_greedy": false, "logits_per_token": -1.4785704612731934, "logits_per_char": -0.7392852306365967, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 110, "native_id": 110, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4519782066345215, "incorrect_loss_raw": 1.3922841946283977, "correct_loss_per_char": 0.7259891033172607, "incorrect_loss_per_char": 0.6961420973141988, "correct_loss_per_token": 1.4519782066345215, "incorrect_loss_per_token": 1.3922841946283977, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.118913173675537, "num_tokens": 1, "num_tokens_all": 581, "is_greedy": true, "logits_per_token": -1.118913173675537, "logits_per_char": -0.5594565868377686, "num_chars": 2}, {"sum_logits": -1.4519782066345215, "num_tokens": 1, "num_tokens_all": 581, "is_greedy": false, "logits_per_token": -1.4519782066345215, "logits_per_char": -0.7259891033172607, "num_chars": 2}, {"sum_logits": -1.5912449359893799, "num_tokens": 1, "num_tokens_all": 581, "is_greedy": false, "logits_per_token": -1.5912449359893799, "logits_per_char": -0.7956224679946899, "num_chars": 2}, {"sum_logits": -1.4666944742202759, "num_tokens": 1, "num_tokens_all": 581, "is_greedy": false, "logits_per_token": -1.4666944742202759, "logits_per_char": -0.7333472371101379, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 111, "native_id": 111, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.314267635345459, "incorrect_loss_raw": 1.4526640176773071, "correct_loss_per_char": 0.6571338176727295, "incorrect_loss_per_char": 0.7263320088386536, "correct_loss_per_token": 1.314267635345459, "incorrect_loss_per_token": 1.4526640176773071, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1274219751358032, "num_tokens": 1, "num_tokens_all": 579, "is_greedy": true, "logits_per_token": -1.1274219751358032, "logits_per_char": -0.5637109875679016, "num_chars": 2}, {"sum_logits": -1.7133519649505615, "num_tokens": 1, "num_tokens_all": 579, "is_greedy": false, "logits_per_token": -1.7133519649505615, "logits_per_char": -0.8566759824752808, "num_chars": 2}, {"sum_logits": -1.5172181129455566, "num_tokens": 1, "num_tokens_all": 579, "is_greedy": false, "logits_per_token": -1.5172181129455566, "logits_per_char": -0.7586090564727783, "num_chars": 2}, {"sum_logits": -1.314267635345459, "num_tokens": 1, "num_tokens_all": 579, "is_greedy": false, "logits_per_token": -1.314267635345459, "logits_per_char": -0.6571338176727295, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 112, "native_id": 112, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.48479425907135, "incorrect_loss_raw": 1.3822307189305623, "correct_loss_per_char": 0.742397129535675, "incorrect_loss_per_char": 0.6911153594652811, "correct_loss_per_token": 1.48479425907135, "incorrect_loss_per_token": 1.3822307189305623, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1151264905929565, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": true, "logits_per_token": -1.1151264905929565, "logits_per_char": -0.5575632452964783, "num_chars": 2}, {"sum_logits": -1.4995859861373901, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.4995859861373901, "logits_per_char": -0.7497929930686951, "num_chars": 2}, {"sum_logits": -1.5319796800613403, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.5319796800613403, "logits_per_char": -0.7659898400306702, "num_chars": 2}, {"sum_logits": -1.48479425907135, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.48479425907135, "logits_per_char": -0.742397129535675, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 113, "native_id": 113, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5367374420166016, "incorrect_loss_raw": 1.3486560980478923, "correct_loss_per_char": 0.7683687210083008, "incorrect_loss_per_char": 0.6743280490239462, "correct_loss_per_token": 1.5367374420166016, "incorrect_loss_per_token": 1.3486560980478923, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3705297708511353, "num_tokens": 1, "num_tokens_all": 622, "is_greedy": false, "logits_per_token": -1.3705297708511353, "logits_per_char": -0.6852648854255676, "num_chars": 2}, {"sum_logits": -1.368001103401184, "num_tokens": 1, "num_tokens_all": 622, "is_greedy": false, "logits_per_token": -1.368001103401184, "logits_per_char": -0.684000551700592, "num_chars": 2}, {"sum_logits": -1.5367374420166016, "num_tokens": 1, "num_tokens_all": 622, "is_greedy": false, "logits_per_token": -1.5367374420166016, "logits_per_char": -0.7683687210083008, "num_chars": 2}, {"sum_logits": -1.3074374198913574, "num_tokens": 1, "num_tokens_all": 622, "is_greedy": true, "logits_per_token": -1.3074374198913574, "logits_per_char": -0.6537187099456787, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 114, "native_id": 114, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2781100273132324, "incorrect_loss_raw": 1.4372281630833943, "correct_loss_per_char": 0.6390550136566162, "incorrect_loss_per_char": 0.7186140815416971, "correct_loss_per_token": 1.2781100273132324, "incorrect_loss_per_token": 1.4372281630833943, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2781100273132324, "num_tokens": 1, "num_tokens_all": 624, "is_greedy": true, "logits_per_token": -1.2781100273132324, "logits_per_char": -0.6390550136566162, "num_chars": 2}, {"sum_logits": -1.2922464609146118, "num_tokens": 1, "num_tokens_all": 624, "is_greedy": false, "logits_per_token": -1.2922464609146118, "logits_per_char": -0.6461232304573059, "num_chars": 2}, {"sum_logits": -1.5722771883010864, "num_tokens": 1, "num_tokens_all": 624, "is_greedy": false, "logits_per_token": -1.5722771883010864, "logits_per_char": -0.7861385941505432, "num_chars": 2}, {"sum_logits": -1.4471608400344849, "num_tokens": 1, "num_tokens_all": 624, "is_greedy": false, "logits_per_token": -1.4471608400344849, "logits_per_char": -0.7235804200172424, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 115, "native_id": 115, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4397797584533691, "incorrect_loss_raw": 1.3923373222351074, "correct_loss_per_char": 0.7198898792266846, "incorrect_loss_per_char": 0.6961686611175537, "correct_loss_per_token": 1.4397797584533691, "incorrect_loss_per_token": 1.3923373222351074, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1737804412841797, "num_tokens": 1, "num_tokens_all": 585, "is_greedy": true, "logits_per_token": -1.1737804412841797, "logits_per_char": -0.5868902206420898, "num_chars": 2}, {"sum_logits": -1.4473589658737183, "num_tokens": 1, "num_tokens_all": 585, "is_greedy": false, "logits_per_token": -1.4473589658737183, "logits_per_char": -0.7236794829368591, "num_chars": 2}, {"sum_logits": -1.5558725595474243, "num_tokens": 1, "num_tokens_all": 585, "is_greedy": false, "logits_per_token": -1.5558725595474243, "logits_per_char": -0.7779362797737122, "num_chars": 2}, {"sum_logits": -1.4397797584533691, "num_tokens": 1, "num_tokens_all": 585, "is_greedy": false, "logits_per_token": -1.4397797584533691, "logits_per_char": -0.7198898792266846, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 116, "native_id": 116, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5684099197387695, "incorrect_loss_raw": 1.348169167836507, "correct_loss_per_char": 0.7842049598693848, "incorrect_loss_per_char": 0.6740845839182535, "correct_loss_per_token": 1.5684099197387695, "incorrect_loss_per_token": 1.348169167836507, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.36795175075531, "num_tokens": 1, "num_tokens_all": 709, "is_greedy": false, "logits_per_token": -1.36795175075531, "logits_per_char": -0.683975875377655, "num_chars": 2}, {"sum_logits": -1.1623482704162598, "num_tokens": 1, "num_tokens_all": 709, "is_greedy": true, "logits_per_token": -1.1623482704162598, "logits_per_char": -0.5811741352081299, "num_chars": 2}, {"sum_logits": -1.5142074823379517, "num_tokens": 1, "num_tokens_all": 709, "is_greedy": false, "logits_per_token": -1.5142074823379517, "logits_per_char": -0.7571037411689758, "num_chars": 2}, {"sum_logits": -1.5684099197387695, "num_tokens": 1, "num_tokens_all": 709, "is_greedy": false, "logits_per_token": -1.5684099197387695, "logits_per_char": -0.7842049598693848, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 117, "native_id": 117, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5006943941116333, "incorrect_loss_raw": 1.3690160115559895, "correct_loss_per_char": 0.7503471970558167, "incorrect_loss_per_char": 0.6845080057779948, "correct_loss_per_token": 1.5006943941116333, "incorrect_loss_per_token": 1.3690160115559895, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2331875562667847, "num_tokens": 1, "num_tokens_all": 637, "is_greedy": true, "logits_per_token": -1.2331875562667847, "logits_per_char": -0.6165937781333923, "num_chars": 2}, {"sum_logits": -1.2834972143173218, "num_tokens": 1, "num_tokens_all": 637, "is_greedy": false, "logits_per_token": -1.2834972143173218, "logits_per_char": -0.6417486071586609, "num_chars": 2}, {"sum_logits": -1.5006943941116333, "num_tokens": 1, "num_tokens_all": 637, "is_greedy": false, "logits_per_token": -1.5006943941116333, "logits_per_char": -0.7503471970558167, "num_chars": 2}, {"sum_logits": -1.5903632640838623, "num_tokens": 1, "num_tokens_all": 637, "is_greedy": false, "logits_per_token": -1.5903632640838623, "logits_per_char": -0.7951816320419312, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 118, "native_id": 118, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2678881883621216, "incorrect_loss_raw": 1.4394190311431885, "correct_loss_per_char": 0.6339440941810608, "incorrect_loss_per_char": 0.7197095155715942, "correct_loss_per_token": 1.2678881883621216, "incorrect_loss_per_token": 1.4394190311431885, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2678881883621216, "num_tokens": 1, "num_tokens_all": 617, "is_greedy": true, "logits_per_token": -1.2678881883621216, "logits_per_char": -0.6339440941810608, "num_chars": 2}, {"sum_logits": -1.4065661430358887, "num_tokens": 1, "num_tokens_all": 617, "is_greedy": false, "logits_per_token": -1.4065661430358887, "logits_per_char": -0.7032830715179443, "num_chars": 2}, {"sum_logits": -1.5550652742385864, "num_tokens": 1, "num_tokens_all": 617, "is_greedy": false, "logits_per_token": -1.5550652742385864, "logits_per_char": -0.7775326371192932, "num_chars": 2}, {"sum_logits": -1.3566256761550903, "num_tokens": 1, "num_tokens_all": 617, "is_greedy": false, "logits_per_token": -1.3566256761550903, "logits_per_char": -0.6783128380775452, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 119, "native_id": 119, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5741230249404907, "incorrect_loss_raw": 1.3588191668192546, "correct_loss_per_char": 0.7870615124702454, "incorrect_loss_per_char": 0.6794095834096273, "correct_loss_per_token": 1.5741230249404907, "incorrect_loss_per_token": 1.3588191668192546, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.091742753982544, "num_tokens": 1, "num_tokens_all": 605, "is_greedy": true, "logits_per_token": -1.091742753982544, "logits_per_char": -0.545871376991272, "num_chars": 2}, {"sum_logits": -1.3842641115188599, "num_tokens": 1, "num_tokens_all": 605, "is_greedy": false, "logits_per_token": -1.3842641115188599, "logits_per_char": -0.6921320557594299, "num_chars": 2}, {"sum_logits": -1.6004506349563599, "num_tokens": 1, "num_tokens_all": 605, "is_greedy": false, "logits_per_token": -1.6004506349563599, "logits_per_char": -0.8002253174781799, "num_chars": 2}, {"sum_logits": -1.5741230249404907, "num_tokens": 1, "num_tokens_all": 605, "is_greedy": false, "logits_per_token": -1.5741230249404907, "logits_per_char": -0.7870615124702454, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 120, "native_id": 120, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.263824224472046, "incorrect_loss_raw": 1.4429430564244587, "correct_loss_per_char": 0.631912112236023, "incorrect_loss_per_char": 0.7214715282122294, "correct_loss_per_token": 1.263824224472046, "incorrect_loss_per_token": 1.4429430564244587, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.317123532295227, "num_tokens": 1, "num_tokens_all": 640, "is_greedy": false, "logits_per_token": -1.317123532295227, "logits_per_char": -0.6585617661476135, "num_chars": 2}, {"sum_logits": -1.263824224472046, "num_tokens": 1, "num_tokens_all": 640, "is_greedy": true, "logits_per_token": -1.263824224472046, "logits_per_char": -0.631912112236023, "num_chars": 2}, {"sum_logits": -1.5644546747207642, "num_tokens": 1, "num_tokens_all": 640, "is_greedy": false, "logits_per_token": -1.5644546747207642, "logits_per_char": -0.7822273373603821, "num_chars": 2}, {"sum_logits": -1.4472509622573853, "num_tokens": 1, "num_tokens_all": 640, "is_greedy": false, "logits_per_token": -1.4472509622573853, "logits_per_char": -0.7236254811286926, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 121, "native_id": 121, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.46652352809906, "incorrect_loss_raw": 1.382420023282369, "correct_loss_per_char": 0.73326176404953, "incorrect_loss_per_char": 0.6912100116411845, "correct_loss_per_token": 1.46652352809906, "incorrect_loss_per_token": 1.382420023282369, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.165094256401062, "num_tokens": 1, "num_tokens_all": 574, "is_greedy": true, "logits_per_token": -1.165094256401062, "logits_per_char": -0.582547128200531, "num_chars": 2}, {"sum_logits": -1.46652352809906, "num_tokens": 1, "num_tokens_all": 574, "is_greedy": false, "logits_per_token": -1.46652352809906, "logits_per_char": -0.73326176404953, "num_chars": 2}, {"sum_logits": -1.5257118940353394, "num_tokens": 1, "num_tokens_all": 574, "is_greedy": false, "logits_per_token": -1.5257118940353394, "logits_per_char": -0.7628559470176697, "num_chars": 2}, {"sum_logits": -1.4564539194107056, "num_tokens": 1, "num_tokens_all": 574, "is_greedy": false, "logits_per_token": -1.4564539194107056, "logits_per_char": -0.7282269597053528, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 122, "native_id": 122, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6362404823303223, "incorrect_loss_raw": 1.3650134404500325, "correct_loss_per_char": 0.8181202411651611, "incorrect_loss_per_char": 0.6825067202250162, "correct_loss_per_token": 1.6362404823303223, "incorrect_loss_per_token": 1.3650134404500325, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9933887720108032, "num_tokens": 1, "num_tokens_all": 566, "is_greedy": true, "logits_per_token": -0.9933887720108032, "logits_per_char": -0.4966943860054016, "num_chars": 2}, {"sum_logits": -1.4636168479919434, "num_tokens": 1, "num_tokens_all": 566, "is_greedy": false, "logits_per_token": -1.4636168479919434, "logits_per_char": -0.7318084239959717, "num_chars": 2}, {"sum_logits": -1.6362404823303223, "num_tokens": 1, "num_tokens_all": 566, "is_greedy": false, "logits_per_token": -1.6362404823303223, "logits_per_char": -0.8181202411651611, "num_chars": 2}, {"sum_logits": -1.638034701347351, "num_tokens": 1, "num_tokens_all": 566, "is_greedy": false, "logits_per_token": -1.638034701347351, "logits_per_char": -0.8190173506736755, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 123, "native_id": 123, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6344858407974243, "incorrect_loss_raw": 1.3518929878870647, "correct_loss_per_char": 0.8172429203987122, "incorrect_loss_per_char": 0.6759464939435323, "correct_loss_per_token": 1.6344858407974243, "incorrect_loss_per_token": 1.3518929878870647, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0220983028411865, "num_tokens": 1, "num_tokens_all": 582, "is_greedy": true, "logits_per_token": -1.0220983028411865, "logits_per_char": -0.5110491514205933, "num_chars": 2}, {"sum_logits": -1.4356112480163574, "num_tokens": 1, "num_tokens_all": 582, "is_greedy": false, "logits_per_token": -1.4356112480163574, "logits_per_char": -0.7178056240081787, "num_chars": 2}, {"sum_logits": -1.59796941280365, "num_tokens": 1, "num_tokens_all": 582, "is_greedy": false, "logits_per_token": -1.59796941280365, "logits_per_char": -0.798984706401825, "num_chars": 2}, {"sum_logits": -1.6344858407974243, "num_tokens": 1, "num_tokens_all": 582, "is_greedy": false, "logits_per_token": -1.6344858407974243, "logits_per_char": -0.8172429203987122, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 124, "native_id": 124, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4728528261184692, "incorrect_loss_raw": 1.386338750521342, "correct_loss_per_char": 0.7364264130592346, "incorrect_loss_per_char": 0.693169375260671, "correct_loss_per_token": 1.4728528261184692, "incorrect_loss_per_token": 1.386338750521342, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.149059534072876, "num_tokens": 1, "num_tokens_all": 578, "is_greedy": true, "logits_per_token": -1.149059534072876, "logits_per_char": -0.574529767036438, "num_chars": 2}, {"sum_logits": -1.3699662685394287, "num_tokens": 1, "num_tokens_all": 578, "is_greedy": false, "logits_per_token": -1.3699662685394287, "logits_per_char": -0.6849831342697144, "num_chars": 2}, {"sum_logits": -1.6399904489517212, "num_tokens": 1, "num_tokens_all": 578, "is_greedy": false, "logits_per_token": -1.6399904489517212, "logits_per_char": -0.8199952244758606, "num_chars": 2}, {"sum_logits": -1.4728528261184692, "num_tokens": 1, "num_tokens_all": 578, "is_greedy": false, "logits_per_token": -1.4728528261184692, "logits_per_char": -0.7364264130592346, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 125, "native_id": 125, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5494877099990845, "incorrect_loss_raw": 1.3657655715942383, "correct_loss_per_char": 0.7747438549995422, "incorrect_loss_per_char": 0.6828827857971191, "correct_loss_per_token": 1.5494877099990845, "incorrect_loss_per_token": 1.3657655715942383, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0978788137435913, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": true, "logits_per_token": -1.0978788137435913, "logits_per_char": -0.5489394068717957, "num_chars": 2}, {"sum_logits": -1.4348417520523071, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": false, "logits_per_token": -1.4348417520523071, "logits_per_char": -0.7174208760261536, "num_chars": 2}, {"sum_logits": -1.5645761489868164, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": false, "logits_per_token": -1.5645761489868164, "logits_per_char": -0.7822880744934082, "num_chars": 2}, {"sum_logits": -1.5494877099990845, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": false, "logits_per_token": -1.5494877099990845, "logits_per_char": -0.7747438549995422, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 126, "native_id": 126, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5933693647384644, "incorrect_loss_raw": 1.3517351945241292, "correct_loss_per_char": 0.7966846823692322, "incorrect_loss_per_char": 0.6758675972620646, "correct_loss_per_token": 1.5933693647384644, "incorrect_loss_per_token": 1.3517351945241292, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4179356098175049, "num_tokens": 1, "num_tokens_all": 641, "is_greedy": false, "logits_per_token": -1.4179356098175049, "logits_per_char": -0.7089678049087524, "num_chars": 2}, {"sum_logits": -1.1057343482971191, "num_tokens": 1, "num_tokens_all": 641, "is_greedy": true, "logits_per_token": -1.1057343482971191, "logits_per_char": -0.5528671741485596, "num_chars": 2}, {"sum_logits": -1.5315356254577637, "num_tokens": 1, "num_tokens_all": 641, "is_greedy": false, "logits_per_token": -1.5315356254577637, "logits_per_char": -0.7657678127288818, "num_chars": 2}, {"sum_logits": -1.5933693647384644, "num_tokens": 1, "num_tokens_all": 641, "is_greedy": false, "logits_per_token": -1.5933693647384644, "logits_per_char": -0.7966846823692322, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 127, "native_id": 127, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6697107553482056, "incorrect_loss_raw": 1.3394193251927693, "correct_loss_per_char": 0.8348553776741028, "incorrect_loss_per_char": 0.6697096625963846, "correct_loss_per_token": 1.6697107553482056, "incorrect_loss_per_token": 1.3394193251927693, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0454188585281372, "num_tokens": 1, "num_tokens_all": 558, "is_greedy": true, "logits_per_token": -1.0454188585281372, "logits_per_char": -0.5227094292640686, "num_chars": 2}, {"sum_logits": -1.4296576976776123, "num_tokens": 1, "num_tokens_all": 558, "is_greedy": false, "logits_per_token": -1.4296576976776123, "logits_per_char": -0.7148288488388062, "num_chars": 2}, {"sum_logits": -1.6697107553482056, "num_tokens": 1, "num_tokens_all": 558, "is_greedy": false, "logits_per_token": -1.6697107553482056, "logits_per_char": -0.8348553776741028, "num_chars": 2}, {"sum_logits": -1.5431814193725586, "num_tokens": 1, "num_tokens_all": 558, "is_greedy": false, "logits_per_token": -1.5431814193725586, "logits_per_char": -0.7715907096862793, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 128, "native_id": 128, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.586727499961853, "incorrect_loss_raw": 1.3486080567042034, "correct_loss_per_char": 0.7933637499809265, "incorrect_loss_per_char": 0.6743040283521017, "correct_loss_per_token": 1.586727499961853, "incorrect_loss_per_token": 1.3486080567042034, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1263136863708496, "num_tokens": 1, "num_tokens_all": 616, "is_greedy": true, "logits_per_token": -1.1263136863708496, "logits_per_char": -0.5631568431854248, "num_chars": 2}, {"sum_logits": -1.3886301517486572, "num_tokens": 1, "num_tokens_all": 616, "is_greedy": false, "logits_per_token": -1.3886301517486572, "logits_per_char": -0.6943150758743286, "num_chars": 2}, {"sum_logits": -1.530880331993103, "num_tokens": 1, "num_tokens_all": 616, "is_greedy": false, "logits_per_token": -1.530880331993103, "logits_per_char": -0.7654401659965515, "num_chars": 2}, {"sum_logits": -1.586727499961853, "num_tokens": 1, "num_tokens_all": 616, "is_greedy": false, "logits_per_token": -1.586727499961853, "logits_per_char": -0.7933637499809265, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 129, "native_id": 129, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5392907857894897, "incorrect_loss_raw": 1.350150187810262, "correct_loss_per_char": 0.7696453928947449, "incorrect_loss_per_char": 0.675075093905131, "correct_loss_per_token": 1.5392907857894897, "incorrect_loss_per_token": 1.350150187810262, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3125089406967163, "num_tokens": 1, "num_tokens_all": 632, "is_greedy": false, "logits_per_token": -1.3125089406967163, "logits_per_char": -0.6562544703483582, "num_chars": 2}, {"sum_logits": -1.3088804483413696, "num_tokens": 1, "num_tokens_all": 632, "is_greedy": true, "logits_per_token": -1.3088804483413696, "logits_per_char": -0.6544402241706848, "num_chars": 2}, {"sum_logits": -1.5392907857894897, "num_tokens": 1, "num_tokens_all": 632, "is_greedy": false, "logits_per_token": -1.5392907857894897, "logits_per_char": -0.7696453928947449, "num_chars": 2}, {"sum_logits": -1.4290611743927002, "num_tokens": 1, "num_tokens_all": 632, "is_greedy": false, "logits_per_token": -1.4290611743927002, "logits_per_char": -0.7145305871963501, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 130, "native_id": 130, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2953556776046753, "incorrect_loss_raw": 1.436305085817973, "correct_loss_per_char": 0.6476778388023376, "incorrect_loss_per_char": 0.7181525429089864, "correct_loss_per_token": 1.2953556776046753, "incorrect_loss_per_token": 1.436305085817973, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2953556776046753, "num_tokens": 1, "num_tokens_all": 648, "is_greedy": false, "logits_per_token": -1.2953556776046753, "logits_per_char": -0.6476778388023376, "num_chars": 2}, {"sum_logits": -1.219606637954712, "num_tokens": 1, "num_tokens_all": 648, "is_greedy": true, "logits_per_token": -1.219606637954712, "logits_per_char": -0.609803318977356, "num_chars": 2}, {"sum_logits": -1.5211044549942017, "num_tokens": 1, "num_tokens_all": 648, "is_greedy": false, "logits_per_token": -1.5211044549942017, "logits_per_char": -0.7605522274971008, "num_chars": 2}, {"sum_logits": -1.5682041645050049, "num_tokens": 1, "num_tokens_all": 648, "is_greedy": false, "logits_per_token": -1.5682041645050049, "logits_per_char": -0.7841020822525024, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 131, "native_id": 131, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3934751749038696, "incorrect_loss_raw": 1.5322425365447998, "correct_loss_per_char": 0.6967375874519348, "incorrect_loss_per_char": 0.7661212682723999, "correct_loss_per_token": 1.3934751749038696, "incorrect_loss_per_token": 1.5322425365447998, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8248586654663086, "num_tokens": 1, "num_tokens_all": 622, "is_greedy": true, "logits_per_token": -0.8248586654663086, "logits_per_char": -0.4124293327331543, "num_chars": 2}, {"sum_logits": -1.3934751749038696, "num_tokens": 1, "num_tokens_all": 622, "is_greedy": false, "logits_per_token": -1.3934751749038696, "logits_per_char": -0.6967375874519348, "num_chars": 2}, {"sum_logits": -1.9003124237060547, "num_tokens": 1, "num_tokens_all": 622, "is_greedy": false, "logits_per_token": -1.9003124237060547, "logits_per_char": -0.9501562118530273, "num_chars": 2}, {"sum_logits": -1.8715565204620361, "num_tokens": 1, "num_tokens_all": 622, "is_greedy": false, "logits_per_token": -1.8715565204620361, "logits_per_char": -0.9357782602310181, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 132, "native_id": 132, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5580403804779053, "incorrect_loss_raw": 1.3559795220692952, "correct_loss_per_char": 0.7790201902389526, "incorrect_loss_per_char": 0.6779897610346476, "correct_loss_per_token": 1.5580403804779053, "incorrect_loss_per_token": 1.3559795220692952, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2255553007125854, "num_tokens": 1, "num_tokens_all": 636, "is_greedy": true, "logits_per_token": -1.2255553007125854, "logits_per_char": -0.6127776503562927, "num_chars": 2}, {"sum_logits": -1.2435871362686157, "num_tokens": 1, "num_tokens_all": 636, "is_greedy": false, "logits_per_token": -1.2435871362686157, "logits_per_char": -0.6217935681343079, "num_chars": 2}, {"sum_logits": -1.5580403804779053, "num_tokens": 1, "num_tokens_all": 636, "is_greedy": false, "logits_per_token": -1.5580403804779053, "logits_per_char": -0.7790201902389526, "num_chars": 2}, {"sum_logits": -1.5987961292266846, "num_tokens": 1, "num_tokens_all": 636, "is_greedy": false, "logits_per_token": -1.5987961292266846, "logits_per_char": -0.7993980646133423, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 133, "native_id": 133, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3915948867797852, "incorrect_loss_raw": 1.4050625562667847, "correct_loss_per_char": 0.6957974433898926, "incorrect_loss_per_char": 0.7025312781333923, "correct_loss_per_token": 1.3915948867797852, "incorrect_loss_per_token": 1.4050625562667847, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.176035761833191, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": true, "logits_per_token": -1.176035761833191, "logits_per_char": -0.5880178809165955, "num_chars": 2}, {"sum_logits": -1.3915948867797852, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": false, "logits_per_token": -1.3915948867797852, "logits_per_char": -0.6957974433898926, "num_chars": 2}, {"sum_logits": -1.520483374595642, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": false, "logits_per_token": -1.520483374595642, "logits_per_char": -0.760241687297821, "num_chars": 2}, {"sum_logits": -1.518668532371521, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": false, "logits_per_token": -1.518668532371521, "logits_per_char": -0.7593342661857605, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 134, "native_id": 134, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2871218919754028, "incorrect_loss_raw": 1.4371551275253296, "correct_loss_per_char": 0.6435609459877014, "incorrect_loss_per_char": 0.7185775637626648, "correct_loss_per_token": 1.2871218919754028, "incorrect_loss_per_token": 1.4371551275253296, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3262431621551514, "num_tokens": 1, "num_tokens_all": 570, "is_greedy": false, "logits_per_token": -1.3262431621551514, "logits_per_char": -0.6631215810775757, "num_chars": 2}, {"sum_logits": -1.4874958992004395, "num_tokens": 1, "num_tokens_all": 570, "is_greedy": false, "logits_per_token": -1.4874958992004395, "logits_per_char": -0.7437479496002197, "num_chars": 2}, {"sum_logits": -1.497726321220398, "num_tokens": 1, "num_tokens_all": 570, "is_greedy": false, "logits_per_token": -1.497726321220398, "logits_per_char": -0.748863160610199, "num_chars": 2}, {"sum_logits": -1.2871218919754028, "num_tokens": 1, "num_tokens_all": 570, "is_greedy": true, "logits_per_token": -1.2871218919754028, "logits_per_char": -0.6435609459877014, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 135, "native_id": 135, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3584598302841187, "incorrect_loss_raw": 1.4058260917663574, "correct_loss_per_char": 0.6792299151420593, "incorrect_loss_per_char": 0.7029130458831787, "correct_loss_per_token": 1.3584598302841187, "incorrect_loss_per_token": 1.4058260917663574, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3945316076278687, "num_tokens": 1, "num_tokens_all": 595, "is_greedy": false, "logits_per_token": -1.3945316076278687, "logits_per_char": -0.6972658038139343, "num_chars": 2}, {"sum_logits": -1.3656673431396484, "num_tokens": 1, "num_tokens_all": 595, "is_greedy": false, "logits_per_token": -1.3656673431396484, "logits_per_char": -0.6828336715698242, "num_chars": 2}, {"sum_logits": -1.4572793245315552, "num_tokens": 1, "num_tokens_all": 595, "is_greedy": false, "logits_per_token": -1.4572793245315552, "logits_per_char": -0.7286396622657776, "num_chars": 2}, {"sum_logits": -1.3584598302841187, "num_tokens": 1, "num_tokens_all": 595, "is_greedy": true, "logits_per_token": -1.3584598302841187, "logits_per_char": -0.6792299151420593, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 136, "native_id": 136, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5729418992996216, "incorrect_loss_raw": 1.3626425663630168, "correct_loss_per_char": 0.7864709496498108, "incorrect_loss_per_char": 0.6813212831815084, "correct_loss_per_token": 1.5729418992996216, "incorrect_loss_per_token": 1.3626425663630168, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.078869342803955, "num_tokens": 1, "num_tokens_all": 563, "is_greedy": true, "logits_per_token": -1.078869342803955, "logits_per_char": -0.5394346714019775, "num_chars": 2}, {"sum_logits": -1.4462909698486328, "num_tokens": 1, "num_tokens_all": 563, "is_greedy": false, "logits_per_token": -1.4462909698486328, "logits_per_char": -0.7231454849243164, "num_chars": 2}, {"sum_logits": -1.5729418992996216, "num_tokens": 1, "num_tokens_all": 563, "is_greedy": false, "logits_per_token": -1.5729418992996216, "logits_per_char": -0.7864709496498108, "num_chars": 2}, {"sum_logits": -1.5627673864364624, "num_tokens": 1, "num_tokens_all": 563, "is_greedy": false, "logits_per_token": -1.5627673864364624, "logits_per_char": -0.7813836932182312, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 137, "native_id": 137, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5657939910888672, "incorrect_loss_raw": 1.3595941066741943, "correct_loss_per_char": 0.7828969955444336, "incorrect_loss_per_char": 0.6797970533370972, "correct_loss_per_token": 1.5657939910888672, "incorrect_loss_per_token": 1.3595941066741943, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1453375816345215, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": true, "logits_per_token": -1.1453375816345215, "logits_per_char": -0.5726687908172607, "num_chars": 2}, {"sum_logits": -1.3117451667785645, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": false, "logits_per_token": -1.3117451667785645, "logits_per_char": -0.6558725833892822, "num_chars": 2}, {"sum_logits": -1.621699571609497, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": false, "logits_per_token": -1.621699571609497, "logits_per_char": -0.8108497858047485, "num_chars": 2}, {"sum_logits": -1.5657939910888672, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": false, "logits_per_token": -1.5657939910888672, "logits_per_char": -0.7828969955444336, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 138, "native_id": 138, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5387102365493774, "incorrect_loss_raw": 1.3803014357884724, "correct_loss_per_char": 0.7693551182746887, "incorrect_loss_per_char": 0.6901507178942362, "correct_loss_per_token": 1.5387102365493774, "incorrect_loss_per_token": 1.3803014357884724, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0915813446044922, "num_tokens": 1, "num_tokens_all": 562, "is_greedy": true, "logits_per_token": -1.0915813446044922, "logits_per_char": -0.5457906723022461, "num_chars": 2}, {"sum_logits": -1.4377706050872803, "num_tokens": 1, "num_tokens_all": 562, "is_greedy": false, "logits_per_token": -1.4377706050872803, "logits_per_char": -0.7188853025436401, "num_chars": 2}, {"sum_logits": -1.611552357673645, "num_tokens": 1, "num_tokens_all": 562, "is_greedy": false, "logits_per_token": -1.611552357673645, "logits_per_char": -0.8057761788368225, "num_chars": 2}, {"sum_logits": -1.5387102365493774, "num_tokens": 1, "num_tokens_all": 562, "is_greedy": false, "logits_per_token": -1.5387102365493774, "logits_per_char": -0.7693551182746887, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 139, "native_id": 139, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5314652919769287, "incorrect_loss_raw": 1.356926401456197, "correct_loss_per_char": 0.7657326459884644, "incorrect_loss_per_char": 0.6784632007280985, "correct_loss_per_token": 1.5314652919769287, "incorrect_loss_per_token": 1.356926401456197, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2884529829025269, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": true, "logits_per_token": -1.2884529829025269, "logits_per_char": -0.6442264914512634, "num_chars": 2}, {"sum_logits": -1.4812440872192383, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": false, "logits_per_token": -1.4812440872192383, "logits_per_char": -0.7406220436096191, "num_chars": 2}, {"sum_logits": -1.5314652919769287, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": false, "logits_per_token": -1.5314652919769287, "logits_per_char": -0.7657326459884644, "num_chars": 2}, {"sum_logits": -1.3010821342468262, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": false, "logits_per_token": -1.3010821342468262, "logits_per_char": -0.6505410671234131, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 140, "native_id": 140, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2661186456680298, "incorrect_loss_raw": 1.4410768349965413, "correct_loss_per_char": 0.6330593228340149, "incorrect_loss_per_char": 0.7205384174982706, "correct_loss_per_token": 1.2661186456680298, "incorrect_loss_per_token": 1.4410768349965413, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2661186456680298, "num_tokens": 1, "num_tokens_all": 568, "is_greedy": true, "logits_per_token": -1.2661186456680298, "logits_per_char": -0.6330593228340149, "num_chars": 2}, {"sum_logits": -1.3592677116394043, "num_tokens": 1, "num_tokens_all": 568, "is_greedy": false, "logits_per_token": -1.3592677116394043, "logits_per_char": -0.6796338558197021, "num_chars": 2}, {"sum_logits": -1.5242934226989746, "num_tokens": 1, "num_tokens_all": 568, "is_greedy": false, "logits_per_token": -1.5242934226989746, "logits_per_char": -0.7621467113494873, "num_chars": 2}, {"sum_logits": -1.4396693706512451, "num_tokens": 1, "num_tokens_all": 568, "is_greedy": false, "logits_per_token": -1.4396693706512451, "logits_per_char": -0.7198346853256226, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 141, "native_id": 141, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.568952202796936, "incorrect_loss_raw": 1.3517022530237834, "correct_loss_per_char": 0.784476101398468, "incorrect_loss_per_char": 0.6758511265118917, "correct_loss_per_token": 1.568952202796936, "incorrect_loss_per_token": 1.3517022530237834, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.16156804561615, "num_tokens": 1, "num_tokens_all": 619, "is_greedy": true, "logits_per_token": -1.16156804561615, "logits_per_char": -0.580784022808075, "num_chars": 2}, {"sum_logits": -1.3265856504440308, "num_tokens": 1, "num_tokens_all": 619, "is_greedy": false, "logits_per_token": -1.3265856504440308, "logits_per_char": -0.6632928252220154, "num_chars": 2}, {"sum_logits": -1.568952202796936, "num_tokens": 1, "num_tokens_all": 619, "is_greedy": false, "logits_per_token": -1.568952202796936, "logits_per_char": -0.784476101398468, "num_chars": 2}, {"sum_logits": -1.5669530630111694, "num_tokens": 1, "num_tokens_all": 619, "is_greedy": false, "logits_per_token": -1.5669530630111694, "logits_per_char": -0.7834765315055847, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 142, "native_id": 142, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5799143314361572, "incorrect_loss_raw": 1.3468769391377766, "correct_loss_per_char": 0.7899571657180786, "incorrect_loss_per_char": 0.6734384695688883, "correct_loss_per_token": 1.5799143314361572, "incorrect_loss_per_token": 1.3468769391377766, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1490123271942139, "num_tokens": 1, "num_tokens_all": 587, "is_greedy": true, "logits_per_token": -1.1490123271942139, "logits_per_char": -0.5745061635971069, "num_chars": 2}, {"sum_logits": -1.4427951574325562, "num_tokens": 1, "num_tokens_all": 587, "is_greedy": false, "logits_per_token": -1.4427951574325562, "logits_per_char": -0.7213975787162781, "num_chars": 2}, {"sum_logits": -1.5799143314361572, "num_tokens": 1, "num_tokens_all": 587, "is_greedy": false, "logits_per_token": -1.5799143314361572, "logits_per_char": -0.7899571657180786, "num_chars": 2}, {"sum_logits": -1.44882333278656, "num_tokens": 1, "num_tokens_all": 587, "is_greedy": false, "logits_per_token": -1.44882333278656, "logits_per_char": -0.72441166639328, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 143, "native_id": 143, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.147667646408081, "incorrect_loss_raw": 1.4893041054407756, "correct_loss_per_char": 0.5738338232040405, "incorrect_loss_per_char": 0.7446520527203878, "correct_loss_per_token": 1.147667646408081, "incorrect_loss_per_token": 1.4893041054407756, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.147667646408081, "num_tokens": 1, "num_tokens_all": 599, "is_greedy": true, "logits_per_token": -1.147667646408081, "logits_per_char": -0.5738338232040405, "num_chars": 2}, {"sum_logits": -1.4291270971298218, "num_tokens": 1, "num_tokens_all": 599, "is_greedy": false, "logits_per_token": -1.4291270971298218, "logits_per_char": -0.7145635485649109, "num_chars": 2}, {"sum_logits": -1.5570913553237915, "num_tokens": 1, "num_tokens_all": 599, "is_greedy": false, "logits_per_token": -1.5570913553237915, "logits_per_char": -0.7785456776618958, "num_chars": 2}, {"sum_logits": -1.4816938638687134, "num_tokens": 1, "num_tokens_all": 599, "is_greedy": false, "logits_per_token": -1.4816938638687134, "logits_per_char": -0.7408469319343567, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 144, "native_id": 144, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3778047561645508, "incorrect_loss_raw": 1.41881263256073, "correct_loss_per_char": 0.6889023780822754, "incorrect_loss_per_char": 0.709406316280365, "correct_loss_per_token": 1.3778047561645508, "incorrect_loss_per_token": 1.41881263256073, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1335697174072266, "num_tokens": 1, "num_tokens_all": 574, "is_greedy": true, "logits_per_token": -1.1335697174072266, "logits_per_char": -0.5667848587036133, "num_chars": 2}, {"sum_logits": -1.3778047561645508, "num_tokens": 1, "num_tokens_all": 574, "is_greedy": false, "logits_per_token": -1.3778047561645508, "logits_per_char": -0.6889023780822754, "num_chars": 2}, {"sum_logits": -1.4759238958358765, "num_tokens": 1, "num_tokens_all": 574, "is_greedy": false, "logits_per_token": -1.4759238958358765, "logits_per_char": -0.7379619479179382, "num_chars": 2}, {"sum_logits": -1.646944284439087, "num_tokens": 1, "num_tokens_all": 574, "is_greedy": false, "logits_per_token": -1.646944284439087, "logits_per_char": -0.8234721422195435, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 145, "native_id": 145, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.559281349182129, "incorrect_loss_raw": 1.3578051726023357, "correct_loss_per_char": 0.7796406745910645, "incorrect_loss_per_char": 0.6789025863011678, "correct_loss_per_token": 1.559281349182129, "incorrect_loss_per_token": 1.3578051726023357, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1114975214004517, "num_tokens": 1, "num_tokens_all": 582, "is_greedy": true, "logits_per_token": -1.1114975214004517, "logits_per_char": -0.5557487607002258, "num_chars": 2}, {"sum_logits": -1.400137186050415, "num_tokens": 1, "num_tokens_all": 582, "is_greedy": false, "logits_per_token": -1.400137186050415, "logits_per_char": -0.7000685930252075, "num_chars": 2}, {"sum_logits": -1.5617808103561401, "num_tokens": 1, "num_tokens_all": 582, "is_greedy": false, "logits_per_token": -1.5617808103561401, "logits_per_char": -0.7808904051780701, "num_chars": 2}, {"sum_logits": -1.559281349182129, "num_tokens": 1, "num_tokens_all": 582, "is_greedy": false, "logits_per_token": -1.559281349182129, "logits_per_char": -0.7796406745910645, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 146, "native_id": 146, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3702588081359863, "incorrect_loss_raw": 1.4061704476674397, "correct_loss_per_char": 0.6851294040679932, "incorrect_loss_per_char": 0.7030852238337199, "correct_loss_per_token": 1.3702588081359863, "incorrect_loss_per_token": 1.4061704476674397, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2727168798446655, "num_tokens": 1, "num_tokens_all": 585, "is_greedy": true, "logits_per_token": -1.2727168798446655, "logits_per_char": -0.6363584399223328, "num_chars": 2}, {"sum_logits": -1.3702588081359863, "num_tokens": 1, "num_tokens_all": 585, "is_greedy": false, "logits_per_token": -1.3702588081359863, "logits_per_char": -0.6851294040679932, "num_chars": 2}, {"sum_logits": -1.4977829456329346, "num_tokens": 1, "num_tokens_all": 585, "is_greedy": false, "logits_per_token": -1.4977829456329346, "logits_per_char": -0.7488914728164673, "num_chars": 2}, {"sum_logits": -1.4480115175247192, "num_tokens": 1, "num_tokens_all": 585, "is_greedy": false, "logits_per_token": -1.4480115175247192, "logits_per_char": -0.7240057587623596, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 147, "native_id": 147, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2207136154174805, "incorrect_loss_raw": 1.4681148529052734, "correct_loss_per_char": 0.6103568077087402, "incorrect_loss_per_char": 0.7340574264526367, "correct_loss_per_token": 1.2207136154174805, "incorrect_loss_per_token": 1.4681148529052734, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2207136154174805, "num_tokens": 1, "num_tokens_all": 565, "is_greedy": true, "logits_per_token": -1.2207136154174805, "logits_per_char": -0.6103568077087402, "num_chars": 2}, {"sum_logits": -1.2964555025100708, "num_tokens": 1, "num_tokens_all": 565, "is_greedy": false, "logits_per_token": -1.2964555025100708, "logits_per_char": -0.6482277512550354, "num_chars": 2}, {"sum_logits": -1.4939883947372437, "num_tokens": 1, "num_tokens_all": 565, "is_greedy": false, "logits_per_token": -1.4939883947372437, "logits_per_char": -0.7469941973686218, "num_chars": 2}, {"sum_logits": -1.6139006614685059, "num_tokens": 1, "num_tokens_all": 565, "is_greedy": false, "logits_per_token": -1.6139006614685059, "logits_per_char": -0.8069503307342529, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 148, "native_id": 148, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5258504152297974, "incorrect_loss_raw": 1.3683205048243205, "correct_loss_per_char": 0.7629252076148987, "incorrect_loss_per_char": 0.6841602524121603, "correct_loss_per_token": 1.5258504152297974, "incorrect_loss_per_token": 1.3683205048243205, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1955435276031494, "num_tokens": 1, "num_tokens_all": 586, "is_greedy": true, "logits_per_token": -1.1955435276031494, "logits_per_char": -0.5977717638015747, "num_chars": 2}, {"sum_logits": -1.4030113220214844, "num_tokens": 1, "num_tokens_all": 586, "is_greedy": false, "logits_per_token": -1.4030113220214844, "logits_per_char": -0.7015056610107422, "num_chars": 2}, {"sum_logits": -1.5064066648483276, "num_tokens": 1, "num_tokens_all": 586, "is_greedy": false, "logits_per_token": -1.5064066648483276, "logits_per_char": -0.7532033324241638, "num_chars": 2}, {"sum_logits": -1.5258504152297974, "num_tokens": 1, "num_tokens_all": 586, "is_greedy": false, "logits_per_token": -1.5258504152297974, "logits_per_char": -0.7629252076148987, "num_chars": 2}], "label": 3, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 149, "native_id": 149, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2369191646575928, "incorrect_loss_raw": 1.44933021068573, "correct_loss_per_char": 0.6184595823287964, "incorrect_loss_per_char": 0.724665105342865, "correct_loss_per_token": 1.2369191646575928, "incorrect_loss_per_token": 1.44933021068573, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2369191646575928, "num_tokens": 1, "num_tokens_all": 587, "is_greedy": true, "logits_per_token": -1.2369191646575928, "logits_per_char": -0.6184595823287964, "num_chars": 2}, {"sum_logits": -1.4014897346496582, "num_tokens": 1, "num_tokens_all": 587, "is_greedy": false, "logits_per_token": -1.4014897346496582, "logits_per_char": -0.7007448673248291, "num_chars": 2}, {"sum_logits": -1.5156681537628174, "num_tokens": 1, "num_tokens_all": 587, "is_greedy": false, "logits_per_token": -1.5156681537628174, "logits_per_char": -0.7578340768814087, "num_chars": 2}, {"sum_logits": -1.4308327436447144, "num_tokens": 1, "num_tokens_all": 587, "is_greedy": false, "logits_per_token": -1.4308327436447144, "logits_per_char": -0.7154163718223572, "num_chars": 2}], "label": 0, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 150, "native_id": 150, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7184383869171143, "incorrect_loss_raw": 1.3256992896397908, "correct_loss_per_char": 0.8592191934585571, "incorrect_loss_per_char": 0.6628496448198954, "correct_loss_per_token": 1.7184383869171143, "incorrect_loss_per_token": 1.3256992896397908, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0388989448547363, "num_tokens": 1, "num_tokens_all": 575, "is_greedy": true, "logits_per_token": -1.0388989448547363, "logits_per_char": -0.5194494724273682, "num_chars": 2}, {"sum_logits": -1.3968697786331177, "num_tokens": 1, "num_tokens_all": 575, "is_greedy": false, "logits_per_token": -1.3968697786331177, "logits_per_char": -0.6984348893165588, "num_chars": 2}, {"sum_logits": -1.7184383869171143, "num_tokens": 1, "num_tokens_all": 575, "is_greedy": false, "logits_per_token": -1.7184383869171143, "logits_per_char": -0.8592191934585571, "num_chars": 2}, {"sum_logits": -1.5413291454315186, "num_tokens": 1, "num_tokens_all": 575, "is_greedy": false, "logits_per_token": -1.5413291454315186, "logits_per_char": -0.7706645727157593, "num_chars": 2}], "label": 2, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 151, "native_id": 151, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.437814712524414, "incorrect_loss_raw": 1.4069437980651855, "correct_loss_per_char": 0.718907356262207, "incorrect_loss_per_char": 0.7034718990325928, "correct_loss_per_token": 1.437814712524414, "incorrect_loss_per_token": 1.4069437980651855, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.084993600845337, "num_tokens": 1, "num_tokens_all": 565, "is_greedy": true, "logits_per_token": -1.084993600845337, "logits_per_char": -0.5424968004226685, "num_chars": 2}, {"sum_logits": -1.437814712524414, "num_tokens": 1, "num_tokens_all": 565, "is_greedy": false, "logits_per_token": -1.437814712524414, "logits_per_char": -0.718907356262207, "num_chars": 2}, {"sum_logits": -1.5331413745880127, "num_tokens": 1, "num_tokens_all": 565, "is_greedy": false, "logits_per_token": -1.5331413745880127, "logits_per_char": -0.7665706872940063, "num_chars": 2}, {"sum_logits": -1.602696418762207, "num_tokens": 1, "num_tokens_all": 565, "is_greedy": false, "logits_per_token": -1.602696418762207, "logits_per_char": -0.8013482093811035, "num_chars": 2}], "label": 1, "task_hash": "e7ca8a8921c02622e23c99b7d90379f7", "model_hash": "03418cf8091a9882619950ffb07429a5"}