{"doc_id": 0, "native_id": 0, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.53069269657135, "incorrect_loss_raw": 1.3521550099054973, "correct_loss_per_char": 0.765346348285675, "incorrect_loss_per_char": 0.6760775049527487, "correct_loss_per_token": 1.53069269657135, "incorrect_loss_per_token": 1.3521550099054973, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.376935601234436, "num_tokens": 1, "num_tokens_all": 614, "is_greedy": false, "logits_per_token": -1.376935601234436, "logits_per_char": -0.688467800617218, "num_chars": 2}, {"sum_logits": -1.2473725080490112, "num_tokens": 1, "num_tokens_all": 614, "is_greedy": true, "logits_per_token": -1.2473725080490112, "logits_per_char": -0.6236862540245056, "num_chars": 2}, {"sum_logits": -1.53069269657135, "num_tokens": 1, "num_tokens_all": 614, "is_greedy": false, "logits_per_token": -1.53069269657135, "logits_per_char": -0.765346348285675, "num_chars": 2}, {"sum_logits": -1.4321569204330444, "num_tokens": 1, "num_tokens_all": 614, "is_greedy": false, "logits_per_token": -1.4321569204330444, "logits_per_char": -0.7160784602165222, "num_chars": 2}], "label": 2, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 1, "native_id": 1, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2708746194839478, "incorrect_loss_raw": 1.4377388556798298, "correct_loss_per_char": 0.6354373097419739, "incorrect_loss_per_char": 0.7188694278399149, "correct_loss_per_token": 1.2708746194839478, "incorrect_loss_per_token": 1.4377388556798298, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3431220054626465, "num_tokens": 1, "num_tokens_all": 604, "is_greedy": false, "logits_per_token": -1.3431220054626465, "logits_per_char": -0.6715610027313232, "num_chars": 2}, {"sum_logits": -1.2708746194839478, "num_tokens": 1, "num_tokens_all": 604, "is_greedy": true, "logits_per_token": -1.2708746194839478, "logits_per_char": -0.6354373097419739, "num_chars": 2}, {"sum_logits": -1.4228907823562622, "num_tokens": 1, "num_tokens_all": 604, "is_greedy": false, "logits_per_token": -1.4228907823562622, "logits_per_char": -0.7114453911781311, "num_chars": 2}, {"sum_logits": -1.547203779220581, "num_tokens": 1, "num_tokens_all": 604, "is_greedy": false, "logits_per_token": -1.547203779220581, "logits_per_char": -0.7736018896102905, "num_chars": 2}], "label": 1, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 2, "native_id": 2, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5952893495559692, "incorrect_loss_raw": 1.3597029447555542, "correct_loss_per_char": 0.7976446747779846, "incorrect_loss_per_char": 0.6798514723777771, "correct_loss_per_token": 1.5952893495559692, "incorrect_loss_per_token": 1.3597029447555542, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1492514610290527, "num_tokens": 1, "num_tokens_all": 555, "is_greedy": true, "logits_per_token": -1.1492514610290527, "logits_per_char": -0.5746257305145264, "num_chars": 2}, {"sum_logits": -1.2191803455352783, "num_tokens": 1, "num_tokens_all": 555, "is_greedy": false, "logits_per_token": -1.2191803455352783, "logits_per_char": -0.6095901727676392, "num_chars": 2}, {"sum_logits": -1.7106770277023315, "num_tokens": 1, "num_tokens_all": 555, "is_greedy": false, "logits_per_token": -1.7106770277023315, "logits_per_char": -0.8553385138511658, "num_chars": 2}, {"sum_logits": -1.5952893495559692, "num_tokens": 1, "num_tokens_all": 555, "is_greedy": false, "logits_per_token": -1.5952893495559692, "logits_per_char": -0.7976446747779846, "num_chars": 2}], "label": 3, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 3, "native_id": 3, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4484612941741943, "incorrect_loss_raw": 1.4101836681365967, "correct_loss_per_char": 0.7242306470870972, "incorrect_loss_per_char": 0.7050918340682983, "correct_loss_per_token": 1.4484612941741943, "incorrect_loss_per_token": 1.4101836681365967, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0656540393829346, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": true, "logits_per_token": -1.0656540393829346, "logits_per_char": -0.5328270196914673, "num_chars": 2}, {"sum_logits": -1.4645311832427979, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": false, "logits_per_token": -1.4645311832427979, "logits_per_char": -0.7322655916213989, "num_chars": 2}, {"sum_logits": -1.7003657817840576, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": false, "logits_per_token": -1.7003657817840576, "logits_per_char": -0.8501828908920288, "num_chars": 2}, {"sum_logits": -1.4484612941741943, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": false, "logits_per_token": -1.4484612941741943, "logits_per_char": -0.7242306470870972, "num_chars": 2}], "label": 3, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 4, "native_id": 4, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3812735080718994, "incorrect_loss_raw": 1.414179841677348, "correct_loss_per_char": 0.6906367540359497, "incorrect_loss_per_char": 0.707089920838674, "correct_loss_per_token": 1.3812735080718994, "incorrect_loss_per_token": 1.414179841677348, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1718323230743408, "num_tokens": 1, "num_tokens_all": 592, "is_greedy": true, "logits_per_token": -1.1718323230743408, "logits_per_char": -0.5859161615371704, "num_chars": 2}, {"sum_logits": -1.3812735080718994, "num_tokens": 1, "num_tokens_all": 592, "is_greedy": false, "logits_per_token": -1.3812735080718994, "logits_per_char": -0.6906367540359497, "num_chars": 2}, {"sum_logits": -1.5414232015609741, "num_tokens": 1, "num_tokens_all": 592, "is_greedy": false, "logits_per_token": -1.5414232015609741, "logits_per_char": -0.7707116007804871, "num_chars": 2}, {"sum_logits": -1.5292840003967285, "num_tokens": 1, "num_tokens_all": 592, "is_greedy": false, "logits_per_token": -1.5292840003967285, "logits_per_char": -0.7646420001983643, "num_chars": 2}], "label": 1, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 5, "native_id": 5, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2150238752365112, "incorrect_loss_raw": 1.4642491340637207, "correct_loss_per_char": 0.6075119376182556, "incorrect_loss_per_char": 0.7321245670318604, "correct_loss_per_token": 1.2150238752365112, "incorrect_loss_per_token": 1.4642491340637207, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2928118705749512, "num_tokens": 1, "num_tokens_all": 616, "is_greedy": false, "logits_per_token": -1.2928118705749512, "logits_per_char": -0.6464059352874756, "num_chars": 2}, {"sum_logits": -1.2150238752365112, "num_tokens": 1, "num_tokens_all": 616, "is_greedy": true, "logits_per_token": -1.2150238752365112, "logits_per_char": -0.6075119376182556, "num_chars": 2}, {"sum_logits": -1.5893820524215698, "num_tokens": 1, "num_tokens_all": 616, "is_greedy": false, "logits_per_token": -1.5893820524215698, "logits_per_char": -0.7946910262107849, "num_chars": 2}, {"sum_logits": -1.5105534791946411, "num_tokens": 1, "num_tokens_all": 616, "is_greedy": false, "logits_per_token": -1.5105534791946411, "logits_per_char": -0.7552767395973206, "num_chars": 2}], "label": 1, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 6, "native_id": 6, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5071544647216797, "incorrect_loss_raw": 1.3648839394251506, "correct_loss_per_char": 0.7535772323608398, "incorrect_loss_per_char": 0.6824419697125753, "correct_loss_per_token": 1.5071544647216797, "incorrect_loss_per_token": 1.3648839394251506, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2385917901992798, "num_tokens": 1, "num_tokens_all": 568, "is_greedy": true, "logits_per_token": -1.2385917901992798, "logits_per_char": -0.6192958950996399, "num_chars": 2}, {"sum_logits": -1.5071544647216797, "num_tokens": 1, "num_tokens_all": 568, "is_greedy": false, "logits_per_token": -1.5071544647216797, "logits_per_char": -0.7535772323608398, "num_chars": 2}, {"sum_logits": -1.5104432106018066, "num_tokens": 1, "num_tokens_all": 568, "is_greedy": false, "logits_per_token": -1.5104432106018066, "logits_per_char": -0.7552216053009033, "num_chars": 2}, {"sum_logits": -1.3456168174743652, "num_tokens": 1, "num_tokens_all": 568, "is_greedy": false, "logits_per_token": -1.3456168174743652, "logits_per_char": -0.6728084087371826, "num_chars": 2}], "label": 1, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 7, "native_id": 7, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3318861722946167, "incorrect_loss_raw": 1.4134883483250935, "correct_loss_per_char": 0.6659430861473083, "incorrect_loss_per_char": 0.7067441741625468, "correct_loss_per_token": 1.3318861722946167, "incorrect_loss_per_token": 1.4134883483250935, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3318861722946167, "num_tokens": 1, "num_tokens_all": 581, "is_greedy": true, "logits_per_token": -1.3318861722946167, "logits_per_char": -0.6659430861473083, "num_chars": 2}, {"sum_logits": -1.3562349081039429, "num_tokens": 1, "num_tokens_all": 581, "is_greedy": false, "logits_per_token": -1.3562349081039429, "logits_per_char": -0.6781174540519714, "num_chars": 2}, {"sum_logits": -1.4821807146072388, "num_tokens": 1, "num_tokens_all": 581, "is_greedy": false, "logits_per_token": -1.4821807146072388, "logits_per_char": -0.7410903573036194, "num_chars": 2}, {"sum_logits": -1.4020494222640991, "num_tokens": 1, "num_tokens_all": 581, "is_greedy": false, "logits_per_token": -1.4020494222640991, "logits_per_char": -0.7010247111320496, "num_chars": 2}], "label": 0, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 8, "native_id": 8, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2958881855010986, "incorrect_loss_raw": 1.4330192009607952, "correct_loss_per_char": 0.6479440927505493, "incorrect_loss_per_char": 0.7165096004803976, "correct_loss_per_token": 1.2958881855010986, "incorrect_loss_per_token": 1.4330192009607952, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2393516302108765, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": true, "logits_per_token": -1.2393516302108765, "logits_per_char": -0.6196758151054382, "num_chars": 2}, {"sum_logits": -1.2958881855010986, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": false, "logits_per_token": -1.2958881855010986, "logits_per_char": -0.6479440927505493, "num_chars": 2}, {"sum_logits": -1.5100245475769043, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": false, "logits_per_token": -1.5100245475769043, "logits_per_char": -0.7550122737884521, "num_chars": 2}, {"sum_logits": -1.5496814250946045, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": false, "logits_per_token": -1.5496814250946045, "logits_per_char": -0.7748407125473022, "num_chars": 2}], "label": 1, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 9, "native_id": 9, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4366697072982788, "incorrect_loss_raw": 1.3782137632369995, "correct_loss_per_char": 0.7183348536491394, "incorrect_loss_per_char": 0.6891068816184998, "correct_loss_per_token": 1.4366697072982788, "incorrect_loss_per_token": 1.3782137632369995, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2931864261627197, "num_tokens": 1, "num_tokens_all": 551, "is_greedy": true, "logits_per_token": -1.2931864261627197, "logits_per_char": -0.6465932130813599, "num_chars": 2}, {"sum_logits": -1.4177401065826416, "num_tokens": 1, "num_tokens_all": 551, "is_greedy": false, "logits_per_token": -1.4177401065826416, "logits_per_char": -0.7088700532913208, "num_chars": 2}, {"sum_logits": -1.4366697072982788, "num_tokens": 1, "num_tokens_all": 551, "is_greedy": false, "logits_per_token": -1.4366697072982788, "logits_per_char": -0.7183348536491394, "num_chars": 2}, {"sum_logits": -1.4237147569656372, "num_tokens": 1, "num_tokens_all": 551, "is_greedy": false, "logits_per_token": -1.4237147569656372, "logits_per_char": -0.7118573784828186, "num_chars": 2}], "label": 2, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 10, "native_id": 10, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6284658908843994, "incorrect_loss_raw": 1.3999800483385723, "correct_loss_per_char": 0.8142329454421997, "incorrect_loss_per_char": 0.6999900241692861, "correct_loss_per_token": 1.6284658908843994, "incorrect_loss_per_token": 1.3999800483385723, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9204532504081726, "num_tokens": 1, "num_tokens_all": 590, "is_greedy": true, "logits_per_token": -0.9204532504081726, "logits_per_char": -0.4602266252040863, "num_chars": 2}, {"sum_logits": -1.4354861974716187, "num_tokens": 1, "num_tokens_all": 590, "is_greedy": false, "logits_per_token": -1.4354861974716187, "logits_per_char": -0.7177430987358093, "num_chars": 2}, {"sum_logits": -1.6284658908843994, "num_tokens": 1, "num_tokens_all": 590, "is_greedy": false, "logits_per_token": -1.6284658908843994, "logits_per_char": -0.8142329454421997, "num_chars": 2}, {"sum_logits": -1.8440006971359253, "num_tokens": 1, "num_tokens_all": 590, "is_greedy": false, "logits_per_token": -1.8440006971359253, "logits_per_char": -0.9220003485679626, "num_chars": 2}], "label": 2, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 11, "native_id": 11, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5145820379257202, "incorrect_loss_raw": 1.3515923817952473, "correct_loss_per_char": 0.7572910189628601, "incorrect_loss_per_char": 0.6757961908976237, "correct_loss_per_token": 1.5145820379257202, "incorrect_loss_per_token": 1.3515923817952473, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3926132917404175, "num_tokens": 1, "num_tokens_all": 599, "is_greedy": false, "logits_per_token": -1.3926132917404175, "logits_per_char": -0.6963066458702087, "num_chars": 2}, {"sum_logits": -1.311063289642334, "num_tokens": 1, "num_tokens_all": 599, "is_greedy": true, "logits_per_token": -1.311063289642334, "logits_per_char": -0.655531644821167, "num_chars": 2}, {"sum_logits": -1.5145820379257202, "num_tokens": 1, "num_tokens_all": 599, "is_greedy": false, "logits_per_token": -1.5145820379257202, "logits_per_char": -0.7572910189628601, "num_chars": 2}, {"sum_logits": -1.3511005640029907, "num_tokens": 1, "num_tokens_all": 599, "is_greedy": false, "logits_per_token": -1.3511005640029907, "logits_per_char": -0.6755502820014954, "num_chars": 2}], "label": 2, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 12, "native_id": 12, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4355556964874268, "incorrect_loss_raw": 1.379299243291219, "correct_loss_per_char": 0.7177778482437134, "incorrect_loss_per_char": 0.6896496216456095, "correct_loss_per_token": 1.4355556964874268, "incorrect_loss_per_token": 1.379299243291219, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3158167600631714, "num_tokens": 1, "num_tokens_all": 588, "is_greedy": true, "logits_per_token": -1.3158167600631714, "logits_per_char": -0.6579083800315857, "num_chars": 2}, {"sum_logits": -1.3388888835906982, "num_tokens": 1, "num_tokens_all": 588, "is_greedy": false, "logits_per_token": -1.3388888835906982, "logits_per_char": -0.6694444417953491, "num_chars": 2}, {"sum_logits": -1.4831920862197876, "num_tokens": 1, "num_tokens_all": 588, "is_greedy": false, "logits_per_token": -1.4831920862197876, "logits_per_char": -0.7415960431098938, "num_chars": 2}, {"sum_logits": -1.4355556964874268, "num_tokens": 1, "num_tokens_all": 588, "is_greedy": false, "logits_per_token": -1.4355556964874268, "logits_per_char": -0.7177778482437134, "num_chars": 2}], "label": 3, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 13, "native_id": 13, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.307493805885315, "incorrect_loss_raw": 1.4221826791763306, "correct_loss_per_char": 0.6537469029426575, "incorrect_loss_per_char": 0.7110913395881653, "correct_loss_per_token": 1.307493805885315, "incorrect_loss_per_token": 1.4221826791763306, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3705188035964966, "num_tokens": 1, "num_tokens_all": 553, "is_greedy": false, "logits_per_token": -1.3705188035964966, "logits_per_char": -0.6852594017982483, "num_chars": 2}, {"sum_logits": -1.307493805885315, "num_tokens": 1, "num_tokens_all": 553, "is_greedy": true, "logits_per_token": -1.307493805885315, "logits_per_char": -0.6537469029426575, "num_chars": 2}, {"sum_logits": -1.4993928670883179, "num_tokens": 1, "num_tokens_all": 553, "is_greedy": false, "logits_per_token": -1.4993928670883179, "logits_per_char": -0.7496964335441589, "num_chars": 2}, {"sum_logits": -1.3966363668441772, "num_tokens": 1, "num_tokens_all": 553, "is_greedy": false, "logits_per_token": -1.3966363668441772, "logits_per_char": -0.6983181834220886, "num_chars": 2}], "label": 1, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 14, "native_id": 14, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.131389617919922, "incorrect_loss_raw": 1.3362937768300374, "correct_loss_per_char": 1.065694808959961, "incorrect_loss_per_char": 0.6681468884150187, "correct_loss_per_token": 2.131389617919922, "incorrect_loss_per_token": 1.3362937768300374, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.768401026725769, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": true, "logits_per_token": -0.768401026725769, "logits_per_char": -0.3842005133628845, "num_chars": 2}, {"sum_logits": -1.3632903099060059, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": false, "logits_per_token": -1.3632903099060059, "logits_per_char": -0.6816451549530029, "num_chars": 2}, {"sum_logits": -1.8771899938583374, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": false, "logits_per_token": -1.8771899938583374, "logits_per_char": -0.9385949969291687, "num_chars": 2}, {"sum_logits": -2.131389617919922, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": false, "logits_per_token": -2.131389617919922, "logits_per_char": -1.065694808959961, "num_chars": 2}], "label": 3, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 15, "native_id": 15, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3793789148330688, "incorrect_loss_raw": 1.4156438906987507, "correct_loss_per_char": 0.6896894574165344, "incorrect_loss_per_char": 0.7078219453493754, "correct_loss_per_token": 1.3793789148330688, "incorrect_loss_per_token": 1.4156438906987507, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1800410747528076, "num_tokens": 1, "num_tokens_all": 579, "is_greedy": true, "logits_per_token": -1.1800410747528076, "logits_per_char": -0.5900205373764038, "num_chars": 2}, {"sum_logits": -1.3793789148330688, "num_tokens": 1, "num_tokens_all": 579, "is_greedy": false, "logits_per_token": -1.3793789148330688, "logits_per_char": -0.6896894574165344, "num_chars": 2}, {"sum_logits": -1.6621124744415283, "num_tokens": 1, "num_tokens_all": 579, "is_greedy": false, "logits_per_token": -1.6621124744415283, "logits_per_char": -0.8310562372207642, "num_chars": 2}, {"sum_logits": -1.4047781229019165, "num_tokens": 1, "num_tokens_all": 579, "is_greedy": false, "logits_per_token": -1.4047781229019165, "logits_per_char": -0.7023890614509583, "num_chars": 2}], "label": 1, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 16, "native_id": 16, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3747539520263672, "incorrect_loss_raw": 1.4089702367782593, "correct_loss_per_char": 0.6873769760131836, "incorrect_loss_per_char": 0.7044851183891296, "correct_loss_per_token": 1.3747539520263672, "incorrect_loss_per_token": 1.4089702367782593, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3747539520263672, "num_tokens": 1, "num_tokens_all": 562, "is_greedy": false, "logits_per_token": -1.3747539520263672, "logits_per_char": -0.6873769760131836, "num_chars": 2}, {"sum_logits": -1.3056946992874146, "num_tokens": 1, "num_tokens_all": 562, "is_greedy": false, "logits_per_token": -1.3056946992874146, "logits_per_char": -0.6528473496437073, "num_chars": 2}, {"sum_logits": -1.6166380643844604, "num_tokens": 1, "num_tokens_all": 562, "is_greedy": false, "logits_per_token": -1.6166380643844604, "logits_per_char": -0.8083190321922302, "num_chars": 2}, {"sum_logits": -1.3045779466629028, "num_tokens": 1, "num_tokens_all": 562, "is_greedy": true, "logits_per_token": -1.3045779466629028, "logits_per_char": -0.6522889733314514, "num_chars": 2}], "label": 0, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 17, "native_id": 17, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5025519132614136, "incorrect_loss_raw": 1.3590502341588337, "correct_loss_per_char": 0.7512759566307068, "incorrect_loss_per_char": 0.6795251170794169, "correct_loss_per_token": 1.5025519132614136, "incorrect_loss_per_token": 1.3590502341588337, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4518795013427734, "num_tokens": 1, "num_tokens_all": 607, "is_greedy": false, "logits_per_token": -1.4518795013427734, "logits_per_char": -0.7259397506713867, "num_chars": 2}, {"sum_logits": -1.2905501127243042, "num_tokens": 1, "num_tokens_all": 607, "is_greedy": true, "logits_per_token": -1.2905501127243042, "logits_per_char": -0.6452750563621521, "num_chars": 2}, {"sum_logits": -1.5025519132614136, "num_tokens": 1, "num_tokens_all": 607, "is_greedy": false, "logits_per_token": -1.5025519132614136, "logits_per_char": -0.7512759566307068, "num_chars": 2}, {"sum_logits": -1.3347210884094238, "num_tokens": 1, "num_tokens_all": 607, "is_greedy": false, "logits_per_token": -1.3347210884094238, "logits_per_char": -0.6673605442047119, "num_chars": 2}], "label": 2, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 18, "native_id": 18, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.520782232284546, "incorrect_loss_raw": 1.3735042015711467, "correct_loss_per_char": 0.760391116142273, "incorrect_loss_per_char": 0.6867521007855734, "correct_loss_per_token": 1.520782232284546, "incorrect_loss_per_token": 1.3735042015711467, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1220285892486572, "num_tokens": 1, "num_tokens_all": 561, "is_greedy": true, "logits_per_token": -1.1220285892486572, "logits_per_char": -0.5610142946243286, "num_chars": 2}, {"sum_logits": -1.3410218954086304, "num_tokens": 1, "num_tokens_all": 561, "is_greedy": false, "logits_per_token": -1.3410218954086304, "logits_per_char": -0.6705109477043152, "num_chars": 2}, {"sum_logits": -1.520782232284546, "num_tokens": 1, "num_tokens_all": 561, "is_greedy": false, "logits_per_token": -1.520782232284546, "logits_per_char": -0.760391116142273, "num_chars": 2}, {"sum_logits": -1.6574621200561523, "num_tokens": 1, "num_tokens_all": 561, "is_greedy": false, "logits_per_token": -1.6574621200561523, "logits_per_char": -0.8287310600280762, "num_chars": 2}], "label": 2, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 19, "native_id": 19, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.288851261138916, "incorrect_loss_raw": 1.4403663476308186, "correct_loss_per_char": 0.644425630569458, "incorrect_loss_per_char": 0.7201831738154093, "correct_loss_per_token": 1.288851261138916, "incorrect_loss_per_token": 1.4403663476308186, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.288851261138916, "num_tokens": 1, "num_tokens_all": 569, "is_greedy": true, "logits_per_token": -1.288851261138916, "logits_per_char": -0.644425630569458, "num_chars": 2}, {"sum_logits": -1.4876042604446411, "num_tokens": 1, "num_tokens_all": 569, "is_greedy": false, "logits_per_token": -1.4876042604446411, "logits_per_char": -0.7438021302223206, "num_chars": 2}, {"sum_logits": -1.5249916315078735, "num_tokens": 1, "num_tokens_all": 569, "is_greedy": false, "logits_per_token": -1.5249916315078735, "logits_per_char": -0.7624958157539368, "num_chars": 2}, {"sum_logits": -1.3085031509399414, "num_tokens": 1, "num_tokens_all": 569, "is_greedy": false, "logits_per_token": -1.3085031509399414, "logits_per_char": -0.6542515754699707, "num_chars": 2}], "label": 0, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 20, "native_id": 20, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.437461495399475, "incorrect_loss_raw": 1.380725661913554, "correct_loss_per_char": 0.7187307476997375, "incorrect_loss_per_char": 0.690362830956777, "correct_loss_per_token": 1.437461495399475, "incorrect_loss_per_token": 1.380725661913554, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.437461495399475, "num_tokens": 1, "num_tokens_all": 623, "is_greedy": false, "logits_per_token": -1.437461495399475, "logits_per_char": -0.7187307476997375, "num_chars": 2}, {"sum_logits": -1.2232023477554321, "num_tokens": 1, "num_tokens_all": 623, "is_greedy": true, "logits_per_token": -1.2232023477554321, "logits_per_char": -0.6116011738777161, "num_chars": 2}, {"sum_logits": -1.4369274377822876, "num_tokens": 1, "num_tokens_all": 623, "is_greedy": false, "logits_per_token": -1.4369274377822876, "logits_per_char": -0.7184637188911438, "num_chars": 2}, {"sum_logits": -1.482047200202942, "num_tokens": 1, "num_tokens_all": 623, "is_greedy": false, "logits_per_token": -1.482047200202942, "logits_per_char": -0.741023600101471, "num_chars": 2}], "label": 0, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 21, "native_id": 21, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4893951416015625, "incorrect_loss_raw": 1.419774333635966, "correct_loss_per_char": 0.7446975708007812, "incorrect_loss_per_char": 0.709887166817983, "correct_loss_per_token": 1.4893951416015625, "incorrect_loss_per_token": 1.419774333635966, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9721897840499878, "num_tokens": 1, "num_tokens_all": 570, "is_greedy": true, "logits_per_token": -0.9721897840499878, "logits_per_char": -0.4860948920249939, "num_chars": 2}, {"sum_logits": -1.4893951416015625, "num_tokens": 1, "num_tokens_all": 570, "is_greedy": false, "logits_per_token": -1.4893951416015625, "logits_per_char": -0.7446975708007812, "num_chars": 2}, {"sum_logits": -1.7072508335113525, "num_tokens": 1, "num_tokens_all": 570, "is_greedy": false, "logits_per_token": -1.7072508335113525, "logits_per_char": -0.8536254167556763, "num_chars": 2}, {"sum_logits": -1.5798823833465576, "num_tokens": 1, "num_tokens_all": 570, "is_greedy": false, "logits_per_token": -1.5798823833465576, "logits_per_char": -0.7899411916732788, "num_chars": 2}], "label": 1, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 22, "native_id": 22, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5408623218536377, "incorrect_loss_raw": 1.3557364145914714, "correct_loss_per_char": 0.7704311609268188, "incorrect_loss_per_char": 0.6778682072957357, "correct_loss_per_token": 1.5408623218536377, "incorrect_loss_per_token": 1.3557364145914714, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.157853603363037, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": true, "logits_per_token": -1.157853603363037, "logits_per_char": -0.5789268016815186, "num_chars": 2}, {"sum_logits": -1.398476004600525, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": false, "logits_per_token": -1.398476004600525, "logits_per_char": -0.6992380023002625, "num_chars": 2}, {"sum_logits": -1.5408623218536377, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": false, "logits_per_token": -1.5408623218536377, "logits_per_char": -0.7704311609268188, "num_chars": 2}, {"sum_logits": -1.510879635810852, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": false, "logits_per_token": -1.510879635810852, "logits_per_char": -0.755439817905426, "num_chars": 2}], "label": 2, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 23, "native_id": 23, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4012433290481567, "incorrect_loss_raw": 1.3905293941497803, "correct_loss_per_char": 0.7006216645240784, "incorrect_loss_per_char": 0.6952646970748901, "correct_loss_per_token": 1.4012433290481567, "incorrect_loss_per_token": 1.3905293941497803, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4012433290481567, "num_tokens": 1, "num_tokens_all": 588, "is_greedy": false, "logits_per_token": -1.4012433290481567, "logits_per_char": -0.7006216645240784, "num_chars": 2}, {"sum_logits": -1.2733803987503052, "num_tokens": 1, "num_tokens_all": 588, "is_greedy": true, "logits_per_token": -1.2733803987503052, "logits_per_char": -0.6366901993751526, "num_chars": 2}, {"sum_logits": -1.432702898979187, "num_tokens": 1, "num_tokens_all": 588, "is_greedy": false, "logits_per_token": -1.432702898979187, "logits_per_char": -0.7163514494895935, "num_chars": 2}, {"sum_logits": -1.4655048847198486, "num_tokens": 1, "num_tokens_all": 588, "is_greedy": false, "logits_per_token": -1.4655048847198486, "logits_per_char": -0.7327524423599243, "num_chars": 2}], "label": 0, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 24, "native_id": 24, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3777583837509155, "incorrect_loss_raw": 1.4102184375127156, "correct_loss_per_char": 0.6888791918754578, "incorrect_loss_per_char": 0.7051092187563578, "correct_loss_per_token": 1.3777583837509155, "incorrect_loss_per_token": 1.4102184375127156, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3777583837509155, "num_tokens": 1, "num_tokens_all": 563, "is_greedy": false, "logits_per_token": -1.3777583837509155, "logits_per_char": -0.6888791918754578, "num_chars": 2}, {"sum_logits": -1.1897658109664917, "num_tokens": 1, "num_tokens_all": 563, "is_greedy": true, "logits_per_token": -1.1897658109664917, "logits_per_char": -0.5948829054832458, "num_chars": 2}, {"sum_logits": -1.60686457157135, "num_tokens": 1, "num_tokens_all": 563, "is_greedy": false, "logits_per_token": -1.60686457157135, "logits_per_char": -0.803432285785675, "num_chars": 2}, {"sum_logits": -1.4340249300003052, "num_tokens": 1, "num_tokens_all": 563, "is_greedy": false, "logits_per_token": -1.4340249300003052, "logits_per_char": -0.7170124650001526, "num_chars": 2}], "label": 0, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 25, "native_id": 25, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3998618125915527, "incorrect_loss_raw": 1.4060859282811482, "correct_loss_per_char": 0.6999309062957764, "incorrect_loss_per_char": 0.7030429641405741, "correct_loss_per_token": 1.3998618125915527, "incorrect_loss_per_token": 1.4060859282811482, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1486557722091675, "num_tokens": 1, "num_tokens_all": 569, "is_greedy": true, "logits_per_token": -1.1486557722091675, "logits_per_char": -0.5743278861045837, "num_chars": 2}, {"sum_logits": -1.3998618125915527, "num_tokens": 1, "num_tokens_all": 569, "is_greedy": false, "logits_per_token": -1.3998618125915527, "logits_per_char": -0.6999309062957764, "num_chars": 2}, {"sum_logits": -1.5958842039108276, "num_tokens": 1, "num_tokens_all": 569, "is_greedy": false, "logits_per_token": -1.5958842039108276, "logits_per_char": -0.7979421019554138, "num_chars": 2}, {"sum_logits": -1.4737178087234497, "num_tokens": 1, "num_tokens_all": 569, "is_greedy": false, "logits_per_token": -1.4737178087234497, "logits_per_char": -0.7368589043617249, "num_chars": 2}], "label": 1, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 26, "native_id": 26, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4283756017684937, "incorrect_loss_raw": 1.3937154213587444, "correct_loss_per_char": 0.7141878008842468, "incorrect_loss_per_char": 0.6968577106793722, "correct_loss_per_token": 1.4283756017684937, "incorrect_loss_per_token": 1.3937154213587444, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1449729204177856, "num_tokens": 1, "num_tokens_all": 565, "is_greedy": true, "logits_per_token": -1.1449729204177856, "logits_per_char": -0.5724864602088928, "num_chars": 2}, {"sum_logits": -1.4283756017684937, "num_tokens": 1, "num_tokens_all": 565, "is_greedy": false, "logits_per_token": -1.4283756017684937, "logits_per_char": -0.7141878008842468, "num_chars": 2}, {"sum_logits": -1.571628212928772, "num_tokens": 1, "num_tokens_all": 565, "is_greedy": false, "logits_per_token": -1.571628212928772, "logits_per_char": -0.785814106464386, "num_chars": 2}, {"sum_logits": -1.4645451307296753, "num_tokens": 1, "num_tokens_all": 565, "is_greedy": false, "logits_per_token": -1.4645451307296753, "logits_per_char": -0.7322725653648376, "num_chars": 2}], "label": 1, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 27, "native_id": 27, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4405624866485596, "incorrect_loss_raw": 1.3931426207224529, "correct_loss_per_char": 0.7202812433242798, "incorrect_loss_per_char": 0.6965713103612264, "correct_loss_per_token": 1.4405624866485596, "incorrect_loss_per_token": 1.3931426207224529, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1909271478652954, "num_tokens": 1, "num_tokens_all": 600, "is_greedy": true, "logits_per_token": -1.1909271478652954, "logits_per_char": -0.5954635739326477, "num_chars": 2}, {"sum_logits": -1.3558356761932373, "num_tokens": 1, "num_tokens_all": 600, "is_greedy": false, "logits_per_token": -1.3558356761932373, "logits_per_char": -0.6779178380966187, "num_chars": 2}, {"sum_logits": -1.4405624866485596, "num_tokens": 1, "num_tokens_all": 600, "is_greedy": false, "logits_per_token": -1.4405624866485596, "logits_per_char": -0.7202812433242798, "num_chars": 2}, {"sum_logits": -1.6326650381088257, "num_tokens": 1, "num_tokens_all": 600, "is_greedy": false, "logits_per_token": -1.6326650381088257, "logits_per_char": -0.8163325190544128, "num_chars": 2}], "label": 2, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 28, "native_id": 28, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3471479415893555, "incorrect_loss_raw": 1.4067870775858562, "correct_loss_per_char": 0.6735739707946777, "incorrect_loss_per_char": 0.7033935387929281, "correct_loss_per_token": 1.3471479415893555, "incorrect_loss_per_token": 1.4067870775858562, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4053038358688354, "num_tokens": 1, "num_tokens_all": 589, "is_greedy": false, "logits_per_token": -1.4053038358688354, "logits_per_char": -0.7026519179344177, "num_chars": 2}, {"sum_logits": -1.332862377166748, "num_tokens": 1, "num_tokens_all": 589, "is_greedy": true, "logits_per_token": -1.332862377166748, "logits_per_char": -0.666431188583374, "num_chars": 2}, {"sum_logits": -1.4821950197219849, "num_tokens": 1, "num_tokens_all": 589, "is_greedy": false, "logits_per_token": -1.4821950197219849, "logits_per_char": -0.7410975098609924, "num_chars": 2}, {"sum_logits": -1.3471479415893555, "num_tokens": 1, "num_tokens_all": 589, "is_greedy": false, "logits_per_token": -1.3471479415893555, "logits_per_char": -0.6735739707946777, "num_chars": 2}], "label": 3, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 29, "native_id": 29, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5335017442703247, "incorrect_loss_raw": 1.357465147972107, "correct_loss_per_char": 0.7667508721351624, "incorrect_loss_per_char": 0.6787325739860535, "correct_loss_per_token": 1.5335017442703247, "incorrect_loss_per_token": 1.357465147972107, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2461917400360107, "num_tokens": 1, "num_tokens_all": 581, "is_greedy": true, "logits_per_token": -1.2461917400360107, "logits_per_char": -0.6230958700180054, "num_chars": 2}, {"sum_logits": -1.3253653049468994, "num_tokens": 1, "num_tokens_all": 581, "is_greedy": false, "logits_per_token": -1.3253653049468994, "logits_per_char": -0.6626826524734497, "num_chars": 2}, {"sum_logits": -1.5008383989334106, "num_tokens": 1, "num_tokens_all": 581, "is_greedy": false, "logits_per_token": -1.5008383989334106, "logits_per_char": -0.7504191994667053, "num_chars": 2}, {"sum_logits": -1.5335017442703247, "num_tokens": 1, "num_tokens_all": 581, "is_greedy": false, "logits_per_token": -1.5335017442703247, "logits_per_char": -0.7667508721351624, "num_chars": 2}], "label": 3, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 30, "native_id": 30, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4251341819763184, "incorrect_loss_raw": 1.391386906305949, "correct_loss_per_char": 0.7125670909881592, "incorrect_loss_per_char": 0.6956934531529745, "correct_loss_per_token": 1.4251341819763184, "incorrect_loss_per_token": 1.391386906305949, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4251341819763184, "num_tokens": 1, "num_tokens_all": 615, "is_greedy": false, "logits_per_token": -1.4251341819763184, "logits_per_char": -0.7125670909881592, "num_chars": 2}, {"sum_logits": -1.1685770750045776, "num_tokens": 1, "num_tokens_all": 615, "is_greedy": true, "logits_per_token": -1.1685770750045776, "logits_per_char": -0.5842885375022888, "num_chars": 2}, {"sum_logits": -1.5118932723999023, "num_tokens": 1, "num_tokens_all": 615, "is_greedy": false, "logits_per_token": -1.5118932723999023, "logits_per_char": -0.7559466361999512, "num_chars": 2}, {"sum_logits": -1.4936903715133667, "num_tokens": 1, "num_tokens_all": 615, "is_greedy": false, "logits_per_token": -1.4936903715133667, "logits_per_char": -0.7468451857566833, "num_chars": 2}], "label": 0, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 31, "native_id": 31, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.431525468826294, "incorrect_loss_raw": 1.380833387374878, "correct_loss_per_char": 0.715762734413147, "incorrect_loss_per_char": 0.690416693687439, "correct_loss_per_token": 1.431525468826294, "incorrect_loss_per_token": 1.380833387374878, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.431525468826294, "num_tokens": 1, "num_tokens_all": 619, "is_greedy": false, "logits_per_token": -1.431525468826294, "logits_per_char": -0.715762734413147, "num_chars": 2}, {"sum_logits": -1.2562365531921387, "num_tokens": 1, "num_tokens_all": 619, "is_greedy": true, "logits_per_token": -1.2562365531921387, "logits_per_char": -0.6281182765960693, "num_chars": 2}, {"sum_logits": -1.412024974822998, "num_tokens": 1, "num_tokens_all": 619, "is_greedy": false, "logits_per_token": -1.412024974822998, "logits_per_char": -0.706012487411499, "num_chars": 2}, {"sum_logits": -1.474238634109497, "num_tokens": 1, "num_tokens_all": 619, "is_greedy": false, "logits_per_token": -1.474238634109497, "logits_per_char": -0.7371193170547485, "num_chars": 2}], "label": 0, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 32, "native_id": 32, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5835093259811401, "incorrect_loss_raw": 1.3508456548055012, "correct_loss_per_char": 0.7917546629905701, "incorrect_loss_per_char": 0.6754228274027506, "correct_loss_per_token": 1.5835093259811401, "incorrect_loss_per_token": 1.3508456548055012, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2049033641815186, "num_tokens": 1, "num_tokens_all": 548, "is_greedy": true, "logits_per_token": -1.2049033641815186, "logits_per_char": -0.6024516820907593, "num_chars": 2}, {"sum_logits": -1.4137011766433716, "num_tokens": 1, "num_tokens_all": 548, "is_greedy": false, "logits_per_token": -1.4137011766433716, "logits_per_char": -0.7068505883216858, "num_chars": 2}, {"sum_logits": -1.4339324235916138, "num_tokens": 1, "num_tokens_all": 548, "is_greedy": false, "logits_per_token": -1.4339324235916138, "logits_per_char": -0.7169662117958069, "num_chars": 2}, {"sum_logits": -1.5835093259811401, "num_tokens": 1, "num_tokens_all": 548, "is_greedy": false, "logits_per_token": -1.5835093259811401, "logits_per_char": -0.7917546629905701, "num_chars": 2}], "label": 3, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 33, "native_id": 33, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3709871768951416, "incorrect_loss_raw": 1.4026075998942058, "correct_loss_per_char": 0.6854935884475708, "incorrect_loss_per_char": 0.7013037999471029, "correct_loss_per_token": 1.3709871768951416, "incorrect_loss_per_token": 1.4026075998942058, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3709871768951416, "num_tokens": 1, "num_tokens_all": 615, "is_greedy": false, "logits_per_token": -1.3709871768951416, "logits_per_char": -0.6854935884475708, "num_chars": 2}, {"sum_logits": -1.2650994062423706, "num_tokens": 1, "num_tokens_all": 615, "is_greedy": true, "logits_per_token": -1.2650994062423706, "logits_per_char": -0.6325497031211853, "num_chars": 2}, {"sum_logits": -1.467288613319397, "num_tokens": 1, "num_tokens_all": 615, "is_greedy": false, "logits_per_token": -1.467288613319397, "logits_per_char": -0.7336443066596985, "num_chars": 2}, {"sum_logits": -1.4754347801208496, "num_tokens": 1, "num_tokens_all": 615, "is_greedy": false, "logits_per_token": -1.4754347801208496, "logits_per_char": -0.7377173900604248, "num_chars": 2}], "label": 0, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 34, "native_id": 34, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5316658020019531, "incorrect_loss_raw": 1.3736637036005657, "correct_loss_per_char": 0.7658329010009766, "incorrect_loss_per_char": 0.6868318518002828, "correct_loss_per_token": 1.5316658020019531, "incorrect_loss_per_token": 1.3736637036005657, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.423607349395752, "num_tokens": 1, "num_tokens_all": 620, "is_greedy": false, "logits_per_token": -1.423607349395752, "logits_per_char": -0.711803674697876, "num_chars": 2}, {"sum_logits": -1.1005581617355347, "num_tokens": 1, "num_tokens_all": 620, "is_greedy": true, "logits_per_token": -1.1005581617355347, "logits_per_char": -0.5502790808677673, "num_chars": 2}, {"sum_logits": -1.5968255996704102, "num_tokens": 1, "num_tokens_all": 620, "is_greedy": false, "logits_per_token": -1.5968255996704102, "logits_per_char": -0.7984127998352051, "num_chars": 2}, {"sum_logits": -1.5316658020019531, "num_tokens": 1, "num_tokens_all": 620, "is_greedy": false, "logits_per_token": -1.5316658020019531, "logits_per_char": -0.7658329010009766, "num_chars": 2}], "label": 3, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 35, "native_id": 35, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4616330862045288, "incorrect_loss_raw": 1.3710911671320598, "correct_loss_per_char": 0.7308165431022644, "incorrect_loss_per_char": 0.6855455835660299, "correct_loss_per_token": 1.4616330862045288, "incorrect_loss_per_token": 1.3710911671320598, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3189736604690552, "num_tokens": 1, "num_tokens_all": 590, "is_greedy": false, "logits_per_token": -1.3189736604690552, "logits_per_char": -0.6594868302345276, "num_chars": 2}, {"sum_logits": -1.3096688985824585, "num_tokens": 1, "num_tokens_all": 590, "is_greedy": true, "logits_per_token": -1.3096688985824585, "logits_per_char": -0.6548344492912292, "num_chars": 2}, {"sum_logits": -1.4616330862045288, "num_tokens": 1, "num_tokens_all": 590, "is_greedy": false, "logits_per_token": -1.4616330862045288, "logits_per_char": -0.7308165431022644, "num_chars": 2}, {"sum_logits": -1.4846309423446655, "num_tokens": 1, "num_tokens_all": 590, "is_greedy": false, "logits_per_token": -1.4846309423446655, "logits_per_char": -0.7423154711723328, "num_chars": 2}], "label": 2, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 36, "native_id": 36, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3316881656646729, "incorrect_loss_raw": 1.4258860349655151, "correct_loss_per_char": 0.6658440828323364, "incorrect_loss_per_char": 0.7129430174827576, "correct_loss_per_token": 1.3316881656646729, "incorrect_loss_per_token": 1.4258860349655151, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2094783782958984, "num_tokens": 1, "num_tokens_all": 574, "is_greedy": true, "logits_per_token": -1.2094783782958984, "logits_per_char": -0.6047391891479492, "num_chars": 2}, {"sum_logits": -1.3316881656646729, "num_tokens": 1, "num_tokens_all": 574, "is_greedy": false, "logits_per_token": -1.3316881656646729, "logits_per_char": -0.6658440828323364, "num_chars": 2}, {"sum_logits": -1.5837440490722656, "num_tokens": 1, "num_tokens_all": 574, "is_greedy": false, "logits_per_token": -1.5837440490722656, "logits_per_char": -0.7918720245361328, "num_chars": 2}, {"sum_logits": -1.4844356775283813, "num_tokens": 1, "num_tokens_all": 574, "is_greedy": false, "logits_per_token": -1.4844356775283813, "logits_per_char": -0.7422178387641907, "num_chars": 2}], "label": 1, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 37, "native_id": 37, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3487818241119385, "incorrect_loss_raw": 1.4069838126500447, "correct_loss_per_char": 0.6743909120559692, "incorrect_loss_per_char": 0.7034919063250223, "correct_loss_per_token": 1.3487818241119385, "incorrect_loss_per_token": 1.4069838126500447, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4622409343719482, "num_tokens": 1, "num_tokens_all": 597, "is_greedy": false, "logits_per_token": -1.4622409343719482, "logits_per_char": -0.7311204671859741, "num_chars": 2}, {"sum_logits": -1.3329956531524658, "num_tokens": 1, "num_tokens_all": 597, "is_greedy": true, "logits_per_token": -1.3329956531524658, "logits_per_char": -0.6664978265762329, "num_chars": 2}, {"sum_logits": -1.4257148504257202, "num_tokens": 1, "num_tokens_all": 597, "is_greedy": false, "logits_per_token": -1.4257148504257202, "logits_per_char": -0.7128574252128601, "num_chars": 2}, {"sum_logits": -1.3487818241119385, "num_tokens": 1, "num_tokens_all": 597, "is_greedy": false, "logits_per_token": -1.3487818241119385, "logits_per_char": -0.6743909120559692, "num_chars": 2}], "label": 3, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 38, "native_id": 38, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.55693519115448, "incorrect_loss_raw": 1.3499072790145874, "correct_loss_per_char": 0.77846759557724, "incorrect_loss_per_char": 0.6749536395072937, "correct_loss_per_token": 1.55693519115448, "incorrect_loss_per_token": 1.3499072790145874, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.293449878692627, "num_tokens": 1, "num_tokens_all": 619, "is_greedy": false, "logits_per_token": -1.293449878692627, "logits_per_char": -0.6467249393463135, "num_chars": 2}, {"sum_logits": -1.2343578338623047, "num_tokens": 1, "num_tokens_all": 619, "is_greedy": true, "logits_per_token": -1.2343578338623047, "logits_per_char": -0.6171789169311523, "num_chars": 2}, {"sum_logits": -1.55693519115448, "num_tokens": 1, "num_tokens_all": 619, "is_greedy": false, "logits_per_token": -1.55693519115448, "logits_per_char": -0.77846759557724, "num_chars": 2}, {"sum_logits": -1.5219141244888306, "num_tokens": 1, "num_tokens_all": 619, "is_greedy": false, "logits_per_token": -1.5219141244888306, "logits_per_char": -0.7609570622444153, "num_chars": 2}], "label": 2, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 39, "native_id": 39, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.353524923324585, "incorrect_loss_raw": 1.425103982289632, "correct_loss_per_char": 0.6767624616622925, "incorrect_loss_per_char": 0.712551991144816, "correct_loss_per_token": 1.353524923324585, "incorrect_loss_per_token": 1.425103982289632, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1463602781295776, "num_tokens": 1, "num_tokens_all": 588, "is_greedy": true, "logits_per_token": -1.1463602781295776, "logits_per_char": -0.5731801390647888, "num_chars": 2}, {"sum_logits": -1.353524923324585, "num_tokens": 1, "num_tokens_all": 588, "is_greedy": false, "logits_per_token": -1.353524923324585, "logits_per_char": -0.6767624616622925, "num_chars": 2}, {"sum_logits": -1.6434221267700195, "num_tokens": 1, "num_tokens_all": 588, "is_greedy": false, "logits_per_token": -1.6434221267700195, "logits_per_char": -0.8217110633850098, "num_chars": 2}, {"sum_logits": -1.4855295419692993, "num_tokens": 1, "num_tokens_all": 588, "is_greedy": false, "logits_per_token": -1.4855295419692993, "logits_per_char": -0.7427647709846497, "num_chars": 2}], "label": 1, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 40, "native_id": 40, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4516973495483398, "incorrect_loss_raw": 1.3800753355026245, "correct_loss_per_char": 0.7258486747741699, "incorrect_loss_per_char": 0.6900376677513123, "correct_loss_per_token": 1.4516973495483398, "incorrect_loss_per_token": 1.3800753355026245, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2895530462265015, "num_tokens": 1, "num_tokens_all": 550, "is_greedy": true, "logits_per_token": -1.2895530462265015, "logits_per_char": -0.6447765231132507, "num_chars": 2}, {"sum_logits": -1.3827507495880127, "num_tokens": 1, "num_tokens_all": 550, "is_greedy": false, "logits_per_token": -1.3827507495880127, "logits_per_char": -0.6913753747940063, "num_chars": 2}, {"sum_logits": -1.4679222106933594, "num_tokens": 1, "num_tokens_all": 550, "is_greedy": false, "logits_per_token": -1.4679222106933594, "logits_per_char": -0.7339611053466797, "num_chars": 2}, {"sum_logits": -1.4516973495483398, "num_tokens": 1, "num_tokens_all": 550, "is_greedy": false, "logits_per_token": -1.4516973495483398, "logits_per_char": -0.7258486747741699, "num_chars": 2}], "label": 3, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 41, "native_id": 41, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4155817031860352, "incorrect_loss_raw": 1.4031356573104858, "correct_loss_per_char": 0.7077908515930176, "incorrect_loss_per_char": 0.7015678286552429, "correct_loss_per_token": 1.4155817031860352, "incorrect_loss_per_token": 1.4031356573104858, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4155817031860352, "num_tokens": 1, "num_tokens_all": 603, "is_greedy": false, "logits_per_token": -1.4155817031860352, "logits_per_char": -0.7077908515930176, "num_chars": 2}, {"sum_logits": -1.4668270349502563, "num_tokens": 1, "num_tokens_all": 603, "is_greedy": false, "logits_per_token": -1.4668270349502563, "logits_per_char": -0.7334135174751282, "num_chars": 2}, {"sum_logits": -1.573777437210083, "num_tokens": 1, "num_tokens_all": 603, "is_greedy": false, "logits_per_token": -1.573777437210083, "logits_per_char": -0.7868887186050415, "num_chars": 2}, {"sum_logits": -1.1688024997711182, "num_tokens": 1, "num_tokens_all": 603, "is_greedy": true, "logits_per_token": -1.1688024997711182, "logits_per_char": -0.5844012498855591, "num_chars": 2}], "label": 0, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 42, "native_id": 42, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0823721885681152, "incorrect_loss_raw": 1.5188255310058594, "correct_loss_per_char": 0.5411860942840576, "incorrect_loss_per_char": 0.7594127655029297, "correct_loss_per_token": 1.0823721885681152, "incorrect_loss_per_token": 1.5188255310058594, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0823721885681152, "num_tokens": 1, "num_tokens_all": 571, "is_greedy": true, "logits_per_token": -1.0823721885681152, "logits_per_char": -0.5411860942840576, "num_chars": 2}, {"sum_logits": -1.5038608312606812, "num_tokens": 1, "num_tokens_all": 571, "is_greedy": false, "logits_per_token": -1.5038608312606812, "logits_per_char": -0.7519304156303406, "num_chars": 2}, {"sum_logits": -1.5803395509719849, "num_tokens": 1, "num_tokens_all": 571, "is_greedy": false, "logits_per_token": -1.5803395509719849, "logits_per_char": -0.7901697754859924, "num_chars": 2}, {"sum_logits": -1.472276210784912, "num_tokens": 1, "num_tokens_all": 571, "is_greedy": false, "logits_per_token": -1.472276210784912, "logits_per_char": -0.736138105392456, "num_chars": 2}], "label": 0, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 43, "native_id": 43, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2171084880828857, "incorrect_loss_raw": 1.4562356074651082, "correct_loss_per_char": 0.6085542440414429, "incorrect_loss_per_char": 0.7281178037325541, "correct_loss_per_token": 1.2171084880828857, "incorrect_loss_per_token": 1.4562356074651082, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.378502607345581, "num_tokens": 1, "num_tokens_all": 615, "is_greedy": false, "logits_per_token": -1.378502607345581, "logits_per_char": -0.6892513036727905, "num_chars": 2}, {"sum_logits": -1.2171084880828857, "num_tokens": 1, "num_tokens_all": 615, "is_greedy": true, "logits_per_token": -1.2171084880828857, "logits_per_char": -0.6085542440414429, "num_chars": 2}, {"sum_logits": -1.4790360927581787, "num_tokens": 1, "num_tokens_all": 615, "is_greedy": false, "logits_per_token": -1.4790360927581787, "logits_per_char": -0.7395180463790894, "num_chars": 2}, {"sum_logits": -1.511168122291565, "num_tokens": 1, "num_tokens_all": 615, "is_greedy": false, "logits_per_token": -1.511168122291565, "logits_per_char": -0.7555840611457825, "num_chars": 2}], "label": 1, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 44, "native_id": 44, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3887826204299927, "incorrect_loss_raw": 1.4173829555511475, "correct_loss_per_char": 0.6943913102149963, "incorrect_loss_per_char": 0.7086914777755737, "correct_loss_per_token": 1.3887826204299927, "incorrect_loss_per_token": 1.4173829555511475, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.106337308883667, "num_tokens": 1, "num_tokens_all": 559, "is_greedy": true, "logits_per_token": -1.106337308883667, "logits_per_char": -0.5531686544418335, "num_chars": 2}, {"sum_logits": -1.3887826204299927, "num_tokens": 1, "num_tokens_all": 559, "is_greedy": false, "logits_per_token": -1.3887826204299927, "logits_per_char": -0.6943913102149963, "num_chars": 2}, {"sum_logits": -1.5347598791122437, "num_tokens": 1, "num_tokens_all": 559, "is_greedy": false, "logits_per_token": -1.5347598791122437, "logits_per_char": -0.7673799395561218, "num_chars": 2}, {"sum_logits": -1.6110516786575317, "num_tokens": 1, "num_tokens_all": 559, "is_greedy": false, "logits_per_token": -1.6110516786575317, "logits_per_char": -0.8055258393287659, "num_chars": 2}], "label": 1, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 45, "native_id": 45, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5026233196258545, "incorrect_loss_raw": 1.3604280948638916, "correct_loss_per_char": 0.7513116598129272, "incorrect_loss_per_char": 0.6802140474319458, "correct_loss_per_token": 1.5026233196258545, "incorrect_loss_per_token": 1.3604280948638916, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.256125569343567, "num_tokens": 1, "num_tokens_all": 556, "is_greedy": true, "logits_per_token": -1.256125569343567, "logits_per_char": -0.6280627846717834, "num_chars": 2}, {"sum_logits": -1.3411734104156494, "num_tokens": 1, "num_tokens_all": 556, "is_greedy": false, "logits_per_token": -1.3411734104156494, "logits_per_char": -0.6705867052078247, "num_chars": 2}, {"sum_logits": -1.4839853048324585, "num_tokens": 1, "num_tokens_all": 556, "is_greedy": false, "logits_per_token": -1.4839853048324585, "logits_per_char": -0.7419926524162292, "num_chars": 2}, {"sum_logits": -1.5026233196258545, "num_tokens": 1, "num_tokens_all": 556, "is_greedy": false, "logits_per_token": -1.5026233196258545, "logits_per_char": -0.7513116598129272, "num_chars": 2}], "label": 3, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 46, "native_id": 46, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4501641988754272, "incorrect_loss_raw": 1.3759239117304485, "correct_loss_per_char": 0.7250820994377136, "incorrect_loss_per_char": 0.6879619558652242, "correct_loss_per_token": 1.4501641988754272, "incorrect_loss_per_token": 1.3759239117304485, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.435361623764038, "num_tokens": 1, "num_tokens_all": 612, "is_greedy": false, "logits_per_token": -1.435361623764038, "logits_per_char": -0.717680811882019, "num_chars": 2}, {"sum_logits": -1.2255363464355469, "num_tokens": 1, "num_tokens_all": 612, "is_greedy": true, "logits_per_token": -1.2255363464355469, "logits_per_char": -0.6127681732177734, "num_chars": 2}, {"sum_logits": -1.4501641988754272, "num_tokens": 1, "num_tokens_all": 612, "is_greedy": false, "logits_per_token": -1.4501641988754272, "logits_per_char": -0.7250820994377136, "num_chars": 2}, {"sum_logits": -1.4668737649917603, "num_tokens": 1, "num_tokens_all": 612, "is_greedy": false, "logits_per_token": -1.4668737649917603, "logits_per_char": -0.7334368824958801, "num_chars": 2}], "label": 2, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 47, "native_id": 47, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2460753917694092, "incorrect_loss_raw": 1.4545889298121135, "correct_loss_per_char": 0.6230376958847046, "incorrect_loss_per_char": 0.7272944649060568, "correct_loss_per_token": 1.2460753917694092, "incorrect_loss_per_token": 1.4545889298121135, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2460753917694092, "num_tokens": 1, "num_tokens_all": 573, "is_greedy": true, "logits_per_token": -1.2460753917694092, "logits_per_char": -0.6230376958847046, "num_chars": 2}, {"sum_logits": -1.4872993230819702, "num_tokens": 1, "num_tokens_all": 573, "is_greedy": false, "logits_per_token": -1.4872993230819702, "logits_per_char": -0.7436496615409851, "num_chars": 2}, {"sum_logits": -1.5999515056610107, "num_tokens": 1, "num_tokens_all": 573, "is_greedy": false, "logits_per_token": -1.5999515056610107, "logits_per_char": -0.7999757528305054, "num_chars": 2}, {"sum_logits": -1.2765159606933594, "num_tokens": 1, "num_tokens_all": 573, "is_greedy": false, "logits_per_token": -1.2765159606933594, "logits_per_char": -0.6382579803466797, "num_chars": 2}], "label": 0, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 48, "native_id": 48, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.506055235862732, "incorrect_loss_raw": 1.3738034963607788, "correct_loss_per_char": 0.753027617931366, "incorrect_loss_per_char": 0.6869017481803894, "correct_loss_per_token": 1.506055235862732, "incorrect_loss_per_token": 1.3738034963607788, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2215967178344727, "num_tokens": 1, "num_tokens_all": 597, "is_greedy": true, "logits_per_token": -1.2215967178344727, "logits_per_char": -0.6107983589172363, "num_chars": 2}, {"sum_logits": -1.2401022911071777, "num_tokens": 1, "num_tokens_all": 597, "is_greedy": false, "logits_per_token": -1.2401022911071777, "logits_per_char": -0.6200511455535889, "num_chars": 2}, {"sum_logits": -1.659711480140686, "num_tokens": 1, "num_tokens_all": 597, "is_greedy": false, "logits_per_token": -1.659711480140686, "logits_per_char": -0.829855740070343, "num_chars": 2}, {"sum_logits": -1.506055235862732, "num_tokens": 1, "num_tokens_all": 597, "is_greedy": false, "logits_per_token": -1.506055235862732, "logits_per_char": -0.753027617931366, "num_chars": 2}], "label": 3, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 49, "native_id": 49, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4440948963165283, "incorrect_loss_raw": 1.6794181267420452, "correct_loss_per_char": 0.7220474481582642, "incorrect_loss_per_char": 0.8397090633710226, "correct_loss_per_token": 1.4440948963165283, "incorrect_loss_per_token": 1.6794181267420452, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6396166086196899, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": true, "logits_per_token": -0.6396166086196899, "logits_per_char": -0.31980830430984497, "num_chars": 2}, {"sum_logits": -1.4440948963165283, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": false, "logits_per_token": -1.4440948963165283, "logits_per_char": -0.7220474481582642, "num_chars": 2}, {"sum_logits": -2.1344218254089355, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": false, "logits_per_token": -2.1344218254089355, "logits_per_char": -1.0672109127044678, "num_chars": 2}, {"sum_logits": -2.2642159461975098, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": false, "logits_per_token": -2.2642159461975098, "logits_per_char": -1.1321079730987549, "num_chars": 2}], "label": 1, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 50, "native_id": 50, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3827012777328491, "incorrect_loss_raw": 1.3977259794871013, "correct_loss_per_char": 0.6913506388664246, "incorrect_loss_per_char": 0.6988629897435507, "correct_loss_per_token": 1.3827012777328491, "incorrect_loss_per_token": 1.3977259794871013, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3827012777328491, "num_tokens": 1, "num_tokens_all": 608, "is_greedy": false, "logits_per_token": -1.3827012777328491, "logits_per_char": -0.6913506388664246, "num_chars": 2}, {"sum_logits": -1.2621454000473022, "num_tokens": 1, "num_tokens_all": 608, "is_greedy": true, "logits_per_token": -1.2621454000473022, "logits_per_char": -0.6310727000236511, "num_chars": 2}, {"sum_logits": -1.488463044166565, "num_tokens": 1, "num_tokens_all": 608, "is_greedy": false, "logits_per_token": -1.488463044166565, "logits_per_char": -0.7442315220832825, "num_chars": 2}, {"sum_logits": -1.4425694942474365, "num_tokens": 1, "num_tokens_all": 608, "is_greedy": false, "logits_per_token": -1.4425694942474365, "logits_per_char": -0.7212847471237183, "num_chars": 2}], "label": 0, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 51, "native_id": 51, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2147289514541626, "incorrect_loss_raw": 1.4653177658716838, "correct_loss_per_char": 0.6073644757270813, "incorrect_loss_per_char": 0.7326588829358419, "correct_loss_per_token": 1.2147289514541626, "incorrect_loss_per_token": 1.4653177658716838, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.299267053604126, "num_tokens": 1, "num_tokens_all": 634, "is_greedy": false, "logits_per_token": -1.299267053604126, "logits_per_char": -0.649633526802063, "num_chars": 2}, {"sum_logits": -1.2147289514541626, "num_tokens": 1, "num_tokens_all": 634, "is_greedy": true, "logits_per_token": -1.2147289514541626, "logits_per_char": -0.6073644757270813, "num_chars": 2}, {"sum_logits": -1.5420092344284058, "num_tokens": 1, "num_tokens_all": 634, "is_greedy": false, "logits_per_token": -1.5420092344284058, "logits_per_char": -0.7710046172142029, "num_chars": 2}, {"sum_logits": -1.5546770095825195, "num_tokens": 1, "num_tokens_all": 634, "is_greedy": false, "logits_per_token": -1.5546770095825195, "logits_per_char": -0.7773385047912598, "num_chars": 2}], "label": 1, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 52, "native_id": 52, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.372717261314392, "incorrect_loss_raw": 1.405153791109721, "correct_loss_per_char": 0.686358630657196, "incorrect_loss_per_char": 0.7025768955548605, "correct_loss_per_token": 1.372717261314392, "incorrect_loss_per_token": 1.405153791109721, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.229110598564148, "num_tokens": 1, "num_tokens_all": 554, "is_greedy": true, "logits_per_token": -1.229110598564148, "logits_per_char": -0.614555299282074, "num_chars": 2}, {"sum_logits": -1.450103998184204, "num_tokens": 1, "num_tokens_all": 554, "is_greedy": false, "logits_per_token": -1.450103998184204, "logits_per_char": -0.725051999092102, "num_chars": 2}, {"sum_logits": -1.5362467765808105, "num_tokens": 1, "num_tokens_all": 554, "is_greedy": false, "logits_per_token": -1.5362467765808105, "logits_per_char": -0.7681233882904053, "num_chars": 2}, {"sum_logits": -1.372717261314392, "num_tokens": 1, "num_tokens_all": 554, "is_greedy": false, "logits_per_token": -1.372717261314392, "logits_per_char": -0.686358630657196, "num_chars": 2}], "label": 3, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 53, "native_id": 53, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5386680364608765, "incorrect_loss_raw": 1.3454307715098064, "correct_loss_per_char": 0.7693340182304382, "incorrect_loss_per_char": 0.6727153857549032, "correct_loss_per_token": 1.5386680364608765, "incorrect_loss_per_token": 1.3454307715098064, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.353959560394287, "num_tokens": 1, "num_tokens_all": 639, "is_greedy": false, "logits_per_token": -1.353959560394287, "logits_per_char": -0.6769797801971436, "num_chars": 2}, {"sum_logits": -1.299115777015686, "num_tokens": 1, "num_tokens_all": 639, "is_greedy": true, "logits_per_token": -1.299115777015686, "logits_per_char": -0.649557888507843, "num_chars": 2}, {"sum_logits": -1.5386680364608765, "num_tokens": 1, "num_tokens_all": 639, "is_greedy": false, "logits_per_token": -1.5386680364608765, "logits_per_char": -0.7693340182304382, "num_chars": 2}, {"sum_logits": -1.3832169771194458, "num_tokens": 1, "num_tokens_all": 639, "is_greedy": false, "logits_per_token": -1.3832169771194458, "logits_per_char": -0.6916084885597229, "num_chars": 2}], "label": 2, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 54, "native_id": 54, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3160881996154785, "incorrect_loss_raw": 1.4195162455240886, "correct_loss_per_char": 0.6580440998077393, "incorrect_loss_per_char": 0.7097581227620443, "correct_loss_per_token": 1.3160881996154785, "incorrect_loss_per_token": 1.4195162455240886, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3160881996154785, "num_tokens": 1, "num_tokens_all": 600, "is_greedy": true, "logits_per_token": -1.3160881996154785, "logits_per_char": -0.6580440998077393, "num_chars": 2}, {"sum_logits": -1.3980923891067505, "num_tokens": 1, "num_tokens_all": 600, "is_greedy": false, "logits_per_token": -1.3980923891067505, "logits_per_char": -0.6990461945533752, "num_chars": 2}, {"sum_logits": -1.3727365732192993, "num_tokens": 1, "num_tokens_all": 600, "is_greedy": false, "logits_per_token": -1.3727365732192993, "logits_per_char": -0.6863682866096497, "num_chars": 2}, {"sum_logits": -1.4877197742462158, "num_tokens": 1, "num_tokens_all": 600, "is_greedy": false, "logits_per_token": -1.4877197742462158, "logits_per_char": -0.7438598871231079, "num_chars": 2}], "label": 0, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 55, "native_id": 55, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.428689956665039, "incorrect_loss_raw": 1.3911460240681965, "correct_loss_per_char": 0.7143449783325195, "incorrect_loss_per_char": 0.6955730120340983, "correct_loss_per_token": 1.428689956665039, "incorrect_loss_per_token": 1.3911460240681965, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2492142915725708, "num_tokens": 1, "num_tokens_all": 554, "is_greedy": true, "logits_per_token": -1.2492142915725708, "logits_per_char": -0.6246071457862854, "num_chars": 2}, {"sum_logits": -1.5044353008270264, "num_tokens": 1, "num_tokens_all": 554, "is_greedy": false, "logits_per_token": -1.5044353008270264, "logits_per_char": -0.7522176504135132, "num_chars": 2}, {"sum_logits": -1.428689956665039, "num_tokens": 1, "num_tokens_all": 554, "is_greedy": false, "logits_per_token": -1.428689956665039, "logits_per_char": -0.7143449783325195, "num_chars": 2}, {"sum_logits": -1.4197884798049927, "num_tokens": 1, "num_tokens_all": 554, "is_greedy": false, "logits_per_token": -1.4197884798049927, "logits_per_char": -0.7098942399024963, "num_chars": 2}], "label": 2, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 56, "native_id": 56, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4589651823043823, "incorrect_loss_raw": 1.3787070512771606, "correct_loss_per_char": 0.7294825911521912, "incorrect_loss_per_char": 0.6893535256385803, "correct_loss_per_token": 1.4589651823043823, "incorrect_loss_per_token": 1.3787070512771606, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3957531452178955, "num_tokens": 1, "num_tokens_all": 604, "is_greedy": false, "logits_per_token": -1.3957531452178955, "logits_per_char": -0.6978765726089478, "num_chars": 2}, {"sum_logits": -1.1926366090774536, "num_tokens": 1, "num_tokens_all": 604, "is_greedy": true, "logits_per_token": -1.1926366090774536, "logits_per_char": -0.5963183045387268, "num_chars": 2}, {"sum_logits": -1.4589651823043823, "num_tokens": 1, "num_tokens_all": 604, "is_greedy": false, "logits_per_token": -1.4589651823043823, "logits_per_char": -0.7294825911521912, "num_chars": 2}, {"sum_logits": -1.5477313995361328, "num_tokens": 1, "num_tokens_all": 604, "is_greedy": false, "logits_per_token": -1.5477313995361328, "logits_per_char": -0.7738656997680664, "num_chars": 2}], "label": 2, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 57, "native_id": 57, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3147870302200317, "incorrect_loss_raw": 1.4204761187235515, "correct_loss_per_char": 0.6573935151100159, "incorrect_loss_per_char": 0.7102380593617758, "correct_loss_per_token": 1.3147870302200317, "incorrect_loss_per_token": 1.4204761187235515, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3147870302200317, "num_tokens": 1, "num_tokens_all": 586, "is_greedy": false, "logits_per_token": -1.3147870302200317, "logits_per_char": -0.6573935151100159, "num_chars": 2}, {"sum_logits": -1.3135465383529663, "num_tokens": 1, "num_tokens_all": 586, "is_greedy": true, "logits_per_token": -1.3135465383529663, "logits_per_char": -0.6567732691764832, "num_chars": 2}, {"sum_logits": -1.4322971105575562, "num_tokens": 1, "num_tokens_all": 586, "is_greedy": false, "logits_per_token": -1.4322971105575562, "logits_per_char": -0.7161485552787781, "num_chars": 2}, {"sum_logits": -1.5155847072601318, "num_tokens": 1, "num_tokens_all": 586, "is_greedy": false, "logits_per_token": -1.5155847072601318, "logits_per_char": -0.7577923536300659, "num_chars": 2}], "label": 0, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 58, "native_id": 58, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.400019884109497, "incorrect_loss_raw": 1.3975374301274617, "correct_loss_per_char": 0.7000099420547485, "incorrect_loss_per_char": 0.6987687150637308, "correct_loss_per_token": 1.400019884109497, "incorrect_loss_per_token": 1.3975374301274617, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5040724277496338, "num_tokens": 1, "num_tokens_all": 660, "is_greedy": false, "logits_per_token": -1.5040724277496338, "logits_per_char": -0.7520362138748169, "num_chars": 2}, {"sum_logits": -1.2126063108444214, "num_tokens": 1, "num_tokens_all": 660, "is_greedy": true, "logits_per_token": -1.2126063108444214, "logits_per_char": -0.6063031554222107, "num_chars": 2}, {"sum_logits": -1.400019884109497, "num_tokens": 1, "num_tokens_all": 660, "is_greedy": false, "logits_per_token": -1.400019884109497, "logits_per_char": -0.7000099420547485, "num_chars": 2}, {"sum_logits": -1.47593355178833, "num_tokens": 1, "num_tokens_all": 660, "is_greedy": false, "logits_per_token": -1.47593355178833, "logits_per_char": -0.737966775894165, "num_chars": 2}], "label": 2, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 59, "native_id": 59, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.427485704421997, "incorrect_loss_raw": 1.3888726631800334, "correct_loss_per_char": 0.7137428522109985, "incorrect_loss_per_char": 0.6944363315900167, "correct_loss_per_token": 1.427485704421997, "incorrect_loss_per_token": 1.3888726631800334, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3010457754135132, "num_tokens": 1, "num_tokens_all": 553, "is_greedy": true, "logits_per_token": -1.3010457754135132, "logits_per_char": -0.6505228877067566, "num_chars": 2}, {"sum_logits": -1.3835535049438477, "num_tokens": 1, "num_tokens_all": 553, "is_greedy": false, "logits_per_token": -1.3835535049438477, "logits_per_char": -0.6917767524719238, "num_chars": 2}, {"sum_logits": -1.4820187091827393, "num_tokens": 1, "num_tokens_all": 553, "is_greedy": false, "logits_per_token": -1.4820187091827393, "logits_per_char": -0.7410093545913696, "num_chars": 2}, {"sum_logits": -1.427485704421997, "num_tokens": 1, "num_tokens_all": 553, "is_greedy": false, "logits_per_token": -1.427485704421997, "logits_per_char": -0.7137428522109985, "num_chars": 2}], "label": 3, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 60, "native_id": 60, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.401568055152893, "incorrect_loss_raw": 1.42706294854482, "correct_loss_per_char": 0.7007840275764465, "incorrect_loss_per_char": 0.71353147427241, "correct_loss_per_token": 1.401568055152893, "incorrect_loss_per_token": 1.42706294854482, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.09346342086792, "num_tokens": 1, "num_tokens_all": 590, "is_greedy": true, "logits_per_token": -1.09346342086792, "logits_per_char": -0.54673171043396, "num_chars": 2}, {"sum_logits": -1.3921412229537964, "num_tokens": 1, "num_tokens_all": 590, "is_greedy": false, "logits_per_token": -1.3921412229537964, "logits_per_char": -0.6960706114768982, "num_chars": 2}, {"sum_logits": -1.7955842018127441, "num_tokens": 1, "num_tokens_all": 590, "is_greedy": false, "logits_per_token": -1.7955842018127441, "logits_per_char": -0.8977921009063721, "num_chars": 2}, {"sum_logits": -1.401568055152893, "num_tokens": 1, "num_tokens_all": 590, "is_greedy": false, "logits_per_token": -1.401568055152893, "logits_per_char": -0.7007840275764465, "num_chars": 2}], "label": 3, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 61, "native_id": 61, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.271008014678955, "incorrect_loss_raw": 1.439717451731364, "correct_loss_per_char": 0.6355040073394775, "incorrect_loss_per_char": 0.719858725865682, "correct_loss_per_token": 1.271008014678955, "incorrect_loss_per_token": 1.439717451731364, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.271008014678955, "num_tokens": 1, "num_tokens_all": 592, "is_greedy": true, "logits_per_token": -1.271008014678955, "logits_per_char": -0.6355040073394775, "num_chars": 2}, {"sum_logits": -1.3239736557006836, "num_tokens": 1, "num_tokens_all": 592, "is_greedy": false, "logits_per_token": -1.3239736557006836, "logits_per_char": -0.6619868278503418, "num_chars": 2}, {"sum_logits": -1.5802501440048218, "num_tokens": 1, "num_tokens_all": 592, "is_greedy": false, "logits_per_token": -1.5802501440048218, "logits_per_char": -0.7901250720024109, "num_chars": 2}, {"sum_logits": -1.4149285554885864, "num_tokens": 1, "num_tokens_all": 592, "is_greedy": false, "logits_per_token": -1.4149285554885864, "logits_per_char": -0.7074642777442932, "num_chars": 2}], "label": 0, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 62, "native_id": 62, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.323085069656372, "incorrect_loss_raw": 1.433497428894043, "correct_loss_per_char": 0.661542534828186, "incorrect_loss_per_char": 0.7167487144470215, "correct_loss_per_token": 1.323085069656372, "incorrect_loss_per_token": 1.433497428894043, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2618281841278076, "num_tokens": 1, "num_tokens_all": 566, "is_greedy": true, "logits_per_token": -1.2618281841278076, "logits_per_char": -0.6309140920639038, "num_chars": 2}, {"sum_logits": -1.323085069656372, "num_tokens": 1, "num_tokens_all": 566, "is_greedy": false, "logits_per_token": -1.323085069656372, "logits_per_char": -0.661542534828186, "num_chars": 2}, {"sum_logits": -1.5408786535263062, "num_tokens": 1, "num_tokens_all": 566, "is_greedy": false, "logits_per_token": -1.5408786535263062, "logits_per_char": -0.7704393267631531, "num_chars": 2}, {"sum_logits": -1.4977854490280151, "num_tokens": 1, "num_tokens_all": 566, "is_greedy": false, "logits_per_token": -1.4977854490280151, "logits_per_char": -0.7488927245140076, "num_chars": 2}], "label": 1, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 63, "native_id": 63, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1934915781021118, "incorrect_loss_raw": 1.4767775932947795, "correct_loss_per_char": 0.5967457890510559, "incorrect_loss_per_char": 0.7383887966473898, "correct_loss_per_token": 1.1934915781021118, "incorrect_loss_per_token": 1.4767775932947795, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1934915781021118, "num_tokens": 1, "num_tokens_all": 592, "is_greedy": true, "logits_per_token": -1.1934915781021118, "logits_per_char": -0.5967457890510559, "num_chars": 2}, {"sum_logits": -1.275671124458313, "num_tokens": 1, "num_tokens_all": 592, "is_greedy": false, "logits_per_token": -1.275671124458313, "logits_per_char": -0.6378355622291565, "num_chars": 2}, {"sum_logits": -1.5313845872879028, "num_tokens": 1, "num_tokens_all": 592, "is_greedy": false, "logits_per_token": -1.5313845872879028, "logits_per_char": -0.7656922936439514, "num_chars": 2}, {"sum_logits": -1.6232770681381226, "num_tokens": 1, "num_tokens_all": 592, "is_greedy": false, "logits_per_token": -1.6232770681381226, "logits_per_char": -0.8116385340690613, "num_chars": 2}], "label": 0, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 64, "native_id": 64, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.368620753288269, "incorrect_loss_raw": 1.4008495807647705, "correct_loss_per_char": 0.6843103766441345, "incorrect_loss_per_char": 0.7004247903823853, "correct_loss_per_token": 1.368620753288269, "incorrect_loss_per_token": 1.4008495807647705, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.368620753288269, "num_tokens": 1, "num_tokens_all": 613, "is_greedy": false, "logits_per_token": -1.368620753288269, "logits_per_char": -0.6843103766441345, "num_chars": 2}, {"sum_logits": -1.2781721353530884, "num_tokens": 1, "num_tokens_all": 613, "is_greedy": true, "logits_per_token": -1.2781721353530884, "logits_per_char": -0.6390860676765442, "num_chars": 2}, {"sum_logits": -1.4659340381622314, "num_tokens": 1, "num_tokens_all": 613, "is_greedy": false, "logits_per_token": -1.4659340381622314, "logits_per_char": -0.7329670190811157, "num_chars": 2}, {"sum_logits": -1.4584425687789917, "num_tokens": 1, "num_tokens_all": 613, "is_greedy": false, "logits_per_token": -1.4584425687789917, "logits_per_char": -0.7292212843894958, "num_chars": 2}], "label": 0, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 65, "native_id": 65, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2454901933670044, "incorrect_loss_raw": 1.4528294404347737, "correct_loss_per_char": 0.6227450966835022, "incorrect_loss_per_char": 0.7264147202173868, "correct_loss_per_token": 1.2454901933670044, "incorrect_loss_per_token": 1.4528294404347737, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2454901933670044, "num_tokens": 1, "num_tokens_all": 593, "is_greedy": true, "logits_per_token": -1.2454901933670044, "logits_per_char": -0.6227450966835022, "num_chars": 2}, {"sum_logits": -1.2641611099243164, "num_tokens": 1, "num_tokens_all": 593, "is_greedy": false, "logits_per_token": -1.2641611099243164, "logits_per_char": -0.6320805549621582, "num_chars": 2}, {"sum_logits": -1.5079621076583862, "num_tokens": 1, "num_tokens_all": 593, "is_greedy": false, "logits_per_token": -1.5079621076583862, "logits_per_char": -0.7539810538291931, "num_chars": 2}, {"sum_logits": -1.5863651037216187, "num_tokens": 1, "num_tokens_all": 593, "is_greedy": false, "logits_per_token": -1.5863651037216187, "logits_per_char": -0.7931825518608093, "num_chars": 2}], "label": 0, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 66, "native_id": 66, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4328961372375488, "incorrect_loss_raw": 1.3923848470052083, "correct_loss_per_char": 0.7164480686187744, "incorrect_loss_per_char": 0.6961924235026041, "correct_loss_per_token": 1.4328961372375488, "incorrect_loss_per_token": 1.3923848470052083, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.208275318145752, "num_tokens": 1, "num_tokens_all": 560, "is_greedy": true, "logits_per_token": -1.208275318145752, "logits_per_char": -0.604137659072876, "num_chars": 2}, {"sum_logits": -1.3696372509002686, "num_tokens": 1, "num_tokens_all": 560, "is_greedy": false, "logits_per_token": -1.3696372509002686, "logits_per_char": -0.6848186254501343, "num_chars": 2}, {"sum_logits": -1.5992419719696045, "num_tokens": 1, "num_tokens_all": 560, "is_greedy": false, "logits_per_token": -1.5992419719696045, "logits_per_char": -0.7996209859848022, "num_chars": 2}, {"sum_logits": -1.4328961372375488, "num_tokens": 1, "num_tokens_all": 560, "is_greedy": false, "logits_per_token": -1.4328961372375488, "logits_per_char": -0.7164480686187744, "num_chars": 2}], "label": 3, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 67, "native_id": 67, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3737162351608276, "incorrect_loss_raw": 1.4090932607650757, "correct_loss_per_char": 0.6868581175804138, "incorrect_loss_per_char": 0.7045466303825378, "correct_loss_per_token": 1.3737162351608276, "incorrect_loss_per_token": 1.4090932607650757, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3737162351608276, "num_tokens": 1, "num_tokens_all": 625, "is_greedy": false, "logits_per_token": -1.3737162351608276, "logits_per_char": -0.6868581175804138, "num_chars": 2}, {"sum_logits": -1.1876049041748047, "num_tokens": 1, "num_tokens_all": 625, "is_greedy": true, "logits_per_token": -1.1876049041748047, "logits_per_char": -0.5938024520874023, "num_chars": 2}, {"sum_logits": -1.4886666536331177, "num_tokens": 1, "num_tokens_all": 625, "is_greedy": false, "logits_per_token": -1.4886666536331177, "logits_per_char": -0.7443333268165588, "num_chars": 2}, {"sum_logits": -1.5510082244873047, "num_tokens": 1, "num_tokens_all": 625, "is_greedy": false, "logits_per_token": -1.5510082244873047, "logits_per_char": -0.7755041122436523, "num_chars": 2}], "label": 0, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 68, "native_id": 68, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2344279289245605, "incorrect_loss_raw": 1.452425201733907, "correct_loss_per_char": 0.6172139644622803, "incorrect_loss_per_char": 0.7262126008669535, "correct_loss_per_token": 1.2344279289245605, "incorrect_loss_per_token": 1.452425201733907, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2344279289245605, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": true, "logits_per_token": -1.2344279289245605, "logits_per_char": -0.6172139644622803, "num_chars": 2}, {"sum_logits": -1.3483071327209473, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.3483071327209473, "logits_per_char": -0.6741535663604736, "num_chars": 2}, {"sum_logits": -1.571611762046814, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.571611762046814, "logits_per_char": -0.785805881023407, "num_chars": 2}, {"sum_logits": -1.43735671043396, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.43735671043396, "logits_per_char": -0.71867835521698, "num_chars": 2}], "label": 0, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 69, "native_id": 69, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2874256372451782, "incorrect_loss_raw": 1.4413442611694336, "correct_loss_per_char": 0.6437128186225891, "incorrect_loss_per_char": 0.7206721305847168, "correct_loss_per_token": 1.2874256372451782, "incorrect_loss_per_token": 1.4413442611694336, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2874256372451782, "num_tokens": 1, "num_tokens_all": 562, "is_greedy": true, "logits_per_token": -1.2874256372451782, "logits_per_char": -0.6437128186225891, "num_chars": 2}, {"sum_logits": -1.4479539394378662, "num_tokens": 1, "num_tokens_all": 562, "is_greedy": false, "logits_per_token": -1.4479539394378662, "logits_per_char": -0.7239769697189331, "num_chars": 2}, {"sum_logits": -1.450472354888916, "num_tokens": 1, "num_tokens_all": 562, "is_greedy": false, "logits_per_token": -1.450472354888916, "logits_per_char": -0.725236177444458, "num_chars": 2}, {"sum_logits": -1.4256064891815186, "num_tokens": 1, "num_tokens_all": 562, "is_greedy": false, "logits_per_token": -1.4256064891815186, "logits_per_char": -0.7128032445907593, "num_chars": 2}], "label": 0, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 70, "native_id": 70, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4188344478607178, "incorrect_loss_raw": 1.3944705327351887, "correct_loss_per_char": 0.7094172239303589, "incorrect_loss_per_char": 0.6972352663675944, "correct_loss_per_token": 1.4188344478607178, "incorrect_loss_per_token": 1.3944705327351887, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2649483680725098, "num_tokens": 1, "num_tokens_all": 552, "is_greedy": true, "logits_per_token": -1.2649483680725098, "logits_per_char": -0.6324741840362549, "num_chars": 2}, {"sum_logits": -1.4188344478607178, "num_tokens": 1, "num_tokens_all": 552, "is_greedy": false, "logits_per_token": -1.4188344478607178, "logits_per_char": -0.7094172239303589, "num_chars": 2}, {"sum_logits": -1.4909515380859375, "num_tokens": 1, "num_tokens_all": 552, "is_greedy": false, "logits_per_token": -1.4909515380859375, "logits_per_char": -0.7454757690429688, "num_chars": 2}, {"sum_logits": -1.4275116920471191, "num_tokens": 1, "num_tokens_all": 552, "is_greedy": false, "logits_per_token": -1.4275116920471191, "logits_per_char": -0.7137558460235596, "num_chars": 2}], "label": 1, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 71, "native_id": 71, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.447758674621582, "incorrect_loss_raw": 1.3822577794392903, "correct_loss_per_char": 0.723879337310791, "incorrect_loss_per_char": 0.6911288897196451, "correct_loss_per_token": 1.447758674621582, "incorrect_loss_per_token": 1.3822577794392903, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2917790412902832, "num_tokens": 1, "num_tokens_all": 557, "is_greedy": true, "logits_per_token": -1.2917790412902832, "logits_per_char": -0.6458895206451416, "num_chars": 2}, {"sum_logits": -1.447758674621582, "num_tokens": 1, "num_tokens_all": 557, "is_greedy": false, "logits_per_token": -1.447758674621582, "logits_per_char": -0.723879337310791, "num_chars": 2}, {"sum_logits": -1.3853776454925537, "num_tokens": 1, "num_tokens_all": 557, "is_greedy": false, "logits_per_token": -1.3853776454925537, "logits_per_char": -0.6926888227462769, "num_chars": 2}, {"sum_logits": -1.4696166515350342, "num_tokens": 1, "num_tokens_all": 557, "is_greedy": false, "logits_per_token": -1.4696166515350342, "logits_per_char": -0.7348083257675171, "num_chars": 2}], "label": 1, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 72, "native_id": 72, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4191431999206543, "incorrect_loss_raw": 1.3912144502003987, "correct_loss_per_char": 0.7095715999603271, "incorrect_loss_per_char": 0.6956072251001993, "correct_loss_per_token": 1.4191431999206543, "incorrect_loss_per_token": 1.3912144502003987, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4191431999206543, "num_tokens": 1, "num_tokens_all": 631, "is_greedy": false, "logits_per_token": -1.4191431999206543, "logits_per_char": -0.7095715999603271, "num_chars": 2}, {"sum_logits": -1.1845588684082031, "num_tokens": 1, "num_tokens_all": 631, "is_greedy": true, "logits_per_token": -1.1845588684082031, "logits_per_char": -0.5922794342041016, "num_chars": 2}, {"sum_logits": -1.4991720914840698, "num_tokens": 1, "num_tokens_all": 631, "is_greedy": false, "logits_per_token": -1.4991720914840698, "logits_per_char": -0.7495860457420349, "num_chars": 2}, {"sum_logits": -1.4899123907089233, "num_tokens": 1, "num_tokens_all": 631, "is_greedy": false, "logits_per_token": -1.4899123907089233, "logits_per_char": -0.7449561953544617, "num_chars": 2}], "label": 0, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 73, "native_id": 73, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7261022329330444, "incorrect_loss_raw": 1.3181314865748088, "correct_loss_per_char": 0.8630511164665222, "incorrect_loss_per_char": 0.6590657432874044, "correct_loss_per_token": 1.7261022329330444, "incorrect_loss_per_token": 1.3181314865748088, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0632212162017822, "num_tokens": 1, "num_tokens_all": 559, "is_greedy": true, "logits_per_token": -1.0632212162017822, "logits_per_char": -0.5316106081008911, "num_chars": 2}, {"sum_logits": -1.4087066650390625, "num_tokens": 1, "num_tokens_all": 559, "is_greedy": false, "logits_per_token": -1.4087066650390625, "logits_per_char": -0.7043533325195312, "num_chars": 2}, {"sum_logits": -1.7261022329330444, "num_tokens": 1, "num_tokens_all": 559, "is_greedy": false, "logits_per_token": -1.7261022329330444, "logits_per_char": -0.8630511164665222, "num_chars": 2}, {"sum_logits": -1.4824665784835815, "num_tokens": 1, "num_tokens_all": 559, "is_greedy": false, "logits_per_token": -1.4824665784835815, "logits_per_char": -0.7412332892417908, "num_chars": 2}], "label": 2, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 74, "native_id": 74, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3855624198913574, "incorrect_loss_raw": 1.4146777788798015, "correct_loss_per_char": 0.6927812099456787, "incorrect_loss_per_char": 0.7073388894399008, "correct_loss_per_token": 1.3855624198913574, "incorrect_loss_per_token": 1.4146777788798015, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1186447143554688, "num_tokens": 1, "num_tokens_all": 582, "is_greedy": true, "logits_per_token": -1.1186447143554688, "logits_per_char": -0.5593223571777344, "num_chars": 2}, {"sum_logits": -1.3855624198913574, "num_tokens": 1, "num_tokens_all": 582, "is_greedy": false, "logits_per_token": -1.3855624198913574, "logits_per_char": -0.6927812099456787, "num_chars": 2}, {"sum_logits": -1.5660510063171387, "num_tokens": 1, "num_tokens_all": 582, "is_greedy": false, "logits_per_token": -1.5660510063171387, "logits_per_char": -0.7830255031585693, "num_chars": 2}, {"sum_logits": -1.5593376159667969, "num_tokens": 1, "num_tokens_all": 582, "is_greedy": false, "logits_per_token": -1.5593376159667969, "logits_per_char": -0.7796688079833984, "num_chars": 2}], "label": 1, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 75, "native_id": 75, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.421808123588562, "incorrect_loss_raw": 1.386778752009074, "correct_loss_per_char": 0.710904061794281, "incorrect_loss_per_char": 0.693389376004537, "correct_loss_per_token": 1.421808123588562, "incorrect_loss_per_token": 1.386778752009074, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2724965810775757, "num_tokens": 1, "num_tokens_all": 618, "is_greedy": true, "logits_per_token": -1.2724965810775757, "logits_per_char": -0.6362482905387878, "num_chars": 2}, {"sum_logits": -1.3615505695343018, "num_tokens": 1, "num_tokens_all": 618, "is_greedy": false, "logits_per_token": -1.3615505695343018, "logits_per_char": -0.6807752847671509, "num_chars": 2}, {"sum_logits": -1.421808123588562, "num_tokens": 1, "num_tokens_all": 618, "is_greedy": false, "logits_per_token": -1.421808123588562, "logits_per_char": -0.710904061794281, "num_chars": 2}, {"sum_logits": -1.5262891054153442, "num_tokens": 1, "num_tokens_all": 618, "is_greedy": false, "logits_per_token": -1.5262891054153442, "logits_per_char": -0.7631445527076721, "num_chars": 2}], "label": 2, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 76, "native_id": 76, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3967790603637695, "incorrect_loss_raw": 1.3887908458709717, "correct_loss_per_char": 0.6983895301818848, "incorrect_loss_per_char": 0.6943954229354858, "correct_loss_per_token": 1.3967790603637695, "incorrect_loss_per_token": 1.3887908458709717, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3967790603637695, "num_tokens": 1, "num_tokens_all": 588, "is_greedy": false, "logits_per_token": -1.3967790603637695, "logits_per_char": -0.6983895301818848, "num_chars": 2}, {"sum_logits": -1.3650203943252563, "num_tokens": 1, "num_tokens_all": 588, "is_greedy": false, "logits_per_token": -1.3650203943252563, "logits_per_char": -0.6825101971626282, "num_chars": 2}, {"sum_logits": -1.4466930627822876, "num_tokens": 1, "num_tokens_all": 588, "is_greedy": false, "logits_per_token": -1.4466930627822876, "logits_per_char": -0.7233465313911438, "num_chars": 2}, {"sum_logits": -1.354659080505371, "num_tokens": 1, "num_tokens_all": 588, "is_greedy": true, "logits_per_token": -1.354659080505371, "logits_per_char": -0.6773295402526855, "num_chars": 2}], "label": 0, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 77, "native_id": 77, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5093504190444946, "incorrect_loss_raw": 1.3606303532918294, "correct_loss_per_char": 0.7546752095222473, "incorrect_loss_per_char": 0.6803151766459147, "correct_loss_per_token": 1.5093504190444946, "incorrect_loss_per_token": 1.3606303532918294, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3342063426971436, "num_tokens": 1, "num_tokens_all": 556, "is_greedy": true, "logits_per_token": -1.3342063426971436, "logits_per_char": -0.6671031713485718, "num_chars": 2}, {"sum_logits": -1.361887812614441, "num_tokens": 1, "num_tokens_all": 556, "is_greedy": false, "logits_per_token": -1.361887812614441, "logits_per_char": -0.6809439063072205, "num_chars": 2}, {"sum_logits": -1.5093504190444946, "num_tokens": 1, "num_tokens_all": 556, "is_greedy": false, "logits_per_token": -1.5093504190444946, "logits_per_char": -0.7546752095222473, "num_chars": 2}, {"sum_logits": -1.3857969045639038, "num_tokens": 1, "num_tokens_all": 556, "is_greedy": false, "logits_per_token": -1.3857969045639038, "logits_per_char": -0.6928984522819519, "num_chars": 2}], "label": 2, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 78, "native_id": 78, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9363755583763123, "incorrect_loss_raw": 1.6592618624369304, "correct_loss_per_char": 0.46818777918815613, "incorrect_loss_per_char": 0.8296309312184652, "correct_loss_per_token": 0.9363755583763123, "incorrect_loss_per_token": 1.6592618624369304, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9363755583763123, "num_tokens": 1, "num_tokens_all": 575, "is_greedy": true, "logits_per_token": -0.9363755583763123, "logits_per_char": -0.46818777918815613, "num_chars": 2}, {"sum_logits": -1.210566520690918, "num_tokens": 1, "num_tokens_all": 575, "is_greedy": false, "logits_per_token": -1.210566520690918, "logits_per_char": -0.605283260345459, "num_chars": 2}, {"sum_logits": -1.8569862842559814, "num_tokens": 1, "num_tokens_all": 575, "is_greedy": false, "logits_per_token": -1.8569862842559814, "logits_per_char": -0.9284931421279907, "num_chars": 2}, {"sum_logits": -1.9102327823638916, "num_tokens": 1, "num_tokens_all": 575, "is_greedy": false, "logits_per_token": -1.9102327823638916, "logits_per_char": -0.9551163911819458, "num_chars": 2}], "label": 0, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 79, "native_id": 79, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.408655047416687, "incorrect_loss_raw": 1.397432525952657, "correct_loss_per_char": 0.7043275237083435, "incorrect_loss_per_char": 0.6987162629763285, "correct_loss_per_token": 1.408655047416687, "incorrect_loss_per_token": 1.397432525952657, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.408655047416687, "num_tokens": 1, "num_tokens_all": 625, "is_greedy": false, "logits_per_token": -1.408655047416687, "logits_per_char": -0.7043275237083435, "num_chars": 2}, {"sum_logits": -1.1756651401519775, "num_tokens": 1, "num_tokens_all": 625, "is_greedy": true, "logits_per_token": -1.1756651401519775, "logits_per_char": -0.5878325700759888, "num_chars": 2}, {"sum_logits": -1.4455314874649048, "num_tokens": 1, "num_tokens_all": 625, "is_greedy": false, "logits_per_token": -1.4455314874649048, "logits_per_char": -0.7227657437324524, "num_chars": 2}, {"sum_logits": -1.5711009502410889, "num_tokens": 1, "num_tokens_all": 625, "is_greedy": false, "logits_per_token": -1.5711009502410889, "logits_per_char": -0.7855504751205444, "num_chars": 2}], "label": 0, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 80, "native_id": 80, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5290168523788452, "incorrect_loss_raw": 1.3701648314793904, "correct_loss_per_char": 0.7645084261894226, "incorrect_loss_per_char": 0.6850824157396952, "correct_loss_per_token": 1.5290168523788452, "incorrect_loss_per_token": 1.3701648314793904, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2660075426101685, "num_tokens": 1, "num_tokens_all": 562, "is_greedy": true, "logits_per_token": -1.2660075426101685, "logits_per_char": -0.6330037713050842, "num_chars": 2}, {"sum_logits": -1.5290168523788452, "num_tokens": 1, "num_tokens_all": 562, "is_greedy": false, "logits_per_token": -1.5290168523788452, "logits_per_char": -0.7645084261894226, "num_chars": 2}, {"sum_logits": -1.4684911966323853, "num_tokens": 1, "num_tokens_all": 562, "is_greedy": false, "logits_per_token": -1.4684911966323853, "logits_per_char": -0.7342455983161926, "num_chars": 2}, {"sum_logits": -1.3759957551956177, "num_tokens": 1, "num_tokens_all": 562, "is_greedy": false, "logits_per_token": -1.3759957551956177, "logits_per_char": -0.6879978775978088, "num_chars": 2}], "label": 1, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 81, "native_id": 81, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5449706315994263, "incorrect_loss_raw": 1.353595534960429, "correct_loss_per_char": 0.7724853157997131, "incorrect_loss_per_char": 0.6767977674802145, "correct_loss_per_token": 1.5449706315994263, "incorrect_loss_per_token": 1.353595534960429, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2193832397460938, "num_tokens": 1, "num_tokens_all": 573, "is_greedy": true, "logits_per_token": -1.2193832397460938, "logits_per_char": -0.6096916198730469, "num_chars": 2}, {"sum_logits": -1.3004474639892578, "num_tokens": 1, "num_tokens_all": 573, "is_greedy": false, "logits_per_token": -1.3004474639892578, "logits_per_char": -0.6502237319946289, "num_chars": 2}, {"sum_logits": -1.5449706315994263, "num_tokens": 1, "num_tokens_all": 573, "is_greedy": false, "logits_per_token": -1.5449706315994263, "logits_per_char": -0.7724853157997131, "num_chars": 2}, {"sum_logits": -1.540955901145935, "num_tokens": 1, "num_tokens_all": 573, "is_greedy": false, "logits_per_token": -1.540955901145935, "logits_per_char": -0.7704779505729675, "num_chars": 2}], "label": 2, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 82, "native_id": 82, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4352370500564575, "incorrect_loss_raw": 1.3854208787282307, "correct_loss_per_char": 0.7176185250282288, "incorrect_loss_per_char": 0.6927104393641154, "correct_loss_per_token": 1.4352370500564575, "incorrect_loss_per_token": 1.3854208787282307, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2891645431518555, "num_tokens": 1, "num_tokens_all": 565, "is_greedy": false, "logits_per_token": -1.2891645431518555, "logits_per_char": -0.6445822715759277, "num_chars": 2}, {"sum_logits": -1.278756856918335, "num_tokens": 1, "num_tokens_all": 565, "is_greedy": true, "logits_per_token": -1.278756856918335, "logits_per_char": -0.6393784284591675, "num_chars": 2}, {"sum_logits": -1.588341236114502, "num_tokens": 1, "num_tokens_all": 565, "is_greedy": false, "logits_per_token": -1.588341236114502, "logits_per_char": -0.794170618057251, "num_chars": 2}, {"sum_logits": -1.4352370500564575, "num_tokens": 1, "num_tokens_all": 565, "is_greedy": false, "logits_per_token": -1.4352370500564575, "logits_per_char": -0.7176185250282288, "num_chars": 2}], "label": 3, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 83, "native_id": 83, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3610649108886719, "incorrect_loss_raw": 1.4134240945180256, "correct_loss_per_char": 0.6805324554443359, "incorrect_loss_per_char": 0.7067120472590128, "correct_loss_per_token": 1.3610649108886719, "incorrect_loss_per_token": 1.4134240945180256, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2648805379867554, "num_tokens": 1, "num_tokens_all": 591, "is_greedy": true, "logits_per_token": -1.2648805379867554, "logits_per_char": -0.6324402689933777, "num_chars": 2}, {"sum_logits": -1.3348489999771118, "num_tokens": 1, "num_tokens_all": 591, "is_greedy": false, "logits_per_token": -1.3348489999771118, "logits_per_char": -0.6674244999885559, "num_chars": 2}, {"sum_logits": -1.64054274559021, "num_tokens": 1, "num_tokens_all": 591, "is_greedy": false, "logits_per_token": -1.64054274559021, "logits_per_char": -0.820271372795105, "num_chars": 2}, {"sum_logits": -1.3610649108886719, "num_tokens": 1, "num_tokens_all": 591, "is_greedy": false, "logits_per_token": -1.3610649108886719, "logits_per_char": -0.6805324554443359, "num_chars": 2}], "label": 3, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 84, "native_id": 84, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4689654111862183, "incorrect_loss_raw": 1.3787525494893391, "correct_loss_per_char": 0.7344827055931091, "incorrect_loss_per_char": 0.6893762747446696, "correct_loss_per_token": 1.4689654111862183, "incorrect_loss_per_token": 1.3787525494893391, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.266144871711731, "num_tokens": 1, "num_tokens_all": 556, "is_greedy": true, "logits_per_token": -1.266144871711731, "logits_per_char": -0.6330724358558655, "num_chars": 2}, {"sum_logits": -1.395070195198059, "num_tokens": 1, "num_tokens_all": 556, "is_greedy": false, "logits_per_token": -1.395070195198059, "logits_per_char": -0.6975350975990295, "num_chars": 2}, {"sum_logits": -1.4689654111862183, "num_tokens": 1, "num_tokens_all": 556, "is_greedy": false, "logits_per_token": -1.4689654111862183, "logits_per_char": -0.7344827055931091, "num_chars": 2}, {"sum_logits": -1.4750425815582275, "num_tokens": 1, "num_tokens_all": 556, "is_greedy": false, "logits_per_token": -1.4750425815582275, "logits_per_char": -0.7375212907791138, "num_chars": 2}], "label": 2, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 85, "native_id": 85, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5570050477981567, "incorrect_loss_raw": 1.3442353010177612, "correct_loss_per_char": 0.7785025238990784, "incorrect_loss_per_char": 0.6721176505088806, "correct_loss_per_token": 1.5570050477981567, "incorrect_loss_per_token": 1.3442353010177612, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.301849126815796, "num_tokens": 1, "num_tokens_all": 590, "is_greedy": false, "logits_per_token": -1.301849126815796, "logits_per_char": -0.650924563407898, "num_chars": 2}, {"sum_logits": -1.2766629457473755, "num_tokens": 1, "num_tokens_all": 590, "is_greedy": true, "logits_per_token": -1.2766629457473755, "logits_per_char": -0.6383314728736877, "num_chars": 2}, {"sum_logits": -1.5570050477981567, "num_tokens": 1, "num_tokens_all": 590, "is_greedy": false, "logits_per_token": -1.5570050477981567, "logits_per_char": -0.7785025238990784, "num_chars": 2}, {"sum_logits": -1.4541938304901123, "num_tokens": 1, "num_tokens_all": 590, "is_greedy": false, "logits_per_token": -1.4541938304901123, "logits_per_char": -0.7270969152450562, "num_chars": 2}], "label": 2, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 86, "native_id": 86, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4735645055770874, "incorrect_loss_raw": 1.3680004676183064, "correct_loss_per_char": 0.7367822527885437, "incorrect_loss_per_char": 0.6840002338091532, "correct_loss_per_token": 1.4735645055770874, "incorrect_loss_per_token": 1.3680004676183064, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2844804525375366, "num_tokens": 1, "num_tokens_all": 650, "is_greedy": true, "logits_per_token": -1.2844804525375366, "logits_per_char": -0.6422402262687683, "num_chars": 2}, {"sum_logits": -1.3950196504592896, "num_tokens": 1, "num_tokens_all": 650, "is_greedy": false, "logits_per_token": -1.3950196504592896, "logits_per_char": -0.6975098252296448, "num_chars": 2}, {"sum_logits": -1.4735645055770874, "num_tokens": 1, "num_tokens_all": 650, "is_greedy": false, "logits_per_token": -1.4735645055770874, "logits_per_char": -0.7367822527885437, "num_chars": 2}, {"sum_logits": -1.4245012998580933, "num_tokens": 1, "num_tokens_all": 650, "is_greedy": false, "logits_per_token": -1.4245012998580933, "logits_per_char": -0.7122506499290466, "num_chars": 2}], "label": 2, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 87, "native_id": 87, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2890281677246094, "incorrect_loss_raw": 1.4330677191416423, "correct_loss_per_char": 0.6445140838623047, "incorrect_loss_per_char": 0.7165338595708212, "correct_loss_per_token": 1.2890281677246094, "incorrect_loss_per_token": 1.4330677191416423, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2890281677246094, "num_tokens": 1, "num_tokens_all": 570, "is_greedy": true, "logits_per_token": -1.2890281677246094, "logits_per_char": -0.6445140838623047, "num_chars": 2}, {"sum_logits": -1.3345301151275635, "num_tokens": 1, "num_tokens_all": 570, "is_greedy": false, "logits_per_token": -1.3345301151275635, "logits_per_char": -0.6672650575637817, "num_chars": 2}, {"sum_logits": -1.5718168020248413, "num_tokens": 1, "num_tokens_all": 570, "is_greedy": false, "logits_per_token": -1.5718168020248413, "logits_per_char": -0.7859084010124207, "num_chars": 2}, {"sum_logits": -1.392856240272522, "num_tokens": 1, "num_tokens_all": 570, "is_greedy": false, "logits_per_token": -1.392856240272522, "logits_per_char": -0.696428120136261, "num_chars": 2}], "label": 0, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 88, "native_id": 88, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3750907182693481, "incorrect_loss_raw": 1.4048367341359456, "correct_loss_per_char": 0.6875453591346741, "incorrect_loss_per_char": 0.7024183670679728, "correct_loss_per_token": 1.3750907182693481, "incorrect_loss_per_token": 1.4048367341359456, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2327245473861694, "num_tokens": 1, "num_tokens_all": 566, "is_greedy": true, "logits_per_token": -1.2327245473861694, "logits_per_char": -0.6163622736930847, "num_chars": 2}, {"sum_logits": -1.3750907182693481, "num_tokens": 1, "num_tokens_all": 566, "is_greedy": false, "logits_per_token": -1.3750907182693481, "logits_per_char": -0.6875453591346741, "num_chars": 2}, {"sum_logits": -1.5569108724594116, "num_tokens": 1, "num_tokens_all": 566, "is_greedy": false, "logits_per_token": -1.5569108724594116, "logits_per_char": -0.7784554362297058, "num_chars": 2}, {"sum_logits": -1.4248747825622559, "num_tokens": 1, "num_tokens_all": 566, "is_greedy": false, "logits_per_token": -1.4248747825622559, "logits_per_char": -0.7124373912811279, "num_chars": 2}], "label": 1, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 89, "native_id": 89, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2896137237548828, "incorrect_loss_raw": 1.4703148206075032, "correct_loss_per_char": 0.6448068618774414, "incorrect_loss_per_char": 0.7351574103037516, "correct_loss_per_token": 1.2896137237548828, "incorrect_loss_per_token": 1.4703148206075032, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.06528902053833, "num_tokens": 1, "num_tokens_all": 554, "is_greedy": true, "logits_per_token": -1.06528902053833, "logits_per_char": -0.532644510269165, "num_chars": 2}, {"sum_logits": -1.2896137237548828, "num_tokens": 1, "num_tokens_all": 554, "is_greedy": false, "logits_per_token": -1.2896137237548828, "logits_per_char": -0.6448068618774414, "num_chars": 2}, {"sum_logits": -1.7121386528015137, "num_tokens": 1, "num_tokens_all": 554, "is_greedy": false, "logits_per_token": -1.7121386528015137, "logits_per_char": -0.8560693264007568, "num_chars": 2}, {"sum_logits": -1.633516788482666, "num_tokens": 1, "num_tokens_all": 554, "is_greedy": false, "logits_per_token": -1.633516788482666, "logits_per_char": -0.816758394241333, "num_chars": 2}], "label": 1, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 90, "native_id": 90, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4462579488754272, "incorrect_loss_raw": 1.4051835536956787, "correct_loss_per_char": 0.7231289744377136, "incorrect_loss_per_char": 0.7025917768478394, "correct_loss_per_token": 1.4462579488754272, "incorrect_loss_per_token": 1.4051835536956787, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3982784748077393, "num_tokens": 1, "num_tokens_all": 547, "is_greedy": true, "logits_per_token": -1.3982784748077393, "logits_per_char": -0.6991392374038696, "num_chars": 2}, {"sum_logits": -1.4462579488754272, "num_tokens": 1, "num_tokens_all": 547, "is_greedy": false, "logits_per_token": -1.4462579488754272, "logits_per_char": -0.7231289744377136, "num_chars": 2}, {"sum_logits": -1.399307131767273, "num_tokens": 1, "num_tokens_all": 547, "is_greedy": false, "logits_per_token": -1.399307131767273, "logits_per_char": -0.6996535658836365, "num_chars": 2}, {"sum_logits": -1.417965054512024, "num_tokens": 1, "num_tokens_all": 547, "is_greedy": false, "logits_per_token": -1.417965054512024, "logits_per_char": -0.708982527256012, "num_chars": 2}], "label": 1, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 91, "native_id": 91, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1513983011245728, "incorrect_loss_raw": 1.4977586269378662, "correct_loss_per_char": 0.5756991505622864, "incorrect_loss_per_char": 0.7488793134689331, "correct_loss_per_token": 1.1513983011245728, "incorrect_loss_per_token": 1.4977586269378662, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1513983011245728, "num_tokens": 1, "num_tokens_all": 555, "is_greedy": true, "logits_per_token": -1.1513983011245728, "logits_per_char": -0.5756991505622864, "num_chars": 2}, {"sum_logits": -1.4364750385284424, "num_tokens": 1, "num_tokens_all": 555, "is_greedy": false, "logits_per_token": -1.4364750385284424, "logits_per_char": -0.7182375192642212, "num_chars": 2}, {"sum_logits": -1.5148844718933105, "num_tokens": 1, "num_tokens_all": 555, "is_greedy": false, "logits_per_token": -1.5148844718933105, "logits_per_char": -0.7574422359466553, "num_chars": 2}, {"sum_logits": -1.5419163703918457, "num_tokens": 1, "num_tokens_all": 555, "is_greedy": false, "logits_per_token": -1.5419163703918457, "logits_per_char": -0.7709581851959229, "num_chars": 2}], "label": 0, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 92, "native_id": 92, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4255260229110718, "incorrect_loss_raw": 1.3953544696172078, "correct_loss_per_char": 0.7127630114555359, "incorrect_loss_per_char": 0.6976772348086039, "correct_loss_per_token": 1.4255260229110718, "incorrect_loss_per_token": 1.3953544696172078, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2494701147079468, "num_tokens": 1, "num_tokens_all": 555, "is_greedy": true, "logits_per_token": -1.2494701147079468, "logits_per_char": -0.6247350573539734, "num_chars": 2}, {"sum_logits": -1.434281349182129, "num_tokens": 1, "num_tokens_all": 555, "is_greedy": false, "logits_per_token": -1.434281349182129, "logits_per_char": -0.7171406745910645, "num_chars": 2}, {"sum_logits": -1.5023119449615479, "num_tokens": 1, "num_tokens_all": 555, "is_greedy": false, "logits_per_token": -1.5023119449615479, "logits_per_char": -0.7511559724807739, "num_chars": 2}, {"sum_logits": -1.4255260229110718, "num_tokens": 1, "num_tokens_all": 555, "is_greedy": false, "logits_per_token": -1.4255260229110718, "logits_per_char": -0.7127630114555359, "num_chars": 2}], "label": 3, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 93, "native_id": 93, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6580007076263428, "incorrect_loss_raw": 1.3346385558446248, "correct_loss_per_char": 0.8290003538131714, "incorrect_loss_per_char": 0.6673192779223124, "correct_loss_per_token": 1.6580007076263428, "incorrect_loss_per_token": 1.3346385558446248, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0960439443588257, "num_tokens": 1, "num_tokens_all": 553, "is_greedy": true, "logits_per_token": -1.0960439443588257, "logits_per_char": -0.5480219721794128, "num_chars": 2}, {"sum_logits": -1.3399409055709839, "num_tokens": 1, "num_tokens_all": 553, "is_greedy": false, "logits_per_token": -1.3399409055709839, "logits_per_char": -0.6699704527854919, "num_chars": 2}, {"sum_logits": -1.567930817604065, "num_tokens": 1, "num_tokens_all": 553, "is_greedy": false, "logits_per_token": -1.567930817604065, "logits_per_char": -0.7839654088020325, "num_chars": 2}, {"sum_logits": -1.6580007076263428, "num_tokens": 1, "num_tokens_all": 553, "is_greedy": false, "logits_per_token": -1.6580007076263428, "logits_per_char": -0.8290003538131714, "num_chars": 2}], "label": 3, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 94, "native_id": 94, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4641716480255127, "incorrect_loss_raw": 1.3782559633255005, "correct_loss_per_char": 0.7320858240127563, "incorrect_loss_per_char": 0.6891279816627502, "correct_loss_per_token": 1.4641716480255127, "incorrect_loss_per_token": 1.3782559633255005, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2794163227081299, "num_tokens": 1, "num_tokens_all": 552, "is_greedy": true, "logits_per_token": -1.2794163227081299, "logits_per_char": -0.6397081613540649, "num_chars": 2}, {"sum_logits": -1.4641716480255127, "num_tokens": 1, "num_tokens_all": 552, "is_greedy": false, "logits_per_token": -1.4641716480255127, "logits_per_char": -0.7320858240127563, "num_chars": 2}, {"sum_logits": -1.3844048976898193, "num_tokens": 1, "num_tokens_all": 552, "is_greedy": false, "logits_per_token": -1.3844048976898193, "logits_per_char": -0.6922024488449097, "num_chars": 2}, {"sum_logits": -1.4709466695785522, "num_tokens": 1, "num_tokens_all": 552, "is_greedy": false, "logits_per_token": -1.4709466695785522, "logits_per_char": -0.7354733347892761, "num_chars": 2}], "label": 1, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 95, "native_id": 95, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2329461574554443, "incorrect_loss_raw": 1.4543951749801636, "correct_loss_per_char": 0.6164730787277222, "incorrect_loss_per_char": 0.7271975874900818, "correct_loss_per_token": 1.2329461574554443, "incorrect_loss_per_token": 1.4543951749801636, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2329461574554443, "num_tokens": 1, "num_tokens_all": 598, "is_greedy": true, "logits_per_token": -1.2329461574554443, "logits_per_char": -0.6164730787277222, "num_chars": 2}, {"sum_logits": -1.3114135265350342, "num_tokens": 1, "num_tokens_all": 598, "is_greedy": false, "logits_per_token": -1.3114135265350342, "logits_per_char": -0.6557067632675171, "num_chars": 2}, {"sum_logits": -1.4852277040481567, "num_tokens": 1, "num_tokens_all": 598, "is_greedy": false, "logits_per_token": -1.4852277040481567, "logits_per_char": -0.7426138520240784, "num_chars": 2}, {"sum_logits": -1.5665442943572998, "num_tokens": 1, "num_tokens_all": 598, "is_greedy": false, "logits_per_token": -1.5665442943572998, "logits_per_char": -0.7832721471786499, "num_chars": 2}], "label": 0, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 96, "native_id": 96, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4143201112747192, "incorrect_loss_raw": 1.386367956797282, "correct_loss_per_char": 0.7071600556373596, "incorrect_loss_per_char": 0.693183978398641, "correct_loss_per_token": 1.4143201112747192, "incorrect_loss_per_token": 1.386367956797282, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4669681787490845, "num_tokens": 1, "num_tokens_all": 615, "is_greedy": false, "logits_per_token": -1.4669681787490845, "logits_per_char": -0.7334840893745422, "num_chars": 2}, {"sum_logits": -1.350791096687317, "num_tokens": 1, "num_tokens_all": 615, "is_greedy": false, "logits_per_token": -1.350791096687317, "logits_per_char": -0.6753955483436584, "num_chars": 2}, {"sum_logits": -1.4143201112747192, "num_tokens": 1, "num_tokens_all": 615, "is_greedy": false, "logits_per_token": -1.4143201112747192, "logits_per_char": -0.7071600556373596, "num_chars": 2}, {"sum_logits": -1.3413445949554443, "num_tokens": 1, "num_tokens_all": 615, "is_greedy": true, "logits_per_token": -1.3413445949554443, "logits_per_char": -0.6706722974777222, "num_chars": 2}], "label": 2, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 97, "native_id": 97, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1265604496002197, "incorrect_loss_raw": 1.5153005520502727, "correct_loss_per_char": 0.5632802248001099, "incorrect_loss_per_char": 0.7576502760251363, "correct_loss_per_token": 1.1265604496002197, "incorrect_loss_per_token": 1.5153005520502727, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1265604496002197, "num_tokens": 1, "num_tokens_all": 637, "is_greedy": true, "logits_per_token": -1.1265604496002197, "logits_per_char": -0.5632802248001099, "num_chars": 2}, {"sum_logits": -1.2386960983276367, "num_tokens": 1, "num_tokens_all": 637, "is_greedy": false, "logits_per_token": -1.2386960983276367, "logits_per_char": -0.6193480491638184, "num_chars": 2}, {"sum_logits": -1.6574680805206299, "num_tokens": 1, "num_tokens_all": 637, "is_greedy": false, "logits_per_token": -1.6574680805206299, "logits_per_char": -0.8287340402603149, "num_chars": 2}, {"sum_logits": -1.6497374773025513, "num_tokens": 1, "num_tokens_all": 637, "is_greedy": false, "logits_per_token": -1.6497374773025513, "logits_per_char": -0.8248687386512756, "num_chars": 2}], "label": 0, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 98, "native_id": 98, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3870676755905151, "incorrect_loss_raw": 1.3975752592086792, "correct_loss_per_char": 0.6935338377952576, "incorrect_loss_per_char": 0.6987876296043396, "correct_loss_per_token": 1.3870676755905151, "incorrect_loss_per_token": 1.3975752592086792, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4002903699874878, "num_tokens": 1, "num_tokens_all": 613, "is_greedy": false, "logits_per_token": -1.4002903699874878, "logits_per_char": -0.7001451849937439, "num_chars": 2}, {"sum_logits": -1.2686842679977417, "num_tokens": 1, "num_tokens_all": 613, "is_greedy": true, "logits_per_token": -1.2686842679977417, "logits_per_char": -0.6343421339988708, "num_chars": 2}, {"sum_logits": -1.523751139640808, "num_tokens": 1, "num_tokens_all": 613, "is_greedy": false, "logits_per_token": -1.523751139640808, "logits_per_char": -0.761875569820404, "num_chars": 2}, {"sum_logits": -1.3870676755905151, "num_tokens": 1, "num_tokens_all": 613, "is_greedy": false, "logits_per_token": -1.3870676755905151, "logits_per_char": -0.6935338377952576, "num_chars": 2}], "label": 3, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 99, "native_id": 99, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3138340711593628, "incorrect_loss_raw": 1.4387552340825398, "correct_loss_per_char": 0.6569170355796814, "incorrect_loss_per_char": 0.7193776170412699, "correct_loss_per_token": 1.3138340711593628, "incorrect_loss_per_token": 1.4387552340825398, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2337629795074463, "num_tokens": 1, "num_tokens_all": 618, "is_greedy": true, "logits_per_token": -1.2337629795074463, "logits_per_char": -0.6168814897537231, "num_chars": 2}, {"sum_logits": -1.3138340711593628, "num_tokens": 1, "num_tokens_all": 618, "is_greedy": false, "logits_per_token": -1.3138340711593628, "logits_per_char": -0.6569170355796814, "num_chars": 2}, {"sum_logits": -1.628938913345337, "num_tokens": 1, "num_tokens_all": 618, "is_greedy": false, "logits_per_token": -1.628938913345337, "logits_per_char": -0.8144694566726685, "num_chars": 2}, {"sum_logits": -1.4535638093948364, "num_tokens": 1, "num_tokens_all": 618, "is_greedy": false, "logits_per_token": -1.4535638093948364, "logits_per_char": -0.7267819046974182, "num_chars": 2}], "label": 1, "task_hash": "7de417726ca2cc155dd1475a38afc381", "model_hash": "03418cf8091a9882619950ffb07429a5"}