|
{"doc_id": 0, "native_id": 0, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4821772575378418, "incorrect_loss_raw": 1.3705730835596721, "correct_loss_per_char": 0.7410886287689209, "incorrect_loss_per_char": 0.6852865417798361, "correct_loss_per_token": 1.4821772575378418, "incorrect_loss_per_token": 1.3705730835596721, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3105140924453735, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.3105140924453735, "logits_per_char": -0.6552570462226868, "num_chars": 2}, {"sum_logits": -1.3415251970291138, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.3415251970291138, "logits_per_char": -0.6707625985145569, "num_chars": 2}, {"sum_logits": -1.4821772575378418, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.4821772575378418, "logits_per_char": -0.7410886287689209, "num_chars": 2}, {"sum_logits": -1.4596799612045288, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.4596799612045288, "logits_per_char": -0.7298399806022644, "num_chars": 2}], "label": 2, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 1, "native_id": 1, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3714996576309204, "incorrect_loss_raw": 1.4203437566757202, "correct_loss_per_char": 0.6857498288154602, "incorrect_loss_per_char": 0.7101718783378601, "correct_loss_per_token": 1.3714996576309204, "incorrect_loss_per_token": 1.4203437566757202, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3714996576309204, "num_tokens": 1, "num_tokens_all": 459, "is_greedy": false, "logits_per_token": -1.3714996576309204, "logits_per_char": -0.6857498288154602, "num_chars": 2}, {"sum_logits": -1.316188931465149, "num_tokens": 1, "num_tokens_all": 459, "is_greedy": true, "logits_per_token": -1.316188931465149, "logits_per_char": -0.6580944657325745, "num_chars": 2}, {"sum_logits": -1.4147359132766724, "num_tokens": 1, "num_tokens_all": 459, "is_greedy": false, "logits_per_token": -1.4147359132766724, "logits_per_char": -0.7073679566383362, "num_chars": 2}, {"sum_logits": -1.5301064252853394, "num_tokens": 1, "num_tokens_all": 459, "is_greedy": false, "logits_per_token": -1.5301064252853394, "logits_per_char": -0.7650532126426697, "num_chars": 2}], "label": 0, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 2, "native_id": 2, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4718445539474487, "incorrect_loss_raw": 1.3829888900121052, "correct_loss_per_char": 0.7359222769737244, "incorrect_loss_per_char": 0.6914944450060526, "correct_loss_per_token": 1.4718445539474487, "incorrect_loss_per_token": 1.3829888900121052, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2903915643692017, "num_tokens": 1, "num_tokens_all": 529, "is_greedy": false, "logits_per_token": -1.2903915643692017, "logits_per_char": -0.6451957821846008, "num_chars": 2}, {"sum_logits": -1.2092297077178955, "num_tokens": 1, "num_tokens_all": 529, "is_greedy": true, "logits_per_token": -1.2092297077178955, "logits_per_char": -0.6046148538589478, "num_chars": 2}, {"sum_logits": -1.4718445539474487, "num_tokens": 1, "num_tokens_all": 529, "is_greedy": false, "logits_per_token": -1.4718445539474487, "logits_per_char": -0.7359222769737244, "num_chars": 2}, {"sum_logits": -1.6493453979492188, "num_tokens": 1, "num_tokens_all": 529, "is_greedy": false, "logits_per_token": -1.6493453979492188, "logits_per_char": -0.8246726989746094, "num_chars": 2}], "label": 2, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 3, "native_id": 3, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2573293447494507, "incorrect_loss_raw": 1.4496458371480305, "correct_loss_per_char": 0.6286646723747253, "incorrect_loss_per_char": 0.7248229185740153, "correct_loss_per_token": 1.2573293447494507, "incorrect_loss_per_token": 1.4496458371480305, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2573293447494507, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": true, "logits_per_token": -1.2573293447494507, "logits_per_char": -0.6286646723747253, "num_chars": 2}, {"sum_logits": -1.322230577468872, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.322230577468872, "logits_per_char": -0.661115288734436, "num_chars": 2}, {"sum_logits": -1.4931873083114624, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.4931873083114624, "logits_per_char": -0.7465936541557312, "num_chars": 2}, {"sum_logits": -1.5335196256637573, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.5335196256637573, "logits_per_char": -0.7667598128318787, "num_chars": 2}], "label": 0, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 4, "native_id": 4, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1821812391281128, "incorrect_loss_raw": 1.4807641506195068, "correct_loss_per_char": 0.5910906195640564, "incorrect_loss_per_char": 0.7403820753097534, "correct_loss_per_token": 1.1821812391281128, "incorrect_loss_per_token": 1.4807641506195068, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1821812391281128, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.1821812391281128, "logits_per_char": -0.5910906195640564, "num_chars": 2}, {"sum_logits": -1.4007800817489624, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4007800817489624, "logits_per_char": -0.7003900408744812, "num_chars": 2}, {"sum_logits": -1.5065258741378784, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.5065258741378784, "logits_per_char": -0.7532629370689392, "num_chars": 2}, {"sum_logits": -1.5349864959716797, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.5349864959716797, "logits_per_char": -0.7674932479858398, "num_chars": 2}], "label": 0, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 5, "native_id": 5, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3099695444107056, "incorrect_loss_raw": 1.439244230588277, "correct_loss_per_char": 0.6549847722053528, "incorrect_loss_per_char": 0.7196221152941386, "correct_loss_per_token": 1.3099695444107056, "incorrect_loss_per_token": 1.439244230588277, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.181244969367981, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.181244969367981, "logits_per_char": -0.5906224846839905, "num_chars": 2}, {"sum_logits": -1.3099695444107056, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.3099695444107056, "logits_per_char": -0.6549847722053528, "num_chars": 2}, {"sum_logits": -1.6520332098007202, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.6520332098007202, "logits_per_char": -0.8260166049003601, "num_chars": 2}, {"sum_logits": -1.4844545125961304, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.4844545125961304, "logits_per_char": -0.7422272562980652, "num_chars": 2}], "label": 1, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 6, "native_id": 6, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2194187641143799, "incorrect_loss_raw": 1.464281400044759, "correct_loss_per_char": 0.6097093820571899, "incorrect_loss_per_char": 0.7321407000223795, "correct_loss_per_token": 1.2194187641143799, "incorrect_loss_per_token": 1.464281400044759, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2194187641143799, "num_tokens": 1, "num_tokens_all": 513, "is_greedy": true, "logits_per_token": -1.2194187641143799, "logits_per_char": -0.6097093820571899, "num_chars": 2}, {"sum_logits": -1.3618121147155762, "num_tokens": 1, "num_tokens_all": 513, "is_greedy": false, "logits_per_token": -1.3618121147155762, "logits_per_char": -0.6809060573577881, "num_chars": 2}, {"sum_logits": -1.4820990562438965, "num_tokens": 1, "num_tokens_all": 513, "is_greedy": false, "logits_per_token": -1.4820990562438965, "logits_per_char": -0.7410495281219482, "num_chars": 2}, {"sum_logits": -1.5489330291748047, "num_tokens": 1, "num_tokens_all": 513, "is_greedy": false, "logits_per_token": -1.5489330291748047, "logits_per_char": -0.7744665145874023, "num_chars": 2}], "label": 0, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 7, "native_id": 7, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2544077634811401, "incorrect_loss_raw": 1.4493187268575032, "correct_loss_per_char": 0.6272038817405701, "incorrect_loss_per_char": 0.7246593634287516, "correct_loss_per_token": 1.2544077634811401, "incorrect_loss_per_token": 1.4493187268575032, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2544077634811401, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.2544077634811401, "logits_per_char": -0.6272038817405701, "num_chars": 2}, {"sum_logits": -1.5231257677078247, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.5231257677078247, "logits_per_char": -0.7615628838539124, "num_chars": 2}, {"sum_logits": -1.4880770444869995, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.4880770444869995, "logits_per_char": -0.7440385222434998, "num_chars": 2}, {"sum_logits": -1.3367533683776855, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.3367533683776855, "logits_per_char": -0.6683766841888428, "num_chars": 2}], "label": 0, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 8, "native_id": 8, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.162168264389038, "incorrect_loss_raw": 1.4875466426213582, "correct_loss_per_char": 0.581084132194519, "incorrect_loss_per_char": 0.7437733213106791, "correct_loss_per_token": 1.162168264389038, "incorrect_loss_per_token": 1.4875466426213582, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.162168264389038, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": true, "logits_per_token": -1.162168264389038, "logits_per_char": -0.581084132194519, "num_chars": 2}, {"sum_logits": -1.3472051620483398, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.3472051620483398, "logits_per_char": -0.6736025810241699, "num_chars": 2}, {"sum_logits": -1.523290991783142, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.523290991783142, "logits_per_char": -0.761645495891571, "num_chars": 2}, {"sum_logits": -1.5921437740325928, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.5921437740325928, "logits_per_char": -0.7960718870162964, "num_chars": 2}], "label": 0, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 9, "native_id": 9, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3622214794158936, "incorrect_loss_raw": 1.4412768284479778, "correct_loss_per_char": 0.6811107397079468, "incorrect_loss_per_char": 0.7206384142239889, "correct_loss_per_token": 1.3622214794158936, "incorrect_loss_per_token": 1.4412768284479778, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0883688926696777, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.0883688926696777, "logits_per_char": -0.5441844463348389, "num_chars": 2}, {"sum_logits": -1.3622214794158936, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3622214794158936, "logits_per_char": -0.6811107397079468, "num_chars": 2}, {"sum_logits": -1.5030479431152344, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.5030479431152344, "logits_per_char": -0.7515239715576172, "num_chars": 2}, {"sum_logits": -1.732413649559021, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.732413649559021, "logits_per_char": -0.8662068247795105, "num_chars": 2}], "label": 1, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 10, "native_id": 10, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6249570846557617, "incorrect_loss_raw": 1.3314930200576782, "correct_loss_per_char": 0.8124785423278809, "incorrect_loss_per_char": 0.6657465100288391, "correct_loss_per_token": 1.6249570846557617, "incorrect_loss_per_token": 1.3314930200576782, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.202502727508545, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": true, "logits_per_token": -1.202502727508545, "logits_per_char": -0.6012513637542725, "num_chars": 2}, {"sum_logits": -1.274124264717102, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": false, "logits_per_token": -1.274124264717102, "logits_per_char": -0.637062132358551, "num_chars": 2}, {"sum_logits": -1.5178520679473877, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": false, "logits_per_token": -1.5178520679473877, "logits_per_char": -0.7589260339736938, "num_chars": 2}, {"sum_logits": -1.6249570846557617, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": false, "logits_per_token": -1.6249570846557617, "logits_per_char": -0.8124785423278809, "num_chars": 2}], "label": 3, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 11, "native_id": 11, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.597001552581787, "incorrect_loss_raw": 1.354565699895223, "correct_loss_per_char": 0.7985007762908936, "incorrect_loss_per_char": 0.6772828499476115, "correct_loss_per_token": 1.597001552581787, "incorrect_loss_per_token": 1.354565699895223, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3093316555023193, "num_tokens": 1, "num_tokens_all": 566, "is_greedy": false, "logits_per_token": -1.3093316555023193, "logits_per_char": -0.6546658277511597, "num_chars": 2}, {"sum_logits": -1.149627923965454, "num_tokens": 1, "num_tokens_all": 566, "is_greedy": true, "logits_per_token": -1.149627923965454, "logits_per_char": -0.574813961982727, "num_chars": 2}, {"sum_logits": -1.597001552581787, "num_tokens": 1, "num_tokens_all": 566, "is_greedy": false, "logits_per_token": -1.597001552581787, "logits_per_char": -0.7985007762908936, "num_chars": 2}, {"sum_logits": -1.6047375202178955, "num_tokens": 1, "num_tokens_all": 566, "is_greedy": false, "logits_per_token": -1.6047375202178955, "logits_per_char": -0.8023687601089478, "num_chars": 2}], "label": 2, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 12, "native_id": 12, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3983590602874756, "incorrect_loss_raw": 1.3935782512029011, "correct_loss_per_char": 0.6991795301437378, "incorrect_loss_per_char": 0.6967891256014506, "correct_loss_per_token": 1.3983590602874756, "incorrect_loss_per_token": 1.3935782512029011, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4355008602142334, "num_tokens": 1, "num_tokens_all": 423, "is_greedy": false, "logits_per_token": -1.4355008602142334, "logits_per_char": -0.7177504301071167, "num_chars": 2}, {"sum_logits": -1.4524394273757935, "num_tokens": 1, "num_tokens_all": 423, "is_greedy": false, "logits_per_token": -1.4524394273757935, "logits_per_char": -0.7262197136878967, "num_chars": 2}, {"sum_logits": -1.3983590602874756, "num_tokens": 1, "num_tokens_all": 423, "is_greedy": false, "logits_per_token": -1.3983590602874756, "logits_per_char": -0.6991795301437378, "num_chars": 2}, {"sum_logits": -1.2927944660186768, "num_tokens": 1, "num_tokens_all": 423, "is_greedy": true, "logits_per_token": -1.2927944660186768, "logits_per_char": -0.6463972330093384, "num_chars": 2}], "label": 2, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 13, "native_id": 13, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3716014623641968, "incorrect_loss_raw": 1.4103328784306843, "correct_loss_per_char": 0.6858007311820984, "incorrect_loss_per_char": 0.7051664392153422, "correct_loss_per_token": 1.3716014623641968, "incorrect_loss_per_token": 1.4103328784306843, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2427361011505127, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.2427361011505127, "logits_per_char": -0.6213680505752563, "num_chars": 2}, {"sum_logits": -1.3716014623641968, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.3716014623641968, "logits_per_char": -0.6858007311820984, "num_chars": 2}, {"sum_logits": -1.475243330001831, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.475243330001831, "logits_per_char": -0.7376216650009155, "num_chars": 2}, {"sum_logits": -1.5130192041397095, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.5130192041397095, "logits_per_char": -0.7565096020698547, "num_chars": 2}], "label": 1, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 14, "native_id": 14, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4267109632492065, "incorrect_loss_raw": 1.3970081011454265, "correct_loss_per_char": 0.7133554816246033, "incorrect_loss_per_char": 0.6985040505727133, "correct_loss_per_token": 1.4267109632492065, "incorrect_loss_per_token": 1.3970081011454265, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2243889570236206, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": true, "logits_per_token": -1.2243889570236206, "logits_per_char": -0.6121944785118103, "num_chars": 2}, {"sum_logits": -1.4226305484771729, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.4226305484771729, "logits_per_char": -0.7113152742385864, "num_chars": 2}, {"sum_logits": -1.4267109632492065, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.4267109632492065, "logits_per_char": -0.7133554816246033, "num_chars": 2}, {"sum_logits": -1.5440047979354858, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.5440047979354858, "logits_per_char": -0.7720023989677429, "num_chars": 2}], "label": 2, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 15, "native_id": 15, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4168949127197266, "incorrect_loss_raw": 1.4014353354771931, "correct_loss_per_char": 0.7084474563598633, "incorrect_loss_per_char": 0.7007176677385966, "correct_loss_per_token": 1.4168949127197266, "incorrect_loss_per_token": 1.4014353354771931, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1444839239120483, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": true, "logits_per_token": -1.1444839239120483, "logits_per_char": -0.5722419619560242, "num_chars": 2}, {"sum_logits": -1.4168949127197266, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.4168949127197266, "logits_per_char": -0.7084474563598633, "num_chars": 2}, {"sum_logits": -1.5098391771316528, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.5098391771316528, "logits_per_char": -0.7549195885658264, "num_chars": 2}, {"sum_logits": -1.5499829053878784, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.5499829053878784, "logits_per_char": -0.7749914526939392, "num_chars": 2}], "label": 1, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 16, "native_id": 16, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3697881698608398, "incorrect_loss_raw": 1.432490070660909, "correct_loss_per_char": 0.6848940849304199, "incorrect_loss_per_char": 0.7162450353304545, "correct_loss_per_token": 1.3697881698608398, "incorrect_loss_per_token": 1.432490070660909, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.084686279296875, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": true, "logits_per_token": -1.084686279296875, "logits_per_char": -0.5423431396484375, "num_chars": 2}, {"sum_logits": -1.3697881698608398, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.3697881698608398, "logits_per_char": -0.6848940849304199, "num_chars": 2}, {"sum_logits": -1.5786656141281128, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.5786656141281128, "logits_per_char": -0.7893328070640564, "num_chars": 2}, {"sum_logits": -1.6341183185577393, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.6341183185577393, "logits_per_char": -0.8170591592788696, "num_chars": 2}], "label": 1, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 17, "native_id": 17, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.409850835800171, "incorrect_loss_raw": 1.387544314066569, "correct_loss_per_char": 0.7049254179000854, "incorrect_loss_per_char": 0.6937721570332845, "correct_loss_per_token": 1.409850835800171, "incorrect_loss_per_token": 1.387544314066569, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.409850835800171, "num_tokens": 1, "num_tokens_all": 411, "is_greedy": false, "logits_per_token": -1.409850835800171, "logits_per_char": -0.7049254179000854, "num_chars": 2}, {"sum_logits": -1.3125879764556885, "num_tokens": 1, "num_tokens_all": 411, "is_greedy": true, "logits_per_token": -1.3125879764556885, "logits_per_char": -0.6562939882278442, "num_chars": 2}, {"sum_logits": -1.4643712043762207, "num_tokens": 1, "num_tokens_all": 411, "is_greedy": false, "logits_per_token": -1.4643712043762207, "logits_per_char": -0.7321856021881104, "num_chars": 2}, {"sum_logits": -1.3856737613677979, "num_tokens": 1, "num_tokens_all": 411, "is_greedy": false, "logits_per_token": -1.3856737613677979, "logits_per_char": -0.6928368806838989, "num_chars": 2}], "label": 0, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 18, "native_id": 18, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.304312825202942, "incorrect_loss_raw": 1.4336919784545898, "correct_loss_per_char": 0.652156412601471, "incorrect_loss_per_char": 0.7168459892272949, "correct_loss_per_token": 1.304312825202942, "incorrect_loss_per_token": 1.4336919784545898, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.304312825202942, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.304312825202942, "logits_per_char": -0.652156412601471, "num_chars": 2}, {"sum_logits": -1.2937915325164795, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.2937915325164795, "logits_per_char": -0.6468957662582397, "num_chars": 2}, {"sum_logits": -1.540940761566162, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.540940761566162, "logits_per_char": -0.770470380783081, "num_chars": 2}, {"sum_logits": -1.466343641281128, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.466343641281128, "logits_per_char": -0.733171820640564, "num_chars": 2}], "label": 0, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 19, "native_id": 19, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4423139095306396, "incorrect_loss_raw": 1.3750805060068767, "correct_loss_per_char": 0.7211569547653198, "incorrect_loss_per_char": 0.6875402530034384, "correct_loss_per_token": 1.4423139095306396, "incorrect_loss_per_token": 1.3750805060068767, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3688032627105713, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": false, "logits_per_token": -1.3688032627105713, "logits_per_char": -0.6844016313552856, "num_chars": 2}, {"sum_logits": -1.3563783168792725, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": true, "logits_per_token": -1.3563783168792725, "logits_per_char": -0.6781891584396362, "num_chars": 2}, {"sum_logits": -1.4000599384307861, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": false, "logits_per_token": -1.4000599384307861, "logits_per_char": -0.7000299692153931, "num_chars": 2}, {"sum_logits": -1.4423139095306396, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": false, "logits_per_token": -1.4423139095306396, "logits_per_char": -0.7211569547653198, "num_chars": 2}], "label": 3, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 20, "native_id": 20, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.391804575920105, "incorrect_loss_raw": 1.3960531949996948, "correct_loss_per_char": 0.6959022879600525, "incorrect_loss_per_char": 0.6980265974998474, "correct_loss_per_token": 1.391804575920105, "incorrect_loss_per_token": 1.3960531949996948, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3821367025375366, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.3821367025375366, "logits_per_char": -0.6910683512687683, "num_chars": 2}, {"sum_logits": -1.4482288360595703, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.4482288360595703, "logits_per_char": -0.7241144180297852, "num_chars": 2}, {"sum_logits": -1.3577940464019775, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": true, "logits_per_token": -1.3577940464019775, "logits_per_char": -0.6788970232009888, "num_chars": 2}, {"sum_logits": -1.391804575920105, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.391804575920105, "logits_per_char": -0.6959022879600525, "num_chars": 2}], "label": 3, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 21, "native_id": 21, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3849793672561646, "incorrect_loss_raw": 1.4011366764704387, "correct_loss_per_char": 0.6924896836280823, "incorrect_loss_per_char": 0.7005683382352194, "correct_loss_per_token": 1.3849793672561646, "incorrect_loss_per_token": 1.4011366764704387, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3253743648529053, "num_tokens": 1, "num_tokens_all": 472, "is_greedy": false, "logits_per_token": -1.3253743648529053, "logits_per_char": -0.6626871824264526, "num_chars": 2}, {"sum_logits": -1.3049923181533813, "num_tokens": 1, "num_tokens_all": 472, "is_greedy": true, "logits_per_token": -1.3049923181533813, "logits_per_char": -0.6524961590766907, "num_chars": 2}, {"sum_logits": -1.3849793672561646, "num_tokens": 1, "num_tokens_all": 472, "is_greedy": false, "logits_per_token": -1.3849793672561646, "logits_per_char": -0.6924896836280823, "num_chars": 2}, {"sum_logits": -1.5730433464050293, "num_tokens": 1, "num_tokens_all": 472, "is_greedy": false, "logits_per_token": -1.5730433464050293, "logits_per_char": -0.7865216732025146, "num_chars": 2}], "label": 2, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 22, "native_id": 22, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5662636756896973, "incorrect_loss_raw": 1.3599320650100708, "correct_loss_per_char": 0.7831318378448486, "incorrect_loss_per_char": 0.6799660325050354, "correct_loss_per_token": 1.5662636756896973, "incorrect_loss_per_token": 1.3599320650100708, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2741693258285522, "num_tokens": 1, "num_tokens_all": 451, "is_greedy": true, "logits_per_token": -1.2741693258285522, "logits_per_char": -0.6370846629142761, "num_chars": 2}, {"sum_logits": -1.308300495147705, "num_tokens": 1, "num_tokens_all": 451, "is_greedy": false, "logits_per_token": -1.308300495147705, "logits_per_char": -0.6541502475738525, "num_chars": 2}, {"sum_logits": -1.497326374053955, "num_tokens": 1, "num_tokens_all": 451, "is_greedy": false, "logits_per_token": -1.497326374053955, "logits_per_char": -0.7486631870269775, "num_chars": 2}, {"sum_logits": -1.5662636756896973, "num_tokens": 1, "num_tokens_all": 451, "is_greedy": false, "logits_per_token": -1.5662636756896973, "logits_per_char": -0.7831318378448486, "num_chars": 2}], "label": 3, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 23, "native_id": 23, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6028677225112915, "incorrect_loss_raw": 1.3428737322489421, "correct_loss_per_char": 0.8014338612556458, "incorrect_loss_per_char": 0.6714368661244711, "correct_loss_per_token": 1.6028677225112915, "incorrect_loss_per_token": 1.3428737322489421, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1805956363677979, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.1805956363677979, "logits_per_char": -0.5902978181838989, "num_chars": 2}, {"sum_logits": -1.3179606199264526, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.3179606199264526, "logits_per_char": -0.6589803099632263, "num_chars": 2}, {"sum_logits": -1.6028677225112915, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.6028677225112915, "logits_per_char": -0.8014338612556458, "num_chars": 2}, {"sum_logits": -1.5300649404525757, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.5300649404525757, "logits_per_char": -0.7650324702262878, "num_chars": 2}], "label": 2, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 24, "native_id": 24, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2151188850402832, "incorrect_loss_raw": 1.4710975488026936, "correct_loss_per_char": 0.6075594425201416, "incorrect_loss_per_char": 0.7355487744013468, "correct_loss_per_token": 1.2151188850402832, "incorrect_loss_per_token": 1.4710975488026936, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2151188850402832, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -1.2151188850402832, "logits_per_char": -0.6075594425201416, "num_chars": 2}, {"sum_logits": -1.4052151441574097, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.4052151441574097, "logits_per_char": -0.7026075720787048, "num_chars": 2}, {"sum_logits": -1.6855756044387817, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.6855756044387817, "logits_per_char": -0.8427878022193909, "num_chars": 2}, {"sum_logits": -1.3225018978118896, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.3225018978118896, "logits_per_char": -0.6612509489059448, "num_chars": 2}], "label": 0, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 25, "native_id": 25, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7519664764404297, "incorrect_loss_raw": 1.32066543896993, "correct_loss_per_char": 0.8759832382202148, "incorrect_loss_per_char": 0.660332719484965, "correct_loss_per_token": 1.7519664764404297, "incorrect_loss_per_token": 1.32066543896993, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0197097063064575, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.0197097063064575, "logits_per_char": -0.5098548531532288, "num_chars": 2}, {"sum_logits": -1.4946463108062744, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4946463108062744, "logits_per_char": -0.7473231554031372, "num_chars": 2}, {"sum_logits": -1.7519664764404297, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.7519664764404297, "logits_per_char": -0.8759832382202148, "num_chars": 2}, {"sum_logits": -1.447640299797058, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.447640299797058, "logits_per_char": -0.723820149898529, "num_chars": 2}], "label": 2, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 26, "native_id": 26, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.428816318511963, "incorrect_loss_raw": 1.3802982966105144, "correct_loss_per_char": 0.7144081592559814, "incorrect_loss_per_char": 0.6901491483052572, "correct_loss_per_token": 1.428816318511963, "incorrect_loss_per_token": 1.3802982966105144, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.446803092956543, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.446803092956543, "logits_per_char": -0.7234015464782715, "num_chars": 2}, {"sum_logits": -1.428816318511963, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.428816318511963, "logits_per_char": -0.7144081592559814, "num_chars": 2}, {"sum_logits": -1.3475985527038574, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.3475985527038574, "logits_per_char": -0.6737992763519287, "num_chars": 2}, {"sum_logits": -1.3464932441711426, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": true, "logits_per_token": -1.3464932441711426, "logits_per_char": -0.6732466220855713, "num_chars": 2}], "label": 1, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 27, "native_id": 27, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3179348707199097, "incorrect_loss_raw": 1.4341417948404949, "correct_loss_per_char": 0.6589674353599548, "incorrect_loss_per_char": 0.7170708974202474, "correct_loss_per_token": 1.3179348707199097, "incorrect_loss_per_token": 1.4341417948404949, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1887590885162354, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.1887590885162354, "logits_per_char": -0.5943795442581177, "num_chars": 2}, {"sum_logits": -1.3179348707199097, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.3179348707199097, "logits_per_char": -0.6589674353599548, "num_chars": 2}, {"sum_logits": -1.5161527395248413, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.5161527395248413, "logits_per_char": -0.7580763697624207, "num_chars": 2}, {"sum_logits": -1.5975135564804077, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.5975135564804077, "logits_per_char": -0.7987567782402039, "num_chars": 2}], "label": 1, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 28, "native_id": 28, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.496344804763794, "incorrect_loss_raw": 1.3685733874638875, "correct_loss_per_char": 0.748172402381897, "incorrect_loss_per_char": 0.6842866937319437, "correct_loss_per_token": 1.496344804763794, "incorrect_loss_per_token": 1.3685733874638875, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3033394813537598, "num_tokens": 1, "num_tokens_all": 443, "is_greedy": false, "logits_per_token": -1.3033394813537598, "logits_per_char": -0.6516697406768799, "num_chars": 2}, {"sum_logits": -1.2393144369125366, "num_tokens": 1, "num_tokens_all": 443, "is_greedy": true, "logits_per_token": -1.2393144369125366, "logits_per_char": -0.6196572184562683, "num_chars": 2}, {"sum_logits": -1.496344804763794, "num_tokens": 1, "num_tokens_all": 443, "is_greedy": false, "logits_per_token": -1.496344804763794, "logits_per_char": -0.748172402381897, "num_chars": 2}, {"sum_logits": -1.5630662441253662, "num_tokens": 1, "num_tokens_all": 443, "is_greedy": false, "logits_per_token": -1.5630662441253662, "logits_per_char": -0.7815331220626831, "num_chars": 2}], "label": 2, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 29, "native_id": 29, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.013635516166687, "incorrect_loss_raw": 1.6515755653381348, "correct_loss_per_char": 0.5068177580833435, "incorrect_loss_per_char": 0.8257877826690674, "correct_loss_per_token": 1.013635516166687, "incorrect_loss_per_token": 1.6515755653381348, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.013635516166687, "num_tokens": 1, "num_tokens_all": 460, "is_greedy": true, "logits_per_token": -1.013635516166687, "logits_per_char": -0.5068177580833435, "num_chars": 2}, {"sum_logits": -1.1273882389068604, "num_tokens": 1, "num_tokens_all": 460, "is_greedy": false, "logits_per_token": -1.1273882389068604, "logits_per_char": -0.5636941194534302, "num_chars": 2}, {"sum_logits": -1.7769153118133545, "num_tokens": 1, "num_tokens_all": 460, "is_greedy": false, "logits_per_token": -1.7769153118133545, "logits_per_char": -0.8884576559066772, "num_chars": 2}, {"sum_logits": -2.0504231452941895, "num_tokens": 1, "num_tokens_all": 460, "is_greedy": false, "logits_per_token": -2.0504231452941895, "logits_per_char": -1.0252115726470947, "num_chars": 2}], "label": 0, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 30, "native_id": 30, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3451801538467407, "incorrect_loss_raw": 1.4127839803695679, "correct_loss_per_char": 0.6725900769233704, "incorrect_loss_per_char": 0.7063919901847839, "correct_loss_per_token": 1.3451801538467407, "incorrect_loss_per_token": 1.4127839803695679, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3651155233383179, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.3651155233383179, "logits_per_char": -0.6825577616691589, "num_chars": 2}, {"sum_logits": -1.3707528114318848, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.3707528114318848, "logits_per_char": -0.6853764057159424, "num_chars": 2}, {"sum_logits": -1.502483606338501, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.502483606338501, "logits_per_char": -0.7512418031692505, "num_chars": 2}, {"sum_logits": -1.3451801538467407, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": true, "logits_per_token": -1.3451801538467407, "logits_per_char": -0.6725900769233704, "num_chars": 2}], "label": 3, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 31, "native_id": 31, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4386433362960815, "incorrect_loss_raw": 1.385990579922994, "correct_loss_per_char": 0.7193216681480408, "incorrect_loss_per_char": 0.692995289961497, "correct_loss_per_token": 1.4386433362960815, "incorrect_loss_per_token": 1.385990579922994, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2477227449417114, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.2477227449417114, "logits_per_char": -0.6238613724708557, "num_chars": 2}, {"sum_logits": -1.4098321199417114, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4098321199417114, "logits_per_char": -0.7049160599708557, "num_chars": 2}, {"sum_logits": -1.500416874885559, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.500416874885559, "logits_per_char": -0.7502084374427795, "num_chars": 2}, {"sum_logits": -1.4386433362960815, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4386433362960815, "logits_per_char": -0.7193216681480408, "num_chars": 2}], "label": 3, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 32, "native_id": 32, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4904191493988037, "incorrect_loss_raw": 1.3968300422032673, "correct_loss_per_char": 0.7452095746994019, "incorrect_loss_per_char": 0.6984150211016337, "correct_loss_per_token": 1.4904191493988037, "incorrect_loss_per_token": 1.3968300422032673, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2993443012237549, "num_tokens": 1, "num_tokens_all": 431, "is_greedy": true, "logits_per_token": -1.2993443012237549, "logits_per_char": -0.6496721506118774, "num_chars": 2}, {"sum_logits": -1.3548697233200073, "num_tokens": 1, "num_tokens_all": 431, "is_greedy": false, "logits_per_token": -1.3548697233200073, "logits_per_char": -0.6774348616600037, "num_chars": 2}, {"sum_logits": -1.4904191493988037, "num_tokens": 1, "num_tokens_all": 431, "is_greedy": false, "logits_per_token": -1.4904191493988037, "logits_per_char": -0.7452095746994019, "num_chars": 2}, {"sum_logits": -1.53627610206604, "num_tokens": 1, "num_tokens_all": 431, "is_greedy": false, "logits_per_token": -1.53627610206604, "logits_per_char": -0.76813805103302, "num_chars": 2}], "label": 2, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 33, "native_id": 33, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4691832065582275, "incorrect_loss_raw": 1.3701859712600708, "correct_loss_per_char": 0.7345916032791138, "incorrect_loss_per_char": 0.6850929856300354, "correct_loss_per_token": 1.4691832065582275, "incorrect_loss_per_token": 1.3701859712600708, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3194080591201782, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -1.3194080591201782, "logits_per_char": -0.6597040295600891, "num_chars": 2}, {"sum_logits": -1.389696478843689, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.389696478843689, "logits_per_char": -0.6948482394218445, "num_chars": 2}, {"sum_logits": -1.4691832065582275, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.4691832065582275, "logits_per_char": -0.7345916032791138, "num_chars": 2}, {"sum_logits": -1.4014533758163452, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.4014533758163452, "logits_per_char": -0.7007266879081726, "num_chars": 2}], "label": 2, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 34, "native_id": 34, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3327380418777466, "incorrect_loss_raw": 1.4312421083450317, "correct_loss_per_char": 0.6663690209388733, "incorrect_loss_per_char": 0.7156210541725159, "correct_loss_per_token": 1.3327380418777466, "incorrect_loss_per_token": 1.4312421083450317, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2320579290390015, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.2320579290390015, "logits_per_char": -0.6160289645195007, "num_chars": 2}, {"sum_logits": -1.5137696266174316, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.5137696266174316, "logits_per_char": -0.7568848133087158, "num_chars": 2}, {"sum_logits": -1.547898769378662, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.547898769378662, "logits_per_char": -0.773949384689331, "num_chars": 2}, {"sum_logits": -1.3327380418777466, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.3327380418777466, "logits_per_char": -0.6663690209388733, "num_chars": 2}], "label": 3, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 35, "native_id": 35, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.42662513256073, "incorrect_loss_raw": 1.381311058998108, "correct_loss_per_char": 0.713312566280365, "incorrect_loss_per_char": 0.690655529499054, "correct_loss_per_token": 1.42662513256073, "incorrect_loss_per_token": 1.381311058998108, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3412866592407227, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": true, "logits_per_token": -1.3412866592407227, "logits_per_char": -0.6706433296203613, "num_chars": 2}, {"sum_logits": -1.3806769847869873, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.3806769847869873, "logits_per_char": -0.6903384923934937, "num_chars": 2}, {"sum_logits": -1.42662513256073, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.42662513256073, "logits_per_char": -0.713312566280365, "num_chars": 2}, {"sum_logits": -1.4219695329666138, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.4219695329666138, "logits_per_char": -0.7109847664833069, "num_chars": 2}], "label": 2, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 36, "native_id": 36, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5717147588729858, "incorrect_loss_raw": 1.3786590099334717, "correct_loss_per_char": 0.7858573794364929, "incorrect_loss_per_char": 0.6893295049667358, "correct_loss_per_token": 1.5717147588729858, "incorrect_loss_per_token": 1.3786590099334717, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0471951961517334, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.0471951961517334, "logits_per_char": -0.5235975980758667, "num_chars": 2}, {"sum_logits": -1.3447483777999878, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.3447483777999878, "logits_per_char": -0.6723741888999939, "num_chars": 2}, {"sum_logits": -1.5717147588729858, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.5717147588729858, "logits_per_char": -0.7858573794364929, "num_chars": 2}, {"sum_logits": -1.7440334558486938, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.7440334558486938, "logits_per_char": -0.8720167279243469, "num_chars": 2}], "label": 2, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 37, "native_id": 37, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6438547372817993, "incorrect_loss_raw": 1.3363457918167114, "correct_loss_per_char": 0.8219273686408997, "incorrect_loss_per_char": 0.6681728959083557, "correct_loss_per_token": 1.6438547372817993, "incorrect_loss_per_token": 1.3363457918167114, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1442421674728394, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.1442421674728394, "logits_per_char": -0.5721210837364197, "num_chars": 2}, {"sum_logits": -1.319848895072937, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.319848895072937, "logits_per_char": -0.6599244475364685, "num_chars": 2}, {"sum_logits": -1.544946312904358, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.544946312904358, "logits_per_char": -0.772473156452179, "num_chars": 2}, {"sum_logits": -1.6438547372817993, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.6438547372817993, "logits_per_char": -0.8219273686408997, "num_chars": 2}], "label": 3, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 38, "native_id": 38, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3948184251785278, "incorrect_loss_raw": 1.3917551040649414, "correct_loss_per_char": 0.6974092125892639, "incorrect_loss_per_char": 0.6958775520324707, "correct_loss_per_token": 1.3948184251785278, "incorrect_loss_per_token": 1.3917551040649414, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3948184251785278, "num_tokens": 1, "num_tokens_all": 420, "is_greedy": false, "logits_per_token": -1.3948184251785278, "logits_per_char": -0.6974092125892639, "num_chars": 2}, {"sum_logits": -1.386847734451294, "num_tokens": 1, "num_tokens_all": 420, "is_greedy": false, "logits_per_token": -1.386847734451294, "logits_per_char": -0.693423867225647, "num_chars": 2}, {"sum_logits": -1.4503298997879028, "num_tokens": 1, "num_tokens_all": 420, "is_greedy": false, "logits_per_token": -1.4503298997879028, "logits_per_char": -0.7251649498939514, "num_chars": 2}, {"sum_logits": -1.3380876779556274, "num_tokens": 1, "num_tokens_all": 420, "is_greedy": true, "logits_per_token": -1.3380876779556274, "logits_per_char": -0.6690438389778137, "num_chars": 2}], "label": 0, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 39, "native_id": 39, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4534404277801514, "incorrect_loss_raw": 1.3734341859817505, "correct_loss_per_char": 0.7267202138900757, "incorrect_loss_per_char": 0.6867170929908752, "correct_loss_per_token": 1.4534404277801514, "incorrect_loss_per_token": 1.3734341859817505, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3678207397460938, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.3678207397460938, "logits_per_char": -0.6839103698730469, "num_chars": 2}, {"sum_logits": -1.4066404104232788, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4066404104232788, "logits_per_char": -0.7033202052116394, "num_chars": 2}, {"sum_logits": -1.4534404277801514, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4534404277801514, "logits_per_char": -0.7267202138900757, "num_chars": 2}, {"sum_logits": -1.345841407775879, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.345841407775879, "logits_per_char": -0.6729207038879395, "num_chars": 2}], "label": 2, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 40, "native_id": 40, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5536823272705078, "incorrect_loss_raw": 1.353819449742635, "correct_loss_per_char": 0.7768411636352539, "incorrect_loss_per_char": 0.6769097248713175, "correct_loss_per_token": 1.5536823272705078, "incorrect_loss_per_token": 1.353819449742635, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1894848346710205, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": true, "logits_per_token": -1.1894848346710205, "logits_per_char": -0.5947424173355103, "num_chars": 2}, {"sum_logits": -1.5536823272705078, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.5536823272705078, "logits_per_char": -0.7768411636352539, "num_chars": 2}, {"sum_logits": -1.3628201484680176, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.3628201484680176, "logits_per_char": -0.6814100742340088, "num_chars": 2}, {"sum_logits": -1.5091533660888672, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.5091533660888672, "logits_per_char": -0.7545766830444336, "num_chars": 2}], "label": 1, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 41, "native_id": 41, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3400378227233887, "incorrect_loss_raw": 1.416705052057902, "correct_loss_per_char": 0.6700189113616943, "incorrect_loss_per_char": 0.708352526028951, "correct_loss_per_token": 1.3400378227233887, "incorrect_loss_per_token": 1.416705052057902, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2483121156692505, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": true, "logits_per_token": -1.2483121156692505, "logits_per_char": -0.6241560578346252, "num_chars": 2}, {"sum_logits": -1.3400378227233887, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.3400378227233887, "logits_per_char": -0.6700189113616943, "num_chars": 2}, {"sum_logits": -1.5008653402328491, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.5008653402328491, "logits_per_char": -0.7504326701164246, "num_chars": 2}, {"sum_logits": -1.5009377002716064, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.5009377002716064, "logits_per_char": -0.7504688501358032, "num_chars": 2}], "label": 1, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 42, "native_id": 42, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3377145528793335, "incorrect_loss_raw": 1.4242619276046753, "correct_loss_per_char": 0.6688572764396667, "incorrect_loss_per_char": 0.7121309638023376, "correct_loss_per_token": 1.3377145528793335, "incorrect_loss_per_token": 1.4242619276046753, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.228879451751709, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.228879451751709, "logits_per_char": -0.6144397258758545, "num_chars": 2}, {"sum_logits": -1.3377145528793335, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.3377145528793335, "logits_per_char": -0.6688572764396667, "num_chars": 2}, {"sum_logits": -1.4656177759170532, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4656177759170532, "logits_per_char": -0.7328088879585266, "num_chars": 2}, {"sum_logits": -1.5782885551452637, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.5782885551452637, "logits_per_char": -0.7891442775726318, "num_chars": 2}], "label": 1, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 43, "native_id": 43, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5452113151550293, "incorrect_loss_raw": 1.3738019069035847, "correct_loss_per_char": 0.7726056575775146, "incorrect_loss_per_char": 0.6869009534517924, "correct_loss_per_token": 1.5452113151550293, "incorrect_loss_per_token": 1.3738019069035847, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0874546766281128, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.0874546766281128, "logits_per_char": -0.5437273383140564, "num_chars": 2}, {"sum_logits": -1.408775806427002, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.408775806427002, "logits_per_char": -0.704387903213501, "num_chars": 2}, {"sum_logits": -1.6251752376556396, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.6251752376556396, "logits_per_char": -0.8125876188278198, "num_chars": 2}, {"sum_logits": -1.5452113151550293, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.5452113151550293, "logits_per_char": -0.7726056575775146, "num_chars": 2}], "label": 3, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 44, "native_id": 44, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3575952053070068, "incorrect_loss_raw": 1.4243608713150024, "correct_loss_per_char": 0.6787976026535034, "incorrect_loss_per_char": 0.7121804356575012, "correct_loss_per_token": 1.3575952053070068, "incorrect_loss_per_token": 1.4243608713150024, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3575952053070068, "num_tokens": 1, "num_tokens_all": 470, "is_greedy": false, "logits_per_token": -1.3575952053070068, "logits_per_char": -0.6787976026535034, "num_chars": 2}, {"sum_logits": -1.2953007221221924, "num_tokens": 1, "num_tokens_all": 470, "is_greedy": true, "logits_per_token": -1.2953007221221924, "logits_per_char": -0.6476503610610962, "num_chars": 2}, {"sum_logits": -1.4187796115875244, "num_tokens": 1, "num_tokens_all": 470, "is_greedy": false, "logits_per_token": -1.4187796115875244, "logits_per_char": -0.7093898057937622, "num_chars": 2}, {"sum_logits": -1.5590022802352905, "num_tokens": 1, "num_tokens_all": 470, "is_greedy": false, "logits_per_token": -1.5590022802352905, "logits_per_char": -0.7795011401176453, "num_chars": 2}], "label": 0, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 45, "native_id": 45, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5033156871795654, "incorrect_loss_raw": 1.3608282804489136, "correct_loss_per_char": 0.7516578435897827, "incorrect_loss_per_char": 0.6804141402244568, "correct_loss_per_token": 1.5033156871795654, "incorrect_loss_per_token": 1.3608282804489136, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3024054765701294, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": true, "logits_per_token": -1.3024054765701294, "logits_per_char": -0.6512027382850647, "num_chars": 2}, {"sum_logits": -1.3868718147277832, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.3868718147277832, "logits_per_char": -0.6934359073638916, "num_chars": 2}, {"sum_logits": -1.3932075500488281, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.3932075500488281, "logits_per_char": -0.6966037750244141, "num_chars": 2}, {"sum_logits": -1.5033156871795654, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.5033156871795654, "logits_per_char": -0.7516578435897827, "num_chars": 2}], "label": 3, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 46, "native_id": 46, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3780343532562256, "incorrect_loss_raw": 1.4043809175491333, "correct_loss_per_char": 0.6890171766281128, "incorrect_loss_per_char": 0.7021904587745667, "correct_loss_per_token": 1.3780343532562256, "incorrect_loss_per_token": 1.4043809175491333, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2204378843307495, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -1.2204378843307495, "logits_per_char": -0.6102189421653748, "num_chars": 2}, {"sum_logits": -1.3780343532562256, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.3780343532562256, "logits_per_char": -0.6890171766281128, "num_chars": 2}, {"sum_logits": -1.5148512125015259, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.5148512125015259, "logits_per_char": -0.7574256062507629, "num_chars": 2}, {"sum_logits": -1.4778536558151245, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.4778536558151245, "logits_per_char": -0.7389268279075623, "num_chars": 2}], "label": 1, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 47, "native_id": 47, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3314558267593384, "incorrect_loss_raw": 1.4239704608917236, "correct_loss_per_char": 0.6657279133796692, "incorrect_loss_per_char": 0.7119852304458618, "correct_loss_per_token": 1.3314558267593384, "incorrect_loss_per_token": 1.4239704608917236, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4211735725402832, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4211735725402832, "logits_per_char": -0.7105867862701416, "num_chars": 2}, {"sum_logits": -1.3109652996063232, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.3109652996063232, "logits_per_char": -0.6554826498031616, "num_chars": 2}, {"sum_logits": -1.3314558267593384, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.3314558267593384, "logits_per_char": -0.6657279133796692, "num_chars": 2}, {"sum_logits": -1.5397725105285645, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.5397725105285645, "logits_per_char": -0.7698862552642822, "num_chars": 2}], "label": 2, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 48, "native_id": 48, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3292897939682007, "incorrect_loss_raw": 1.4142563740412395, "correct_loss_per_char": 0.6646448969841003, "incorrect_loss_per_char": 0.7071281870206197, "correct_loss_per_token": 1.3292897939682007, "incorrect_loss_per_token": 1.4142563740412395, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3292897939682007, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.3292897939682007, "logits_per_char": -0.6646448969841003, "num_chars": 2}, {"sum_logits": -1.3180289268493652, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": true, "logits_per_token": -1.3180289268493652, "logits_per_char": -0.6590144634246826, "num_chars": 2}, {"sum_logits": -1.446448802947998, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.446448802947998, "logits_per_char": -0.723224401473999, "num_chars": 2}, {"sum_logits": -1.478291392326355, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.478291392326355, "logits_per_char": -0.7391456961631775, "num_chars": 2}], "label": 0, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 49, "native_id": 49, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4698643684387207, "incorrect_loss_raw": 1.3768912156422932, "correct_loss_per_char": 0.7349321842193604, "incorrect_loss_per_char": 0.6884456078211466, "correct_loss_per_token": 1.4698643684387207, "incorrect_loss_per_token": 1.3768912156422932, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.217571496963501, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": true, "logits_per_token": -1.217571496963501, "logits_per_char": -0.6087857484817505, "num_chars": 2}, {"sum_logits": -1.3468116521835327, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.3468116521835327, "logits_per_char": -0.6734058260917664, "num_chars": 2}, {"sum_logits": -1.4698643684387207, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.4698643684387207, "logits_per_char": -0.7349321842193604, "num_chars": 2}, {"sum_logits": -1.5662904977798462, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.5662904977798462, "logits_per_char": -0.7831452488899231, "num_chars": 2}], "label": 2, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 50, "native_id": 50, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5336213111877441, "incorrect_loss_raw": 1.3537040948867798, "correct_loss_per_char": 0.7668106555938721, "incorrect_loss_per_char": 0.6768520474433899, "correct_loss_per_token": 1.5336213111877441, "incorrect_loss_per_token": 1.3537040948867798, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2525115013122559, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": true, "logits_per_token": -1.2525115013122559, "logits_per_char": -0.6262557506561279, "num_chars": 2}, {"sum_logits": -1.3387569189071655, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.3387569189071655, "logits_per_char": -0.6693784594535828, "num_chars": 2}, {"sum_logits": -1.5336213111877441, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.5336213111877441, "logits_per_char": -0.7668106555938721, "num_chars": 2}, {"sum_logits": -1.469843864440918, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.469843864440918, "logits_per_char": -0.734921932220459, "num_chars": 2}], "label": 2, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 51, "native_id": 51, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4451981782913208, "incorrect_loss_raw": 1.387419859568278, "correct_loss_per_char": 0.7225990891456604, "incorrect_loss_per_char": 0.693709929784139, "correct_loss_per_token": 1.4451981782913208, "incorrect_loss_per_token": 1.387419859568278, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2541760206222534, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": true, "logits_per_token": -1.2541760206222534, "logits_per_char": -0.6270880103111267, "num_chars": 2}, {"sum_logits": -1.4451981782913208, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.4451981782913208, "logits_per_char": -0.7225990891456604, "num_chars": 2}, {"sum_logits": -1.377140998840332, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.377140998840332, "logits_per_char": -0.688570499420166, "num_chars": 2}, {"sum_logits": -1.5309425592422485, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.5309425592422485, "logits_per_char": -0.7654712796211243, "num_chars": 2}], "label": 1, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 52, "native_id": 52, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.98997962474823, "incorrect_loss_raw": 1.5831234852472942, "correct_loss_per_char": 0.494989812374115, "incorrect_loss_per_char": 0.7915617426236471, "correct_loss_per_token": 0.98997962474823, "incorrect_loss_per_token": 1.5831234852472942, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.98997962474823, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": true, "logits_per_token": -0.98997962474823, "logits_per_char": -0.494989812374115, "num_chars": 2}, {"sum_logits": -1.3738583326339722, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.3738583326339722, "logits_per_char": -0.6869291663169861, "num_chars": 2}, {"sum_logits": -1.7333736419677734, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.7333736419677734, "logits_per_char": -0.8666868209838867, "num_chars": 2}, {"sum_logits": -1.6421384811401367, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.6421384811401367, "logits_per_char": -0.8210692405700684, "num_chars": 2}], "label": 0, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 53, "native_id": 53, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.452532172203064, "incorrect_loss_raw": 1.3817471265792847, "correct_loss_per_char": 0.726266086101532, "incorrect_loss_per_char": 0.6908735632896423, "correct_loss_per_token": 1.452532172203064, "incorrect_loss_per_token": 1.3817471265792847, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.246535301208496, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.246535301208496, "logits_per_char": -0.623267650604248, "num_chars": 2}, {"sum_logits": -1.452532172203064, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.452532172203064, "logits_per_char": -0.726266086101532, "num_chars": 2}, {"sum_logits": -1.5400898456573486, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.5400898456573486, "logits_per_char": -0.7700449228286743, "num_chars": 2}, {"sum_logits": -1.3586162328720093, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.3586162328720093, "logits_per_char": -0.6793081164360046, "num_chars": 2}], "label": 1, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 54, "native_id": 54, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3521608114242554, "incorrect_loss_raw": 1.4058313369750977, "correct_loss_per_char": 0.6760804057121277, "incorrect_loss_per_char": 0.7029156684875488, "correct_loss_per_token": 1.3521608114242554, "incorrect_loss_per_token": 1.4058313369750977, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3521608114242554, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": true, "logits_per_token": -1.3521608114242554, "logits_per_char": -0.6760804057121277, "num_chars": 2}, {"sum_logits": -1.4005465507507324, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.4005465507507324, "logits_per_char": -0.7002732753753662, "num_chars": 2}, {"sum_logits": -1.4215809106826782, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.4215809106826782, "logits_per_char": -0.7107904553413391, "num_chars": 2}, {"sum_logits": -1.3953665494918823, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.3953665494918823, "logits_per_char": -0.6976832747459412, "num_chars": 2}], "label": 0, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 55, "native_id": 55, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.508150577545166, "incorrect_loss_raw": 1.3975878556569417, "correct_loss_per_char": 0.754075288772583, "incorrect_loss_per_char": 0.6987939278284708, "correct_loss_per_token": 1.508150577545166, "incorrect_loss_per_token": 1.3975878556569417, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0225460529327393, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.0225460529327393, "logits_per_char": -0.5112730264663696, "num_chars": 2}, {"sum_logits": -1.508150577545166, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.508150577545166, "logits_per_char": -0.754075288772583, "num_chars": 2}, {"sum_logits": -1.6391410827636719, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.6391410827636719, "logits_per_char": -0.8195705413818359, "num_chars": 2}, {"sum_logits": -1.531076431274414, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.531076431274414, "logits_per_char": -0.765538215637207, "num_chars": 2}], "label": 1, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 56, "native_id": 56, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2249644994735718, "incorrect_loss_raw": 1.4565985202789307, "correct_loss_per_char": 0.6124822497367859, "incorrect_loss_per_char": 0.7282992601394653, "correct_loss_per_token": 1.2249644994735718, "incorrect_loss_per_token": 1.4565985202789307, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2249644994735718, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.2249644994735718, "logits_per_char": -0.6124822497367859, "num_chars": 2}, {"sum_logits": -1.3392690420150757, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.3392690420150757, "logits_per_char": -0.6696345210075378, "num_chars": 2}, {"sum_logits": -1.4859968423843384, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4859968423843384, "logits_per_char": -0.7429984211921692, "num_chars": 2}, {"sum_logits": -1.544529676437378, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.544529676437378, "logits_per_char": -0.772264838218689, "num_chars": 2}], "label": 0, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 57, "native_id": 57, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3244569301605225, "incorrect_loss_raw": 1.437920331954956, "correct_loss_per_char": 0.6622284650802612, "incorrect_loss_per_char": 0.718960165977478, "correct_loss_per_token": 1.3244569301605225, "incorrect_loss_per_token": 1.437920331954956, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1645731925964355, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.1645731925964355, "logits_per_char": -0.5822865962982178, "num_chars": 2}, {"sum_logits": -1.3244569301605225, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.3244569301605225, "logits_per_char": -0.6622284650802612, "num_chars": 2}, {"sum_logits": -1.5000547170639038, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.5000547170639038, "logits_per_char": -0.7500273585319519, "num_chars": 2}, {"sum_logits": -1.6491330862045288, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.6491330862045288, "logits_per_char": -0.8245665431022644, "num_chars": 2}], "label": 1, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 58, "native_id": 58, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3746639490127563, "incorrect_loss_raw": 1.4037022988001506, "correct_loss_per_char": 0.6873319745063782, "incorrect_loss_per_char": 0.7018511494000753, "correct_loss_per_token": 1.3746639490127563, "incorrect_loss_per_token": 1.4037022988001506, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.304194688796997, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.304194688796997, "logits_per_char": -0.6520973443984985, "num_chars": 2}, {"sum_logits": -1.3746639490127563, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.3746639490127563, "logits_per_char": -0.6873319745063782, "num_chars": 2}, {"sum_logits": -1.4348856210708618, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4348856210708618, "logits_per_char": -0.7174428105354309, "num_chars": 2}, {"sum_logits": -1.4720265865325928, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4720265865325928, "logits_per_char": -0.7360132932662964, "num_chars": 2}], "label": 1, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 59, "native_id": 59, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4858145713806152, "incorrect_loss_raw": 1.374926487604777, "correct_loss_per_char": 0.7429072856903076, "incorrect_loss_per_char": 0.6874632438023885, "correct_loss_per_token": 1.4858145713806152, "incorrect_loss_per_token": 1.374926487604777, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1752183437347412, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": true, "logits_per_token": -1.1752183437347412, "logits_per_char": -0.5876091718673706, "num_chars": 2}, {"sum_logits": -1.4537826776504517, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.4537826776504517, "logits_per_char": -0.7268913388252258, "num_chars": 2}, {"sum_logits": -1.4957784414291382, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.4957784414291382, "logits_per_char": -0.7478892207145691, "num_chars": 2}, {"sum_logits": -1.4858145713806152, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.4858145713806152, "logits_per_char": -0.7429072856903076, "num_chars": 2}], "label": 3, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 60, "native_id": 60, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4623899459838867, "incorrect_loss_raw": 1.368548075358073, "correct_loss_per_char": 0.7311949729919434, "incorrect_loss_per_char": 0.6842740376790365, "correct_loss_per_token": 1.4623899459838867, "incorrect_loss_per_token": 1.368548075358073, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4623899459838867, "num_tokens": 1, "num_tokens_all": 404, "is_greedy": false, "logits_per_token": -1.4623899459838867, "logits_per_char": -0.7311949729919434, "num_chars": 2}, {"sum_logits": -1.3716741800308228, "num_tokens": 1, "num_tokens_all": 404, "is_greedy": false, "logits_per_token": -1.3716741800308228, "logits_per_char": -0.6858370900154114, "num_chars": 2}, {"sum_logits": -1.3310843706130981, "num_tokens": 1, "num_tokens_all": 404, "is_greedy": true, "logits_per_token": -1.3310843706130981, "logits_per_char": -0.6655421853065491, "num_chars": 2}, {"sum_logits": -1.4028856754302979, "num_tokens": 1, "num_tokens_all": 404, "is_greedy": false, "logits_per_token": -1.4028856754302979, "logits_per_char": -0.7014428377151489, "num_chars": 2}], "label": 0, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 61, "native_id": 61, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3249166011810303, "incorrect_loss_raw": 1.4180656274159749, "correct_loss_per_char": 0.6624583005905151, "incorrect_loss_per_char": 0.7090328137079874, "correct_loss_per_token": 1.3249166011810303, "incorrect_loss_per_token": 1.4180656274159749, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3249166011810303, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.3249166011810303, "logits_per_char": -0.6624583005905151, "num_chars": 2}, {"sum_logits": -1.3744351863861084, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.3744351863861084, "logits_per_char": -0.6872175931930542, "num_chars": 2}, {"sum_logits": -1.338155746459961, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.338155746459961, "logits_per_char": -0.6690778732299805, "num_chars": 2}, {"sum_logits": -1.5416059494018555, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.5416059494018555, "logits_per_char": -0.7708029747009277, "num_chars": 2}], "label": 0, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 62, "native_id": 62, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.528618574142456, "incorrect_loss_raw": 1.3721487919489543, "correct_loss_per_char": 0.764309287071228, "incorrect_loss_per_char": 0.6860743959744772, "correct_loss_per_token": 1.528618574142456, "incorrect_loss_per_token": 1.3721487919489543, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.531243085861206, "num_tokens": 1, "num_tokens_all": 494, "is_greedy": false, "logits_per_token": -1.531243085861206, "logits_per_char": -0.765621542930603, "num_chars": 2}, {"sum_logits": -1.493693232536316, "num_tokens": 1, "num_tokens_all": 494, "is_greedy": false, "logits_per_token": -1.493693232536316, "logits_per_char": -0.746846616268158, "num_chars": 2}, {"sum_logits": -1.528618574142456, "num_tokens": 1, "num_tokens_all": 494, "is_greedy": false, "logits_per_token": -1.528618574142456, "logits_per_char": -0.764309287071228, "num_chars": 2}, {"sum_logits": -1.0915100574493408, "num_tokens": 1, "num_tokens_all": 494, "is_greedy": true, "logits_per_token": -1.0915100574493408, "logits_per_char": -0.5457550287246704, "num_chars": 2}], "label": 2, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 63, "native_id": 63, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5182311534881592, "incorrect_loss_raw": 1.3648629188537598, "correct_loss_per_char": 0.7591155767440796, "incorrect_loss_per_char": 0.6824314594268799, "correct_loss_per_token": 1.5182311534881592, "incorrect_loss_per_token": 1.3648629188537598, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2291193008422852, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.2291193008422852, "logits_per_char": -0.6145596504211426, "num_chars": 2}, {"sum_logits": -1.4319837093353271, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4319837093353271, "logits_per_char": -0.7159918546676636, "num_chars": 2}, {"sum_logits": -1.433485746383667, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.433485746383667, "logits_per_char": -0.7167428731918335, "num_chars": 2}, {"sum_logits": -1.5182311534881592, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.5182311534881592, "logits_per_char": -0.7591155767440796, "num_chars": 2}], "label": 3, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 64, "native_id": 64, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5055820941925049, "incorrect_loss_raw": 1.356786886850993, "correct_loss_per_char": 0.7527910470962524, "incorrect_loss_per_char": 0.6783934434254965, "correct_loss_per_token": 1.5055820941925049, "incorrect_loss_per_token": 1.356786886850993, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3506251573562622, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.3506251573562622, "logits_per_char": -0.6753125786781311, "num_chars": 2}, {"sum_logits": -1.3101961612701416, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": true, "logits_per_token": -1.3101961612701416, "logits_per_char": -0.6550980806350708, "num_chars": 2}, {"sum_logits": -1.4095393419265747, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.4095393419265747, "logits_per_char": -0.7047696709632874, "num_chars": 2}, {"sum_logits": -1.5055820941925049, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.5055820941925049, "logits_per_char": -0.7527910470962524, "num_chars": 2}], "label": 3, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 65, "native_id": 65, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5258662700653076, "incorrect_loss_raw": 1.381412386894226, "correct_loss_per_char": 0.7629331350326538, "incorrect_loss_per_char": 0.690706193447113, "correct_loss_per_token": 1.5258662700653076, "incorrect_loss_per_token": 1.381412386894226, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2936267852783203, "num_tokens": 1, "num_tokens_all": 424, "is_greedy": true, "logits_per_token": -1.2936267852783203, "logits_per_char": -0.6468133926391602, "num_chars": 2}, {"sum_logits": -1.317152976989746, "num_tokens": 1, "num_tokens_all": 424, "is_greedy": false, "logits_per_token": -1.317152976989746, "logits_per_char": -0.658576488494873, "num_chars": 2}, {"sum_logits": -1.5258662700653076, "num_tokens": 1, "num_tokens_all": 424, "is_greedy": false, "logits_per_token": -1.5258662700653076, "logits_per_char": -0.7629331350326538, "num_chars": 2}, {"sum_logits": -1.5334573984146118, "num_tokens": 1, "num_tokens_all": 424, "is_greedy": false, "logits_per_token": -1.5334573984146118, "logits_per_char": -0.7667286992073059, "num_chars": 2}], "label": 2, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 66, "native_id": 66, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4111109972000122, "incorrect_loss_raw": 1.4072607358296711, "correct_loss_per_char": 0.7055554986000061, "incorrect_loss_per_char": 0.7036303679148356, "correct_loss_per_token": 1.4111109972000122, "incorrect_loss_per_token": 1.4072607358296711, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1212897300720215, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.1212897300720215, "logits_per_char": -0.5606448650360107, "num_chars": 2}, {"sum_logits": -1.4111109972000122, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4111109972000122, "logits_per_char": -0.7055554986000061, "num_chars": 2}, {"sum_logits": -1.5689167976379395, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.5689167976379395, "logits_per_char": -0.7844583988189697, "num_chars": 2}, {"sum_logits": -1.5315756797790527, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.5315756797790527, "logits_per_char": -0.7657878398895264, "num_chars": 2}], "label": 1, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 67, "native_id": 67, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1465579271316528, "incorrect_loss_raw": 1.500460108121236, "correct_loss_per_char": 0.5732789635658264, "incorrect_loss_per_char": 0.750230054060618, "correct_loss_per_token": 1.1465579271316528, "incorrect_loss_per_token": 1.500460108121236, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1465579271316528, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": true, "logits_per_token": -1.1465579271316528, "logits_per_char": -0.5732789635658264, "num_chars": 2}, {"sum_logits": -1.335901141166687, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.335901141166687, "logits_per_char": -0.6679505705833435, "num_chars": 2}, {"sum_logits": -1.5592247247695923, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.5592247247695923, "logits_per_char": -0.7796123623847961, "num_chars": 2}, {"sum_logits": -1.6062544584274292, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.6062544584274292, "logits_per_char": -0.8031272292137146, "num_chars": 2}], "label": 0, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 68, "native_id": 68, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4408351182937622, "incorrect_loss_raw": 1.4044706026713054, "correct_loss_per_char": 0.7204175591468811, "incorrect_loss_per_char": 0.7022353013356527, "correct_loss_per_token": 1.4408351182937622, "incorrect_loss_per_token": 1.4044706026713054, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1494249105453491, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.1494249105453491, "logits_per_char": -0.5747124552726746, "num_chars": 2}, {"sum_logits": -1.4408351182937622, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4408351182937622, "logits_per_char": -0.7204175591468811, "num_chars": 2}, {"sum_logits": -1.4255553483963013, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4255553483963013, "logits_per_char": -0.7127776741981506, "num_chars": 2}, {"sum_logits": -1.6384315490722656, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.6384315490722656, "logits_per_char": -0.8192157745361328, "num_chars": 2}], "label": 1, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 69, "native_id": 69, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3503642082214355, "incorrect_loss_raw": 1.408717115720113, "correct_loss_per_char": 0.6751821041107178, "incorrect_loss_per_char": 0.7043585578600565, "correct_loss_per_token": 1.3503642082214355, "incorrect_loss_per_token": 1.408717115720113, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3460769653320312, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": true, "logits_per_token": -1.3460769653320312, "logits_per_char": -0.6730384826660156, "num_chars": 2}, {"sum_logits": -1.3503642082214355, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": false, "logits_per_token": -1.3503642082214355, "logits_per_char": -0.6751821041107178, "num_chars": 2}, {"sum_logits": -1.5012682676315308, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": false, "logits_per_token": -1.5012682676315308, "logits_per_char": -0.7506341338157654, "num_chars": 2}, {"sum_logits": -1.3788061141967773, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": false, "logits_per_token": -1.3788061141967773, "logits_per_char": -0.6894030570983887, "num_chars": 2}], "label": 1, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 70, "native_id": 70, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2512348890304565, "incorrect_loss_raw": 1.4702799320220947, "correct_loss_per_char": 0.6256174445152283, "incorrect_loss_per_char": 0.7351399660110474, "correct_loss_per_token": 1.2512348890304565, "incorrect_loss_per_token": 1.4702799320220947, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2512348890304565, "num_tokens": 1, "num_tokens_all": 448, "is_greedy": true, "logits_per_token": -1.2512348890304565, "logits_per_char": -0.6256174445152283, "num_chars": 2}, {"sum_logits": -1.2925827503204346, "num_tokens": 1, "num_tokens_all": 448, "is_greedy": false, "logits_per_token": -1.2925827503204346, "logits_per_char": -0.6462913751602173, "num_chars": 2}, {"sum_logits": -1.5540564060211182, "num_tokens": 1, "num_tokens_all": 448, "is_greedy": false, "logits_per_token": -1.5540564060211182, "logits_per_char": -0.7770282030105591, "num_chars": 2}, {"sum_logits": -1.5642006397247314, "num_tokens": 1, "num_tokens_all": 448, "is_greedy": false, "logits_per_token": -1.5642006397247314, "logits_per_char": -0.7821003198623657, "num_chars": 2}], "label": 0, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 71, "native_id": 71, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3870265483856201, "incorrect_loss_raw": 1.3971983591715496, "correct_loss_per_char": 0.6935132741928101, "incorrect_loss_per_char": 0.6985991795857748, "correct_loss_per_token": 1.3870265483856201, "incorrect_loss_per_token": 1.3971983591715496, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2784456014633179, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.2784456014633179, "logits_per_char": -0.6392228007316589, "num_chars": 2}, {"sum_logits": -1.3870265483856201, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.3870265483856201, "logits_per_char": -0.6935132741928101, "num_chars": 2}, {"sum_logits": -1.4982695579528809, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.4982695579528809, "logits_per_char": -0.7491347789764404, "num_chars": 2}, {"sum_logits": -1.4148799180984497, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.4148799180984497, "logits_per_char": -0.7074399590492249, "num_chars": 2}], "label": 1, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 72, "native_id": 72, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.545303463935852, "incorrect_loss_raw": 1.3453193108240764, "correct_loss_per_char": 0.772651731967926, "incorrect_loss_per_char": 0.6726596554120382, "correct_loss_per_token": 1.545303463935852, "incorrect_loss_per_token": 1.3453193108240764, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2984123229980469, "num_tokens": 1, "num_tokens_all": 428, "is_greedy": true, "logits_per_token": -1.2984123229980469, "logits_per_char": -0.6492061614990234, "num_chars": 2}, {"sum_logits": -1.3110878467559814, "num_tokens": 1, "num_tokens_all": 428, "is_greedy": false, "logits_per_token": -1.3110878467559814, "logits_per_char": -0.6555439233779907, "num_chars": 2}, {"sum_logits": -1.545303463935852, "num_tokens": 1, "num_tokens_all": 428, "is_greedy": false, "logits_per_token": -1.545303463935852, "logits_per_char": -0.772651731967926, "num_chars": 2}, {"sum_logits": -1.4264577627182007, "num_tokens": 1, "num_tokens_all": 428, "is_greedy": false, "logits_per_token": -1.4264577627182007, "logits_per_char": -0.7132288813591003, "num_chars": 2}], "label": 2, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 73, "native_id": 73, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1305650472640991, "incorrect_loss_raw": 1.5030981699625652, "correct_loss_per_char": 0.5652825236320496, "incorrect_loss_per_char": 0.7515490849812826, "correct_loss_per_token": 1.1305650472640991, "incorrect_loss_per_token": 1.5030981699625652, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1305650472640991, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.1305650472640991, "logits_per_char": -0.5652825236320496, "num_chars": 2}, {"sum_logits": -1.4225363731384277, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.4225363731384277, "logits_per_char": -0.7112681865692139, "num_chars": 2}, {"sum_logits": -1.5687004327774048, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.5687004327774048, "logits_per_char": -0.7843502163887024, "num_chars": 2}, {"sum_logits": -1.5180577039718628, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.5180577039718628, "logits_per_char": -0.7590288519859314, "num_chars": 2}], "label": 0, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 74, "native_id": 74, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.348388433456421, "incorrect_loss_raw": 1.410564621289571, "correct_loss_per_char": 0.6741942167282104, "incorrect_loss_per_char": 0.7052823106447855, "correct_loss_per_token": 1.348388433456421, "incorrect_loss_per_token": 1.410564621289571, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5362626314163208, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.5362626314163208, "logits_per_char": -0.7681313157081604, "num_chars": 2}, {"sum_logits": -1.348388433456421, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.348388433456421, "logits_per_char": -0.6741942167282104, "num_chars": 2}, {"sum_logits": -1.3344342708587646, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": true, "logits_per_token": -1.3344342708587646, "logits_per_char": -0.6672171354293823, "num_chars": 2}, {"sum_logits": -1.360996961593628, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.360996961593628, "logits_per_char": -0.680498480796814, "num_chars": 2}], "label": 1, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 75, "native_id": 75, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.380097508430481, "incorrect_loss_raw": 1.399682879447937, "correct_loss_per_char": 0.6900487542152405, "incorrect_loss_per_char": 0.6998414397239685, "correct_loss_per_token": 1.380097508430481, "incorrect_loss_per_token": 1.399682879447937, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.380097508430481, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.380097508430481, "logits_per_char": -0.6900487542152405, "num_chars": 2}, {"sum_logits": -1.479286551475525, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.479286551475525, "logits_per_char": -0.7396432757377625, "num_chars": 2}, {"sum_logits": -1.4075164794921875, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.4075164794921875, "logits_per_char": -0.7037582397460938, "num_chars": 2}, {"sum_logits": -1.3122456073760986, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": true, "logits_per_token": -1.3122456073760986, "logits_per_char": -0.6561228036880493, "num_chars": 2}], "label": 0, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 76, "native_id": 76, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4595699310302734, "incorrect_loss_raw": 1.39184574286143, "correct_loss_per_char": 0.7297849655151367, "incorrect_loss_per_char": 0.695922871430715, "correct_loss_per_token": 1.4595699310302734, "incorrect_loss_per_token": 1.39184574286143, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2707948684692383, "num_tokens": 1, "num_tokens_all": 445, "is_greedy": false, "logits_per_token": -1.2707948684692383, "logits_per_char": -0.6353974342346191, "num_chars": 2}, {"sum_logits": -1.2304649353027344, "num_tokens": 1, "num_tokens_all": 445, "is_greedy": true, "logits_per_token": -1.2304649353027344, "logits_per_char": -0.6152324676513672, "num_chars": 2}, {"sum_logits": -1.674277424812317, "num_tokens": 1, "num_tokens_all": 445, "is_greedy": false, "logits_per_token": -1.674277424812317, "logits_per_char": -0.8371387124061584, "num_chars": 2}, {"sum_logits": -1.4595699310302734, "num_tokens": 1, "num_tokens_all": 445, "is_greedy": false, "logits_per_token": -1.4595699310302734, "logits_per_char": -0.7297849655151367, "num_chars": 2}], "label": 3, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 77, "native_id": 77, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5576105117797852, "incorrect_loss_raw": 1.3469750086466472, "correct_loss_per_char": 0.7788052558898926, "incorrect_loss_per_char": 0.6734875043233236, "correct_loss_per_token": 1.5576105117797852, "incorrect_loss_per_token": 1.3469750086466472, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2265315055847168, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -1.2265315055847168, "logits_per_char": -0.6132657527923584, "num_chars": 2}, {"sum_logits": -1.3295927047729492, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.3295927047729492, "logits_per_char": -0.6647963523864746, "num_chars": 2}, {"sum_logits": -1.5576105117797852, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.5576105117797852, "logits_per_char": -0.7788052558898926, "num_chars": 2}, {"sum_logits": -1.4848008155822754, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.4848008155822754, "logits_per_char": -0.7424004077911377, "num_chars": 2}], "label": 2, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 78, "native_id": 78, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4729033708572388, "incorrect_loss_raw": 1.3709330956141155, "correct_loss_per_char": 0.7364516854286194, "incorrect_loss_per_char": 0.6854665478070577, "correct_loss_per_token": 1.4729033708572388, "incorrect_loss_per_token": 1.3709330956141155, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3213711977005005, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.3213711977005005, "logits_per_char": -0.6606855988502502, "num_chars": 2}, {"sum_logits": -1.3193200826644897, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.3193200826644897, "logits_per_char": -0.6596600413322449, "num_chars": 2}, {"sum_logits": -1.472108006477356, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.472108006477356, "logits_per_char": -0.736054003238678, "num_chars": 2}, {"sum_logits": -1.4729033708572388, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.4729033708572388, "logits_per_char": -0.7364516854286194, "num_chars": 2}], "label": 3, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 79, "native_id": 79, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5612186193466187, "incorrect_loss_raw": 1.347704529762268, "correct_loss_per_char": 0.7806093096733093, "incorrect_loss_per_char": 0.673852264881134, "correct_loss_per_token": 1.5612186193466187, "incorrect_loss_per_token": 1.347704529762268, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.239516258239746, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.239516258239746, "logits_per_char": -0.619758129119873, "num_chars": 2}, {"sum_logits": -1.3025263547897339, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.3025263547897339, "logits_per_char": -0.6512631773948669, "num_chars": 2}, {"sum_logits": -1.5612186193466187, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.5612186193466187, "logits_per_char": -0.7806093096733093, "num_chars": 2}, {"sum_logits": -1.5010709762573242, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.5010709762573242, "logits_per_char": -0.7505354881286621, "num_chars": 2}], "label": 2, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 80, "native_id": 80, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4044499397277832, "incorrect_loss_raw": 1.3892491658528645, "correct_loss_per_char": 0.7022249698638916, "incorrect_loss_per_char": 0.6946245829264323, "correct_loss_per_token": 1.4044499397277832, "incorrect_loss_per_token": 1.3892491658528645, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2673894166946411, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": true, "logits_per_token": -1.2673894166946411, "logits_per_char": -0.6336947083473206, "num_chars": 2}, {"sum_logits": -1.413340449333191, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.413340449333191, "logits_per_char": -0.7066702246665955, "num_chars": 2}, {"sum_logits": -1.4870176315307617, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.4870176315307617, "logits_per_char": -0.7435088157653809, "num_chars": 2}, {"sum_logits": -1.4044499397277832, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.4044499397277832, "logits_per_char": -0.7022249698638916, "num_chars": 2}], "label": 3, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 81, "native_id": 81, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4472365379333496, "incorrect_loss_raw": 1.3777216672897339, "correct_loss_per_char": 0.7236182689666748, "incorrect_loss_per_char": 0.6888608336448669, "correct_loss_per_token": 1.4472365379333496, "incorrect_loss_per_token": 1.3777216672897339, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.342878818511963, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.342878818511963, "logits_per_char": -0.6714394092559814, "num_chars": 2}, {"sum_logits": -1.4295833110809326, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.4295833110809326, "logits_per_char": -0.7147916555404663, "num_chars": 2}, {"sum_logits": -1.4472365379333496, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.4472365379333496, "logits_per_char": -0.7236182689666748, "num_chars": 2}, {"sum_logits": -1.3607028722763062, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.3607028722763062, "logits_per_char": -0.6803514361381531, "num_chars": 2}], "label": 2, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 82, "native_id": 82, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.194387435913086, "incorrect_loss_raw": 1.4718022346496582, "correct_loss_per_char": 0.597193717956543, "incorrect_loss_per_char": 0.7359011173248291, "correct_loss_per_token": 1.194387435913086, "incorrect_loss_per_token": 1.4718022346496582, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.194387435913086, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": true, "logits_per_token": -1.194387435913086, "logits_per_char": -0.597193717956543, "num_chars": 2}, {"sum_logits": -1.3769510984420776, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.3769510984420776, "logits_per_char": -0.6884755492210388, "num_chars": 2}, {"sum_logits": -1.5157767534255981, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.5157767534255981, "logits_per_char": -0.7578883767127991, "num_chars": 2}, {"sum_logits": -1.5226788520812988, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.5226788520812988, "logits_per_char": -0.7613394260406494, "num_chars": 2}], "label": 0, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 83, "native_id": 83, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.592499017715454, "incorrect_loss_raw": 1.3391950130462646, "correct_loss_per_char": 0.796249508857727, "incorrect_loss_per_char": 0.6695975065231323, "correct_loss_per_token": 1.592499017715454, "incorrect_loss_per_token": 1.3391950130462646, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2048214673995972, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.2048214673995972, "logits_per_char": -0.6024107336997986, "num_chars": 2}, {"sum_logits": -1.3892310857772827, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.3892310857772827, "logits_per_char": -0.6946155428886414, "num_chars": 2}, {"sum_logits": -1.592499017715454, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.592499017715454, "logits_per_char": -0.796249508857727, "num_chars": 2}, {"sum_logits": -1.423532485961914, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.423532485961914, "logits_per_char": -0.711766242980957, "num_chars": 2}], "label": 2, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 84, "native_id": 84, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6795408725738525, "incorrect_loss_raw": 1.3317572275797527, "correct_loss_per_char": 0.8397704362869263, "incorrect_loss_per_char": 0.6658786137898763, "correct_loss_per_token": 1.6795408725738525, "incorrect_loss_per_token": 1.3317572275797527, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.082329511642456, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": true, "logits_per_token": -1.082329511642456, "logits_per_char": -0.541164755821228, "num_chars": 2}, {"sum_logits": -1.3800699710845947, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.3800699710845947, "logits_per_char": -0.6900349855422974, "num_chars": 2}, {"sum_logits": -1.6795408725738525, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.6795408725738525, "logits_per_char": -0.8397704362869263, "num_chars": 2}, {"sum_logits": -1.532872200012207, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.532872200012207, "logits_per_char": -0.7664361000061035, "num_chars": 2}], "label": 2, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 85, "native_id": 85, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3232567310333252, "incorrect_loss_raw": 1.4211407502492268, "correct_loss_per_char": 0.6616283655166626, "incorrect_loss_per_char": 0.7105703751246134, "correct_loss_per_token": 1.3232567310333252, "incorrect_loss_per_token": 1.4211407502492268, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3232567310333252, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": true, "logits_per_token": -1.3232567310333252, "logits_per_char": -0.6616283655166626, "num_chars": 2}, {"sum_logits": -1.4187365770339966, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.4187365770339966, "logits_per_char": -0.7093682885169983, "num_chars": 2}, {"sum_logits": -1.4450289011001587, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.4450289011001587, "logits_per_char": -0.7225144505500793, "num_chars": 2}, {"sum_logits": -1.3996567726135254, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.3996567726135254, "logits_per_char": -0.6998283863067627, "num_chars": 2}], "label": 0, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 86, "native_id": 86, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.378591775894165, "incorrect_loss_raw": 1.396758238474528, "correct_loss_per_char": 0.6892958879470825, "incorrect_loss_per_char": 0.698379119237264, "correct_loss_per_token": 1.378591775894165, "incorrect_loss_per_token": 1.396758238474528, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4067749977111816, "num_tokens": 1, "num_tokens_all": 411, "is_greedy": false, "logits_per_token": -1.4067749977111816, "logits_per_char": -0.7033874988555908, "num_chars": 2}, {"sum_logits": -1.3373569250106812, "num_tokens": 1, "num_tokens_all": 411, "is_greedy": true, "logits_per_token": -1.3373569250106812, "logits_per_char": -0.6686784625053406, "num_chars": 2}, {"sum_logits": -1.378591775894165, "num_tokens": 1, "num_tokens_all": 411, "is_greedy": false, "logits_per_token": -1.378591775894165, "logits_per_char": -0.6892958879470825, "num_chars": 2}, {"sum_logits": -1.4461427927017212, "num_tokens": 1, "num_tokens_all": 411, "is_greedy": false, "logits_per_token": -1.4461427927017212, "logits_per_char": -0.7230713963508606, "num_chars": 2}], "label": 2, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 87, "native_id": 87, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.035021424293518, "incorrect_loss_raw": 1.5528998772303264, "correct_loss_per_char": 0.517510712146759, "incorrect_loss_per_char": 0.7764499386151632, "correct_loss_per_token": 1.035021424293518, "incorrect_loss_per_token": 1.5528998772303264, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.035021424293518, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.035021424293518, "logits_per_char": -0.517510712146759, "num_chars": 2}, {"sum_logits": -1.4012378454208374, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.4012378454208374, "logits_per_char": -0.7006189227104187, "num_chars": 2}, {"sum_logits": -1.5943424701690674, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.5943424701690674, "logits_per_char": -0.7971712350845337, "num_chars": 2}, {"sum_logits": -1.6631193161010742, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.6631193161010742, "logits_per_char": -0.8315596580505371, "num_chars": 2}], "label": 0, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 88, "native_id": 88, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3749898672103882, "incorrect_loss_raw": 1.3986471891403198, "correct_loss_per_char": 0.6874949336051941, "incorrect_loss_per_char": 0.6993235945701599, "correct_loss_per_token": 1.3749898672103882, "incorrect_loss_per_token": 1.3986471891403198, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4548237323760986, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.4548237323760986, "logits_per_char": -0.7274118661880493, "num_chars": 2}, {"sum_logits": -1.3825711011886597, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.3825711011886597, "logits_per_char": -0.6912855505943298, "num_chars": 2}, {"sum_logits": -1.3585467338562012, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": true, "logits_per_token": -1.3585467338562012, "logits_per_char": -0.6792733669281006, "num_chars": 2}, {"sum_logits": -1.3749898672103882, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.3749898672103882, "logits_per_char": -0.6874949336051941, "num_chars": 2}], "label": 3, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 89, "native_id": 89, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.294090986251831, "incorrect_loss_raw": 1.4334137837092082, "correct_loss_per_char": 0.6470454931259155, "incorrect_loss_per_char": 0.7167068918546041, "correct_loss_per_token": 1.294090986251831, "incorrect_loss_per_token": 1.4334137837092082, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.294090986251831, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.294090986251831, "logits_per_char": -0.6470454931259155, "num_chars": 2}, {"sum_logits": -1.3723673820495605, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.3723673820495605, "logits_per_char": -0.6861836910247803, "num_chars": 2}, {"sum_logits": -1.5871771574020386, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.5871771574020386, "logits_per_char": -0.7935885787010193, "num_chars": 2}, {"sum_logits": -1.3406968116760254, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.3406968116760254, "logits_per_char": -0.6703484058380127, "num_chars": 2}], "label": 0, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 90, "native_id": 90, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6189574003219604, "incorrect_loss_raw": 1.3475761810938518, "correct_loss_per_char": 0.8094787001609802, "incorrect_loss_per_char": 0.6737880905469259, "correct_loss_per_token": 1.6189574003219604, "incorrect_loss_per_token": 1.3475761810938518, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.124530553817749, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.124530553817749, "logits_per_char": -0.5622652769088745, "num_chars": 2}, {"sum_logits": -1.3204420804977417, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.3204420804977417, "logits_per_char": -0.6602210402488708, "num_chars": 2}, {"sum_logits": -1.6189574003219604, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.6189574003219604, "logits_per_char": -0.8094787001609802, "num_chars": 2}, {"sum_logits": -1.5977559089660645, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.5977559089660645, "logits_per_char": -0.7988779544830322, "num_chars": 2}], "label": 2, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 91, "native_id": 91, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6296026706695557, "incorrect_loss_raw": 1.3276960849761963, "correct_loss_per_char": 0.8148013353347778, "incorrect_loss_per_char": 0.6638480424880981, "correct_loss_per_token": 1.6296026706695557, "incorrect_loss_per_token": 1.3276960849761963, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2224773168563843, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -1.2224773168563843, "logits_per_char": -0.6112386584281921, "num_chars": 2}, {"sum_logits": -1.3074976205825806, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.3074976205825806, "logits_per_char": -0.6537488102912903, "num_chars": 2}, {"sum_logits": -1.6296026706695557, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.6296026706695557, "logits_per_char": -0.8148013353347778, "num_chars": 2}, {"sum_logits": -1.453113317489624, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.453113317489624, "logits_per_char": -0.726556658744812, "num_chars": 2}], "label": 2, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 92, "native_id": 92, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.39742112159729, "incorrect_loss_raw": 1.3909434874852498, "correct_loss_per_char": 0.698710560798645, "incorrect_loss_per_char": 0.6954717437426249, "correct_loss_per_token": 1.39742112159729, "incorrect_loss_per_token": 1.3909434874852498, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3506879806518555, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": true, "logits_per_token": -1.3506879806518555, "logits_per_char": -0.6753439903259277, "num_chars": 2}, {"sum_logits": -1.39742112159729, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.39742112159729, "logits_per_char": -0.698710560798645, "num_chars": 2}, {"sum_logits": -1.4182013273239136, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.4182013273239136, "logits_per_char": -0.7091006636619568, "num_chars": 2}, {"sum_logits": -1.4039411544799805, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.4039411544799805, "logits_per_char": -0.7019705772399902, "num_chars": 2}], "label": 1, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 93, "native_id": 93, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4628618955612183, "incorrect_loss_raw": 1.3736265103022258, "correct_loss_per_char": 0.7314309477806091, "incorrect_loss_per_char": 0.6868132551511129, "correct_loss_per_token": 1.4628618955612183, "incorrect_loss_per_token": 1.3736265103022258, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2741552591323853, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": true, "logits_per_token": -1.2741552591323853, "logits_per_char": -0.6370776295661926, "num_chars": 2}, {"sum_logits": -1.3641713857650757, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.3641713857650757, "logits_per_char": -0.6820856928825378, "num_chars": 2}, {"sum_logits": -1.4825528860092163, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.4825528860092163, "logits_per_char": -0.7412764430046082, "num_chars": 2}, {"sum_logits": -1.4628618955612183, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.4628618955612183, "logits_per_char": -0.7314309477806091, "num_chars": 2}], "label": 3, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 94, "native_id": 94, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3510935306549072, "incorrect_loss_raw": 1.4379232327143352, "correct_loss_per_char": 0.6755467653274536, "incorrect_loss_per_char": 0.7189616163571676, "correct_loss_per_token": 1.3510935306549072, "incorrect_loss_per_token": 1.4379232327143352, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3034049272537231, "num_tokens": 1, "num_tokens_all": 428, "is_greedy": true, "logits_per_token": -1.3034049272537231, "logits_per_char": -0.6517024636268616, "num_chars": 2}, {"sum_logits": -1.3510935306549072, "num_tokens": 1, "num_tokens_all": 428, "is_greedy": false, "logits_per_token": -1.3510935306549072, "logits_per_char": -0.6755467653274536, "num_chars": 2}, {"sum_logits": -1.4849812984466553, "num_tokens": 1, "num_tokens_all": 428, "is_greedy": false, "logits_per_token": -1.4849812984466553, "logits_per_char": -0.7424906492233276, "num_chars": 2}, {"sum_logits": -1.525383472442627, "num_tokens": 1, "num_tokens_all": 428, "is_greedy": false, "logits_per_token": -1.525383472442627, "logits_per_char": -0.7626917362213135, "num_chars": 2}], "label": 1, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 95, "native_id": 95, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.226423740386963, "incorrect_loss_raw": 1.4639223019282024, "correct_loss_per_char": 0.6132118701934814, "incorrect_loss_per_char": 0.7319611509641012, "correct_loss_per_token": 1.226423740386963, "incorrect_loss_per_token": 1.4639223019282024, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.226423740386963, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": true, "logits_per_token": -1.226423740386963, "logits_per_char": -0.6132118701934814, "num_chars": 2}, {"sum_logits": -1.3155219554901123, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.3155219554901123, "logits_per_char": -0.6577609777450562, "num_chars": 2}, {"sum_logits": -1.4240745306015015, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.4240745306015015, "logits_per_char": -0.7120372653007507, "num_chars": 2}, {"sum_logits": -1.6521704196929932, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.6521704196929932, "logits_per_char": -0.8260852098464966, "num_chars": 2}], "label": 0, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 96, "native_id": 96, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0768542289733887, "incorrect_loss_raw": 1.534406264623006, "correct_loss_per_char": 0.5384271144866943, "incorrect_loss_per_char": 0.767203132311503, "correct_loss_per_token": 1.0768542289733887, "incorrect_loss_per_token": 1.534406264623006, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0768542289733887, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": true, "logits_per_token": -1.0768542289733887, "logits_per_char": -0.5384271144866943, "num_chars": 2}, {"sum_logits": -1.3458245992660522, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.3458245992660522, "logits_per_char": -0.6729122996330261, "num_chars": 2}, {"sum_logits": -1.599526047706604, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.599526047706604, "logits_per_char": -0.799763023853302, "num_chars": 2}, {"sum_logits": -1.6578681468963623, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.6578681468963623, "logits_per_char": -0.8289340734481812, "num_chars": 2}], "label": 0, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 97, "native_id": 97, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5956718921661377, "incorrect_loss_raw": 1.3539968331654866, "correct_loss_per_char": 0.7978359460830688, "incorrect_loss_per_char": 0.6769984165827433, "correct_loss_per_token": 1.5956718921661377, "incorrect_loss_per_token": 1.3539968331654866, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.08778715133667, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.08778715133667, "logits_per_char": -0.543893575668335, "num_chars": 2}, {"sum_logits": -1.4398584365844727, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4398584365844727, "logits_per_char": -0.7199292182922363, "num_chars": 2}, {"sum_logits": -1.5956718921661377, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.5956718921661377, "logits_per_char": -0.7978359460830688, "num_chars": 2}, {"sum_logits": -1.5343449115753174, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.5343449115753174, "logits_per_char": -0.7671724557876587, "num_chars": 2}], "label": 2, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 98, "native_id": 98, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6428992748260498, "incorrect_loss_raw": 1.3384779691696167, "correct_loss_per_char": 0.8214496374130249, "incorrect_loss_per_char": 0.6692389845848083, "correct_loss_per_token": 1.6428992748260498, "incorrect_loss_per_token": 1.3384779691696167, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1083730459213257, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": true, "logits_per_token": -1.1083730459213257, "logits_per_char": -0.5541865229606628, "num_chars": 2}, {"sum_logits": -1.3317643404006958, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.3317643404006958, "logits_per_char": -0.6658821702003479, "num_chars": 2}, {"sum_logits": -1.6428992748260498, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.6428992748260498, "logits_per_char": -0.8214496374130249, "num_chars": 2}, {"sum_logits": -1.5752965211868286, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.5752965211868286, "logits_per_char": -0.7876482605934143, "num_chars": 2}], "label": 2, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 99, "native_id": 99, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4455550909042358, "incorrect_loss_raw": 1.4036627610524495, "correct_loss_per_char": 0.7227775454521179, "incorrect_loss_per_char": 0.7018313805262247, "correct_loss_per_token": 1.4455550909042358, "incorrect_loss_per_token": 1.4036627610524495, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1035799980163574, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.1035799980163574, "logits_per_char": -0.5517899990081787, "num_chars": 2}, {"sum_logits": -1.4455550909042358, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4455550909042358, "logits_per_char": -0.7227775454521179, "num_chars": 2}, {"sum_logits": -1.5154483318328857, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.5154483318328857, "logits_per_char": -0.7577241659164429, "num_chars": 2}, {"sum_logits": -1.5919599533081055, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.5919599533081055, "logits_per_char": -0.7959799766540527, "num_chars": 2}], "label": 1, "task_hash": "6d7c3f721bf97797f0e660d896f4585b", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
|