diff --git "a/evals/core_9mcqa/task-000-arc_easy:mc-predictions.jsonl" "b/evals/core_9mcqa/task-000-arc_easy:mc-predictions.jsonl" new file mode 100644--- /dev/null +++ "b/evals/core_9mcqa/task-000-arc_easy:mc-predictions.jsonl" @@ -0,0 +1,1000 @@ +{"doc_id": 0, "native_id": "MCAS_2004_9_21", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4882479906082153, "incorrect_loss_raw": 1.363123933474223, "correct_loss_per_char": 0.7441239953041077, "incorrect_loss_per_char": 0.6815619667371114, "correct_loss_per_token": 1.4882479906082153, "incorrect_loss_per_token": 1.363123933474223, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4882479906082153, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.4882479906082153, "logits_per_char": -0.7441239953041077, "num_chars": 2}, {"sum_logits": -1.392048716545105, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.392048716545105, "logits_per_char": -0.6960243582725525, "num_chars": 2}, {"sum_logits": -1.427667260169983, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.427667260169983, "logits_per_char": -0.7138336300849915, "num_chars": 2}, {"sum_logits": -1.2696558237075806, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": true, "logits_per_token": -1.2696558237075806, "logits_per_char": -0.6348279118537903, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1, "native_id": "Mercury_SC_407227", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3516080379486084, "incorrect_loss_raw": 1.405293385187785, "correct_loss_per_char": 0.6758040189743042, "incorrect_loss_per_char": 0.7026466925938925, "correct_loss_per_token": 1.3516080379486084, "incorrect_loss_per_token": 1.405293385187785, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4865081310272217, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.4865081310272217, "logits_per_char": -0.7432540655136108, "num_chars": 2}, {"sum_logits": -1.440984845161438, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.440984845161438, "logits_per_char": -0.720492422580719, "num_chars": 2}, {"sum_logits": -1.2883871793746948, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": true, "logits_per_token": -1.2883871793746948, "logits_per_char": -0.6441935896873474, "num_chars": 2}, {"sum_logits": -1.3516080379486084, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.3516080379486084, "logits_per_char": -0.6758040189743042, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 2, "native_id": "VASoL_2010_5_18", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4438509941101074, "incorrect_loss_raw": 1.3758917649586995, "correct_loss_per_char": 0.7219254970550537, "incorrect_loss_per_char": 0.6879458824793497, "correct_loss_per_token": 1.4438509941101074, "incorrect_loss_per_token": 1.3758917649586995, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4438509941101074, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.4438509941101074, "logits_per_char": -0.7219254970550537, "num_chars": 2}, {"sum_logits": -1.2993971109390259, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": true, "logits_per_token": -1.2993971109390259, "logits_per_char": -0.6496985554695129, "num_chars": 2}, {"sum_logits": -1.398177981376648, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.398177981376648, "logits_per_char": -0.699088990688324, "num_chars": 2}, {"sum_logits": -1.4301002025604248, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.4301002025604248, "logits_per_char": -0.7150501012802124, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 3, "native_id": "MDSA_2011_4_16", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4591683149337769, "incorrect_loss_raw": 1.369628111521403, "correct_loss_per_char": 0.7295841574668884, "incorrect_loss_per_char": 0.6848140557607015, "correct_loss_per_token": 1.4591683149337769, "incorrect_loss_per_token": 1.369628111521403, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3713921308517456, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.3713921308517456, "logits_per_char": -0.6856960654258728, "num_chars": 2}, {"sum_logits": -1.3255492448806763, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": true, "logits_per_token": -1.3255492448806763, "logits_per_char": -0.6627746224403381, "num_chars": 2}, {"sum_logits": -1.411942958831787, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.411942958831787, "logits_per_char": -0.7059714794158936, "num_chars": 2}, {"sum_logits": -1.4591683149337769, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.4591683149337769, "logits_per_char": -0.7295841574668884, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 4, "native_id": "Mercury_7143360", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3095921277999878, "incorrect_loss_raw": 1.4226194222768147, "correct_loss_per_char": 0.6547960638999939, "incorrect_loss_per_char": 0.7113097111384074, "correct_loss_per_token": 1.3095921277999878, "incorrect_loss_per_token": 1.4226194222768147, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5549720525741577, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.5549720525741577, "logits_per_char": -0.7774860262870789, "num_chars": 2}, {"sum_logits": -1.3512318134307861, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.3512318134307861, "logits_per_char": -0.6756159067153931, "num_chars": 2}, {"sum_logits": -1.3616544008255005, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.3616544008255005, "logits_per_char": -0.6808272004127502, "num_chars": 2}, {"sum_logits": -1.3095921277999878, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": true, "logits_per_token": -1.3095921277999878, "logits_per_char": -0.6547960638999939, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 5, "native_id": "MCAS_2004_8_22", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5066308975219727, "incorrect_loss_raw": 1.3583112160364788, "correct_loss_per_char": 0.7533154487609863, "incorrect_loss_per_char": 0.6791556080182394, "correct_loss_per_token": 1.5066308975219727, "incorrect_loss_per_token": 1.3583112160364788, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5066308975219727, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.5066308975219727, "logits_per_char": -0.7533154487609863, "num_chars": 2}, {"sum_logits": -1.3616105318069458, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.3616105318069458, "logits_per_char": -0.6808052659034729, "num_chars": 2}, {"sum_logits": -1.4616334438323975, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.4616334438323975, "logits_per_char": -0.7308167219161987, "num_chars": 2}, {"sum_logits": -1.2516896724700928, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": true, "logits_per_token": -1.2516896724700928, "logits_per_char": -0.6258448362350464, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 6, "native_id": "VASoL_2008_3_20", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.520264983177185, "incorrect_loss_raw": 1.3514937559763591, "correct_loss_per_char": 0.7601324915885925, "incorrect_loss_per_char": 0.6757468779881796, "correct_loss_per_token": 1.520264983177185, "incorrect_loss_per_token": 1.3514937559763591, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.520264983177185, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.520264983177185, "logits_per_char": -0.7601324915885925, "num_chars": 2}, {"sum_logits": -1.3786876201629639, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.3786876201629639, "logits_per_char": -0.6893438100814819, "num_chars": 2}, {"sum_logits": -1.3721925020217896, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.3721925020217896, "logits_per_char": -0.6860962510108948, "num_chars": 2}, {"sum_logits": -1.3036011457443237, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": true, "logits_per_token": -1.3036011457443237, "logits_per_char": -0.6518005728721619, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 7, "native_id": "Mercury_SC_400611", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.444197654724121, "incorrect_loss_raw": 1.372923215230306, "correct_loss_per_char": 0.7220988273620605, "incorrect_loss_per_char": 0.686461607615153, "correct_loss_per_token": 1.444197654724121, "incorrect_loss_per_token": 1.372923215230306, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3445160388946533, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": true, "logits_per_token": -1.3445160388946533, "logits_per_char": -0.6722580194473267, "num_chars": 2}, {"sum_logits": -1.3499603271484375, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.3499603271484375, "logits_per_char": -0.6749801635742188, "num_chars": 2}, {"sum_logits": -1.4242932796478271, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.4242932796478271, "logits_per_char": -0.7121466398239136, "num_chars": 2}, {"sum_logits": -1.444197654724121, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.444197654724121, "logits_per_char": -0.7220988273620605, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 8, "native_id": "Mercury_SC_401811", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3297550678253174, "incorrect_loss_raw": 1.4139727354049683, "correct_loss_per_char": 0.6648775339126587, "incorrect_loss_per_char": 0.7069863677024841, "correct_loss_per_token": 1.3297550678253174, "incorrect_loss_per_token": 1.4139727354049683, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3605775833129883, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.3605775833129883, "logits_per_char": -0.6802887916564941, "num_chars": 2}, {"sum_logits": -1.3899530172348022, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.3899530172348022, "logits_per_char": -0.6949765086174011, "num_chars": 2}, {"sum_logits": -1.4913876056671143, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.4913876056671143, "logits_per_char": -0.7456938028335571, "num_chars": 2}, {"sum_logits": -1.3297550678253174, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": true, "logits_per_token": -1.3297550678253174, "logits_per_char": -0.6648775339126587, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 9, "native_id": "VASoL_2008_3_5", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4103727340698242, "incorrect_loss_raw": 1.3903937339782715, "correct_loss_per_char": 0.7051863670349121, "incorrect_loss_per_char": 0.6951968669891357, "correct_loss_per_token": 1.4103727340698242, "incorrect_loss_per_token": 1.3903937339782715, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4038540124893188, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": false, "logits_per_token": -1.4038540124893188, "logits_per_char": -0.7019270062446594, "num_chars": 2}, {"sum_logits": -1.2840349674224854, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": true, "logits_per_token": -1.2840349674224854, "logits_per_char": -0.6420174837112427, "num_chars": 2}, {"sum_logits": -1.4832922220230103, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": false, "logits_per_token": -1.4832922220230103, "logits_per_char": -0.7416461110115051, "num_chars": 2}, {"sum_logits": -1.4103727340698242, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": false, "logits_per_token": -1.4103727340698242, "logits_per_char": -0.7051863670349121, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 10, "native_id": "NCEOGA_2013_8_6", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4968433380126953, "incorrect_loss_raw": 1.3592900435129802, "correct_loss_per_char": 0.7484216690063477, "incorrect_loss_per_char": 0.6796450217564901, "correct_loss_per_token": 1.4968433380126953, "incorrect_loss_per_token": 1.3592900435129802, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4968433380126953, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -1.4968433380126953, "logits_per_char": -0.7484216690063477, "num_chars": 2}, {"sum_logits": -1.3998035192489624, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -1.3998035192489624, "logits_per_char": -0.6999017596244812, "num_chars": 2}, {"sum_logits": -1.2869772911071777, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": true, "logits_per_token": -1.2869772911071777, "logits_per_char": -0.6434886455535889, "num_chars": 2}, {"sum_logits": -1.3910893201828003, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -1.3910893201828003, "logits_per_char": -0.6955446600914001, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 11, "native_id": "Mercury_177223", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5467417240142822, "incorrect_loss_raw": 1.3469329675038655, "correct_loss_per_char": 0.7733708620071411, "incorrect_loss_per_char": 0.6734664837519327, "correct_loss_per_token": 1.5467417240142822, "incorrect_loss_per_token": 1.3469329675038655, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5467417240142822, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.5467417240142822, "logits_per_char": -0.7733708620071411, "num_chars": 2}, {"sum_logits": -1.4144062995910645, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.4144062995910645, "logits_per_char": -0.7072031497955322, "num_chars": 2}, {"sum_logits": -1.3429028987884521, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.3429028987884521, "logits_per_char": -0.6714514493942261, "num_chars": 2}, {"sum_logits": -1.28348970413208, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": true, "logits_per_token": -1.28348970413208, "logits_per_char": -0.64174485206604, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 12, "native_id": "Mercury_182368", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.412851095199585, "incorrect_loss_raw": 1.385764519373576, "correct_loss_per_char": 0.7064255475997925, "incorrect_loss_per_char": 0.692882259686788, "correct_loss_per_token": 1.412851095199585, "incorrect_loss_per_token": 1.385764519373576, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.412851095199585, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.412851095199585, "logits_per_char": -0.7064255475997925, "num_chars": 2}, {"sum_logits": -1.3072510957717896, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": true, "logits_per_token": -1.3072510957717896, "logits_per_char": -0.6536255478858948, "num_chars": 2}, {"sum_logits": -1.4673900604248047, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.4673900604248047, "logits_per_char": -0.7336950302124023, "num_chars": 2}, {"sum_logits": -1.3826524019241333, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.3826524019241333, "logits_per_char": -0.6913262009620667, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 13, "native_id": "Mercury_7012950", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3618485927581787, "incorrect_loss_raw": 1.3998719056447346, "correct_loss_per_char": 0.6809242963790894, "incorrect_loss_per_char": 0.6999359528223673, "correct_loss_per_token": 1.3618485927581787, "incorrect_loss_per_token": 1.3998719056447346, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3772242069244385, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.3772242069244385, "logits_per_char": -0.6886121034622192, "num_chars": 2}, {"sum_logits": -1.3618485927581787, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": true, "logits_per_token": -1.3618485927581787, "logits_per_char": -0.6809242963790894, "num_chars": 2}, {"sum_logits": -1.370648980140686, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.370648980140686, "logits_per_char": -0.685324490070343, "num_chars": 2}, {"sum_logits": -1.4517425298690796, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.4517425298690796, "logits_per_char": -0.7258712649345398, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 14, "native_id": "Mercury_7216790", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3940811157226562, "incorrect_loss_raw": 1.3933556079864502, "correct_loss_per_char": 0.6970405578613281, "incorrect_loss_per_char": 0.6966778039932251, "correct_loss_per_token": 1.3940811157226562, "incorrect_loss_per_token": 1.3933556079864502, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3154939413070679, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": true, "logits_per_token": -1.3154939413070679, "logits_per_char": -0.6577469706535339, "num_chars": 2}, {"sum_logits": -1.3940811157226562, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.3940811157226562, "logits_per_char": -0.6970405578613281, "num_chars": 2}, {"sum_logits": -1.407434344291687, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.407434344291687, "logits_per_char": -0.7037171721458435, "num_chars": 2}, {"sum_logits": -1.4571385383605957, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.4571385383605957, "logits_per_char": -0.7285692691802979, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 15, "native_id": "Mercury_7083405", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2291786670684814, "incorrect_loss_raw": 1.4501494963963826, "correct_loss_per_char": 0.6145893335342407, "incorrect_loss_per_char": 0.7250747481981913, "correct_loss_per_token": 1.2291786670684814, "incorrect_loss_per_token": 1.4501494963963826, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2291786670684814, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": true, "logits_per_token": -1.2291786670684814, "logits_per_char": -0.6145893335342407, "num_chars": 2}, {"sum_logits": -1.4124493598937988, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.4124493598937988, "logits_per_char": -0.7062246799468994, "num_chars": 2}, {"sum_logits": -1.3950679302215576, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.3950679302215576, "logits_per_char": -0.6975339651107788, "num_chars": 2}, {"sum_logits": -1.5429311990737915, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.5429311990737915, "logits_per_char": -0.7714655995368958, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 16, "native_id": "Mercury_7247853", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3405715227127075, "incorrect_loss_raw": 1.408090869585673, "correct_loss_per_char": 0.6702857613563538, "incorrect_loss_per_char": 0.7040454347928365, "correct_loss_per_token": 1.3405715227127075, "incorrect_loss_per_token": 1.408090869585673, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3405715227127075, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": true, "logits_per_token": -1.3405715227127075, "logits_per_char": -0.6702857613563538, "num_chars": 2}, {"sum_logits": -1.4428914785385132, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.4428914785385132, "logits_per_char": -0.7214457392692566, "num_chars": 2}, {"sum_logits": -1.37897527217865, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.37897527217865, "logits_per_char": -0.689487636089325, "num_chars": 2}, {"sum_logits": -1.402405858039856, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.402405858039856, "logits_per_char": -0.701202929019928, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 17, "native_id": "NYSEDREGENTS_2013_8_13", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3205287456512451, "incorrect_loss_raw": 1.4152944882710774, "correct_loss_per_char": 0.6602643728256226, "incorrect_loss_per_char": 0.7076472441355387, "correct_loss_per_token": 1.3205287456512451, "incorrect_loss_per_token": 1.4152944882710774, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3576327562332153, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.3576327562332153, "logits_per_char": -0.6788163781166077, "num_chars": 2}, {"sum_logits": -1.3205287456512451, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": true, "logits_per_token": -1.3205287456512451, "logits_per_char": -0.6602643728256226, "num_chars": 2}, {"sum_logits": -1.4765548706054688, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.4765548706054688, "logits_per_char": -0.7382774353027344, "num_chars": 2}, {"sum_logits": -1.4116958379745483, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.4116958379745483, "logits_per_char": -0.7058479189872742, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 18, "native_id": "Mercury_7239313", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4089524745941162, "incorrect_loss_raw": 1.3864736954371135, "correct_loss_per_char": 0.7044762372970581, "incorrect_loss_per_char": 0.6932368477185568, "correct_loss_per_token": 1.4089524745941162, "incorrect_loss_per_token": 1.3864736954371135, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4089524745941162, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.4089524745941162, "logits_per_char": -0.7044762372970581, "num_chars": 2}, {"sum_logits": -1.3025344610214233, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": true, "logits_per_token": -1.3025344610214233, "logits_per_char": -0.6512672305107117, "num_chars": 2}, {"sum_logits": -1.4083465337753296, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.4083465337753296, "logits_per_char": -0.7041732668876648, "num_chars": 2}, {"sum_logits": -1.4485400915145874, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.4485400915145874, "logits_per_char": -0.7242700457572937, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 19, "native_id": "Mercury_7168350", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3027215003967285, "incorrect_loss_raw": 1.4244391123453777, "correct_loss_per_char": 0.6513607501983643, "incorrect_loss_per_char": 0.7122195561726888, "correct_loss_per_token": 1.3027215003967285, "incorrect_loss_per_token": 1.4244391123453777, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3027215003967285, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": true, "logits_per_token": -1.3027215003967285, "logits_per_char": -0.6513607501983643, "num_chars": 2}, {"sum_logits": -1.343274712562561, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.343274712562561, "logits_per_char": -0.6716373562812805, "num_chars": 2}, {"sum_logits": -1.5128470659255981, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.5128470659255981, "logits_per_char": -0.7564235329627991, "num_chars": 2}, {"sum_logits": -1.4171955585479736, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.4171955585479736, "logits_per_char": -0.7085977792739868, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 20, "native_id": "Mercury_7064015", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3774933815002441, "incorrect_loss_raw": 1.3946630954742432, "correct_loss_per_char": 0.6887466907501221, "incorrect_loss_per_char": 0.6973315477371216, "correct_loss_per_token": 1.3774933815002441, "incorrect_loss_per_token": 1.3946630954742432, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.429157018661499, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.429157018661499, "logits_per_char": -0.7145785093307495, "num_chars": 2}, {"sum_logits": -1.3223787546157837, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": true, "logits_per_token": -1.3223787546157837, "logits_per_char": -0.6611893773078918, "num_chars": 2}, {"sum_logits": -1.4324535131454468, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.4324535131454468, "logits_per_char": -0.7162267565727234, "num_chars": 2}, {"sum_logits": -1.3774933815002441, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.3774933815002441, "logits_per_char": -0.6887466907501221, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 21, "native_id": "Mercury_SC_400195", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5076665878295898, "incorrect_loss_raw": 1.3615217606226604, "correct_loss_per_char": 0.7538332939147949, "incorrect_loss_per_char": 0.6807608803113302, "correct_loss_per_token": 1.5076665878295898, "incorrect_loss_per_token": 1.3615217606226604, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5272899866104126, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.5272899866104126, "logits_per_char": -0.7636449933052063, "num_chars": 2}, {"sum_logits": -1.5076665878295898, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.5076665878295898, "logits_per_char": -0.7538332939147949, "num_chars": 2}, {"sum_logits": -1.3326128721237183, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.3326128721237183, "logits_per_char": -0.6663064360618591, "num_chars": 2}, {"sum_logits": -1.22466242313385, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": true, "logits_per_token": -1.22466242313385, "logits_per_char": -0.612331211566925, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 22, "native_id": "Mercury_SC_415738", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.568013072013855, "incorrect_loss_raw": 1.3404441674550374, "correct_loss_per_char": 0.7840065360069275, "incorrect_loss_per_char": 0.6702220837275187, "correct_loss_per_token": 1.568013072013855, "incorrect_loss_per_token": 1.3404441674550374, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.568013072013855, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.568013072013855, "logits_per_char": -0.7840065360069275, "num_chars": 2}, {"sum_logits": -1.3978674411773682, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.3978674411773682, "logits_per_char": -0.6989337205886841, "num_chars": 2}, {"sum_logits": -1.4012993574142456, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.4012993574142456, "logits_per_char": -0.7006496787071228, "num_chars": 2}, {"sum_logits": -1.2221657037734985, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": true, "logits_per_token": -1.2221657037734985, "logits_per_char": -0.6110828518867493, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 23, "native_id": "Mercury_7268030", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.463877558708191, "incorrect_loss_raw": 1.3690402507781982, "correct_loss_per_char": 0.7319387793540955, "incorrect_loss_per_char": 0.6845201253890991, "correct_loss_per_token": 1.463877558708191, "incorrect_loss_per_token": 1.3690402507781982, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4414039850234985, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.4414039850234985, "logits_per_char": -0.7207019925117493, "num_chars": 2}, {"sum_logits": -1.3577858209609985, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.3577858209609985, "logits_per_char": -0.6788929104804993, "num_chars": 2}, {"sum_logits": -1.463877558708191, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.463877558708191, "logits_per_char": -0.7319387793540955, "num_chars": 2}, {"sum_logits": -1.3079309463500977, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": true, "logits_per_token": -1.3079309463500977, "logits_per_char": -0.6539654731750488, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 24, "native_id": "Mercury_179113", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4669463634490967, "incorrect_loss_raw": 1.3662940661112468, "correct_loss_per_char": 0.7334731817245483, "incorrect_loss_per_char": 0.6831470330556234, "correct_loss_per_token": 1.4669463634490967, "incorrect_loss_per_token": 1.3662940661112468, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3851038217544556, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.3851038217544556, "logits_per_char": -0.6925519108772278, "num_chars": 2}, {"sum_logits": -1.362259864807129, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.362259864807129, "logits_per_char": -0.6811299324035645, "num_chars": 2}, {"sum_logits": -1.4669463634490967, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.4669463634490967, "logits_per_char": -0.7334731817245483, "num_chars": 2}, {"sum_logits": -1.3515185117721558, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": true, "logits_per_token": -1.3515185117721558, "logits_per_char": -0.6757592558860779, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 25, "native_id": "Mercury_7138425", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.599122405052185, "incorrect_loss_raw": 1.3384377161661785, "correct_loss_per_char": 0.7995612025260925, "incorrect_loss_per_char": 0.6692188580830892, "correct_loss_per_token": 1.599122405052185, "incorrect_loss_per_token": 1.3384377161661785, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3373422622680664, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.3373422622680664, "logits_per_char": -0.6686711311340332, "num_chars": 2}, {"sum_logits": -1.174254298210144, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": true, "logits_per_token": -1.174254298210144, "logits_per_char": -0.587127149105072, "num_chars": 2}, {"sum_logits": -1.5037165880203247, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.5037165880203247, "logits_per_char": -0.7518582940101624, "num_chars": 2}, {"sum_logits": -1.599122405052185, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.599122405052185, "logits_per_char": -0.7995612025260925, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 26, "native_id": "Mercury_7018340", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3066880702972412, "incorrect_loss_raw": 1.4214706023534138, "correct_loss_per_char": 0.6533440351486206, "incorrect_loss_per_char": 0.7107353011767069, "correct_loss_per_token": 1.3066880702972412, "incorrect_loss_per_token": 1.4214706023534138, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3066880702972412, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": true, "logits_per_token": -1.3066880702972412, "logits_per_char": -0.6533440351486206, "num_chars": 2}, {"sum_logits": -1.3891313076019287, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.3891313076019287, "logits_per_char": -0.6945656538009644, "num_chars": 2}, {"sum_logits": -1.4868557453155518, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.4868557453155518, "logits_per_char": -0.7434278726577759, "num_chars": 2}, {"sum_logits": -1.3884247541427612, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.3884247541427612, "logits_per_char": -0.6942123770713806, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 27, "native_id": "Mercury_401760", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2850430011749268, "incorrect_loss_raw": 1.434041976928711, "correct_loss_per_char": 0.6425215005874634, "incorrect_loss_per_char": 0.7170209884643555, "correct_loss_per_token": 1.2850430011749268, "incorrect_loss_per_token": 1.434041976928711, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.421010971069336, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.421010971069336, "logits_per_char": -0.710505485534668, "num_chars": 2}, {"sum_logits": -1.3221220970153809, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.3221220970153809, "logits_per_char": -0.6610610485076904, "num_chars": 2}, {"sum_logits": -1.558992862701416, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.558992862701416, "logits_per_char": -0.779496431350708, "num_chars": 2}, {"sum_logits": -1.2850430011749268, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.2850430011749268, "logits_per_char": -0.6425215005874634, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 28, "native_id": "Mercury_7033635", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2813448905944824, "incorrect_loss_raw": 1.4306735595067341, "correct_loss_per_char": 0.6406724452972412, "incorrect_loss_per_char": 0.7153367797533671, "correct_loss_per_token": 1.2813448905944824, "incorrect_loss_per_token": 1.4306735595067341, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.375531792640686, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.375531792640686, "logits_per_char": -0.687765896320343, "num_chars": 2}, {"sum_logits": -1.4364687204360962, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.4364687204360962, "logits_per_char": -0.7182343602180481, "num_chars": 2}, {"sum_logits": -1.2813448905944824, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": true, "logits_per_token": -1.2813448905944824, "logits_per_char": -0.6406724452972412, "num_chars": 2}, {"sum_logits": -1.4800201654434204, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.4800201654434204, "logits_per_char": -0.7400100827217102, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 29, "native_id": "Mercury_SC_406012", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4135955572128296, "incorrect_loss_raw": 1.394534428914388, "correct_loss_per_char": 0.7067977786064148, "incorrect_loss_per_char": 0.697267214457194, "correct_loss_per_token": 1.4135955572128296, "incorrect_loss_per_token": 1.394534428914388, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5665245056152344, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.5665245056152344, "logits_per_char": -0.7832622528076172, "num_chars": 2}, {"sum_logits": -1.4135955572128296, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.4135955572128296, "logits_per_char": -0.7067977786064148, "num_chars": 2}, {"sum_logits": -1.429280400276184, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.429280400276184, "logits_per_char": -0.714640200138092, "num_chars": 2}, {"sum_logits": -1.1877983808517456, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": true, "logits_per_token": -1.1877983808517456, "logits_per_char": -0.5938991904258728, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 30, "native_id": "NYSEDREGENTS_2010_4_18", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3197795152664185, "incorrect_loss_raw": 1.4167625109354656, "correct_loss_per_char": 0.6598897576332092, "incorrect_loss_per_char": 0.7083812554677328, "correct_loss_per_token": 1.3197795152664185, "incorrect_loss_per_token": 1.4167625109354656, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.373672604560852, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.373672604560852, "logits_per_char": -0.686836302280426, "num_chars": 2}, {"sum_logits": -1.3197795152664185, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": true, "logits_per_token": -1.3197795152664185, "logits_per_char": -0.6598897576332092, "num_chars": 2}, {"sum_logits": -1.528610110282898, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.528610110282898, "logits_per_char": -0.764305055141449, "num_chars": 2}, {"sum_logits": -1.3480048179626465, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.3480048179626465, "logits_per_char": -0.6740024089813232, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 31, "native_id": "NYSEDREGENTS_2008_8_29", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5856527090072632, "incorrect_loss_raw": 1.3337163925170898, "correct_loss_per_char": 0.7928263545036316, "incorrect_loss_per_char": 0.6668581962585449, "correct_loss_per_token": 1.5856527090072632, "incorrect_loss_per_token": 1.3337163925170898, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2756184339523315, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": true, "logits_per_token": -1.2756184339523315, "logits_per_char": -0.6378092169761658, "num_chars": 2}, {"sum_logits": -1.3332419395446777, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -1.3332419395446777, "logits_per_char": -0.6666209697723389, "num_chars": 2}, {"sum_logits": -1.3922888040542603, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -1.3922888040542603, "logits_per_char": -0.6961444020271301, "num_chars": 2}, {"sum_logits": -1.5856527090072632, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -1.5856527090072632, "logits_per_char": -0.7928263545036316, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 32, "native_id": "Mercury_7086765", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3956894874572754, "incorrect_loss_raw": 1.3916842142740886, "correct_loss_per_char": 0.6978447437286377, "incorrect_loss_per_char": 0.6958421071370443, "correct_loss_per_token": 1.3956894874572754, "incorrect_loss_per_token": 1.3916842142740886, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.488397479057312, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.488397479057312, "logits_per_char": -0.744198739528656, "num_chars": 2}, {"sum_logits": -1.3956894874572754, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.3956894874572754, "logits_per_char": -0.6978447437286377, "num_chars": 2}, {"sum_logits": -1.3944716453552246, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.3944716453552246, "logits_per_char": -0.6972358226776123, "num_chars": 2}, {"sum_logits": -1.292183518409729, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": true, "logits_per_token": -1.292183518409729, "logits_per_char": -0.6460917592048645, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 33, "native_id": "Mercury_414146", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6408812999725342, "incorrect_loss_raw": 2.0002284049987793, "correct_loss_per_char": 0.3204406499862671, "incorrect_loss_per_char": 1.0001142024993896, "correct_loss_per_token": 0.6408812999725342, "incorrect_loss_per_token": 2.0002284049987793, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6408812999725342, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": true, "logits_per_token": -0.6408812999725342, "logits_per_char": -0.3204406499862671, "num_chars": 2}, {"sum_logits": -1.5212388038635254, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.5212388038635254, "logits_per_char": -0.7606194019317627, "num_chars": 2}, {"sum_logits": -1.8989310264587402, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.8989310264587402, "logits_per_char": -0.9494655132293701, "num_chars": 2}, {"sum_logits": -2.5805153846740723, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -2.5805153846740723, "logits_per_char": -1.2902576923370361, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 34, "native_id": "Mercury_7163240", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3921879529953003, "incorrect_loss_raw": 1.3905850648880005, "correct_loss_per_char": 0.6960939764976501, "incorrect_loss_per_char": 0.6952925324440002, "correct_loss_per_token": 1.3921879529953003, "incorrect_loss_per_token": 1.3905850648880005, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4627145528793335, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.4627145528793335, "logits_per_char": -0.7313572764396667, "num_chars": 2}, {"sum_logits": -1.3733316659927368, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.3733316659927368, "logits_per_char": -0.6866658329963684, "num_chars": 2}, {"sum_logits": -1.3921879529953003, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.3921879529953003, "logits_per_char": -0.6960939764976501, "num_chars": 2}, {"sum_logits": -1.3357089757919312, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": true, "logits_per_token": -1.3357089757919312, "logits_per_char": -0.6678544878959656, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 35, "native_id": "MCAS_2000_4_32", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3954358100891113, "incorrect_loss_raw": 1.3903167645136516, "correct_loss_per_char": 0.6977179050445557, "incorrect_loss_per_char": 0.6951583822568258, "correct_loss_per_token": 1.3954358100891113, "incorrect_loss_per_token": 1.3903167645136516, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3620479106903076, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.3620479106903076, "logits_per_char": -0.6810239553451538, "num_chars": 2}, {"sum_logits": -1.3325592279434204, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": true, "logits_per_token": -1.3325592279434204, "logits_per_char": -0.6662796139717102, "num_chars": 2}, {"sum_logits": -1.3954358100891113, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.3954358100891113, "logits_per_char": -0.6977179050445557, "num_chars": 2}, {"sum_logits": -1.4763431549072266, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.4763431549072266, "logits_per_char": -0.7381715774536133, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 36, "native_id": "Mercury_SC_406016", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2817710638046265, "incorrect_loss_raw": 1.429895321528117, "correct_loss_per_char": 0.6408855319023132, "incorrect_loss_per_char": 0.7149476607640585, "correct_loss_per_token": 1.2817710638046265, "incorrect_loss_per_token": 1.429895321528117, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.489624261856079, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.489624261856079, "logits_per_char": -0.7448121309280396, "num_chars": 2}, {"sum_logits": -1.3557690382003784, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.3557690382003784, "logits_per_char": -0.6778845191001892, "num_chars": 2}, {"sum_logits": -1.444292664527893, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.444292664527893, "logits_per_char": -0.7221463322639465, "num_chars": 2}, {"sum_logits": -1.2817710638046265, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": true, "logits_per_token": -1.2817710638046265, "logits_per_char": -0.6408855319023132, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 37, "native_id": "Mercury_SC_402270", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4152294397354126, "incorrect_loss_raw": 1.383373498916626, "correct_loss_per_char": 0.7076147198677063, "incorrect_loss_per_char": 0.691686749458313, "correct_loss_per_token": 1.4152294397354126, "incorrect_loss_per_token": 1.383373498916626, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3633875846862793, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.3633875846862793, "logits_per_char": -0.6816937923431396, "num_chars": 2}, {"sum_logits": -1.3180116415023804, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": true, "logits_per_token": -1.3180116415023804, "logits_per_char": -0.6590058207511902, "num_chars": 2}, {"sum_logits": -1.4687212705612183, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.4687212705612183, "logits_per_char": -0.7343606352806091, "num_chars": 2}, {"sum_logits": -1.4152294397354126, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.4152294397354126, "logits_per_char": -0.7076147198677063, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 38, "native_id": "TIMSS_2003_8_pg99", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.382049560546875, "incorrect_loss_raw": 1.3968338171641033, "correct_loss_per_char": 0.6910247802734375, "incorrect_loss_per_char": 0.6984169085820516, "correct_loss_per_token": 1.382049560546875, "incorrect_loss_per_token": 1.3968338171641033, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4994347095489502, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4994347095489502, "logits_per_char": -0.7497173547744751, "num_chars": 2}, {"sum_logits": -1.4215971231460571, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4215971231460571, "logits_per_char": -0.7107985615730286, "num_chars": 2}, {"sum_logits": -1.382049560546875, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.382049560546875, "logits_per_char": -0.6910247802734375, "num_chars": 2}, {"sum_logits": -1.2694696187973022, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.2694696187973022, "logits_per_char": -0.6347348093986511, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 39, "native_id": "Mercury_7092365", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3936402797698975, "incorrect_loss_raw": 1.3894671201705933, "correct_loss_per_char": 0.6968201398849487, "incorrect_loss_per_char": 0.6947335600852966, "correct_loss_per_token": 1.3936402797698975, "incorrect_loss_per_token": 1.3894671201705933, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3782422542572021, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": true, "logits_per_token": -1.3782422542572021, "logits_per_char": -0.6891211271286011, "num_chars": 2}, {"sum_logits": -1.3936402797698975, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.3936402797698975, "logits_per_char": -0.6968201398849487, "num_chars": 2}, {"sum_logits": -1.3975894451141357, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.3975894451141357, "logits_per_char": -0.6987947225570679, "num_chars": 2}, {"sum_logits": -1.392569661140442, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.392569661140442, "logits_per_char": -0.696284830570221, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 40, "native_id": "Mercury_179218", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3713802099227905, "incorrect_loss_raw": 1.3996115922927856, "correct_loss_per_char": 0.6856901049613953, "incorrect_loss_per_char": 0.6998057961463928, "correct_loss_per_token": 1.3713802099227905, "incorrect_loss_per_token": 1.3996115922927856, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2978217601776123, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": true, "logits_per_token": -1.2978217601776123, "logits_per_char": -0.6489108800888062, "num_chars": 2}, {"sum_logits": -1.4482415914535522, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.4482415914535522, "logits_per_char": -0.7241207957267761, "num_chars": 2}, {"sum_logits": -1.4527714252471924, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.4527714252471924, "logits_per_char": -0.7263857126235962, "num_chars": 2}, {"sum_logits": -1.3713802099227905, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.3713802099227905, "logits_per_char": -0.6856901049613953, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 41, "native_id": "Mercury_SC_407370", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3268581628799438, "incorrect_loss_raw": 1.4145572582880657, "correct_loss_per_char": 0.6634290814399719, "incorrect_loss_per_char": 0.7072786291440328, "correct_loss_per_token": 1.3268581628799438, "incorrect_loss_per_token": 1.4145572582880657, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3268581628799438, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": true, "logits_per_token": -1.3268581628799438, "logits_per_char": -0.6634290814399719, "num_chars": 2}, {"sum_logits": -1.33723783493042, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.33723783493042, "logits_per_char": -0.66861891746521, "num_chars": 2}, {"sum_logits": -1.4070308208465576, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.4070308208465576, "logits_per_char": -0.7035154104232788, "num_chars": 2}, {"sum_logits": -1.4994031190872192, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.4994031190872192, "logits_per_char": -0.7497015595436096, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 42, "native_id": "Mercury_7094605", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.494697093963623, "incorrect_loss_raw": 1.3610329627990723, "correct_loss_per_char": 0.7473485469818115, "incorrect_loss_per_char": 0.6805164813995361, "correct_loss_per_token": 1.494697093963623, "incorrect_loss_per_token": 1.3610329627990723, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.494697093963623, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.494697093963623, "logits_per_char": -0.7473485469818115, "num_chars": 2}, {"sum_logits": -1.3722985982894897, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.3722985982894897, "logits_per_char": -0.6861492991447449, "num_chars": 2}, {"sum_logits": -1.4287419319152832, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.4287419319152832, "logits_per_char": -0.7143709659576416, "num_chars": 2}, {"sum_logits": -1.2820583581924438, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": true, "logits_per_token": -1.2820583581924438, "logits_per_char": -0.6410291790962219, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 43, "native_id": "Mercury_7216720", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3758984804153442, "incorrect_loss_raw": 1.4000126123428345, "correct_loss_per_char": 0.6879492402076721, "incorrect_loss_per_char": 0.7000063061714172, "correct_loss_per_token": 1.3758984804153442, "incorrect_loss_per_token": 1.4000126123428345, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.408646583557129, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.408646583557129, "logits_per_char": -0.7043232917785645, "num_chars": 2}, {"sum_logits": -1.3011616468429565, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": true, "logits_per_token": -1.3011616468429565, "logits_per_char": -0.6505808234214783, "num_chars": 2}, {"sum_logits": -1.3758984804153442, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.3758984804153442, "logits_per_char": -0.6879492402076721, "num_chars": 2}, {"sum_logits": -1.490229606628418, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.490229606628418, "logits_per_char": -0.745114803314209, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 44, "native_id": "Mercury_7126840", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3767340183258057, "incorrect_loss_raw": 1.3999390602111816, "correct_loss_per_char": 0.6883670091629028, "incorrect_loss_per_char": 0.6999695301055908, "correct_loss_per_token": 1.3767340183258057, "incorrect_loss_per_token": 1.3999390602111816, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.533795952796936, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.533795952796936, "logits_per_char": -0.766897976398468, "num_chars": 2}, {"sum_logits": -1.2828937768936157, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": true, "logits_per_token": -1.2828937768936157, "logits_per_char": -0.6414468884468079, "num_chars": 2}, {"sum_logits": -1.3767340183258057, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.3767340183258057, "logits_per_char": -0.6883670091629028, "num_chars": 2}, {"sum_logits": -1.3831274509429932, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.3831274509429932, "logits_per_char": -0.6915637254714966, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 45, "native_id": "NCEOGA_2013_5_16", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4092235565185547, "incorrect_loss_raw": 1.3977895180384319, "correct_loss_per_char": 0.7046117782592773, "incorrect_loss_per_char": 0.6988947590192159, "correct_loss_per_token": 1.4092235565185547, "incorrect_loss_per_token": 1.3977895180384319, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2505022287368774, "num_tokens": 1, "num_tokens_all": 301, "is_greedy": true, "logits_per_token": -1.2505022287368774, "logits_per_char": -0.6252511143684387, "num_chars": 2}, {"sum_logits": -1.3113739490509033, "num_tokens": 1, "num_tokens_all": 301, "is_greedy": false, "logits_per_token": -1.3113739490509033, "logits_per_char": -0.6556869745254517, "num_chars": 2}, {"sum_logits": -1.4092235565185547, "num_tokens": 1, "num_tokens_all": 301, "is_greedy": false, "logits_per_token": -1.4092235565185547, "logits_per_char": -0.7046117782592773, "num_chars": 2}, {"sum_logits": -1.6314923763275146, "num_tokens": 1, "num_tokens_all": 301, "is_greedy": false, "logits_per_token": -1.6314923763275146, "logits_per_char": -0.8157461881637573, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 46, "native_id": "Mercury_SC_LBS11008", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.339686393737793, "incorrect_loss_raw": 1.4071240027745564, "correct_loss_per_char": 0.6698431968688965, "incorrect_loss_per_char": 0.7035620013872782, "correct_loss_per_token": 1.339686393737793, "incorrect_loss_per_token": 1.4071240027745564, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4457789659500122, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.4457789659500122, "logits_per_char": -0.7228894829750061, "num_chars": 2}, {"sum_logits": -1.422522783279419, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.422522783279419, "logits_per_char": -0.7112613916397095, "num_chars": 2}, {"sum_logits": -1.3530702590942383, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.3530702590942383, "logits_per_char": -0.6765351295471191, "num_chars": 2}, {"sum_logits": -1.339686393737793, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": true, "logits_per_token": -1.339686393737793, "logits_per_char": -0.6698431968688965, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 47, "native_id": "Mercury_7077648", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5491971969604492, "incorrect_loss_raw": 1.360235373179118, "correct_loss_per_char": 0.7745985984802246, "incorrect_loss_per_char": 0.680117686589559, "correct_loss_per_token": 1.5491971969604492, "incorrect_loss_per_token": 1.360235373179118, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5053311586380005, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.5053311586380005, "logits_per_char": -0.7526655793190002, "num_chars": 2}, {"sum_logits": -1.5491971969604492, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.5491971969604492, "logits_per_char": -0.7745985984802246, "num_chars": 2}, {"sum_logits": -1.1143169403076172, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": true, "logits_per_token": -1.1143169403076172, "logits_per_char": -0.5571584701538086, "num_chars": 2}, {"sum_logits": -1.4610580205917358, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.4610580205917358, "logits_per_char": -0.7305290102958679, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 48, "native_id": "Mercury_7027388", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5101646184921265, "incorrect_loss_raw": 1.3532451391220093, "correct_loss_per_char": 0.7550823092460632, "incorrect_loss_per_char": 0.6766225695610046, "correct_loss_per_token": 1.5101646184921265, "incorrect_loss_per_token": 1.3532451391220093, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5101646184921265, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.5101646184921265, "logits_per_char": -0.7550823092460632, "num_chars": 2}, {"sum_logits": -1.3816745281219482, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.3816745281219482, "logits_per_char": -0.6908372640609741, "num_chars": 2}, {"sum_logits": -1.3514673709869385, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.3514673709869385, "logits_per_char": -0.6757336854934692, "num_chars": 2}, {"sum_logits": -1.3265935182571411, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": true, "logits_per_token": -1.3265935182571411, "logits_per_char": -0.6632967591285706, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 49, "native_id": "Mercury_7168140", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4718011617660522, "incorrect_loss_raw": 1.3688329855600994, "correct_loss_per_char": 0.7359005808830261, "incorrect_loss_per_char": 0.6844164927800497, "correct_loss_per_token": 1.4718011617660522, "incorrect_loss_per_token": 1.3688329855600994, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4338222742080688, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.4338222742080688, "logits_per_char": -0.7169111371040344, "num_chars": 2}, {"sum_logits": -1.4264312982559204, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.4264312982559204, "logits_per_char": -0.7132156491279602, "num_chars": 2}, {"sum_logits": -1.4718011617660522, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.4718011617660522, "logits_per_char": -0.7359005808830261, "num_chars": 2}, {"sum_logits": -1.2462453842163086, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": true, "logits_per_token": -1.2462453842163086, "logits_per_char": -0.6231226921081543, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 50, "native_id": "Mercury_7024745", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3205838203430176, "incorrect_loss_raw": 1.4151513973871868, "correct_loss_per_char": 0.6602919101715088, "incorrect_loss_per_char": 0.7075756986935934, "correct_loss_per_token": 1.3205838203430176, "incorrect_loss_per_token": 1.4151513973871868, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.469511866569519, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": false, "logits_per_token": -1.469511866569519, "logits_per_char": -0.7347559332847595, "num_chars": 2}, {"sum_logits": -1.3704829216003418, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": false, "logits_per_token": -1.3704829216003418, "logits_per_char": -0.6852414608001709, "num_chars": 2}, {"sum_logits": -1.4054594039916992, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": false, "logits_per_token": -1.4054594039916992, "logits_per_char": -0.7027297019958496, "num_chars": 2}, {"sum_logits": -1.3205838203430176, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": true, "logits_per_token": -1.3205838203430176, "logits_per_char": -0.6602919101715088, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 51, "native_id": "MCAS_2004_5_32", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4548426866531372, "incorrect_loss_raw": 1.3726565440495808, "correct_loss_per_char": 0.7274213433265686, "incorrect_loss_per_char": 0.6863282720247904, "correct_loss_per_token": 1.4548426866531372, "incorrect_loss_per_token": 1.3726565440495808, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4548426866531372, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.4548426866531372, "logits_per_char": -0.7274213433265686, "num_chars": 2}, {"sum_logits": -1.4594477415084839, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.4594477415084839, "logits_per_char": -0.7297238707542419, "num_chars": 2}, {"sum_logits": -1.3897713422775269, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.3897713422775269, "logits_per_char": -0.6948856711387634, "num_chars": 2}, {"sum_logits": -1.268750548362732, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": true, "logits_per_token": -1.268750548362732, "logits_per_char": -0.634375274181366, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 52, "native_id": "LEAP_2002_8_10387", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4176936149597168, "incorrect_loss_raw": 1.3911261955897014, "correct_loss_per_char": 0.7088468074798584, "incorrect_loss_per_char": 0.6955630977948507, "correct_loss_per_token": 1.4176936149597168, "incorrect_loss_per_token": 1.3911261955897014, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5485485792160034, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.5485485792160034, "logits_per_char": -0.7742742896080017, "num_chars": 2}, {"sum_logits": -1.212547779083252, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.212547779083252, "logits_per_char": -0.606273889541626, "num_chars": 2}, {"sum_logits": -1.4176936149597168, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4176936149597168, "logits_per_char": -0.7088468074798584, "num_chars": 2}, {"sum_logits": -1.4122822284698486, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4122822284698486, "logits_per_char": -0.7061411142349243, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 53, "native_id": "Mercury_7057330", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.347838282585144, "incorrect_loss_raw": 1.4045228958129883, "correct_loss_per_char": 0.673919141292572, "incorrect_loss_per_char": 0.7022614479064941, "correct_loss_per_token": 1.347838282585144, "incorrect_loss_per_token": 1.4045228958129883, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.347838282585144, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": true, "logits_per_token": -1.347838282585144, "logits_per_char": -0.673919141292572, "num_chars": 2}, {"sum_logits": -1.4340132474899292, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.4340132474899292, "logits_per_char": -0.7170066237449646, "num_chars": 2}, {"sum_logits": -1.416036605834961, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.416036605834961, "logits_per_char": -0.7080183029174805, "num_chars": 2}, {"sum_logits": -1.3635188341140747, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.3635188341140747, "logits_per_char": -0.6817594170570374, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 54, "native_id": "Mercury_SC_416166", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.420531153678894, "incorrect_loss_raw": 1.3826695283253987, "correct_loss_per_char": 0.710265576839447, "incorrect_loss_per_char": 0.6913347641626993, "correct_loss_per_token": 1.420531153678894, "incorrect_loss_per_token": 1.3826695283253987, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.42239248752594, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.42239248752594, "logits_per_char": -0.71119624376297, "num_chars": 2}, {"sum_logits": -1.3052297830581665, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": true, "logits_per_token": -1.3052297830581665, "logits_per_char": -0.6526148915290833, "num_chars": 2}, {"sum_logits": -1.420531153678894, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.420531153678894, "logits_per_char": -0.710265576839447, "num_chars": 2}, {"sum_logits": -1.4203863143920898, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.4203863143920898, "logits_per_char": -0.7101931571960449, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 55, "native_id": "Mercury_7098543", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3557415008544922, "incorrect_loss_raw": 1.40813414255778, "correct_loss_per_char": 0.6778707504272461, "incorrect_loss_per_char": 0.70406707127889, "correct_loss_per_token": 1.3557415008544922, "incorrect_loss_per_token": 1.40813414255778, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4180790185928345, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.4180790185928345, "logits_per_char": -0.7090395092964172, "num_chars": 2}, {"sum_logits": -1.3557415008544922, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.3557415008544922, "logits_per_char": -0.6778707504272461, "num_chars": 2}, {"sum_logits": -1.4895310401916504, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.4895310401916504, "logits_per_char": -0.7447655200958252, "num_chars": 2}, {"sum_logits": -1.316792368888855, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": true, "logits_per_token": -1.316792368888855, "logits_per_char": -0.6583961844444275, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 56, "native_id": "Mercury_7194495", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1839630603790283, "incorrect_loss_raw": 1.4730867544809978, "correct_loss_per_char": 0.5919815301895142, "incorrect_loss_per_char": 0.7365433772404989, "correct_loss_per_token": 1.1839630603790283, "incorrect_loss_per_token": 1.4730867544809978, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.600257158279419, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.600257158279419, "logits_per_char": -0.8001285791397095, "num_chars": 2}, {"sum_logits": -1.4025688171386719, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4025688171386719, "logits_per_char": -0.7012844085693359, "num_chars": 2}, {"sum_logits": -1.4164342880249023, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4164342880249023, "logits_per_char": -0.7082171440124512, "num_chars": 2}, {"sum_logits": -1.1839630603790283, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.1839630603790283, "logits_per_char": -0.5919815301895142, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 57, "native_id": "MEA_2016_5_8", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4178173542022705, "incorrect_loss_raw": 1.3834419250488281, "correct_loss_per_char": 0.7089086771011353, "incorrect_loss_per_char": 0.6917209625244141, "correct_loss_per_token": 1.4178173542022705, "incorrect_loss_per_token": 1.3834419250488281, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3023412227630615, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": true, "logits_per_token": -1.3023412227630615, "logits_per_char": -0.6511706113815308, "num_chars": 2}, {"sum_logits": -1.413902997970581, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.413902997970581, "logits_per_char": -0.7069514989852905, "num_chars": 2}, {"sum_logits": -1.4178173542022705, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.4178173542022705, "logits_per_char": -0.7089086771011353, "num_chars": 2}, {"sum_logits": -1.4340815544128418, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.4340815544128418, "logits_per_char": -0.7170407772064209, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 58, "native_id": "Mercury_7081148", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4083316326141357, "incorrect_loss_raw": 1.3961787621180217, "correct_loss_per_char": 0.7041658163070679, "incorrect_loss_per_char": 0.6980893810590109, "correct_loss_per_token": 1.4083316326141357, "incorrect_loss_per_token": 1.3961787621180217, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5744024515151978, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": false, "logits_per_token": -1.5744024515151978, "logits_per_char": -0.7872012257575989, "num_chars": 2}, {"sum_logits": -1.4083316326141357, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": false, "logits_per_token": -1.4083316326141357, "logits_per_char": -0.7041658163070679, "num_chars": 2}, {"sum_logits": -1.406321406364441, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": false, "logits_per_token": -1.406321406364441, "logits_per_char": -0.7031607031822205, "num_chars": 2}, {"sum_logits": -1.2078124284744263, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": true, "logits_per_token": -1.2078124284744263, "logits_per_char": -0.6039062142372131, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 59, "native_id": "Mercury_7005128", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2743321657180786, "incorrect_loss_raw": 1.4378902117411296, "correct_loss_per_char": 0.6371660828590393, "incorrect_loss_per_char": 0.7189451058705648, "correct_loss_per_token": 1.2743321657180786, "incorrect_loss_per_token": 1.4378902117411296, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5972968339920044, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.5972968339920044, "logits_per_char": -0.7986484169960022, "num_chars": 2}, {"sum_logits": -1.328019142150879, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.328019142150879, "logits_per_char": -0.6640095710754395, "num_chars": 2}, {"sum_logits": -1.3883546590805054, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.3883546590805054, "logits_per_char": -0.6941773295402527, "num_chars": 2}, {"sum_logits": -1.2743321657180786, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -1.2743321657180786, "logits_per_char": -0.6371660828590393, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 60, "native_id": "Mercury_SC_408250", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.387715220451355, "incorrect_loss_raw": 1.3958384195963542, "correct_loss_per_char": 0.6938576102256775, "incorrect_loss_per_char": 0.6979192097981771, "correct_loss_per_token": 1.387715220451355, "incorrect_loss_per_token": 1.3958384195963542, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4959864616394043, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.4959864616394043, "logits_per_char": -0.7479932308197021, "num_chars": 2}, {"sum_logits": -1.387715220451355, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.387715220451355, "logits_per_char": -0.6938576102256775, "num_chars": 2}, {"sum_logits": -1.2487398386001587, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": true, "logits_per_token": -1.2487398386001587, "logits_per_char": -0.6243699193000793, "num_chars": 2}, {"sum_logits": -1.4427889585494995, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.4427889585494995, "logits_per_char": -0.7213944792747498, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 61, "native_id": "TIMSS_2003_8_pg18", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.452903151512146, "incorrect_loss_raw": 1.3773372968037922, "correct_loss_per_char": 0.726451575756073, "incorrect_loss_per_char": 0.6886686484018961, "correct_loss_per_token": 1.452903151512146, "incorrect_loss_per_token": 1.3773372968037922, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4951426982879639, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.4951426982879639, "logits_per_char": -0.7475713491439819, "num_chars": 2}, {"sum_logits": -1.4083940982818604, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.4083940982818604, "logits_per_char": -0.7041970491409302, "num_chars": 2}, {"sum_logits": -1.452903151512146, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.452903151512146, "logits_per_char": -0.726451575756073, "num_chars": 2}, {"sum_logits": -1.2284750938415527, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": true, "logits_per_token": -1.2284750938415527, "logits_per_char": -0.6142375469207764, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 62, "native_id": "Mercury_400837", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3137685060501099, "incorrect_loss_raw": 1.4173933664957683, "correct_loss_per_char": 0.6568842530250549, "incorrect_loss_per_char": 0.7086966832478842, "correct_loss_per_token": 1.3137685060501099, "incorrect_loss_per_token": 1.4173933664957683, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3137685060501099, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": true, "logits_per_token": -1.3137685060501099, "logits_per_char": -0.6568842530250549, "num_chars": 2}, {"sum_logits": -1.3651034832000732, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.3651034832000732, "logits_per_char": -0.6825517416000366, "num_chars": 2}, {"sum_logits": -1.4256924390792847, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.4256924390792847, "logits_per_char": -0.7128462195396423, "num_chars": 2}, {"sum_logits": -1.4613841772079468, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.4613841772079468, "logits_per_char": -0.7306920886039734, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 63, "native_id": "LEAP__4_10227", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2842841148376465, "incorrect_loss_raw": 1.427860140800476, "correct_loss_per_char": 0.6421420574188232, "incorrect_loss_per_char": 0.713930070400238, "correct_loss_per_token": 1.2842841148376465, "incorrect_loss_per_token": 1.427860140800476, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4171727895736694, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.4171727895736694, "logits_per_char": -0.7085863947868347, "num_chars": 2}, {"sum_logits": -1.4177685976028442, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.4177685976028442, "logits_per_char": -0.7088842988014221, "num_chars": 2}, {"sum_logits": -1.4486390352249146, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.4486390352249146, "logits_per_char": -0.7243195176124573, "num_chars": 2}, {"sum_logits": -1.2842841148376465, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.2842841148376465, "logits_per_char": -0.6421420574188232, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 64, "native_id": "Mercury_SC_415369", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5276082754135132, "incorrect_loss_raw": 1.349137266476949, "correct_loss_per_char": 0.7638041377067566, "incorrect_loss_per_char": 0.6745686332384745, "correct_loss_per_token": 1.5276082754135132, "incorrect_loss_per_token": 1.349137266476949, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5276082754135132, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.5276082754135132, "logits_per_char": -0.7638041377067566, "num_chars": 2}, {"sum_logits": -1.421834111213684, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.421834111213684, "logits_per_char": -0.710917055606842, "num_chars": 2}, {"sum_logits": -1.3358525037765503, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.3358525037765503, "logits_per_char": -0.6679262518882751, "num_chars": 2}, {"sum_logits": -1.2897251844406128, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": true, "logits_per_token": -1.2897251844406128, "logits_per_char": -0.6448625922203064, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 65, "native_id": "Mercury_SC_400868", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3991825580596924, "incorrect_loss_raw": 1.3913260300954182, "correct_loss_per_char": 0.6995912790298462, "incorrect_loss_per_char": 0.6956630150477091, "correct_loss_per_token": 1.3991825580596924, "incorrect_loss_per_token": 1.3913260300954182, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4711261987686157, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.4711261987686157, "logits_per_char": -0.7355630993843079, "num_chars": 2}, {"sum_logits": -1.438654899597168, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.438654899597168, "logits_per_char": -0.719327449798584, "num_chars": 2}, {"sum_logits": -1.3991825580596924, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.3991825580596924, "logits_per_char": -0.6995912790298462, "num_chars": 2}, {"sum_logits": -1.2641969919204712, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": true, "logits_per_token": -1.2641969919204712, "logits_per_char": -0.6320984959602356, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 66, "native_id": "Mercury_7042543", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3610328435897827, "incorrect_loss_raw": 1.4145965178807576, "correct_loss_per_char": 0.6805164217948914, "incorrect_loss_per_char": 0.7072982589403788, "correct_loss_per_token": 1.3610328435897827, "incorrect_loss_per_token": 1.4145965178807576, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.615331768989563, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.615331768989563, "logits_per_char": -0.8076658844947815, "num_chars": 2}, {"sum_logits": -1.3610328435897827, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.3610328435897827, "logits_per_char": -0.6805164217948914, "num_chars": 2}, {"sum_logits": -1.4183013439178467, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.4183013439178467, "logits_per_char": -0.7091506719589233, "num_chars": 2}, {"sum_logits": -1.2101564407348633, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": true, "logits_per_token": -1.2101564407348633, "logits_per_char": -0.6050782203674316, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 67, "native_id": "Mercury_SC_405865", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4516551494598389, "incorrect_loss_raw": 1.372861385345459, "correct_loss_per_char": 0.7258275747299194, "incorrect_loss_per_char": 0.6864306926727295, "correct_loss_per_token": 1.4516551494598389, "incorrect_loss_per_token": 1.372861385345459, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.424508810043335, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.424508810043335, "logits_per_char": -0.7122544050216675, "num_chars": 2}, {"sum_logits": -1.4516551494598389, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.4516551494598389, "logits_per_char": -0.7258275747299194, "num_chars": 2}, {"sum_logits": -1.3462918996810913, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": true, "logits_per_token": -1.3462918996810913, "logits_per_char": -0.6731459498405457, "num_chars": 2}, {"sum_logits": -1.3477834463119507, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.3477834463119507, "logits_per_char": -0.6738917231559753, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 68, "native_id": "Mercury_SC_408900", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3693329095840454, "incorrect_loss_raw": 1.397845188776652, "correct_loss_per_char": 0.6846664547920227, "incorrect_loss_per_char": 0.698922594388326, "correct_loss_per_token": 1.3693329095840454, "incorrect_loss_per_token": 1.397845188776652, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4395873546600342, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.4395873546600342, "logits_per_char": -0.7197936773300171, "num_chars": 2}, {"sum_logits": -1.380998134613037, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.380998134613037, "logits_per_char": -0.6904990673065186, "num_chars": 2}, {"sum_logits": -1.3693329095840454, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": true, "logits_per_token": -1.3693329095840454, "logits_per_char": -0.6846664547920227, "num_chars": 2}, {"sum_logits": -1.3729500770568848, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.3729500770568848, "logits_per_char": -0.6864750385284424, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 69, "native_id": "MEAP_2005_5_16", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.501151204109192, "incorrect_loss_raw": 1.3554308414459229, "correct_loss_per_char": 0.750575602054596, "incorrect_loss_per_char": 0.6777154207229614, "correct_loss_per_token": 1.501151204109192, "incorrect_loss_per_token": 1.3554308414459229, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3476407527923584, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": true, "logits_per_token": -1.3476407527923584, "logits_per_char": -0.6738203763961792, "num_chars": 2}, {"sum_logits": -1.3686857223510742, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.3686857223510742, "logits_per_char": -0.6843428611755371, "num_chars": 2}, {"sum_logits": -1.501151204109192, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.501151204109192, "logits_per_char": -0.750575602054596, "num_chars": 2}, {"sum_logits": -1.349966049194336, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.349966049194336, "logits_per_char": -0.674983024597168, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 70, "native_id": "MCAS_2013_8_29418", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5354396104812622, "incorrect_loss_raw": 1.3461201588312786, "correct_loss_per_char": 0.7677198052406311, "incorrect_loss_per_char": 0.6730600794156393, "correct_loss_per_token": 1.5354396104812622, "incorrect_loss_per_token": 1.3461201588312786, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.312417984008789, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.312417984008789, "logits_per_char": -0.6562089920043945, "num_chars": 2}, {"sum_logits": -1.3517273664474487, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.3517273664474487, "logits_per_char": -0.6758636832237244, "num_chars": 2}, {"sum_logits": -1.3742151260375977, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.3742151260375977, "logits_per_char": -0.6871075630187988, "num_chars": 2}, {"sum_logits": -1.5354396104812622, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.5354396104812622, "logits_per_char": -0.7677198052406311, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 71, "native_id": "Mercury_7013685", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.427952527999878, "incorrect_loss_raw": 1.3791674772898357, "correct_loss_per_char": 0.713976263999939, "incorrect_loss_per_char": 0.6895837386449178, "correct_loss_per_token": 1.427952527999878, "incorrect_loss_per_token": 1.3791674772898357, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.427952527999878, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.427952527999878, "logits_per_char": -0.713976263999939, "num_chars": 2}, {"sum_logits": -1.3798472881317139, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.3798472881317139, "logits_per_char": -0.6899236440658569, "num_chars": 2}, {"sum_logits": -1.413985013961792, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.413985013961792, "logits_per_char": -0.706992506980896, "num_chars": 2}, {"sum_logits": -1.343670129776001, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": true, "logits_per_token": -1.343670129776001, "logits_per_char": -0.6718350648880005, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 72, "native_id": "Mercury_404898", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3323776721954346, "incorrect_loss_raw": 1.415019949277242, "correct_loss_per_char": 0.6661888360977173, "incorrect_loss_per_char": 0.707509974638621, "correct_loss_per_token": 1.3323776721954346, "incorrect_loss_per_token": 1.415019949277242, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3323776721954346, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.3323776721954346, "logits_per_char": -0.6661888360977173, "num_chars": 2}, {"sum_logits": -1.3982168436050415, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.3982168436050415, "logits_per_char": -0.6991084218025208, "num_chars": 2}, {"sum_logits": -1.331331729888916, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": true, "logits_per_token": -1.331331729888916, "logits_per_char": -0.665665864944458, "num_chars": 2}, {"sum_logits": -1.5155112743377686, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.5155112743377686, "logits_per_char": -0.7577556371688843, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 73, "native_id": "NYSEDREGENTS_2010_8_17", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4676858186721802, "incorrect_loss_raw": 1.3678276538848877, "correct_loss_per_char": 0.7338429093360901, "incorrect_loss_per_char": 0.6839138269424438, "correct_loss_per_token": 1.4676858186721802, "incorrect_loss_per_token": 1.3678276538848877, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3867448568344116, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.3867448568344116, "logits_per_char": -0.6933724284172058, "num_chars": 2}, {"sum_logits": -1.2886993885040283, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": true, "logits_per_token": -1.2886993885040283, "logits_per_char": -0.6443496942520142, "num_chars": 2}, {"sum_logits": -1.4280387163162231, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.4280387163162231, "logits_per_char": -0.7140193581581116, "num_chars": 2}, {"sum_logits": -1.4676858186721802, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.4676858186721802, "logits_per_char": -0.7338429093360901, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 74, "native_id": "NAEP_2005_4_S12+7", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3770015239715576, "incorrect_loss_raw": 1.3954348961512248, "correct_loss_per_char": 0.6885007619857788, "incorrect_loss_per_char": 0.6977174480756124, "correct_loss_per_token": 1.3770015239715576, "incorrect_loss_per_token": 1.3954348961512248, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3770015239715576, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.3770015239715576, "logits_per_char": -0.6885007619857788, "num_chars": 2}, {"sum_logits": -1.426146149635315, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.426146149635315, "logits_per_char": -0.7130730748176575, "num_chars": 2}, {"sum_logits": -1.4065576791763306, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.4065576791763306, "logits_per_char": -0.7032788395881653, "num_chars": 2}, {"sum_logits": -1.3536008596420288, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": true, "logits_per_token": -1.3536008596420288, "logits_per_char": -0.6768004298210144, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 75, "native_id": "Mercury_7008208", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3744304180145264, "incorrect_loss_raw": 1.4040696223576863, "correct_loss_per_char": 0.6872152090072632, "incorrect_loss_per_char": 0.7020348111788431, "correct_loss_per_token": 1.3744304180145264, "incorrect_loss_per_token": 1.4040696223576863, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3744304180145264, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.3744304180145264, "logits_per_char": -0.6872152090072632, "num_chars": 2}, {"sum_logits": -1.4209656715393066, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.4209656715393066, "logits_per_char": -0.7104828357696533, "num_chars": 2}, {"sum_logits": -1.258649230003357, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": true, "logits_per_token": -1.258649230003357, "logits_per_char": -0.6293246150016785, "num_chars": 2}, {"sum_logits": -1.5325939655303955, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.5325939655303955, "logits_per_char": -0.7662969827651978, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 76, "native_id": "Mercury_SC_401164", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4330918788909912, "incorrect_loss_raw": 1.3813668886820476, "correct_loss_per_char": 0.7165459394454956, "incorrect_loss_per_char": 0.6906834443410238, "correct_loss_per_token": 1.4330918788909912, "incorrect_loss_per_token": 1.3813668886820476, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3563228845596313, "num_tokens": 1, "num_tokens_all": 301, "is_greedy": false, "logits_per_token": -1.3563228845596313, "logits_per_char": -0.6781614422798157, "num_chars": 2}, {"sum_logits": -1.2898032665252686, "num_tokens": 1, "num_tokens_all": 301, "is_greedy": true, "logits_per_token": -1.2898032665252686, "logits_per_char": -0.6449016332626343, "num_chars": 2}, {"sum_logits": -1.4330918788909912, "num_tokens": 1, "num_tokens_all": 301, "is_greedy": false, "logits_per_token": -1.4330918788909912, "logits_per_char": -0.7165459394454956, "num_chars": 2}, {"sum_logits": -1.4979745149612427, "num_tokens": 1, "num_tokens_all": 301, "is_greedy": false, "logits_per_token": -1.4979745149612427, "logits_per_char": -0.7489872574806213, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 77, "native_id": "Mercury_7126875", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3990412950515747, "incorrect_loss_raw": 1.3894343376159668, "correct_loss_per_char": 0.6995206475257874, "incorrect_loss_per_char": 0.6947171688079834, "correct_loss_per_token": 1.3990412950515747, "incorrect_loss_per_token": 1.3894343376159668, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4464402198791504, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.4464402198791504, "logits_per_char": -0.7232201099395752, "num_chars": 2}, {"sum_logits": -1.423709750175476, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.423709750175476, "logits_per_char": -0.711854875087738, "num_chars": 2}, {"sum_logits": -1.3990412950515747, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.3990412950515747, "logits_per_char": -0.6995206475257874, "num_chars": 2}, {"sum_logits": -1.298153042793274, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": true, "logits_per_token": -1.298153042793274, "logits_per_char": -0.649076521396637, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 78, "native_id": "Mercury_SC_LBS10591", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4203091859817505, "incorrect_loss_raw": 1.3830459117889404, "correct_loss_per_char": 0.7101545929908752, "incorrect_loss_per_char": 0.6915229558944702, "correct_loss_per_token": 1.4203091859817505, "incorrect_loss_per_token": 1.3830459117889404, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4203091859817505, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.4203091859817505, "logits_per_char": -0.7101545929908752, "num_chars": 2}, {"sum_logits": -1.338642954826355, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": true, "logits_per_token": -1.338642954826355, "logits_per_char": -0.6693214774131775, "num_chars": 2}, {"sum_logits": -1.464382290840149, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.464382290840149, "logits_per_char": -0.7321911454200745, "num_chars": 2}, {"sum_logits": -1.3461124897003174, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.3461124897003174, "logits_per_char": -0.6730562448501587, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 79, "native_id": "MCAS_2014_8_19", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3702720403671265, "incorrect_loss_raw": 1.3986746072769165, "correct_loss_per_char": 0.6851360201835632, "incorrect_loss_per_char": 0.6993373036384583, "correct_loss_per_token": 1.3702720403671265, "incorrect_loss_per_token": 1.3986746072769165, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4047800302505493, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.4047800302505493, "logits_per_char": -0.7023900151252747, "num_chars": 2}, {"sum_logits": -1.3685771226882935, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.3685771226882935, "logits_per_char": -0.6842885613441467, "num_chars": 2}, {"sum_logits": -1.4226666688919067, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.4226666688919067, "logits_per_char": -0.7113333344459534, "num_chars": 2}, {"sum_logits": -1.3702720403671265, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.3702720403671265, "logits_per_char": -0.6851360201835632, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 80, "native_id": "MDSA_2013_8_7", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3411612510681152, "incorrect_loss_raw": 1.4088356892267864, "correct_loss_per_char": 0.6705806255340576, "incorrect_loss_per_char": 0.7044178446133932, "correct_loss_per_token": 1.3411612510681152, "incorrect_loss_per_token": 1.4088356892267864, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.51600182056427, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.51600182056427, "logits_per_char": -0.758000910282135, "num_chars": 2}, {"sum_logits": -1.3585231304168701, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.3585231304168701, "logits_per_char": -0.6792615652084351, "num_chars": 2}, {"sum_logits": -1.3411612510681152, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": true, "logits_per_token": -1.3411612510681152, "logits_per_char": -0.6705806255340576, "num_chars": 2}, {"sum_logits": -1.3519821166992188, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.3519821166992188, "logits_per_char": -0.6759910583496094, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 81, "native_id": "Mercury_7077578", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.416748046875, "incorrect_loss_raw": 1.389337142308553, "correct_loss_per_char": 0.7083740234375, "incorrect_loss_per_char": 0.6946685711542765, "correct_loss_per_token": 1.416748046875, "incorrect_loss_per_token": 1.389337142308553, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5654422044754028, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.5654422044754028, "logits_per_char": -0.7827211022377014, "num_chars": 2}, {"sum_logits": -1.416748046875, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.416748046875, "logits_per_char": -0.7083740234375, "num_chars": 2}, {"sum_logits": -1.3452903032302856, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.3452903032302856, "logits_per_char": -0.6726451516151428, "num_chars": 2}, {"sum_logits": -1.2572789192199707, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": true, "logits_per_token": -1.2572789192199707, "logits_per_char": -0.6286394596099854, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 82, "native_id": "Mercury_SC_404975", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3111958503723145, "incorrect_loss_raw": 1.4193230470021565, "correct_loss_per_char": 0.6555979251861572, "incorrect_loss_per_char": 0.7096615235010783, "correct_loss_per_token": 1.3111958503723145, "incorrect_loss_per_token": 1.4193230470021565, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4119724035263062, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.4119724035263062, "logits_per_char": -0.7059862017631531, "num_chars": 2}, {"sum_logits": -1.437827706336975, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.437827706336975, "logits_per_char": -0.7189138531684875, "num_chars": 2}, {"sum_logits": -1.4081690311431885, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.4081690311431885, "logits_per_char": -0.7040845155715942, "num_chars": 2}, {"sum_logits": -1.3111958503723145, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": true, "logits_per_token": -1.3111958503723145, "logits_per_char": -0.6555979251861572, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 83, "native_id": "Mercury_7197890", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.298762321472168, "incorrect_loss_raw": 1.422124942143758, "correct_loss_per_char": 0.649381160736084, "incorrect_loss_per_char": 0.711062471071879, "correct_loss_per_token": 1.298762321472168, "incorrect_loss_per_token": 1.422124942143758, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4130830764770508, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.4130830764770508, "logits_per_char": -0.7065415382385254, "num_chars": 2}, {"sum_logits": -1.298762321472168, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": true, "logits_per_token": -1.298762321472168, "logits_per_char": -0.649381160736084, "num_chars": 2}, {"sum_logits": -1.4385309219360352, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.4385309219360352, "logits_per_char": -0.7192654609680176, "num_chars": 2}, {"sum_logits": -1.4147608280181885, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.4147608280181885, "logits_per_char": -0.7073804140090942, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 84, "native_id": "Mercury_7072625", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4686375856399536, "incorrect_loss_raw": 1.367147405942281, "correct_loss_per_char": 0.7343187928199768, "incorrect_loss_per_char": 0.6835737029711405, "correct_loss_per_token": 1.4686375856399536, "incorrect_loss_per_token": 1.367147405942281, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.39108145236969, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.39108145236969, "logits_per_char": -0.695540726184845, "num_chars": 2}, {"sum_logits": -1.4686375856399536, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.4686375856399536, "logits_per_char": -0.7343187928199768, "num_chars": 2}, {"sum_logits": -1.4265328645706177, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.4265328645706177, "logits_per_char": -0.7132664322853088, "num_chars": 2}, {"sum_logits": -1.2838279008865356, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": true, "logits_per_token": -1.2838279008865356, "logits_per_char": -0.6419139504432678, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 85, "native_id": "MCAS_2000_8_36", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.46879243850708, "incorrect_loss_raw": 1.3652458985646565, "correct_loss_per_char": 0.73439621925354, "incorrect_loss_per_char": 0.6826229492823283, "correct_loss_per_token": 1.46879243850708, "incorrect_loss_per_token": 1.3652458985646565, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.393968939781189, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.393968939781189, "logits_per_char": -0.6969844698905945, "num_chars": 2}, {"sum_logits": -1.46879243850708, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.46879243850708, "logits_per_char": -0.73439621925354, "num_chars": 2}, {"sum_logits": -1.3584282398223877, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.3584282398223877, "logits_per_char": -0.6792141199111938, "num_chars": 2}, {"sum_logits": -1.343340516090393, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": true, "logits_per_token": -1.343340516090393, "logits_per_char": -0.6716702580451965, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 86, "native_id": "Mercury_7227903", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3234920501708984, "incorrect_loss_raw": 1.4150444269180298, "correct_loss_per_char": 0.6617460250854492, "incorrect_loss_per_char": 0.7075222134590149, "correct_loss_per_token": 1.3234920501708984, "incorrect_loss_per_token": 1.4150444269180298, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3618396520614624, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.3618396520614624, "logits_per_char": -0.6809198260307312, "num_chars": 2}, {"sum_logits": -1.3234920501708984, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": true, "logits_per_token": -1.3234920501708984, "logits_per_char": -0.6617460250854492, "num_chars": 2}, {"sum_logits": -1.4188505411148071, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.4188505411148071, "logits_per_char": -0.7094252705574036, "num_chars": 2}, {"sum_logits": -1.4644430875778198, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.4644430875778198, "logits_per_char": -0.7322215437889099, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 87, "native_id": "MCAS_8_2015_8", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.401214838027954, "incorrect_loss_raw": 1.3892043828964233, "correct_loss_per_char": 0.700607419013977, "incorrect_loss_per_char": 0.6946021914482117, "correct_loss_per_token": 1.401214838027954, "incorrect_loss_per_token": 1.3892043828964233, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4694101810455322, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.4694101810455322, "logits_per_char": -0.7347050905227661, "num_chars": 2}, {"sum_logits": -1.401214838027954, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.401214838027954, "logits_per_char": -0.700607419013977, "num_chars": 2}, {"sum_logits": -1.3702633380889893, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.3702633380889893, "logits_per_char": -0.6851316690444946, "num_chars": 2}, {"sum_logits": -1.3279396295547485, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": true, "logits_per_token": -1.3279396295547485, "logits_per_char": -0.6639698147773743, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 88, "native_id": "Mercury_7015890", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.482582688331604, "incorrect_loss_raw": 1.3630178372065227, "correct_loss_per_char": 0.741291344165802, "incorrect_loss_per_char": 0.6815089186032613, "correct_loss_per_token": 1.482582688331604, "incorrect_loss_per_token": 1.3630178372065227, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3833770751953125, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.3833770751953125, "logits_per_char": -0.6916885375976562, "num_chars": 2}, {"sum_logits": -1.482582688331604, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.482582688331604, "logits_per_char": -0.741291344165802, "num_chars": 2}, {"sum_logits": -1.3572514057159424, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.3572514057159424, "logits_per_char": -0.6786257028579712, "num_chars": 2}, {"sum_logits": -1.348425030708313, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": true, "logits_per_token": -1.348425030708313, "logits_per_char": -0.6742125153541565, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 89, "native_id": "Mercury_7263095", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4956576824188232, "incorrect_loss_raw": 1.3592052459716797, "correct_loss_per_char": 0.7478288412094116, "incorrect_loss_per_char": 0.6796026229858398, "correct_loss_per_token": 1.4956576824188232, "incorrect_loss_per_token": 1.3592052459716797, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4956576824188232, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.4956576824188232, "logits_per_char": -0.7478288412094116, "num_chars": 2}, {"sum_logits": -1.4489147663116455, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.4489147663116455, "logits_per_char": -0.7244573831558228, "num_chars": 2}, {"sum_logits": -1.3385976552963257, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.3385976552963257, "logits_per_char": -0.6692988276481628, "num_chars": 2}, {"sum_logits": -1.2901033163070679, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": true, "logits_per_token": -1.2901033163070679, "logits_per_char": -0.6450516581535339, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 90, "native_id": "Mercury_7248203", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4948371648788452, "incorrect_loss_raw": 1.3605996370315552, "correct_loss_per_char": 0.7474185824394226, "incorrect_loss_per_char": 0.6802998185157776, "correct_loss_per_token": 1.4948371648788452, "incorrect_loss_per_token": 1.3605996370315552, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.335817575454712, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": true, "logits_per_token": -1.335817575454712, "logits_per_char": -0.667908787727356, "num_chars": 2}, {"sum_logits": -1.3639048337936401, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.3639048337936401, "logits_per_char": -0.6819524168968201, "num_chars": 2}, {"sum_logits": -1.4948371648788452, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.4948371648788452, "logits_per_char": -0.7474185824394226, "num_chars": 2}, {"sum_logits": -1.3820765018463135, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.3820765018463135, "logits_per_char": -0.6910382509231567, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 91, "native_id": "MSA_2012_5_13", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.318650722503662, "incorrect_loss_raw": 1.4176421165466309, "correct_loss_per_char": 0.659325361251831, "incorrect_loss_per_char": 0.7088210582733154, "correct_loss_per_token": 1.318650722503662, "incorrect_loss_per_token": 1.4176421165466309, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4808884859085083, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.4808884859085083, "logits_per_char": -0.7404442429542542, "num_chars": 2}, {"sum_logits": -1.3180032968521118, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": true, "logits_per_token": -1.3180032968521118, "logits_per_char": -0.6590016484260559, "num_chars": 2}, {"sum_logits": -1.318650722503662, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.318650722503662, "logits_per_char": -0.659325361251831, "num_chars": 2}, {"sum_logits": -1.4540345668792725, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.4540345668792725, "logits_per_char": -0.7270172834396362, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 92, "native_id": "Mercury_SC_400675", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3941807746887207, "incorrect_loss_raw": 1.3927802642186482, "correct_loss_per_char": 0.6970903873443604, "incorrect_loss_per_char": 0.6963901321093241, "correct_loss_per_token": 1.3941807746887207, "incorrect_loss_per_token": 1.3927802642186482, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5087199211120605, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.5087199211120605, "logits_per_char": -0.7543599605560303, "num_chars": 2}, {"sum_logits": -1.3941807746887207, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.3941807746887207, "logits_per_char": -0.6970903873443604, "num_chars": 2}, {"sum_logits": -1.2695913314819336, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": true, "logits_per_token": -1.2695913314819336, "logits_per_char": -0.6347956657409668, "num_chars": 2}, {"sum_logits": -1.4000295400619507, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.4000295400619507, "logits_per_char": -0.7000147700309753, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 93, "native_id": "ACTAAP_2010_7_8", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4920563697814941, "incorrect_loss_raw": 1.3694926500320435, "correct_loss_per_char": 0.7460281848907471, "incorrect_loss_per_char": 0.6847463250160217, "correct_loss_per_token": 1.4920563697814941, "incorrect_loss_per_token": 1.3694926500320435, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.230877161026001, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": true, "logits_per_token": -1.230877161026001, "logits_per_char": -0.6154385805130005, "num_chars": 2}, {"sum_logits": -1.3310736417770386, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.3310736417770386, "logits_per_char": -0.6655368208885193, "num_chars": 2}, {"sum_logits": -1.5465271472930908, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.5465271472930908, "logits_per_char": -0.7732635736465454, "num_chars": 2}, {"sum_logits": -1.4920563697814941, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.4920563697814941, "logits_per_char": -0.7460281848907471, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 94, "native_id": "Mercury_7242900", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3719532489776611, "incorrect_loss_raw": 1.396175503730774, "correct_loss_per_char": 0.6859766244888306, "incorrect_loss_per_char": 0.698087751865387, "correct_loss_per_token": 1.3719532489776611, "incorrect_loss_per_token": 1.396175503730774, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4165806770324707, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.4165806770324707, "logits_per_char": -0.7082903385162354, "num_chars": 2}, {"sum_logits": -1.4227619171142578, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.4227619171142578, "logits_per_char": -0.7113809585571289, "num_chars": 2}, {"sum_logits": -1.3719532489776611, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.3719532489776611, "logits_per_char": -0.6859766244888306, "num_chars": 2}, {"sum_logits": -1.3491839170455933, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": true, "logits_per_token": -1.3491839170455933, "logits_per_char": -0.6745919585227966, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 95, "native_id": "VASoL_2009_3_29", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3992245197296143, "incorrect_loss_raw": 1.390421708424886, "correct_loss_per_char": 0.6996122598648071, "incorrect_loss_per_char": 0.695210854212443, "correct_loss_per_token": 1.3992245197296143, "incorrect_loss_per_token": 1.390421708424886, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4401850700378418, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.4401850700378418, "logits_per_char": -0.7200925350189209, "num_chars": 2}, {"sum_logits": -1.3645942211151123, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": true, "logits_per_token": -1.3645942211151123, "logits_per_char": -0.6822971105575562, "num_chars": 2}, {"sum_logits": -1.3992245197296143, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.3992245197296143, "logits_per_char": -0.6996122598648071, "num_chars": 2}, {"sum_logits": -1.366485834121704, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.366485834121704, "logits_per_char": -0.683242917060852, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 96, "native_id": "Mercury_177485", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.516775131225586, "incorrect_loss_raw": 1.3520948092142742, "correct_loss_per_char": 0.758387565612793, "incorrect_loss_per_char": 0.6760474046071371, "correct_loss_per_token": 1.516775131225586, "incorrect_loss_per_token": 1.3520948092142742, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2927693128585815, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": true, "logits_per_token": -1.2927693128585815, "logits_per_char": -0.6463846564292908, "num_chars": 2}, {"sum_logits": -1.516775131225586, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.516775131225586, "logits_per_char": -0.758387565612793, "num_chars": 2}, {"sum_logits": -1.432002067565918, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.432002067565918, "logits_per_char": -0.716001033782959, "num_chars": 2}, {"sum_logits": -1.3315130472183228, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.3315130472183228, "logits_per_char": -0.6657565236091614, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 97, "native_id": "Mercury_7219713", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.183846354484558, "incorrect_loss_raw": 1.475275715192159, "correct_loss_per_char": 0.591923177242279, "incorrect_loss_per_char": 0.7376378575960795, "correct_loss_per_token": 1.183846354484558, "incorrect_loss_per_token": 1.475275715192159, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5485942363739014, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.5485942363739014, "logits_per_char": -0.7742971181869507, "num_chars": 2}, {"sum_logits": -1.4548040628433228, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.4548040628433228, "logits_per_char": -0.7274020314216614, "num_chars": 2}, {"sum_logits": -1.422428846359253, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.422428846359253, "logits_per_char": -0.7112144231796265, "num_chars": 2}, {"sum_logits": -1.183846354484558, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.183846354484558, "logits_per_char": -0.591923177242279, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 98, "native_id": "Mercury_416411", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3671520948410034, "incorrect_loss_raw": 1.4037360350290935, "correct_loss_per_char": 0.6835760474205017, "incorrect_loss_per_char": 0.7018680175145467, "correct_loss_per_token": 1.3671520948410034, "incorrect_loss_per_token": 1.4037360350290935, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3671520948410034, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.3671520948410034, "logits_per_char": -0.6835760474205017, "num_chars": 2}, {"sum_logits": -1.3561633825302124, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.3561633825302124, "logits_per_char": -0.6780816912651062, "num_chars": 2}, {"sum_logits": -1.3257489204406738, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": true, "logits_per_token": -1.3257489204406738, "logits_per_char": -0.6628744602203369, "num_chars": 2}, {"sum_logits": -1.529295802116394, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.529295802116394, "logits_per_char": -0.764647901058197, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 99, "native_id": "Mercury_7251720", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2997338771820068, "incorrect_loss_raw": 1.424203634262085, "correct_loss_per_char": 0.6498669385910034, "incorrect_loss_per_char": 0.7121018171310425, "correct_loss_per_token": 1.2997338771820068, "incorrect_loss_per_token": 1.424203634262085, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4917501211166382, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.4917501211166382, "logits_per_char": -0.7458750605583191, "num_chars": 2}, {"sum_logits": -1.37323796749115, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.37323796749115, "logits_per_char": -0.686618983745575, "num_chars": 2}, {"sum_logits": -1.4076228141784668, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.4076228141784668, "logits_per_char": -0.7038114070892334, "num_chars": 2}, {"sum_logits": -1.2997338771820068, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.2997338771820068, "logits_per_char": -0.6498669385910034, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 100, "native_id": "Mercury_7197960", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5533639192581177, "incorrect_loss_raw": 1.3448632558186848, "correct_loss_per_char": 0.7766819596290588, "incorrect_loss_per_char": 0.6724316279093424, "correct_loss_per_token": 1.5533639192581177, "incorrect_loss_per_token": 1.3448632558186848, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5533639192581177, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.5533639192581177, "logits_per_char": -0.7766819596290588, "num_chars": 2}, {"sum_logits": -1.3051217794418335, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3051217794418335, "logits_per_char": -0.6525608897209167, "num_chars": 2}, {"sum_logits": -1.4597829580307007, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4597829580307007, "logits_per_char": -0.7298914790153503, "num_chars": 2}, {"sum_logits": -1.2696850299835205, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.2696850299835205, "logits_per_char": -0.6348425149917603, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 101, "native_id": "Mercury_SC_413242", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5043436288833618, "incorrect_loss_raw": 1.362618128458659, "correct_loss_per_char": 0.7521718144416809, "incorrect_loss_per_char": 0.6813090642293295, "correct_loss_per_token": 1.5043436288833618, "incorrect_loss_per_token": 1.362618128458659, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5043436288833618, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.5043436288833618, "logits_per_char": -0.7521718144416809, "num_chars": 2}, {"sum_logits": -1.5348883867263794, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.5348883867263794, "logits_per_char": -0.7674441933631897, "num_chars": 2}, {"sum_logits": -1.3337719440460205, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.3337719440460205, "logits_per_char": -0.6668859720230103, "num_chars": 2}, {"sum_logits": -1.2191940546035767, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": true, "logits_per_token": -1.2191940546035767, "logits_per_char": -0.6095970273017883, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 102, "native_id": "MCAS_2012_8_23649", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.236923336982727, "incorrect_loss_raw": 1.451186219851176, "correct_loss_per_char": 0.6184616684913635, "incorrect_loss_per_char": 0.725593109925588, "correct_loss_per_token": 1.236923336982727, "incorrect_loss_per_token": 1.451186219851176, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5605833530426025, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.5605833530426025, "logits_per_char": -0.7802916765213013, "num_chars": 2}, {"sum_logits": -1.3519957065582275, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.3519957065582275, "logits_per_char": -0.6759978532791138, "num_chars": 2}, {"sum_logits": -1.4409795999526978, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.4409795999526978, "logits_per_char": -0.7204897999763489, "num_chars": 2}, {"sum_logits": -1.236923336982727, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": true, "logits_per_token": -1.236923336982727, "logits_per_char": -0.6184616684913635, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 103, "native_id": "ACTAAP_2013_5_5", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4467965364456177, "incorrect_loss_raw": 1.3803245623906453, "correct_loss_per_char": 0.7233982682228088, "incorrect_loss_per_char": 0.6901622811953226, "correct_loss_per_token": 1.4467965364456177, "incorrect_loss_per_token": 1.3803245623906453, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5814144611358643, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.5814144611358643, "logits_per_char": -0.7907072305679321, "num_chars": 2}, {"sum_logits": -1.4467965364456177, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.4467965364456177, "logits_per_char": -0.7233982682228088, "num_chars": 2}, {"sum_logits": -1.2621550559997559, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": true, "logits_per_token": -1.2621550559997559, "logits_per_char": -0.6310775279998779, "num_chars": 2}, {"sum_logits": -1.297404170036316, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.297404170036316, "logits_per_char": -0.648702085018158, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 104, "native_id": "Mercury_7200585", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4273606538772583, "incorrect_loss_raw": 1.3858451843261719, "correct_loss_per_char": 0.7136803269386292, "incorrect_loss_per_char": 0.6929225921630859, "correct_loss_per_token": 1.4273606538772583, "incorrect_loss_per_token": 1.3858451843261719, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5079870223999023, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.5079870223999023, "logits_per_char": -0.7539935111999512, "num_chars": 2}, {"sum_logits": -1.4273606538772583, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.4273606538772583, "logits_per_char": -0.7136803269386292, "num_chars": 2}, {"sum_logits": -1.4345606565475464, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.4345606565475464, "logits_per_char": -0.7172803282737732, "num_chars": 2}, {"sum_logits": -1.214987874031067, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": true, "logits_per_token": -1.214987874031067, "logits_per_char": -0.6074939370155334, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 105, "native_id": "Mercury_SC_401119", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3545926809310913, "incorrect_loss_raw": 1.4052542448043823, "correct_loss_per_char": 0.6772963404655457, "incorrect_loss_per_char": 0.7026271224021912, "correct_loss_per_token": 1.3545926809310913, "incorrect_loss_per_token": 1.4052542448043823, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4118355512619019, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.4118355512619019, "logits_per_char": -0.7059177756309509, "num_chars": 2}, {"sum_logits": -1.3490784168243408, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": true, "logits_per_token": -1.3490784168243408, "logits_per_char": -0.6745392084121704, "num_chars": 2}, {"sum_logits": -1.4548487663269043, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.4548487663269043, "logits_per_char": -0.7274243831634521, "num_chars": 2}, {"sum_logits": -1.3545926809310913, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.3545926809310913, "logits_per_char": -0.6772963404655457, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 106, "native_id": "AIMS_2009_4_20", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4020788669586182, "incorrect_loss_raw": 1.387221058209737, "correct_loss_per_char": 0.7010394334793091, "incorrect_loss_per_char": 0.6936105291048685, "correct_loss_per_token": 1.4020788669586182, "incorrect_loss_per_token": 1.387221058209737, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4020788669586182, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.4020788669586182, "logits_per_char": -0.7010394334793091, "num_chars": 2}, {"sum_logits": -1.429855227470398, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.429855227470398, "logits_per_char": -0.714927613735199, "num_chars": 2}, {"sum_logits": -1.4063658714294434, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.4063658714294434, "logits_per_char": -0.7031829357147217, "num_chars": 2}, {"sum_logits": -1.3254420757293701, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.3254420757293701, "logits_per_char": -0.6627210378646851, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 107, "native_id": "Mercury_7186130", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3915001153945923, "incorrect_loss_raw": 1.393786112467448, "correct_loss_per_char": 0.6957500576972961, "incorrect_loss_per_char": 0.696893056233724, "correct_loss_per_token": 1.3915001153945923, "incorrect_loss_per_token": 1.393786112467448, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5146082639694214, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.5146082639694214, "logits_per_char": -0.7573041319847107, "num_chars": 2}, {"sum_logits": -1.35379958152771, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.35379958152771, "logits_per_char": -0.676899790763855, "num_chars": 2}, {"sum_logits": -1.3915001153945923, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.3915001153945923, "logits_per_char": -0.6957500576972961, "num_chars": 2}, {"sum_logits": -1.3129504919052124, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": true, "logits_per_token": -1.3129504919052124, "logits_per_char": -0.6564752459526062, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 108, "native_id": "NYSEDREGENTS_2010_4_17", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9932479858398438, "incorrect_loss_raw": 2.079358458518982, "correct_loss_per_char": 0.4966239929199219, "incorrect_loss_per_char": 1.039679229259491, "correct_loss_per_token": 0.9932479858398438, "incorrect_loss_per_token": 2.079358458518982, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9932479858398438, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": true, "logits_per_token": -0.9932479858398438, "logits_per_char": -0.4966239929199219, "num_chars": 2}, {"sum_logits": -1.3878976106643677, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.3878976106643677, "logits_per_char": -0.6939488053321838, "num_chars": 2}, {"sum_logits": -2.0577280521392822, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -2.0577280521392822, "logits_per_char": -1.0288640260696411, "num_chars": 2}, {"sum_logits": -2.792449712753296, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -2.792449712753296, "logits_per_char": -1.396224856376648, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 109, "native_id": "Mercury_SC_407706", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.305493712425232, "incorrect_loss_raw": 1.4197859366734822, "correct_loss_per_char": 0.652746856212616, "incorrect_loss_per_char": 0.7098929683367411, "correct_loss_per_token": 1.305493712425232, "incorrect_loss_per_token": 1.4197859366734822, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3941222429275513, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.3941222429275513, "logits_per_char": -0.6970611214637756, "num_chars": 2}, {"sum_logits": -1.305493712425232, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": true, "logits_per_token": -1.305493712425232, "logits_per_char": -0.652746856212616, "num_chars": 2}, {"sum_logits": -1.4400988817214966, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.4400988817214966, "logits_per_char": -0.7200494408607483, "num_chars": 2}, {"sum_logits": -1.425136685371399, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.425136685371399, "logits_per_char": -0.7125683426856995, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 110, "native_id": "Mercury_180390", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.521289587020874, "incorrect_loss_raw": 1.3526345491409302, "correct_loss_per_char": 0.760644793510437, "incorrect_loss_per_char": 0.6763172745704651, "correct_loss_per_token": 1.521289587020874, "incorrect_loss_per_token": 1.3526345491409302, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3073153495788574, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": true, "logits_per_token": -1.3073153495788574, "logits_per_char": -0.6536576747894287, "num_chars": 2}, {"sum_logits": -1.3329216241836548, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.3329216241836548, "logits_per_char": -0.6664608120918274, "num_chars": 2}, {"sum_logits": -1.4176666736602783, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.4176666736602783, "logits_per_char": -0.7088333368301392, "num_chars": 2}, {"sum_logits": -1.521289587020874, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.521289587020874, "logits_per_char": -0.760644793510437, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 111, "native_id": "Mercury_7137480", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.381164789199829, "incorrect_loss_raw": 1.394509236017863, "correct_loss_per_char": 0.6905823945999146, "incorrect_loss_per_char": 0.6972546180089315, "correct_loss_per_token": 1.381164789199829, "incorrect_loss_per_token": 1.394509236017863, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4298175573349, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.4298175573349, "logits_per_char": -0.71490877866745, "num_chars": 2}, {"sum_logits": -1.3461533784866333, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": true, "logits_per_token": -1.3461533784866333, "logits_per_char": -0.6730766892433167, "num_chars": 2}, {"sum_logits": -1.381164789199829, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.381164789199829, "logits_per_char": -0.6905823945999146, "num_chars": 2}, {"sum_logits": -1.4075567722320557, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.4075567722320557, "logits_per_char": -0.7037783861160278, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 112, "native_id": "Mercury_7044520", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3936407566070557, "incorrect_loss_raw": 1.3912298281987507, "correct_loss_per_char": 0.6968203783035278, "incorrect_loss_per_char": 0.6956149140993754, "correct_loss_per_token": 1.3936407566070557, "incorrect_loss_per_token": 1.3912298281987507, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.385500431060791, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.385500431060791, "logits_per_char": -0.6927502155303955, "num_chars": 2}, {"sum_logits": -1.3936407566070557, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.3936407566070557, "logits_per_char": -0.6968203783035278, "num_chars": 2}, {"sum_logits": -1.3786200284957886, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": true, "logits_per_token": -1.3786200284957886, "logits_per_char": -0.6893100142478943, "num_chars": 2}, {"sum_logits": -1.4095690250396729, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.4095690250396729, "logits_per_char": -0.7047845125198364, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 113, "native_id": "Mercury_7080973", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4634877443313599, "incorrect_loss_raw": 1.3693181276321411, "correct_loss_per_char": 0.7317438721656799, "incorrect_loss_per_char": 0.6846590638160706, "correct_loss_per_token": 1.4634877443313599, "incorrect_loss_per_token": 1.3693181276321411, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4147764444351196, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.4147764444351196, "logits_per_char": -0.7073882222175598, "num_chars": 2}, {"sum_logits": -1.4634877443313599, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.4634877443313599, "logits_per_char": -0.7317438721656799, "num_chars": 2}, {"sum_logits": -1.3731943368911743, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.3731943368911743, "logits_per_char": -0.6865971684455872, "num_chars": 2}, {"sum_logits": -1.3199836015701294, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": true, "logits_per_token": -1.3199836015701294, "logits_per_char": -0.6599918007850647, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 114, "native_id": "ACTAAP_2007_7_18", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3683592081069946, "incorrect_loss_raw": 1.404056151707967, "correct_loss_per_char": 0.6841796040534973, "incorrect_loss_per_char": 0.7020280758539835, "correct_loss_per_token": 1.3683592081069946, "incorrect_loss_per_token": 1.404056151707967, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4687011241912842, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.4687011241912842, "logits_per_char": -0.7343505620956421, "num_chars": 2}, {"sum_logits": -1.3119914531707764, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": true, "logits_per_token": -1.3119914531707764, "logits_per_char": -0.6559957265853882, "num_chars": 2}, {"sum_logits": -1.4314758777618408, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.4314758777618408, "logits_per_char": -0.7157379388809204, "num_chars": 2}, {"sum_logits": -1.3683592081069946, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.3683592081069946, "logits_per_char": -0.6841796040534973, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 115, "native_id": "OHAT_2007_8_44", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3420078754425049, "incorrect_loss_raw": 1.408238689104716, "correct_loss_per_char": 0.6710039377212524, "incorrect_loss_per_char": 0.704119344552358, "correct_loss_per_token": 1.3420078754425049, "incorrect_loss_per_token": 1.408238689104716, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4412990808486938, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.4412990808486938, "logits_per_char": -0.7206495404243469, "num_chars": 2}, {"sum_logits": -1.441318154335022, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.441318154335022, "logits_per_char": -0.720659077167511, "num_chars": 2}, {"sum_logits": -1.3420078754425049, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": true, "logits_per_token": -1.3420078754425049, "logits_per_char": -0.6710039377212524, "num_chars": 2}, {"sum_logits": -1.3420988321304321, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.3420988321304321, "logits_per_char": -0.6710494160652161, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 116, "native_id": "NAEP_2005_8_S11+3", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4032388925552368, "incorrect_loss_raw": 1.390537699063619, "correct_loss_per_char": 0.7016194462776184, "incorrect_loss_per_char": 0.6952688495318095, "correct_loss_per_token": 1.4032388925552368, "incorrect_loss_per_token": 1.390537699063619, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4374725818634033, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.4374725818634033, "logits_per_char": -0.7187362909317017, "num_chars": 2}, {"sum_logits": -1.2591934204101562, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": true, "logits_per_token": -1.2591934204101562, "logits_per_char": -0.6295967102050781, "num_chars": 2}, {"sum_logits": -1.4749470949172974, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.4749470949172974, "logits_per_char": -0.7374735474586487, "num_chars": 2}, {"sum_logits": -1.4032388925552368, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.4032388925552368, "logits_per_char": -0.7016194462776184, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 117, "native_id": "Mercury_SC_401403", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5965940952301025, "incorrect_loss_raw": 1.329918622970581, "correct_loss_per_char": 0.7982970476150513, "incorrect_loss_per_char": 0.6649593114852905, "correct_loss_per_token": 1.5965940952301025, "incorrect_loss_per_token": 1.329918622970581, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5965940952301025, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.5965940952301025, "logits_per_char": -0.7982970476150513, "num_chars": 2}, {"sum_logits": -1.3469579219818115, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.3469579219818115, "logits_per_char": -0.6734789609909058, "num_chars": 2}, {"sum_logits": -1.366719365119934, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.366719365119934, "logits_per_char": -0.683359682559967, "num_chars": 2}, {"sum_logits": -1.2760785818099976, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": true, "logits_per_token": -1.2760785818099976, "logits_per_char": -0.6380392909049988, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 118, "native_id": "Mercury_7027108", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.353968858718872, "incorrect_loss_raw": 1.4071199496587117, "correct_loss_per_char": 0.676984429359436, "incorrect_loss_per_char": 0.7035599748293558, "correct_loss_per_token": 1.353968858718872, "incorrect_loss_per_token": 1.4071199496587117, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.353968858718872, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.353968858718872, "logits_per_char": -0.676984429359436, "num_chars": 2}, {"sum_logits": -1.4709962606430054, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.4709962606430054, "logits_per_char": -0.7354981303215027, "num_chars": 2}, {"sum_logits": -1.433497428894043, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.433497428894043, "logits_per_char": -0.7167487144470215, "num_chars": 2}, {"sum_logits": -1.316866159439087, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": true, "logits_per_token": -1.316866159439087, "logits_per_char": -0.6584330797195435, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 119, "native_id": "Mercury_7195125", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1761713027954102, "incorrect_loss_raw": 1.4783602952957153, "correct_loss_per_char": 0.5880856513977051, "incorrect_loss_per_char": 0.7391801476478577, "correct_loss_per_token": 1.1761713027954102, "incorrect_loss_per_token": 1.4783602952957153, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6200207471847534, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.6200207471847534, "logits_per_char": -0.8100103735923767, "num_chars": 2}, {"sum_logits": -1.357620120048523, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.357620120048523, "logits_per_char": -0.6788100600242615, "num_chars": 2}, {"sum_logits": -1.4574400186538696, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.4574400186538696, "logits_per_char": -0.7287200093269348, "num_chars": 2}, {"sum_logits": -1.1761713027954102, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": true, "logits_per_token": -1.1761713027954102, "logits_per_char": -0.5880856513977051, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 120, "native_id": "Mercury_7043680", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.412109375, "incorrect_loss_raw": 1.3827319145202637, "correct_loss_per_char": 0.7060546875, "incorrect_loss_per_char": 0.6913659572601318, "correct_loss_per_token": 1.412109375, "incorrect_loss_per_token": 1.3827319145202637, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3907266855239868, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.3907266855239868, "logits_per_char": -0.6953633427619934, "num_chars": 2}, {"sum_logits": -1.412109375, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.412109375, "logits_per_char": -0.7060546875, "num_chars": 2}, {"sum_logits": -1.390381097793579, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.390381097793579, "logits_per_char": -0.6951905488967896, "num_chars": 2}, {"sum_logits": -1.367087960243225, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": true, "logits_per_token": -1.367087960243225, "logits_per_char": -0.6835439801216125, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 121, "native_id": "VASoL_2011_5_36", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3611198663711548, "incorrect_loss_raw": 1.4038794040679932, "correct_loss_per_char": 0.6805599331855774, "incorrect_loss_per_char": 0.7019397020339966, "correct_loss_per_token": 1.3611198663711548, "incorrect_loss_per_token": 1.4038794040679932, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.514404296875, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.514404296875, "logits_per_char": -0.7572021484375, "num_chars": 2}, {"sum_logits": -1.337788462638855, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": true, "logits_per_token": -1.337788462638855, "logits_per_char": -0.6688942313194275, "num_chars": 2}, {"sum_logits": -1.3594454526901245, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.3594454526901245, "logits_per_char": -0.6797227263450623, "num_chars": 2}, {"sum_logits": -1.3611198663711548, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.3611198663711548, "logits_per_char": -0.6805599331855774, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 122, "native_id": "Mercury_7166950", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.402321457862854, "incorrect_loss_raw": 1.3886704047520955, "correct_loss_per_char": 0.701160728931427, "incorrect_loss_per_char": 0.6943352023760477, "correct_loss_per_token": 1.402321457862854, "incorrect_loss_per_token": 1.3886704047520955, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.402321457862854, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.402321457862854, "logits_per_char": -0.701160728931427, "num_chars": 2}, {"sum_logits": -1.4605423212051392, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.4605423212051392, "logits_per_char": -0.7302711606025696, "num_chars": 2}, {"sum_logits": -1.384523868560791, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.384523868560791, "logits_per_char": -0.6922619342803955, "num_chars": 2}, {"sum_logits": -1.3209450244903564, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": true, "logits_per_token": -1.3209450244903564, "logits_per_char": -0.6604725122451782, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 123, "native_id": "MDSA_2013_8_2", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3393354415893555, "incorrect_loss_raw": 1.4094359079996746, "correct_loss_per_char": 0.6696677207946777, "incorrect_loss_per_char": 0.7047179539998373, "correct_loss_per_token": 1.3393354415893555, "incorrect_loss_per_token": 1.4094359079996746, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4608181715011597, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.4608181715011597, "logits_per_char": -0.7304090857505798, "num_chars": 2}, {"sum_logits": -1.3781882524490356, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.3781882524490356, "logits_per_char": -0.6890941262245178, "num_chars": 2}, {"sum_logits": -1.3393354415893555, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": true, "logits_per_token": -1.3393354415893555, "logits_per_char": -0.6696677207946777, "num_chars": 2}, {"sum_logits": -1.3893013000488281, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.3893013000488281, "logits_per_char": -0.6946506500244141, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 124, "native_id": "Mercury_7085313", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3309617042541504, "incorrect_loss_raw": 1.4123693307240803, "correct_loss_per_char": 0.6654808521270752, "incorrect_loss_per_char": 0.7061846653620402, "correct_loss_per_token": 1.3309617042541504, "incorrect_loss_per_token": 1.4123693307240803, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3309617042541504, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": true, "logits_per_token": -1.3309617042541504, "logits_per_char": -0.6654808521270752, "num_chars": 2}, {"sum_logits": -1.3338649272918701, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.3338649272918701, "logits_per_char": -0.6669324636459351, "num_chars": 2}, {"sum_logits": -1.4362893104553223, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.4362893104553223, "logits_per_char": -0.7181446552276611, "num_chars": 2}, {"sum_logits": -1.4669537544250488, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.4669537544250488, "logits_per_char": -0.7334768772125244, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 125, "native_id": "Mercury_7018095", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5618860721588135, "incorrect_loss_raw": 1.3419965505599976, "correct_loss_per_char": 0.7809430360794067, "incorrect_loss_per_char": 0.6709982752799988, "correct_loss_per_token": 1.5618860721588135, "incorrect_loss_per_token": 1.3419965505599976, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.251930832862854, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": true, "logits_per_token": -1.251930832862854, "logits_per_char": -0.625965416431427, "num_chars": 2}, {"sum_logits": -1.4378799200057983, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.4378799200057983, "logits_per_char": -0.7189399600028992, "num_chars": 2}, {"sum_logits": -1.3361788988113403, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.3361788988113403, "logits_per_char": -0.6680894494056702, "num_chars": 2}, {"sum_logits": -1.5618860721588135, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.5618860721588135, "logits_per_char": -0.7809430360794067, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 126, "native_id": "Mercury_7099348", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3573849201202393, "incorrect_loss_raw": 1.4114326635996501, "correct_loss_per_char": 0.6786924600601196, "incorrect_loss_per_char": 0.7057163317998251, "correct_loss_per_token": 1.3573849201202393, "incorrect_loss_per_token": 1.4114326635996501, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.587229609489441, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.587229609489441, "logits_per_char": -0.7936148047447205, "num_chars": 2}, {"sum_logits": -1.4003181457519531, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.4003181457519531, "logits_per_char": -0.7001590728759766, "num_chars": 2}, {"sum_logits": -1.3573849201202393, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.3573849201202393, "logits_per_char": -0.6786924600601196, "num_chars": 2}, {"sum_logits": -1.2467502355575562, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.2467502355575562, "logits_per_char": -0.6233751177787781, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 127, "native_id": "NCEOGA_2013_5_6", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4401743412017822, "incorrect_loss_raw": 1.3785232702891033, "correct_loss_per_char": 0.7200871706008911, "incorrect_loss_per_char": 0.6892616351445516, "correct_loss_per_token": 1.4401743412017822, "incorrect_loss_per_token": 1.3785232702891033, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3795452117919922, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.3795452117919922, "logits_per_char": -0.6897726058959961, "num_chars": 2}, {"sum_logits": -1.4401743412017822, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.4401743412017822, "logits_per_char": -0.7200871706008911, "num_chars": 2}, {"sum_logits": -1.483567714691162, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.483567714691162, "logits_per_char": -0.741783857345581, "num_chars": 2}, {"sum_logits": -1.2724568843841553, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": true, "logits_per_token": -1.2724568843841553, "logits_per_char": -0.6362284421920776, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 128, "native_id": "Mercury_7084018", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.461960792541504, "incorrect_loss_raw": 1.3700975179672241, "correct_loss_per_char": 0.730980396270752, "incorrect_loss_per_char": 0.6850487589836121, "correct_loss_per_token": 1.461960792541504, "incorrect_loss_per_token": 1.3700975179672241, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4031819105148315, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.4031819105148315, "logits_per_char": -0.7015909552574158, "num_chars": 2}, {"sum_logits": -1.2887130975723267, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.2887130975723267, "logits_per_char": -0.6443565487861633, "num_chars": 2}, {"sum_logits": -1.461960792541504, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.461960792541504, "logits_per_char": -0.730980396270752, "num_chars": 2}, {"sum_logits": -1.4183975458145142, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.4183975458145142, "logits_per_char": -0.7091987729072571, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 129, "native_id": "LEAP__7_10346", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3829548358917236, "incorrect_loss_raw": 1.4289084672927856, "correct_loss_per_char": 0.6914774179458618, "incorrect_loss_per_char": 0.7144542336463928, "correct_loss_per_token": 1.3829548358917236, "incorrect_loss_per_token": 1.4289084672927856, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.774475336074829, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.774475336074829, "logits_per_char": -0.8872376680374146, "num_chars": 2}, {"sum_logits": -1.3829548358917236, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.3829548358917236, "logits_per_char": -0.6914774179458618, "num_chars": 2}, {"sum_logits": -1.3936039209365845, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.3936039209365845, "logits_per_char": -0.6968019604682922, "num_chars": 2}, {"sum_logits": -1.1186461448669434, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.1186461448669434, "logits_per_char": -0.5593230724334717, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 130, "native_id": "Mercury_7008680", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0107696056365967, "incorrect_loss_raw": 1.5632498661677043, "correct_loss_per_char": 0.5053848028182983, "incorrect_loss_per_char": 0.7816249330838522, "correct_loss_per_token": 1.0107696056365967, "incorrect_loss_per_token": 1.5632498661677043, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5692485570907593, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": false, "logits_per_token": -1.5692485570907593, "logits_per_char": -0.7846242785453796, "num_chars": 2}, {"sum_logits": -1.638358473777771, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": false, "logits_per_token": -1.638358473777771, "logits_per_char": -0.8191792368888855, "num_chars": 2}, {"sum_logits": -1.0107696056365967, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": true, "logits_per_token": -1.0107696056365967, "logits_per_char": -0.5053848028182983, "num_chars": 2}, {"sum_logits": -1.4821425676345825, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": false, "logits_per_token": -1.4821425676345825, "logits_per_char": -0.7410712838172913, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 131, "native_id": "NYSEDREGENTS_2015_4_16", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2644236087799072, "incorrect_loss_raw": 1.4376233418782551, "correct_loss_per_char": 0.6322118043899536, "incorrect_loss_per_char": 0.7188116709391276, "correct_loss_per_token": 1.2644236087799072, "incorrect_loss_per_token": 1.4376233418782551, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4763015508651733, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.4763015508651733, "logits_per_char": -0.7381507754325867, "num_chars": 2}, {"sum_logits": -1.2644236087799072, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": true, "logits_per_token": -1.2644236087799072, "logits_per_char": -0.6322118043899536, "num_chars": 2}, {"sum_logits": -1.3670012950897217, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.3670012950897217, "logits_per_char": -0.6835006475448608, "num_chars": 2}, {"sum_logits": -1.4695671796798706, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.4695671796798706, "logits_per_char": -0.7347835898399353, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 132, "native_id": "Mercury_SC_415071", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3869718313217163, "incorrect_loss_raw": 1.390789786974589, "correct_loss_per_char": 0.6934859156608582, "incorrect_loss_per_char": 0.6953948934872946, "correct_loss_per_token": 1.3869718313217163, "incorrect_loss_per_token": 1.390789786974589, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3589648008346558, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": true, "logits_per_token": -1.3589648008346558, "logits_per_char": -0.6794824004173279, "num_chars": 2}, {"sum_logits": -1.3869718313217163, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.3869718313217163, "logits_per_char": -0.6934859156608582, "num_chars": 2}, {"sum_logits": -1.4215210676193237, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.4215210676193237, "logits_per_char": -0.7107605338096619, "num_chars": 2}, {"sum_logits": -1.3918834924697876, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.3918834924697876, "logits_per_char": -0.6959417462348938, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 133, "native_id": "Mercury_7188860", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.492663025856018, "incorrect_loss_raw": 1.35894779364268, "correct_loss_per_char": 0.746331512928009, "incorrect_loss_per_char": 0.67947389682134, "correct_loss_per_token": 1.492663025856018, "incorrect_loss_per_token": 1.35894779364268, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.38180410861969, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.38180410861969, "logits_per_char": -0.690902054309845, "num_chars": 2}, {"sum_logits": -1.3381407260894775, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": true, "logits_per_token": -1.3381407260894775, "logits_per_char": -0.6690703630447388, "num_chars": 2}, {"sum_logits": -1.356898546218872, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.356898546218872, "logits_per_char": -0.678449273109436, "num_chars": 2}, {"sum_logits": -1.492663025856018, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.492663025856018, "logits_per_char": -0.746331512928009, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 134, "native_id": "Mercury_402560", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3897488117218018, "incorrect_loss_raw": 1.390877366065979, "correct_loss_per_char": 0.6948744058609009, "incorrect_loss_per_char": 0.6954386830329895, "correct_loss_per_token": 1.3897488117218018, "incorrect_loss_per_token": 1.390877366065979, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3897488117218018, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.3897488117218018, "logits_per_char": -0.6948744058609009, "num_chars": 2}, {"sum_logits": -1.4065972566604614, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.4065972566604614, "logits_per_char": -0.7032986283302307, "num_chars": 2}, {"sum_logits": -1.3686798810958862, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": true, "logits_per_token": -1.3686798810958862, "logits_per_char": -0.6843399405479431, "num_chars": 2}, {"sum_logits": -1.3973549604415894, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.3973549604415894, "logits_per_char": -0.6986774802207947, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 135, "native_id": "Mercury_178815", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3070300817489624, "incorrect_loss_raw": 1.4288233915964763, "correct_loss_per_char": 0.6535150408744812, "incorrect_loss_per_char": 0.7144116957982382, "correct_loss_per_token": 1.3070300817489624, "incorrect_loss_per_token": 1.4288233915964763, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5674831867218018, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.5674831867218018, "logits_per_char": -0.7837415933609009, "num_chars": 2}, {"sum_logits": -1.4771052598953247, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.4771052598953247, "logits_per_char": -0.7385526299476624, "num_chars": 2}, {"sum_logits": -1.3070300817489624, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.3070300817489624, "logits_per_char": -0.6535150408744812, "num_chars": 2}, {"sum_logits": -1.2418817281723022, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": true, "logits_per_token": -1.2418817281723022, "logits_per_char": -0.6209408640861511, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 136, "native_id": "MCAS_2003_5_10", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.370953917503357, "incorrect_loss_raw": 1.3969885905583699, "correct_loss_per_char": 0.6854769587516785, "incorrect_loss_per_char": 0.6984942952791849, "correct_loss_per_token": 1.370953917503357, "incorrect_loss_per_token": 1.3969885905583699, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4247225522994995, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.4247225522994995, "logits_per_char": -0.7123612761497498, "num_chars": 2}, {"sum_logits": -1.3465137481689453, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": true, "logits_per_token": -1.3465137481689453, "logits_per_char": -0.6732568740844727, "num_chars": 2}, {"sum_logits": -1.419729471206665, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.419729471206665, "logits_per_char": -0.7098647356033325, "num_chars": 2}, {"sum_logits": -1.370953917503357, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.370953917503357, "logits_per_char": -0.6854769587516785, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 137, "native_id": "Mercury_7029785", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4736958742141724, "incorrect_loss_raw": 1.3643264770507812, "correct_loss_per_char": 0.7368479371070862, "incorrect_loss_per_char": 0.6821632385253906, "correct_loss_per_token": 1.4736958742141724, "incorrect_loss_per_token": 1.3643264770507812, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4736958742141724, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.4736958742141724, "logits_per_char": -0.7368479371070862, "num_chars": 2}, {"sum_logits": -1.3930712938308716, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.3930712938308716, "logits_per_char": -0.6965356469154358, "num_chars": 2}, {"sum_logits": -1.3323003053665161, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": true, "logits_per_token": -1.3323003053665161, "logits_per_char": -0.6661501526832581, "num_chars": 2}, {"sum_logits": -1.367607831954956, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.367607831954956, "logits_per_char": -0.683803915977478, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 138, "native_id": "MDSA_2009_8_12", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5730503797531128, "incorrect_loss_raw": 1.3394819100697835, "correct_loss_per_char": 0.7865251898765564, "incorrect_loss_per_char": 0.6697409550348917, "correct_loss_per_token": 1.5730503797531128, "incorrect_loss_per_token": 1.3394819100697835, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5730503797531128, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.5730503797531128, "logits_per_char": -0.7865251898765564, "num_chars": 2}, {"sum_logits": -1.3944004774093628, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.3944004774093628, "logits_per_char": -0.6972002387046814, "num_chars": 2}, {"sum_logits": -1.408839225769043, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.408839225769043, "logits_per_char": -0.7044196128845215, "num_chars": 2}, {"sum_logits": -1.2152060270309448, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": true, "logits_per_token": -1.2152060270309448, "logits_per_char": -0.6076030135154724, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 139, "native_id": "Mercury_7109690", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3962585926055908, "incorrect_loss_raw": 1.388987382253011, "correct_loss_per_char": 0.6981292963027954, "incorrect_loss_per_char": 0.6944936911265055, "correct_loss_per_token": 1.3962585926055908, "incorrect_loss_per_token": 1.388987382253011, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.440243124961853, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.440243124961853, "logits_per_char": -0.7201215624809265, "num_chars": 2}, {"sum_logits": -1.3962585926055908, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.3962585926055908, "logits_per_char": -0.6981292963027954, "num_chars": 2}, {"sum_logits": -1.3803552389144897, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.3803552389144897, "logits_per_char": -0.6901776194572449, "num_chars": 2}, {"sum_logits": -1.3463637828826904, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": true, "logits_per_token": -1.3463637828826904, "logits_per_char": -0.6731818914413452, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 140, "native_id": "CSZ30179", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3749819993972778, "incorrect_loss_raw": 1.3978126843770344, "correct_loss_per_char": 0.6874909996986389, "incorrect_loss_per_char": 0.6989063421885172, "correct_loss_per_token": 1.3749819993972778, "incorrect_loss_per_token": 1.3978126843770344, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2858420610427856, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": true, "logits_per_token": -1.2858420610427856, "logits_per_char": -0.6429210305213928, "num_chars": 2}, {"sum_logits": -1.4615137577056885, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.4615137577056885, "logits_per_char": -0.7307568788528442, "num_chars": 2}, {"sum_logits": -1.4460822343826294, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.4460822343826294, "logits_per_char": -0.7230411171913147, "num_chars": 2}, {"sum_logits": -1.3749819993972778, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.3749819993972778, "logits_per_char": -0.6874909996986389, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 141, "native_id": "MCAS_2006_8_10", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.445662021636963, "incorrect_loss_raw": 1.3731261491775513, "correct_loss_per_char": 0.7228310108184814, "incorrect_loss_per_char": 0.6865630745887756, "correct_loss_per_token": 1.445662021636963, "incorrect_loss_per_token": 1.3731261491775513, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3541561365127563, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": true, "logits_per_token": -1.3541561365127563, "logits_per_char": -0.6770780682563782, "num_chars": 2}, {"sum_logits": -1.361778974533081, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.361778974533081, "logits_per_char": -0.6808894872665405, "num_chars": 2}, {"sum_logits": -1.4034433364868164, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.4034433364868164, "logits_per_char": -0.7017216682434082, "num_chars": 2}, {"sum_logits": -1.445662021636963, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.445662021636963, "logits_per_char": -0.7228310108184814, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 142, "native_id": "Mercury_7217280", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4316781759262085, "incorrect_loss_raw": 1.3800099690755208, "correct_loss_per_char": 0.7158390879631042, "incorrect_loss_per_char": 0.6900049845377604, "correct_loss_per_token": 1.4316781759262085, "incorrect_loss_per_token": 1.3800099690755208, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2792270183563232, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": true, "logits_per_token": -1.2792270183563232, "logits_per_char": -0.6396135091781616, "num_chars": 2}, {"sum_logits": -1.3852847814559937, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.3852847814559937, "logits_per_char": -0.6926423907279968, "num_chars": 2}, {"sum_logits": -1.4755181074142456, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.4755181074142456, "logits_per_char": -0.7377590537071228, "num_chars": 2}, {"sum_logits": -1.4316781759262085, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.4316781759262085, "logits_per_char": -0.7158390879631042, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 143, "native_id": "Mercury_SC_401128", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3913094997406006, "incorrect_loss_raw": 1.39143701394399, "correct_loss_per_char": 0.6956547498703003, "incorrect_loss_per_char": 0.695718506971995, "correct_loss_per_token": 1.3913094997406006, "incorrect_loss_per_token": 1.39143701394399, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4553935527801514, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.4553935527801514, "logits_per_char": -0.7276967763900757, "num_chars": 2}, {"sum_logits": -1.4099094867706299, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.4099094867706299, "logits_per_char": -0.7049547433853149, "num_chars": 2}, {"sum_logits": -1.3913094997406006, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.3913094997406006, "logits_per_char": -0.6956547498703003, "num_chars": 2}, {"sum_logits": -1.309008002281189, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": true, "logits_per_token": -1.309008002281189, "logits_per_char": -0.6545040011405945, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 144, "native_id": "Mercury_406785", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2389867305755615, "incorrect_loss_raw": 1.4494221607844036, "correct_loss_per_char": 0.6194933652877808, "incorrect_loss_per_char": 0.7247110803922018, "correct_loss_per_token": 1.2389867305755615, "incorrect_loss_per_token": 1.4494221607844036, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2389867305755615, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": true, "logits_per_token": -1.2389867305755615, "logits_per_char": -0.6194933652877808, "num_chars": 2}, {"sum_logits": -1.5121830701828003, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.5121830701828003, "logits_per_char": -0.7560915350914001, "num_chars": 2}, {"sum_logits": -1.3937908411026, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.3937908411026, "logits_per_char": -0.6968954205513, "num_chars": 2}, {"sum_logits": -1.44229257106781, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.44229257106781, "logits_per_char": -0.721146285533905, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 145, "native_id": "Mercury_7093100", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.372283935546875, "incorrect_loss_raw": 1.3987845182418823, "correct_loss_per_char": 0.6861419677734375, "incorrect_loss_per_char": 0.6993922591209412, "correct_loss_per_token": 1.372283935546875, "incorrect_loss_per_token": 1.3987845182418823, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4653791189193726, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": false, "logits_per_token": -1.4653791189193726, "logits_per_char": -0.7326895594596863, "num_chars": 2}, {"sum_logits": -1.372283935546875, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": false, "logits_per_token": -1.372283935546875, "logits_per_char": -0.6861419677734375, "num_chars": 2}, {"sum_logits": -1.4311702251434326, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": false, "logits_per_token": -1.4311702251434326, "logits_per_char": -0.7155851125717163, "num_chars": 2}, {"sum_logits": -1.2998042106628418, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": true, "logits_per_token": -1.2998042106628418, "logits_per_char": -0.6499021053314209, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 146, "native_id": "MCAS_2011_5_15", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3751481771469116, "incorrect_loss_raw": 1.3982587258021038, "correct_loss_per_char": 0.6875740885734558, "incorrect_loss_per_char": 0.6991293629010519, "correct_loss_per_token": 1.3751481771469116, "incorrect_loss_per_token": 1.3982587258021038, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4095191955566406, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.4095191955566406, "logits_per_char": -0.7047595977783203, "num_chars": 2}, {"sum_logits": -1.2998647689819336, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": true, "logits_per_token": -1.2998647689819336, "logits_per_char": -0.6499323844909668, "num_chars": 2}, {"sum_logits": -1.3751481771469116, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.3751481771469116, "logits_per_char": -0.6875740885734558, "num_chars": 2}, {"sum_logits": -1.4853922128677368, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.4853922128677368, "logits_per_char": -0.7426961064338684, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 147, "native_id": "Mercury_SC_402122", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4012582302093506, "incorrect_loss_raw": 1.3913284142812092, "correct_loss_per_char": 0.7006291151046753, "incorrect_loss_per_char": 0.6956642071406046, "correct_loss_per_token": 1.4012582302093506, "incorrect_loss_per_token": 1.3913284142812092, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5045264959335327, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.5045264959335327, "logits_per_char": -0.7522632479667664, "num_chars": 2}, {"sum_logits": -1.3960009813308716, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.3960009813308716, "logits_per_char": -0.6980004906654358, "num_chars": 2}, {"sum_logits": -1.4012582302093506, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.4012582302093506, "logits_per_char": -0.7006291151046753, "num_chars": 2}, {"sum_logits": -1.2734577655792236, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": true, "logits_per_token": -1.2734577655792236, "logits_per_char": -0.6367288827896118, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 148, "native_id": "Mercury_SC_400518", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.385547399520874, "incorrect_loss_raw": 1.3964602947235107, "correct_loss_per_char": 0.692773699760437, "incorrect_loss_per_char": 0.6982301473617554, "correct_loss_per_token": 1.385547399520874, "incorrect_loss_per_token": 1.3964602947235107, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5218888521194458, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.5218888521194458, "logits_per_char": -0.7609444260597229, "num_chars": 2}, {"sum_logits": -1.3946932554244995, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.3946932554244995, "logits_per_char": -0.6973466277122498, "num_chars": 2}, {"sum_logits": -1.385547399520874, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.385547399520874, "logits_per_char": -0.692773699760437, "num_chars": 2}, {"sum_logits": -1.272798776626587, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -1.272798776626587, "logits_per_char": -0.6363993883132935, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 149, "native_id": "Mercury_SC_409595", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3663164377212524, "incorrect_loss_raw": 1.4014182885487874, "correct_loss_per_char": 0.6831582188606262, "incorrect_loss_per_char": 0.7007091442743937, "correct_loss_per_token": 1.3663164377212524, "incorrect_loss_per_token": 1.4014182885487874, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5182852745056152, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.5182852745056152, "logits_per_char": -0.7591426372528076, "num_chars": 2}, {"sum_logits": -1.3309142589569092, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": true, "logits_per_token": -1.3309142589569092, "logits_per_char": -0.6654571294784546, "num_chars": 2}, {"sum_logits": -1.3663164377212524, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.3663164377212524, "logits_per_char": -0.6831582188606262, "num_chars": 2}, {"sum_logits": -1.355055332183838, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.355055332183838, "logits_per_char": -0.677527666091919, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 150, "native_id": "Mercury_SC_401125", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.290063738822937, "incorrect_loss_raw": 1.426676829655965, "correct_loss_per_char": 0.6450318694114685, "incorrect_loss_per_char": 0.7133384148279825, "correct_loss_per_token": 1.290063738822937, "incorrect_loss_per_token": 1.426676829655965, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4813988208770752, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.4813988208770752, "logits_per_char": -0.7406994104385376, "num_chars": 2}, {"sum_logits": -1.3522676229476929, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.3522676229476929, "logits_per_char": -0.6761338114738464, "num_chars": 2}, {"sum_logits": -1.4463640451431274, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.4463640451431274, "logits_per_char": -0.7231820225715637, "num_chars": 2}, {"sum_logits": -1.290063738822937, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": true, "logits_per_token": -1.290063738822937, "logits_per_char": -0.6450318694114685, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 151, "native_id": "Mercury_7267540", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3906927108764648, "incorrect_loss_raw": 1.3917307058970134, "correct_loss_per_char": 0.6953463554382324, "incorrect_loss_per_char": 0.6958653529485067, "correct_loss_per_token": 1.3906927108764648, "incorrect_loss_per_token": 1.3917307058970134, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3804478645324707, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.3804478645324707, "logits_per_char": -0.6902239322662354, "num_chars": 2}, {"sum_logits": -1.3656871318817139, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": true, "logits_per_token": -1.3656871318817139, "logits_per_char": -0.6828435659408569, "num_chars": 2}, {"sum_logits": -1.4290571212768555, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.4290571212768555, "logits_per_char": -0.7145285606384277, "num_chars": 2}, {"sum_logits": -1.3906927108764648, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.3906927108764648, "logits_per_char": -0.6953463554382324, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 152, "native_id": "NYSEDREGENTS_2008_4_25", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4531162977218628, "incorrect_loss_raw": 1.3744200468063354, "correct_loss_per_char": 0.7265581488609314, "incorrect_loss_per_char": 0.6872100234031677, "correct_loss_per_token": 1.4531162977218628, "incorrect_loss_per_token": 1.3744200468063354, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3557496070861816, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.3557496070861816, "logits_per_char": -0.6778748035430908, "num_chars": 2}, {"sum_logits": -1.2860430479049683, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": true, "logits_per_token": -1.2860430479049683, "logits_per_char": -0.6430215239524841, "num_chars": 2}, {"sum_logits": -1.4531162977218628, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.4531162977218628, "logits_per_char": -0.7265581488609314, "num_chars": 2}, {"sum_logits": -1.4814674854278564, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.4814674854278564, "logits_per_char": -0.7407337427139282, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 153, "native_id": "TIMSS_2007_4_pg34", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3536946773529053, "incorrect_loss_raw": 1.4065372149149578, "correct_loss_per_char": 0.6768473386764526, "incorrect_loss_per_char": 0.7032686074574789, "correct_loss_per_token": 1.3536946773529053, "incorrect_loss_per_token": 1.4065372149149578, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4873721599578857, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.4873721599578857, "logits_per_char": -0.7436860799789429, "num_chars": 2}, {"sum_logits": -1.2859631776809692, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": true, "logits_per_token": -1.2859631776809692, "logits_per_char": -0.6429815888404846, "num_chars": 2}, {"sum_logits": -1.446276307106018, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.446276307106018, "logits_per_char": -0.723138153553009, "num_chars": 2}, {"sum_logits": -1.3536946773529053, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.3536946773529053, "logits_per_char": -0.6768473386764526, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 154, "native_id": "Mercury_182158", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.465376377105713, "incorrect_loss_raw": 1.3704807360967, "correct_loss_per_char": 0.7326881885528564, "incorrect_loss_per_char": 0.68524036804835, "correct_loss_per_token": 1.465376377105713, "incorrect_loss_per_token": 1.3704807360967, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.465376377105713, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.465376377105713, "logits_per_char": -0.7326881885528564, "num_chars": 2}, {"sum_logits": -1.4232560396194458, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.4232560396194458, "logits_per_char": -0.7116280198097229, "num_chars": 2}, {"sum_logits": -1.4287238121032715, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.4287238121032715, "logits_per_char": -0.7143619060516357, "num_chars": 2}, {"sum_logits": -1.2594623565673828, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": true, "logits_per_token": -1.2594623565673828, "logits_per_char": -0.6297311782836914, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 155, "native_id": "Mercury_SC_LBS10616", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2801287174224854, "incorrect_loss_raw": 1.4324628114700317, "correct_loss_per_char": 0.6400643587112427, "incorrect_loss_per_char": 0.7162314057350159, "correct_loss_per_token": 1.2801287174224854, "incorrect_loss_per_token": 1.4324628114700317, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2801287174224854, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": true, "logits_per_token": -1.2801287174224854, "logits_per_char": -0.6400643587112427, "num_chars": 2}, {"sum_logits": -1.3745801448822021, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.3745801448822021, "logits_per_char": -0.6872900724411011, "num_chars": 2}, {"sum_logits": -1.4371217489242554, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.4371217489242554, "logits_per_char": -0.7185608744621277, "num_chars": 2}, {"sum_logits": -1.4856865406036377, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.4856865406036377, "logits_per_char": -0.7428432703018188, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 156, "native_id": "Mercury_SC_401827", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.418358325958252, "incorrect_loss_raw": 1.3820422093073528, "correct_loss_per_char": 0.709179162979126, "incorrect_loss_per_char": 0.6910211046536764, "correct_loss_per_token": 1.418358325958252, "incorrect_loss_per_token": 1.3820422093073528, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3176425695419312, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": true, "logits_per_token": -1.3176425695419312, "logits_per_char": -0.6588212847709656, "num_chars": 2}, {"sum_logits": -1.456330418586731, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.456330418586731, "logits_per_char": -0.7281652092933655, "num_chars": 2}, {"sum_logits": -1.372153639793396, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.372153639793396, "logits_per_char": -0.686076819896698, "num_chars": 2}, {"sum_logits": -1.418358325958252, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.418358325958252, "logits_per_char": -0.709179162979126, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 157, "native_id": "NYSEDREGENTS_2012_4_8", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4524811506271362, "incorrect_loss_raw": 1.3731393416722615, "correct_loss_per_char": 0.7262405753135681, "incorrect_loss_per_char": 0.6865696708361307, "correct_loss_per_token": 1.4524811506271362, "incorrect_loss_per_token": 1.3731393416722615, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4524811506271362, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.4524811506271362, "logits_per_char": -0.7262405753135681, "num_chars": 2}, {"sum_logits": -1.3387006521224976, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": true, "logits_per_token": -1.3387006521224976, "logits_per_char": -0.6693503260612488, "num_chars": 2}, {"sum_logits": -1.347051739692688, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.347051739692688, "logits_per_char": -0.673525869846344, "num_chars": 2}, {"sum_logits": -1.4336656332015991, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.4336656332015991, "logits_per_char": -0.7168328166007996, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 158, "native_id": "Mercury_7263655", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6637089252471924, "incorrect_loss_raw": 1.3227184216181438, "correct_loss_per_char": 0.8318544626235962, "incorrect_loss_per_char": 0.6613592108090719, "correct_loss_per_token": 1.6637089252471924, "incorrect_loss_per_token": 1.3227184216181438, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6637089252471924, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.6637089252471924, "logits_per_char": -0.8318544626235962, "num_chars": 2}, {"sum_logits": -1.286719799041748, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.286719799041748, "logits_per_char": -0.643359899520874, "num_chars": 2}, {"sum_logits": -1.5082119703292847, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.5082119703292847, "logits_per_char": -0.7541059851646423, "num_chars": 2}, {"sum_logits": -1.1732234954833984, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": true, "logits_per_token": -1.1732234954833984, "logits_per_char": -0.5866117477416992, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 159, "native_id": "Mercury_SC_409142", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2741713523864746, "incorrect_loss_raw": 1.4333454370498657, "correct_loss_per_char": 0.6370856761932373, "incorrect_loss_per_char": 0.7166727185249329, "correct_loss_per_token": 1.2741713523864746, "incorrect_loss_per_token": 1.4333454370498657, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4299129247665405, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": false, "logits_per_token": -1.4299129247665405, "logits_per_char": -0.7149564623832703, "num_chars": 2}, {"sum_logits": -1.4897572994232178, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": false, "logits_per_token": -1.4897572994232178, "logits_per_char": -0.7448786497116089, "num_chars": 2}, {"sum_logits": -1.3803660869598389, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": false, "logits_per_token": -1.3803660869598389, "logits_per_char": -0.6901830434799194, "num_chars": 2}, {"sum_logits": -1.2741713523864746, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": true, "logits_per_token": -1.2741713523864746, "logits_per_char": -0.6370856761932373, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 160, "native_id": "Mercury_403681", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3134552240371704, "incorrect_loss_raw": 1.4171379804611206, "correct_loss_per_char": 0.6567276120185852, "incorrect_loss_per_char": 0.7085689902305603, "correct_loss_per_token": 1.3134552240371704, "incorrect_loss_per_token": 1.4171379804611206, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3649890422821045, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.3649890422821045, "logits_per_char": -0.6824945211410522, "num_chars": 2}, {"sum_logits": -1.4201483726501465, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.4201483726501465, "logits_per_char": -0.7100741863250732, "num_chars": 2}, {"sum_logits": -1.3134552240371704, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": true, "logits_per_token": -1.3134552240371704, "logits_per_char": -0.6567276120185852, "num_chars": 2}, {"sum_logits": -1.4662765264511108, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.4662765264511108, "logits_per_char": -0.7331382632255554, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 161, "native_id": "Mercury_410702", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.451114535331726, "incorrect_loss_raw": 1.3714648882548015, "correct_loss_per_char": 0.725557267665863, "incorrect_loss_per_char": 0.6857324441274008, "correct_loss_per_token": 1.451114535331726, "incorrect_loss_per_token": 1.3714648882548015, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.451114535331726, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.451114535331726, "logits_per_char": -0.725557267665863, "num_chars": 2}, {"sum_logits": -1.39454185962677, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.39454185962677, "logits_per_char": -0.697270929813385, "num_chars": 2}, {"sum_logits": -1.3902567625045776, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.3902567625045776, "logits_per_char": -0.6951283812522888, "num_chars": 2}, {"sum_logits": -1.3295960426330566, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.3295960426330566, "logits_per_char": -0.6647980213165283, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 162, "native_id": "Mercury_SC_415417", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3992480039596558, "incorrect_loss_raw": 1.391815423965454, "correct_loss_per_char": 0.6996240019798279, "incorrect_loss_per_char": 0.695907711982727, "correct_loss_per_token": 1.3992480039596558, "incorrect_loss_per_token": 1.391815423965454, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3992480039596558, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.3992480039596558, "logits_per_char": -0.6996240019798279, "num_chars": 2}, {"sum_logits": -1.5165480375289917, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.5165480375289917, "logits_per_char": -0.7582740187644958, "num_chars": 2}, {"sum_logits": -1.32289719581604, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": true, "logits_per_token": -1.32289719581604, "logits_per_char": -0.66144859790802, "num_chars": 2}, {"sum_logits": -1.3360010385513306, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.3360010385513306, "logits_per_char": -0.6680005192756653, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 163, "native_id": "Mercury_SC_414155", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.350975751876831, "incorrect_loss_raw": 1.4077910979588826, "correct_loss_per_char": 0.6754878759384155, "incorrect_loss_per_char": 0.7038955489794413, "correct_loss_per_token": 1.350975751876831, "incorrect_loss_per_token": 1.4077910979588826, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5070157051086426, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.5070157051086426, "logits_per_char": -0.7535078525543213, "num_chars": 2}, {"sum_logits": -1.2922271490097046, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": true, "logits_per_token": -1.2922271490097046, "logits_per_char": -0.6461135745048523, "num_chars": 2}, {"sum_logits": -1.4241304397583008, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.4241304397583008, "logits_per_char": -0.7120652198791504, "num_chars": 2}, {"sum_logits": -1.350975751876831, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.350975751876831, "logits_per_char": -0.6754878759384155, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 164, "native_id": "NYSEDREGENTS_2013_8_14", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3459155559539795, "incorrect_loss_raw": 1.4070805708567302, "correct_loss_per_char": 0.6729577779769897, "incorrect_loss_per_char": 0.7035402854283651, "correct_loss_per_token": 1.3459155559539795, "incorrect_loss_per_token": 1.4070805708567302, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4284569025039673, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.4284569025039673, "logits_per_char": -0.7142284512519836, "num_chars": 2}, {"sum_logits": -1.352964162826538, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.352964162826538, "logits_per_char": -0.676482081413269, "num_chars": 2}, {"sum_logits": -1.3459155559539795, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": true, "logits_per_token": -1.3459155559539795, "logits_per_char": -0.6729577779769897, "num_chars": 2}, {"sum_logits": -1.439820647239685, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.439820647239685, "logits_per_char": -0.7199103236198425, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 165, "native_id": "Mercury_7043943", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5027629137039185, "incorrect_loss_raw": 1.3572577238082886, "correct_loss_per_char": 0.7513814568519592, "incorrect_loss_per_char": 0.6786288619041443, "correct_loss_per_token": 1.5027629137039185, "incorrect_loss_per_token": 1.3572577238082886, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5027629137039185, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.5027629137039185, "logits_per_char": -0.7513814568519592, "num_chars": 2}, {"sum_logits": -1.4306890964508057, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.4306890964508057, "logits_per_char": -0.7153445482254028, "num_chars": 2}, {"sum_logits": -1.383175253868103, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.383175253868103, "logits_per_char": -0.6915876269340515, "num_chars": 2}, {"sum_logits": -1.257908821105957, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": true, "logits_per_token": -1.257908821105957, "logits_per_char": -0.6289544105529785, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 166, "native_id": "Mercury_406955", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5377992391586304, "incorrect_loss_raw": 1.3450901508331299, "correct_loss_per_char": 0.7688996195793152, "incorrect_loss_per_char": 0.6725450754165649, "correct_loss_per_token": 1.5377992391586304, "incorrect_loss_per_token": 1.3450901508331299, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5377992391586304, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.5377992391586304, "logits_per_char": -0.7688996195793152, "num_chars": 2}, {"sum_logits": -1.400255560874939, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.400255560874939, "logits_per_char": -0.7001277804374695, "num_chars": 2}, {"sum_logits": -1.3439016342163086, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.3439016342163086, "logits_per_char": -0.6719508171081543, "num_chars": 2}, {"sum_logits": -1.291113257408142, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.291113257408142, "logits_per_char": -0.645556628704071, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 167, "native_id": "Mercury_SC_LBS10041", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.582406997680664, "incorrect_loss_raw": 1.3333272139231365, "correct_loss_per_char": 0.791203498840332, "incorrect_loss_per_char": 0.6666636069615682, "correct_loss_per_token": 1.582406997680664, "incorrect_loss_per_token": 1.3333272139231365, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3690654039382935, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.3690654039382935, "logits_per_char": -0.6845327019691467, "num_chars": 2}, {"sum_logits": -1.3117825984954834, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": true, "logits_per_token": -1.3117825984954834, "logits_per_char": -0.6558912992477417, "num_chars": 2}, {"sum_logits": -1.3191336393356323, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.3191336393356323, "logits_per_char": -0.6595668196678162, "num_chars": 2}, {"sum_logits": -1.582406997680664, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.582406997680664, "logits_per_char": -0.791203498840332, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 168, "native_id": "Mercury_7005093", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.313340663909912, "incorrect_loss_raw": 1.4176666339238484, "correct_loss_per_char": 0.656670331954956, "incorrect_loss_per_char": 0.7088333169619242, "correct_loss_per_token": 1.313340663909912, "incorrect_loss_per_token": 1.4176666339238484, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4096039533615112, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.4096039533615112, "logits_per_char": -0.7048019766807556, "num_chars": 2}, {"sum_logits": -1.313340663909912, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.313340663909912, "logits_per_char": -0.656670331954956, "num_chars": 2}, {"sum_logits": -1.46973717212677, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.46973717212677, "logits_per_char": -0.734868586063385, "num_chars": 2}, {"sum_logits": -1.3736587762832642, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.3736587762832642, "logits_per_char": -0.6868293881416321, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 169, "native_id": "MDSA_2007_8_6", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.37071692943573, "incorrect_loss_raw": 1.3997718890508015, "correct_loss_per_char": 0.685358464717865, "incorrect_loss_per_char": 0.6998859445254008, "correct_loss_per_token": 1.37071692943573, "incorrect_loss_per_token": 1.3997718890508015, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5222481489181519, "num_tokens": 1, "num_tokens_all": 421, "is_greedy": false, "logits_per_token": -1.5222481489181519, "logits_per_char": -0.7611240744590759, "num_chars": 2}, {"sum_logits": -1.37071692943573, "num_tokens": 1, "num_tokens_all": 421, "is_greedy": false, "logits_per_token": -1.37071692943573, "logits_per_char": -0.685358464717865, "num_chars": 2}, {"sum_logits": -1.3527626991271973, "num_tokens": 1, "num_tokens_all": 421, "is_greedy": false, "logits_per_token": -1.3527626991271973, "logits_per_char": -0.6763813495635986, "num_chars": 2}, {"sum_logits": -1.3243048191070557, "num_tokens": 1, "num_tokens_all": 421, "is_greedy": true, "logits_per_token": -1.3243048191070557, "logits_per_char": -0.6621524095535278, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 170, "native_id": "Mercury_7170905", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4403506517410278, "incorrect_loss_raw": 1.375621755917867, "correct_loss_per_char": 0.7201753258705139, "incorrect_loss_per_char": 0.6878108779589335, "correct_loss_per_token": 1.4403506517410278, "incorrect_loss_per_token": 1.375621755917867, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4160881042480469, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.4160881042480469, "logits_per_char": -0.7080440521240234, "num_chars": 2}, {"sum_logits": -1.4403506517410278, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.4403506517410278, "logits_per_char": -0.7201753258705139, "num_chars": 2}, {"sum_logits": -1.3850226402282715, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.3850226402282715, "logits_per_char": -0.6925113201141357, "num_chars": 2}, {"sum_logits": -1.3257545232772827, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": true, "logits_per_token": -1.3257545232772827, "logits_per_char": -0.6628772616386414, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 171, "native_id": "TIMSS_2007_4_pg82", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3081339597702026, "incorrect_loss_raw": 1.4217617511749268, "correct_loss_per_char": 0.6540669798851013, "incorrect_loss_per_char": 0.7108808755874634, "correct_loss_per_token": 1.3081339597702026, "incorrect_loss_per_token": 1.4217617511749268, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5129926204681396, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.5129926204681396, "logits_per_char": -0.7564963102340698, "num_chars": 2}, {"sum_logits": -1.3081339597702026, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": true, "logits_per_token": -1.3081339597702026, "logits_per_char": -0.6540669798851013, "num_chars": 2}, {"sum_logits": -1.3460301160812378, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.3460301160812378, "logits_per_char": -0.6730150580406189, "num_chars": 2}, {"sum_logits": -1.4062625169754028, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.4062625169754028, "logits_per_char": -0.7031312584877014, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 172, "native_id": "TIMSS_2003_4_pg12", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.346810221672058, "incorrect_loss_raw": 1.4040151834487915, "correct_loss_per_char": 0.673405110836029, "incorrect_loss_per_char": 0.7020075917243958, "correct_loss_per_token": 1.346810221672058, "incorrect_loss_per_token": 1.4040151834487915, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.399475336074829, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.399475336074829, "logits_per_char": -0.6997376680374146, "num_chars": 2}, {"sum_logits": -1.39982008934021, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.39982008934021, "logits_per_char": -0.699910044670105, "num_chars": 2}, {"sum_logits": -1.4127501249313354, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.4127501249313354, "logits_per_char": -0.7063750624656677, "num_chars": 2}, {"sum_logits": -1.346810221672058, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": true, "logits_per_token": -1.346810221672058, "logits_per_char": -0.673405110836029, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 173, "native_id": "Mercury_7037345", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.437117338180542, "incorrect_loss_raw": 1.374771277109782, "correct_loss_per_char": 0.718558669090271, "incorrect_loss_per_char": 0.687385638554891, "correct_loss_per_token": 1.437117338180542, "incorrect_loss_per_token": 1.374771277109782, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.341540813446045, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": true, "logits_per_token": -1.341540813446045, "logits_per_char": -0.6707704067230225, "num_chars": 2}, {"sum_logits": -1.437117338180542, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.437117338180542, "logits_per_char": -0.718558669090271, "num_chars": 2}, {"sum_logits": -1.4242138862609863, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.4242138862609863, "logits_per_char": -0.7121069431304932, "num_chars": 2}, {"sum_logits": -1.3585591316223145, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.3585591316223145, "logits_per_char": -0.6792795658111572, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 174, "native_id": "Mercury_7008260", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3668022155761719, "incorrect_loss_raw": 1.4084161520004272, "correct_loss_per_char": 0.6834011077880859, "incorrect_loss_per_char": 0.7042080760002136, "correct_loss_per_token": 1.3668022155761719, "incorrect_loss_per_token": 1.4084161520004272, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5614784955978394, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.5614784955978394, "logits_per_char": -0.7807392477989197, "num_chars": 2}, {"sum_logits": -1.4393407106399536, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.4393407106399536, "logits_per_char": -0.7196703553199768, "num_chars": 2}, {"sum_logits": -1.3668022155761719, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.3668022155761719, "logits_per_char": -0.6834011077880859, "num_chars": 2}, {"sum_logits": -1.2244292497634888, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": true, "logits_per_token": -1.2244292497634888, "logits_per_char": -0.6122146248817444, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 175, "native_id": "Mercury_7003990", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2655036449432373, "incorrect_loss_raw": 1.4350857734680176, "correct_loss_per_char": 0.6327518224716187, "incorrect_loss_per_char": 0.7175428867340088, "correct_loss_per_token": 1.2655036449432373, "incorrect_loss_per_token": 1.4350857734680176, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4478191137313843, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.4478191137313843, "logits_per_char": -0.7239095568656921, "num_chars": 2}, {"sum_logits": -1.4313431978225708, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.4313431978225708, "logits_per_char": -0.7156715989112854, "num_chars": 2}, {"sum_logits": -1.4260950088500977, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.4260950088500977, "logits_per_char": -0.7130475044250488, "num_chars": 2}, {"sum_logits": -1.2655036449432373, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": true, "logits_per_token": -1.2655036449432373, "logits_per_char": -0.6327518224716187, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 176, "native_id": "LEAP_2000_8_1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.467555284500122, "incorrect_loss_raw": 1.369350830713908, "correct_loss_per_char": 0.733777642250061, "incorrect_loss_per_char": 0.684675415356954, "correct_loss_per_token": 1.467555284500122, "incorrect_loss_per_token": 1.369350830713908, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3490787744522095, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.3490787744522095, "logits_per_char": -0.6745393872261047, "num_chars": 2}, {"sum_logits": -1.317409634590149, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": true, "logits_per_token": -1.317409634590149, "logits_per_char": -0.6587048172950745, "num_chars": 2}, {"sum_logits": -1.467555284500122, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.467555284500122, "logits_per_char": -0.733777642250061, "num_chars": 2}, {"sum_logits": -1.4415640830993652, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.4415640830993652, "logits_per_char": -0.7207820415496826, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 177, "native_id": "Mercury_7163328", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5282000303268433, "incorrect_loss_raw": 1.3549180030822754, "correct_loss_per_char": 0.7641000151634216, "incorrect_loss_per_char": 0.6774590015411377, "correct_loss_per_token": 1.5282000303268433, "incorrect_loss_per_token": 1.3549180030822754, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5282000303268433, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.5282000303268433, "logits_per_char": -0.7641000151634216, "num_chars": 2}, {"sum_logits": -1.409397006034851, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.409397006034851, "logits_per_char": -0.7046985030174255, "num_chars": 2}, {"sum_logits": -1.468139886856079, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.468139886856079, "logits_per_char": -0.7340699434280396, "num_chars": 2}, {"sum_logits": -1.187217116355896, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.187217116355896, "logits_per_char": -0.593608558177948, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 178, "native_id": "NYSEDREGENTS_2008_8_7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3687143325805664, "incorrect_loss_raw": 1.398475170135498, "correct_loss_per_char": 0.6843571662902832, "incorrect_loss_per_char": 0.699237585067749, "correct_loss_per_token": 1.3687143325805664, "incorrect_loss_per_token": 1.398475170135498, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3687143325805664, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": true, "logits_per_token": -1.3687143325805664, "logits_per_char": -0.6843571662902832, "num_chars": 2}, {"sum_logits": -1.3833694458007812, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.3833694458007812, "logits_per_char": -0.6916847229003906, "num_chars": 2}, {"sum_logits": -1.410542368888855, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.410542368888855, "logits_per_char": -0.7052711844444275, "num_chars": 2}, {"sum_logits": -1.401513695716858, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.401513695716858, "logits_per_char": -0.700756847858429, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 179, "native_id": "Mercury_7007928", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4204386472702026, "incorrect_loss_raw": 1.380388617515564, "correct_loss_per_char": 0.7102193236351013, "incorrect_loss_per_char": 0.690194308757782, "correct_loss_per_token": 1.4204386472702026, "incorrect_loss_per_token": 1.380388617515564, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3262146711349487, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": true, "logits_per_token": -1.3262146711349487, "logits_per_char": -0.6631073355674744, "num_chars": 2}, {"sum_logits": -1.4038935899734497, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.4038935899734497, "logits_per_char": -0.7019467949867249, "num_chars": 2}, {"sum_logits": -1.4110575914382935, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.4110575914382935, "logits_per_char": -0.7055287957191467, "num_chars": 2}, {"sum_logits": -1.4204386472702026, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.4204386472702026, "logits_per_char": -0.7102193236351013, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 180, "native_id": "Mercury_7015575", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3957234621047974, "incorrect_loss_raw": 1.3888065814971924, "correct_loss_per_char": 0.6978617310523987, "incorrect_loss_per_char": 0.6944032907485962, "correct_loss_per_token": 1.3957234621047974, "incorrect_loss_per_token": 1.3888065814971924, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.401282548904419, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.401282548904419, "logits_per_char": -0.7006412744522095, "num_chars": 2}, {"sum_logits": -1.3957234621047974, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.3957234621047974, "logits_per_char": -0.6978617310523987, "num_chars": 2}, {"sum_logits": -1.438629150390625, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.438629150390625, "logits_per_char": -0.7193145751953125, "num_chars": 2}, {"sum_logits": -1.3265080451965332, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": true, "logits_per_token": -1.3265080451965332, "logits_per_char": -0.6632540225982666, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 181, "native_id": "Mercury_416636", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.373124599456787, "incorrect_loss_raw": 1.3996904691060383, "correct_loss_per_char": 0.6865622997283936, "incorrect_loss_per_char": 0.6998452345530192, "correct_loss_per_token": 1.373124599456787, "incorrect_loss_per_token": 1.3996904691060383, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4144628047943115, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.4144628047943115, "logits_per_char": -0.7072314023971558, "num_chars": 2}, {"sum_logits": -1.3487207889556885, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": true, "logits_per_token": -1.3487207889556885, "logits_per_char": -0.6743603944778442, "num_chars": 2}, {"sum_logits": -1.4358878135681152, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.4358878135681152, "logits_per_char": -0.7179439067840576, "num_chars": 2}, {"sum_logits": -1.373124599456787, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.373124599456787, "logits_per_char": -0.6865622997283936, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 182, "native_id": "TAKS_2009_8_29", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4872853755950928, "incorrect_loss_raw": 1.363796631495158, "correct_loss_per_char": 0.7436426877975464, "incorrect_loss_per_char": 0.681898315747579, "correct_loss_per_token": 1.4872853755950928, "incorrect_loss_per_token": 1.363796631495158, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4872853755950928, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.4872853755950928, "logits_per_char": -0.7436426877975464, "num_chars": 2}, {"sum_logits": -1.4378727674484253, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.4378727674484253, "logits_per_char": -0.7189363837242126, "num_chars": 2}, {"sum_logits": -1.3920198678970337, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.3920198678970337, "logits_per_char": -0.6960099339485168, "num_chars": 2}, {"sum_logits": -1.2614972591400146, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": true, "logits_per_token": -1.2614972591400146, "logits_per_char": -0.6307486295700073, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 183, "native_id": "MEA_2016_8_20", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4930009841918945, "incorrect_loss_raw": 1.3619695504506428, "correct_loss_per_char": 0.7465004920959473, "incorrect_loss_per_char": 0.6809847752253214, "correct_loss_per_token": 1.4930009841918945, "incorrect_loss_per_token": 1.3619695504506428, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4930009841918945, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.4930009841918945, "logits_per_char": -0.7465004920959473, "num_chars": 2}, {"sum_logits": -1.4504019021987915, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.4504019021987915, "logits_per_char": -0.7252009510993958, "num_chars": 2}, {"sum_logits": -1.3965458869934082, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.3965458869934082, "logits_per_char": -0.6982729434967041, "num_chars": 2}, {"sum_logits": -1.238960862159729, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": true, "logits_per_token": -1.238960862159729, "logits_per_char": -0.6194804310798645, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 184, "native_id": "MDSA_2012_8_1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.493823528289795, "incorrect_loss_raw": 1.3648933569590251, "correct_loss_per_char": 0.7469117641448975, "incorrect_loss_per_char": 0.6824466784795126, "correct_loss_per_token": 1.493823528289795, "incorrect_loss_per_token": 1.3648933569590251, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.493823528289795, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.493823528289795, "logits_per_char": -0.7469117641448975, "num_chars": 2}, {"sum_logits": -1.4959139823913574, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.4959139823913574, "logits_per_char": -0.7479569911956787, "num_chars": 2}, {"sum_logits": -1.37542724609375, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.37542724609375, "logits_per_char": -0.687713623046875, "num_chars": 2}, {"sum_logits": -1.2233388423919678, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": true, "logits_per_token": -1.2233388423919678, "logits_per_char": -0.6116694211959839, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 185, "native_id": "Mercury_402332", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3897664546966553, "incorrect_loss_raw": 1.3977641661961873, "correct_loss_per_char": 0.6948832273483276, "incorrect_loss_per_char": 0.6988820830980936, "correct_loss_per_token": 1.3897664546966553, "incorrect_loss_per_token": 1.3977641661961873, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2940454483032227, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": true, "logits_per_token": -1.2940454483032227, "logits_per_char": -0.6470227241516113, "num_chars": 2}, {"sum_logits": -1.3663629293441772, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.3663629293441772, "logits_per_char": -0.6831814646720886, "num_chars": 2}, {"sum_logits": -1.3897664546966553, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.3897664546966553, "logits_per_char": -0.6948832273483276, "num_chars": 2}, {"sum_logits": -1.532884120941162, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.532884120941162, "logits_per_char": -0.766442060470581, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 186, "native_id": "Mercury_7080605", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4061588048934937, "incorrect_loss_raw": 1.389865557352702, "correct_loss_per_char": 0.7030794024467468, "incorrect_loss_per_char": 0.694932778676351, "correct_loss_per_token": 1.4061588048934937, "incorrect_loss_per_token": 1.389865557352702, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3343349695205688, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": true, "logits_per_token": -1.3343349695205688, "logits_per_char": -0.6671674847602844, "num_chars": 2}, {"sum_logits": -1.449209213256836, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.449209213256836, "logits_per_char": -0.724604606628418, "num_chars": 2}, {"sum_logits": -1.4061588048934937, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.4061588048934937, "logits_per_char": -0.7030794024467468, "num_chars": 2}, {"sum_logits": -1.3860524892807007, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.3860524892807007, "logits_per_char": -0.6930262446403503, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 187, "native_id": "Mercury_7134803", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3757445812225342, "incorrect_loss_raw": 1.3996909856796265, "correct_loss_per_char": 0.6878722906112671, "incorrect_loss_per_char": 0.6998454928398132, "correct_loss_per_token": 1.3757445812225342, "incorrect_loss_per_token": 1.3996909856796265, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4948616027832031, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.4948616027832031, "logits_per_char": -0.7474308013916016, "num_chars": 2}, {"sum_logits": -1.4269448518753052, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.4269448518753052, "logits_per_char": -0.7134724259376526, "num_chars": 2}, {"sum_logits": -1.3757445812225342, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.3757445812225342, "logits_per_char": -0.6878722906112671, "num_chars": 2}, {"sum_logits": -1.277266502380371, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": true, "logits_per_token": -1.277266502380371, "logits_per_char": -0.6386332511901855, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 188, "native_id": "TIMSS_2007_8_pg29", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4328781366348267, "incorrect_loss_raw": 1.3786083062489827, "correct_loss_per_char": 0.7164390683174133, "incorrect_loss_per_char": 0.6893041531244913, "correct_loss_per_token": 1.4328781366348267, "incorrect_loss_per_token": 1.3786083062489827, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3751977682113647, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": false, "logits_per_token": -1.3751977682113647, "logits_per_char": -0.6875988841056824, "num_chars": 2}, {"sum_logits": -1.333140254020691, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": true, "logits_per_token": -1.333140254020691, "logits_per_char": -0.6665701270103455, "num_chars": 2}, {"sum_logits": -1.4274868965148926, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": false, "logits_per_token": -1.4274868965148926, "logits_per_char": -0.7137434482574463, "num_chars": 2}, {"sum_logits": -1.4328781366348267, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": false, "logits_per_token": -1.4328781366348267, "logits_per_char": -0.7164390683174133, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 189, "native_id": "MCAS_2007_5_4785", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5326305627822876, "incorrect_loss_raw": 1.3461469411849976, "correct_loss_per_char": 0.7663152813911438, "incorrect_loss_per_char": 0.6730734705924988, "correct_loss_per_token": 1.5326305627822876, "incorrect_loss_per_token": 1.3461469411849976, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5326305627822876, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.5326305627822876, "logits_per_char": -0.7663152813911438, "num_chars": 2}, {"sum_logits": -1.3854433298110962, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.3854433298110962, "logits_per_char": -0.6927216649055481, "num_chars": 2}, {"sum_logits": -1.3321510553359985, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.3321510553359985, "logits_per_char": -0.6660755276679993, "num_chars": 2}, {"sum_logits": -1.320846438407898, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": true, "logits_per_token": -1.320846438407898, "logits_per_char": -0.660423219203949, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 190, "native_id": "NYSEDREGENTS_2012_8_3", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5183024406433105, "incorrect_loss_raw": 1.3627349932988484, "correct_loss_per_char": 0.7591512203216553, "incorrect_loss_per_char": 0.6813674966494242, "correct_loss_per_token": 1.5183024406433105, "incorrect_loss_per_token": 1.3627349932988484, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2579313516616821, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": true, "logits_per_token": -1.2579313516616821, "logits_per_char": -0.6289656758308411, "num_chars": 2}, {"sum_logits": -1.2831244468688965, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.2831244468688965, "logits_per_char": -0.6415622234344482, "num_chars": 2}, {"sum_logits": -1.5183024406433105, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.5183024406433105, "logits_per_char": -0.7591512203216553, "num_chars": 2}, {"sum_logits": -1.5471491813659668, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.5471491813659668, "logits_per_char": -0.7735745906829834, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 191, "native_id": "Mercury_404987", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4043858051300049, "incorrect_loss_raw": 1.3881521622339885, "correct_loss_per_char": 0.7021929025650024, "incorrect_loss_per_char": 0.6940760811169943, "correct_loss_per_token": 1.4043858051300049, "incorrect_loss_per_token": 1.3881521622339885, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3982545137405396, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.3982545137405396, "logits_per_char": -0.6991272568702698, "num_chars": 2}, {"sum_logits": -1.4043858051300049, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.4043858051300049, "logits_per_char": -0.7021929025650024, "num_chars": 2}, {"sum_logits": -1.4595905542373657, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.4595905542373657, "logits_per_char": -0.7297952771186829, "num_chars": 2}, {"sum_logits": -1.30661141872406, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": true, "logits_per_token": -1.30661141872406, "logits_per_char": -0.65330570936203, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 192, "native_id": "MCAS_2012_8_23648", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3860036134719849, "incorrect_loss_raw": 1.3976196845372517, "correct_loss_per_char": 0.6930018067359924, "incorrect_loss_per_char": 0.6988098422686259, "correct_loss_per_token": 1.3860036134719849, "incorrect_loss_per_token": 1.3976196845372517, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4287104606628418, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.4287104606628418, "logits_per_char": -0.7143552303314209, "num_chars": 2}, {"sum_logits": -1.308777928352356, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": true, "logits_per_token": -1.308777928352356, "logits_per_char": -0.654388964176178, "num_chars": 2}, {"sum_logits": -1.4553706645965576, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.4553706645965576, "logits_per_char": -0.7276853322982788, "num_chars": 2}, {"sum_logits": -1.3860036134719849, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.3860036134719849, "logits_per_char": -0.6930018067359924, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 193, "native_id": "MCAS_2005_5_10", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3827725648880005, "incorrect_loss_raw": 1.3943487405776978, "correct_loss_per_char": 0.6913862824440002, "incorrect_loss_per_char": 0.6971743702888489, "correct_loss_per_token": 1.3827725648880005, "incorrect_loss_per_token": 1.3943487405776978, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3451653718948364, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": true, "logits_per_token": -1.3451653718948364, "logits_per_char": -0.6725826859474182, "num_chars": 2}, {"sum_logits": -1.3827725648880005, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.3827725648880005, "logits_per_char": -0.6913862824440002, "num_chars": 2}, {"sum_logits": -1.4133524894714355, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.4133524894714355, "logits_per_char": -0.7066762447357178, "num_chars": 2}, {"sum_logits": -1.4245283603668213, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.4245283603668213, "logits_per_char": -0.7122641801834106, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 194, "native_id": "Mercury_7213868", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3804618120193481, "incorrect_loss_raw": 1.3980626265207927, "correct_loss_per_char": 0.6902309060096741, "incorrect_loss_per_char": 0.6990313132603964, "correct_loss_per_token": 1.3804618120193481, "incorrect_loss_per_token": 1.3980626265207927, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.457542061805725, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.457542061805725, "logits_per_char": -0.7287710309028625, "num_chars": 2}, {"sum_logits": -1.303971767425537, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": true, "logits_per_token": -1.303971767425537, "logits_per_char": -0.6519858837127686, "num_chars": 2}, {"sum_logits": -1.3804618120193481, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.3804618120193481, "logits_per_char": -0.6902309060096741, "num_chars": 2}, {"sum_logits": -1.4326740503311157, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.4326740503311157, "logits_per_char": -0.7163370251655579, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 195, "native_id": "NYSEDREGENTS_2012_8_16", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2884562015533447, "incorrect_loss_raw": 1.428759773572286, "correct_loss_per_char": 0.6442281007766724, "incorrect_loss_per_char": 0.714379886786143, "correct_loss_per_token": 1.2884562015533447, "incorrect_loss_per_token": 1.428759773572286, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4428623914718628, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.4428623914718628, "logits_per_char": -0.7214311957359314, "num_chars": 2}, {"sum_logits": -1.2884562015533447, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": true, "logits_per_token": -1.2884562015533447, "logits_per_char": -0.6442281007766724, "num_chars": 2}, {"sum_logits": -1.3535690307617188, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.3535690307617188, "logits_per_char": -0.6767845153808594, "num_chars": 2}, {"sum_logits": -1.4898478984832764, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.4898478984832764, "logits_per_char": -0.7449239492416382, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 196, "native_id": "Mercury_7239453", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4878909587860107, "incorrect_loss_raw": 1.3607451518376668, "correct_loss_per_char": 0.7439454793930054, "incorrect_loss_per_char": 0.6803725759188334, "correct_loss_per_token": 1.4878909587860107, "incorrect_loss_per_token": 1.3607451518376668, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4320472478866577, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.4320472478866577, "logits_per_char": -0.7160236239433289, "num_chars": 2}, {"sum_logits": -1.328320860862732, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.328320860862732, "logits_per_char": -0.664160430431366, "num_chars": 2}, {"sum_logits": -1.4878909587860107, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.4878909587860107, "logits_per_char": -0.7439454793930054, "num_chars": 2}, {"sum_logits": -1.3218673467636108, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": true, "logits_per_token": -1.3218673467636108, "logits_per_char": -0.6609336733818054, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 197, "native_id": "Mercury_7008033", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5271573066711426, "incorrect_loss_raw": 1.349725604057312, "correct_loss_per_char": 0.7635786533355713, "incorrect_loss_per_char": 0.674862802028656, "correct_loss_per_token": 1.5271573066711426, "incorrect_loss_per_token": 1.349725604057312, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2910351753234863, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": true, "logits_per_token": -1.2910351753234863, "logits_per_char": -0.6455175876617432, "num_chars": 2}, {"sum_logits": -1.3957459926605225, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.3957459926605225, "logits_per_char": -0.6978729963302612, "num_chars": 2}, {"sum_logits": -1.3623956441879272, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.3623956441879272, "logits_per_char": -0.6811978220939636, "num_chars": 2}, {"sum_logits": -1.5271573066711426, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.5271573066711426, "logits_per_char": -0.7635786533355713, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 198, "native_id": "Mercury_SC_400125", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.460831880569458, "incorrect_loss_raw": 1.3690004746119182, "correct_loss_per_char": 0.730415940284729, "incorrect_loss_per_char": 0.6845002373059591, "correct_loss_per_token": 1.460831880569458, "incorrect_loss_per_token": 1.3690004746119182, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.460831880569458, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.460831880569458, "logits_per_char": -0.730415940284729, "num_chars": 2}, {"sum_logits": -1.2894960641860962, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": true, "logits_per_token": -1.2894960641860962, "logits_per_char": -0.6447480320930481, "num_chars": 2}, {"sum_logits": -1.426323652267456, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.426323652267456, "logits_per_char": -0.713161826133728, "num_chars": 2}, {"sum_logits": -1.3911817073822021, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.3911817073822021, "logits_per_char": -0.6955908536911011, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 199, "native_id": "VASoL_2008_5_40", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.345823049545288, "incorrect_loss_raw": 1.414179523785909, "correct_loss_per_char": 0.672911524772644, "incorrect_loss_per_char": 0.7070897618929545, "correct_loss_per_token": 1.345823049545288, "incorrect_loss_per_token": 1.414179523785909, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.345823049545288, "num_tokens": 1, "num_tokens_all": 301, "is_greedy": false, "logits_per_token": -1.345823049545288, "logits_per_char": -0.672911524772644, "num_chars": 2}, {"sum_logits": -1.2737562656402588, "num_tokens": 1, "num_tokens_all": 301, "is_greedy": true, "logits_per_token": -1.2737562656402588, "logits_per_char": -0.6368781328201294, "num_chars": 2}, {"sum_logits": -1.374334692955017, "num_tokens": 1, "num_tokens_all": 301, "is_greedy": false, "logits_per_token": -1.374334692955017, "logits_per_char": -0.6871673464775085, "num_chars": 2}, {"sum_logits": -1.5944476127624512, "num_tokens": 1, "num_tokens_all": 301, "is_greedy": false, "logits_per_token": -1.5944476127624512, "logits_per_char": -0.7972238063812256, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 200, "native_id": "MCAS_2004_5_21", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4811207056045532, "incorrect_loss_raw": 1.364230473836263, "correct_loss_per_char": 0.7405603528022766, "incorrect_loss_per_char": 0.6821152369181315, "correct_loss_per_token": 1.4811207056045532, "incorrect_loss_per_token": 1.364230473836263, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4811207056045532, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.4811207056045532, "logits_per_char": -0.7405603528022766, "num_chars": 2}, {"sum_logits": -1.4300776720046997, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.4300776720046997, "logits_per_char": -0.7150388360023499, "num_chars": 2}, {"sum_logits": -1.3606839179992676, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.3606839179992676, "logits_per_char": -0.6803419589996338, "num_chars": 2}, {"sum_logits": -1.3019298315048218, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": true, "logits_per_token": -1.3019298315048218, "logits_per_char": -0.6509649157524109, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 201, "native_id": "OHAT_2010_8_24", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3050323724746704, "incorrect_loss_raw": 1.430393934249878, "correct_loss_per_char": 0.6525161862373352, "incorrect_loss_per_char": 0.715196967124939, "correct_loss_per_token": 1.3050323724746704, "incorrect_loss_per_token": 1.430393934249878, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.629744291305542, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.629744291305542, "logits_per_char": -0.814872145652771, "num_chars": 2}, {"sum_logits": -1.3050323724746704, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.3050323724746704, "logits_per_char": -0.6525161862373352, "num_chars": 2}, {"sum_logits": -1.3588922023773193, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.3588922023773193, "logits_per_char": -0.6794461011886597, "num_chars": 2}, {"sum_logits": -1.3025453090667725, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": true, "logits_per_token": -1.3025453090667725, "logits_per_char": -0.6512726545333862, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 202, "native_id": "Mercury_7126613", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3348467350006104, "incorrect_loss_raw": 1.4111388127009075, "correct_loss_per_char": 0.6674233675003052, "incorrect_loss_per_char": 0.7055694063504537, "correct_loss_per_token": 1.3348467350006104, "incorrect_loss_per_token": 1.4111388127009075, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4915159940719604, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.4915159940719604, "logits_per_char": -0.7457579970359802, "num_chars": 2}, {"sum_logits": -1.3258475065231323, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": true, "logits_per_token": -1.3258475065231323, "logits_per_char": -0.6629237532615662, "num_chars": 2}, {"sum_logits": -1.4160529375076294, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.4160529375076294, "logits_per_char": -0.7080264687538147, "num_chars": 2}, {"sum_logits": -1.3348467350006104, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.3348467350006104, "logits_per_char": -0.6674233675003052, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 203, "native_id": "Mercury_400396", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3631627559661865, "incorrect_loss_raw": 1.399886965751648, "correct_loss_per_char": 0.6815813779830933, "incorrect_loss_per_char": 0.699943482875824, "correct_loss_per_token": 1.3631627559661865, "incorrect_loss_per_token": 1.399886965751648, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3631627559661865, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": true, "logits_per_token": -1.3631627559661865, "logits_per_char": -0.6815813779830933, "num_chars": 2}, {"sum_logits": -1.3807010650634766, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.3807010650634766, "logits_per_char": -0.6903505325317383, "num_chars": 2}, {"sum_logits": -1.4176677465438843, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.4176677465438843, "logits_per_char": -0.7088338732719421, "num_chars": 2}, {"sum_logits": -1.401292085647583, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.401292085647583, "logits_per_char": -0.7006460428237915, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 204, "native_id": "ACTAAP_2010_5_8", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5689769983291626, "incorrect_loss_raw": 1.3422294855117798, "correct_loss_per_char": 0.7844884991645813, "incorrect_loss_per_char": 0.6711147427558899, "correct_loss_per_token": 1.5689769983291626, "incorrect_loss_per_token": 1.3422294855117798, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5689769983291626, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.5689769983291626, "logits_per_char": -0.7844884991645813, "num_chars": 2}, {"sum_logits": -1.4471759796142578, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.4471759796142578, "logits_per_char": -0.7235879898071289, "num_chars": 2}, {"sum_logits": -1.3772779703140259, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.3772779703140259, "logits_per_char": -0.6886389851570129, "num_chars": 2}, {"sum_logits": -1.2022345066070557, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": true, "logits_per_token": -1.2022345066070557, "logits_per_char": -0.6011172533035278, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 205, "native_id": "Mercury_7092278", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4400357007980347, "incorrect_loss_raw": 1.376119335492452, "correct_loss_per_char": 0.7200178503990173, "incorrect_loss_per_char": 0.688059667746226, "correct_loss_per_token": 1.4400357007980347, "incorrect_loss_per_token": 1.376119335492452, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3951704502105713, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.3951704502105713, "logits_per_char": -0.6975852251052856, "num_chars": 2}, {"sum_logits": -1.3144679069519043, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": true, "logits_per_token": -1.3144679069519043, "logits_per_char": -0.6572339534759521, "num_chars": 2}, {"sum_logits": -1.4400357007980347, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.4400357007980347, "logits_per_char": -0.7200178503990173, "num_chars": 2}, {"sum_logits": -1.4187196493148804, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.4187196493148804, "logits_per_char": -0.7093598246574402, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 206, "native_id": "NYSEDREGENTS_2008_8_12", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.398827314376831, "incorrect_loss_raw": 1.3902231454849243, "correct_loss_per_char": 0.6994136571884155, "incorrect_loss_per_char": 0.6951115727424622, "correct_loss_per_token": 1.398827314376831, "incorrect_loss_per_token": 1.3902231454849243, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.352117657661438, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.352117657661438, "logits_per_char": -0.676058828830719, "num_chars": 2}, {"sum_logits": -1.398827314376831, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.398827314376831, "logits_per_char": -0.6994136571884155, "num_chars": 2}, {"sum_logits": -1.495330572128296, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.495330572128296, "logits_per_char": -0.747665286064148, "num_chars": 2}, {"sum_logits": -1.323221206665039, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": true, "logits_per_token": -1.323221206665039, "logits_per_char": -0.6616106033325195, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 207, "native_id": "LEAP__7_10342", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4402116537094116, "incorrect_loss_raw": 1.380348841349284, "correct_loss_per_char": 0.7201058268547058, "incorrect_loss_per_char": 0.690174420674642, "correct_loss_per_token": 1.4402116537094116, "incorrect_loss_per_token": 1.380348841349284, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5337729454040527, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.5337729454040527, "logits_per_char": -0.7668864727020264, "num_chars": 2}, {"sum_logits": -1.4402116537094116, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.4402116537094116, "logits_per_char": -0.7201058268547058, "num_chars": 2}, {"sum_logits": -1.2559123039245605, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": true, "logits_per_token": -1.2559123039245605, "logits_per_char": -0.6279561519622803, "num_chars": 2}, {"sum_logits": -1.3513612747192383, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.3513612747192383, "logits_per_char": -0.6756806373596191, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 208, "native_id": "Mercury_7176208", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3571876287460327, "incorrect_loss_raw": 1.4090190728505452, "correct_loss_per_char": 0.6785938143730164, "incorrect_loss_per_char": 0.7045095364252726, "correct_loss_per_token": 1.3571876287460327, "incorrect_loss_per_token": 1.4090190728505452, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5129034519195557, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.5129034519195557, "logits_per_char": -0.7564517259597778, "num_chars": 2}, {"sum_logits": -1.3571876287460327, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.3571876287460327, "logits_per_char": -0.6785938143730164, "num_chars": 2}, {"sum_logits": -1.4651379585266113, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.4651379585266113, "logits_per_char": -0.7325689792633057, "num_chars": 2}, {"sum_logits": -1.2490158081054688, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.2490158081054688, "logits_per_char": -0.6245079040527344, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 209, "native_id": "Mercury_7057768", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3976126909255981, "incorrect_loss_raw": 1.387855331103007, "correct_loss_per_char": 0.6988063454627991, "incorrect_loss_per_char": 0.6939276655515035, "correct_loss_per_token": 1.3976126909255981, "incorrect_loss_per_token": 1.387855331103007, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3976126909255981, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.3976126909255981, "logits_per_char": -0.6988063454627991, "num_chars": 2}, {"sum_logits": -1.342556118965149, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": true, "logits_per_token": -1.342556118965149, "logits_per_char": -0.6712780594825745, "num_chars": 2}, {"sum_logits": -1.3939207792282104, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.3939207792282104, "logits_per_char": -0.6969603896141052, "num_chars": 2}, {"sum_logits": -1.4270890951156616, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.4270890951156616, "logits_per_char": -0.7135445475578308, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 210, "native_id": "Mercury_406776", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4520312547683716, "incorrect_loss_raw": 1.3716692129770915, "correct_loss_per_char": 0.7260156273841858, "incorrect_loss_per_char": 0.6858346064885458, "correct_loss_per_token": 1.4520312547683716, "incorrect_loss_per_token": 1.3716692129770915, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3831520080566406, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.3831520080566406, "logits_per_char": -0.6915760040283203, "num_chars": 2}, {"sum_logits": -1.4496687650680542, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.4496687650680542, "logits_per_char": -0.7248343825340271, "num_chars": 2}, {"sum_logits": -1.4520312547683716, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.4520312547683716, "logits_per_char": -0.7260156273841858, "num_chars": 2}, {"sum_logits": -1.2821868658065796, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": true, "logits_per_token": -1.2821868658065796, "logits_per_char": -0.6410934329032898, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 211, "native_id": "MCAS_2012_5_3", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3606672286987305, "incorrect_loss_raw": 1.4025240341822307, "correct_loss_per_char": 0.6803336143493652, "incorrect_loss_per_char": 0.7012620170911154, "correct_loss_per_token": 1.3606672286987305, "incorrect_loss_per_token": 1.4025240341822307, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.412309169769287, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.412309169769287, "logits_per_char": -0.7061545848846436, "num_chars": 2}, {"sum_logits": -1.4287031888961792, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.4287031888961792, "logits_per_char": -0.7143515944480896, "num_chars": 2}, {"sum_logits": -1.3606672286987305, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": true, "logits_per_token": -1.3606672286987305, "logits_per_char": -0.6803336143493652, "num_chars": 2}, {"sum_logits": -1.3665597438812256, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.3665597438812256, "logits_per_char": -0.6832798719406128, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 212, "native_id": "Mercury_SC_405444", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3394697904586792, "incorrect_loss_raw": 1.407573898633321, "correct_loss_per_char": 0.6697348952293396, "incorrect_loss_per_char": 0.7037869493166605, "correct_loss_per_token": 1.3394697904586792, "incorrect_loss_per_token": 1.407573898633321, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4206396341323853, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.4206396341323853, "logits_per_char": -0.7103198170661926, "num_chars": 2}, {"sum_logits": -1.3394697904586792, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": true, "logits_per_token": -1.3394697904586792, "logits_per_char": -0.6697348952293396, "num_chars": 2}, {"sum_logits": -1.4190324544906616, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.4190324544906616, "logits_per_char": -0.7095162272453308, "num_chars": 2}, {"sum_logits": -1.3830496072769165, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.3830496072769165, "logits_per_char": -0.6915248036384583, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 213, "native_id": "Mercury_7160545", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4698975086212158, "incorrect_loss_raw": 1.3764188687006633, "correct_loss_per_char": 0.7349487543106079, "incorrect_loss_per_char": 0.6882094343503317, "correct_loss_per_token": 1.4698975086212158, "incorrect_loss_per_token": 1.3764188687006633, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5254371166229248, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.5254371166229248, "logits_per_char": -0.7627185583114624, "num_chars": 2}, {"sum_logits": -1.4698975086212158, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.4698975086212158, "logits_per_char": -0.7349487543106079, "num_chars": 2}, {"sum_logits": -1.4140676259994507, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.4140676259994507, "logits_per_char": -0.7070338129997253, "num_chars": 2}, {"sum_logits": -1.1897518634796143, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": true, "logits_per_token": -1.1897518634796143, "logits_per_char": -0.5948759317398071, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 214, "native_id": "MDSA_2009_8_25", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4157496690750122, "incorrect_loss_raw": 1.3826111952463787, "correct_loss_per_char": 0.7078748345375061, "incorrect_loss_per_char": 0.6913055976231893, "correct_loss_per_token": 1.4157496690750122, "incorrect_loss_per_token": 1.3826111952463787, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3533154726028442, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.3533154726028442, "logits_per_char": -0.6766577363014221, "num_chars": 2}, {"sum_logits": -1.3519223928451538, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": true, "logits_per_token": -1.3519223928451538, "logits_per_char": -0.6759611964225769, "num_chars": 2}, {"sum_logits": -1.4425957202911377, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.4425957202911377, "logits_per_char": -0.7212978601455688, "num_chars": 2}, {"sum_logits": -1.4157496690750122, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.4157496690750122, "logits_per_char": -0.7078748345375061, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 215, "native_id": "TIMSS_2011_4_pg51", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4806387424468994, "incorrect_loss_raw": 1.3630783160527546, "correct_loss_per_char": 0.7403193712234497, "incorrect_loss_per_char": 0.6815391580263773, "correct_loss_per_token": 1.4806387424468994, "incorrect_loss_per_token": 1.3630783160527546, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3760840892791748, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.3760840892791748, "logits_per_char": -0.6880420446395874, "num_chars": 2}, {"sum_logits": -1.4806387424468994, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.4806387424468994, "logits_per_char": -0.7403193712234497, "num_chars": 2}, {"sum_logits": -1.3744697570800781, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.3744697570800781, "logits_per_char": -0.6872348785400391, "num_chars": 2}, {"sum_logits": -1.3386811017990112, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": true, "logits_per_token": -1.3386811017990112, "logits_per_char": -0.6693405508995056, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 216, "native_id": "NYSEDREGENTS_2013_4_13", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4615143537521362, "incorrect_loss_raw": 1.3821637630462646, "correct_loss_per_char": 0.7307571768760681, "incorrect_loss_per_char": 0.6910818815231323, "correct_loss_per_token": 1.4615143537521362, "incorrect_loss_per_token": 1.3821637630462646, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3893858194351196, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.3893858194351196, "logits_per_char": -0.6946929097175598, "num_chars": 2}, {"sum_logits": -1.1678688526153564, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": true, "logits_per_token": -1.1678688526153564, "logits_per_char": -0.5839344263076782, "num_chars": 2}, {"sum_logits": -1.4615143537521362, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.4615143537521362, "logits_per_char": -0.7307571768760681, "num_chars": 2}, {"sum_logits": -1.5892366170883179, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.5892366170883179, "logits_per_char": -0.7946183085441589, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 217, "native_id": "MSA_2015_5_7", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4476854801177979, "incorrect_loss_raw": 1.3765672047932942, "correct_loss_per_char": 0.7238427400588989, "incorrect_loss_per_char": 0.6882836023966471, "correct_loss_per_token": 1.4476854801177979, "incorrect_loss_per_token": 1.3765672047932942, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4994505643844604, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.4994505643844604, "logits_per_char": -0.7497252821922302, "num_chars": 2}, {"sum_logits": -1.4476854801177979, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.4476854801177979, "logits_per_char": -0.7238427400588989, "num_chars": 2}, {"sum_logits": -1.3673208951950073, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.3673208951950073, "logits_per_char": -0.6836604475975037, "num_chars": 2}, {"sum_logits": -1.262930154800415, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": true, "logits_per_token": -1.262930154800415, "logits_per_char": -0.6314650774002075, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 218, "native_id": "Mercury_SC_400662", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4235026836395264, "incorrect_loss_raw": 1.3881741762161255, "correct_loss_per_char": 0.7117513418197632, "incorrect_loss_per_char": 0.6940870881080627, "correct_loss_per_token": 1.4235026836395264, "incorrect_loss_per_token": 1.3881741762161255, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5564295053482056, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.5564295053482056, "logits_per_char": -0.7782147526741028, "num_chars": 2}, {"sum_logits": -1.368798017501831, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.368798017501831, "logits_per_char": -0.6843990087509155, "num_chars": 2}, {"sum_logits": -1.4235026836395264, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.4235026836395264, "logits_per_char": -0.7117513418197632, "num_chars": 2}, {"sum_logits": -1.2392950057983398, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": true, "logits_per_token": -1.2392950057983398, "logits_per_char": -0.6196475028991699, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 219, "native_id": "Mercury_SC_401833", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3478771448135376, "incorrect_loss_raw": 1.40972634156545, "correct_loss_per_char": 0.6739385724067688, "incorrect_loss_per_char": 0.704863170782725, "correct_loss_per_token": 1.3478771448135376, "incorrect_loss_per_token": 1.40972634156545, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5560497045516968, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.5560497045516968, "logits_per_char": -0.7780248522758484, "num_chars": 2}, {"sum_logits": -1.3463835716247559, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.3463835716247559, "logits_per_char": -0.6731917858123779, "num_chars": 2}, {"sum_logits": -1.3267457485198975, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": true, "logits_per_token": -1.3267457485198975, "logits_per_char": -0.6633728742599487, "num_chars": 2}, {"sum_logits": -1.3478771448135376, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.3478771448135376, "logits_per_char": -0.6739385724067688, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 220, "native_id": "Mercury_7071750", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3644765615463257, "incorrect_loss_raw": 1.40013587474823, "correct_loss_per_char": 0.6822382807731628, "incorrect_loss_per_char": 0.700067937374115, "correct_loss_per_token": 1.3644765615463257, "incorrect_loss_per_token": 1.40013587474823, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3356297016143799, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": true, "logits_per_token": -1.3356297016143799, "logits_per_char": -0.6678148508071899, "num_chars": 2}, {"sum_logits": -1.4902136325836182, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.4902136325836182, "logits_per_char": -0.7451068162918091, "num_chars": 2}, {"sum_logits": -1.374564290046692, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.374564290046692, "logits_per_char": -0.687282145023346, "num_chars": 2}, {"sum_logits": -1.3644765615463257, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.3644765615463257, "logits_per_char": -0.6822382807731628, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 221, "native_id": "Mercury_404991", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.41793954372406, "incorrect_loss_raw": 1.3827176094055176, "correct_loss_per_char": 0.70896977186203, "incorrect_loss_per_char": 0.6913588047027588, "correct_loss_per_token": 1.41793954372406, "incorrect_loss_per_token": 1.3827176094055176, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.41793954372406, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.41793954372406, "logits_per_char": -0.70896977186203, "num_chars": 2}, {"sum_logits": -1.3642966747283936, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": true, "logits_per_token": -1.3642966747283936, "logits_per_char": -0.6821483373641968, "num_chars": 2}, {"sum_logits": -1.368943214416504, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.368943214416504, "logits_per_char": -0.684471607208252, "num_chars": 2}, {"sum_logits": -1.4149129390716553, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.4149129390716553, "logits_per_char": -0.7074564695358276, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 222, "native_id": "Mercury_7246278", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3982795476913452, "incorrect_loss_raw": 1.3889052470525105, "correct_loss_per_char": 0.6991397738456726, "incorrect_loss_per_char": 0.6944526235262553, "correct_loss_per_token": 1.3982795476913452, "incorrect_loss_per_token": 1.3889052470525105, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3718421459197998, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.3718421459197998, "logits_per_char": -0.6859210729598999, "num_chars": 2}, {"sum_logits": -1.4234986305236816, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.4234986305236816, "logits_per_char": -0.7117493152618408, "num_chars": 2}, {"sum_logits": -1.3713749647140503, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": true, "logits_per_token": -1.3713749647140503, "logits_per_char": -0.6856874823570251, "num_chars": 2}, {"sum_logits": -1.3982795476913452, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.3982795476913452, "logits_per_char": -0.6991397738456726, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 223, "native_id": "Mercury_SC_400987", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4577865600585938, "incorrect_loss_raw": 1.3693591753641765, "correct_loss_per_char": 0.7288932800292969, "incorrect_loss_per_char": 0.6846795876820883, "correct_loss_per_token": 1.4577865600585938, "incorrect_loss_per_token": 1.3693591753641765, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.381914496421814, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.381914496421814, "logits_per_char": -0.690957248210907, "num_chars": 2}, {"sum_logits": -1.4577865600585938, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.4577865600585938, "logits_per_char": -0.7288932800292969, "num_chars": 2}, {"sum_logits": -1.3997318744659424, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.3997318744659424, "logits_per_char": -0.6998659372329712, "num_chars": 2}, {"sum_logits": -1.326431155204773, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": true, "logits_per_token": -1.326431155204773, "logits_per_char": -0.6632155776023865, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 224, "native_id": "ACTAAP_2010_5_15", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2880195379257202, "incorrect_loss_raw": 1.4341009855270386, "correct_loss_per_char": 0.6440097689628601, "incorrect_loss_per_char": 0.7170504927635193, "correct_loss_per_token": 1.2880195379257202, "incorrect_loss_per_token": 1.4341009855270386, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5924590826034546, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.5924590826034546, "logits_per_char": -0.7962295413017273, "num_chars": 2}, {"sum_logits": -1.3994694948196411, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.3994694948196411, "logits_per_char": -0.6997347474098206, "num_chars": 2}, {"sum_logits": -1.31037437915802, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.31037437915802, "logits_per_char": -0.65518718957901, "num_chars": 2}, {"sum_logits": -1.2880195379257202, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.2880195379257202, "logits_per_char": -0.6440097689628601, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 225, "native_id": "ACTAAP_2014_5_4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4411401748657227, "incorrect_loss_raw": 1.3815455834070842, "correct_loss_per_char": 0.7205700874328613, "incorrect_loss_per_char": 0.6907727917035421, "correct_loss_per_token": 1.4411401748657227, "incorrect_loss_per_token": 1.3815455834070842, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2311387062072754, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": true, "logits_per_token": -1.2311387062072754, "logits_per_char": -0.6155693531036377, "num_chars": 2}, {"sum_logits": -1.4060492515563965, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.4060492515563965, "logits_per_char": -0.7030246257781982, "num_chars": 2}, {"sum_logits": -1.4411401748657227, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.4411401748657227, "logits_per_char": -0.7205700874328613, "num_chars": 2}, {"sum_logits": -1.5074487924575806, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.5074487924575806, "logits_per_char": -0.7537243962287903, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 226, "native_id": "Mercury_LBS10993", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4378530979156494, "incorrect_loss_raw": 1.3812175194422405, "correct_loss_per_char": 0.7189265489578247, "incorrect_loss_per_char": 0.6906087597211202, "correct_loss_per_token": 1.4378530979156494, "incorrect_loss_per_token": 1.3812175194422405, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3102900981903076, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.3102900981903076, "logits_per_char": -0.6551450490951538, "num_chars": 2}, {"sum_logits": -1.2856566905975342, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": true, "logits_per_token": -1.2856566905975342, "logits_per_char": -0.6428283452987671, "num_chars": 2}, {"sum_logits": -1.4378530979156494, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.4378530979156494, "logits_per_char": -0.7189265489578247, "num_chars": 2}, {"sum_logits": -1.5477057695388794, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.5477057695388794, "logits_per_char": -0.7738528847694397, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 227, "native_id": "Mercury_7216580", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4900176525115967, "incorrect_loss_raw": 1.3601107597351074, "correct_loss_per_char": 0.7450088262557983, "incorrect_loss_per_char": 0.6800553798675537, "correct_loss_per_token": 1.4900176525115967, "incorrect_loss_per_token": 1.3601107597351074, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3811919689178467, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.3811919689178467, "logits_per_char": -0.6905959844589233, "num_chars": 2}, {"sum_logits": -1.369494915008545, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.369494915008545, "logits_per_char": -0.6847474575042725, "num_chars": 2}, {"sum_logits": -1.4900176525115967, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.4900176525115967, "logits_per_char": -0.7450088262557983, "num_chars": 2}, {"sum_logits": -1.3296453952789307, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": true, "logits_per_token": -1.3296453952789307, "logits_per_char": -0.6648226976394653, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 228, "native_id": "Mercury_SC_405340", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3369956016540527, "incorrect_loss_raw": 1.409594217936198, "correct_loss_per_char": 0.6684978008270264, "incorrect_loss_per_char": 0.704797108968099, "correct_loss_per_token": 1.3369956016540527, "incorrect_loss_per_token": 1.409594217936198, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3553842306137085, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.3553842306137085, "logits_per_char": -0.6776921153068542, "num_chars": 2}, {"sum_logits": -1.3369956016540527, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": true, "logits_per_token": -1.3369956016540527, "logits_per_char": -0.6684978008270264, "num_chars": 2}, {"sum_logits": -1.3651974201202393, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.3651974201202393, "logits_per_char": -0.6825987100601196, "num_chars": 2}, {"sum_logits": -1.508201003074646, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.508201003074646, "logits_per_char": -0.754100501537323, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 229, "native_id": "MCAS_2006_9_13-v1", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3414562940597534, "incorrect_loss_raw": 1.4163965781529744, "correct_loss_per_char": 0.6707281470298767, "incorrect_loss_per_char": 0.7081982890764872, "correct_loss_per_token": 1.3414562940597534, "incorrect_loss_per_token": 1.4163965781529744, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3414562940597534, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.3414562940597534, "logits_per_char": -0.6707281470298767, "num_chars": 2}, {"sum_logits": -1.5730865001678467, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.5730865001678467, "logits_per_char": -0.7865432500839233, "num_chars": 2}, {"sum_logits": -1.3264038562774658, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": true, "logits_per_token": -1.3264038562774658, "logits_per_char": -0.6632019281387329, "num_chars": 2}, {"sum_logits": -1.3496993780136108, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.3496993780136108, "logits_per_char": -0.6748496890068054, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 230, "native_id": "Mercury_401313", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4758890867233276, "incorrect_loss_raw": 1.365465482076009, "correct_loss_per_char": 0.7379445433616638, "incorrect_loss_per_char": 0.6827327410380045, "correct_loss_per_token": 1.4758890867233276, "incorrect_loss_per_token": 1.365465482076009, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3404556512832642, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.3404556512832642, "logits_per_char": -0.6702278256416321, "num_chars": 2}, {"sum_logits": -1.4307278394699097, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.4307278394699097, "logits_per_char": -0.7153639197349548, "num_chars": 2}, {"sum_logits": -1.3252129554748535, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": true, "logits_per_token": -1.3252129554748535, "logits_per_char": -0.6626064777374268, "num_chars": 2}, {"sum_logits": -1.4758890867233276, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.4758890867233276, "logits_per_char": -0.7379445433616638, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 231, "native_id": "Mercury_7137008", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4453188180923462, "incorrect_loss_raw": 1.3781133890151978, "correct_loss_per_char": 0.7226594090461731, "incorrect_loss_per_char": 0.6890566945075989, "correct_loss_per_token": 1.4453188180923462, "incorrect_loss_per_token": 1.3781133890151978, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4809412956237793, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.4809412956237793, "logits_per_char": -0.7404706478118896, "num_chars": 2}, {"sum_logits": -1.3938599824905396, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.3938599824905396, "logits_per_char": -0.6969299912452698, "num_chars": 2}, {"sum_logits": -1.4453188180923462, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.4453188180923462, "logits_per_char": -0.7226594090461731, "num_chars": 2}, {"sum_logits": -1.2595388889312744, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": true, "logits_per_token": -1.2595388889312744, "logits_per_char": -0.6297694444656372, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 232, "native_id": "Mercury_7234273", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3589892387390137, "incorrect_loss_raw": 1.4098783731460571, "correct_loss_per_char": 0.6794946193695068, "incorrect_loss_per_char": 0.7049391865730286, "correct_loss_per_token": 1.3589892387390137, "incorrect_loss_per_token": 1.4098783731460571, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.54969322681427, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.54969322681427, "logits_per_char": -0.774846613407135, "num_chars": 2}, {"sum_logits": -1.4499236345291138, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.4499236345291138, "logits_per_char": -0.7249618172645569, "num_chars": 2}, {"sum_logits": -1.3589892387390137, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.3589892387390137, "logits_per_char": -0.6794946193695068, "num_chars": 2}, {"sum_logits": -1.2300182580947876, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": true, "logits_per_token": -1.2300182580947876, "logits_per_char": -0.6150091290473938, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 233, "native_id": "ACTAAP_2013_7_10", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4852583408355713, "incorrect_loss_raw": 1.3679787715276082, "correct_loss_per_char": 0.7426291704177856, "incorrect_loss_per_char": 0.6839893857638041, "correct_loss_per_token": 1.4852583408355713, "incorrect_loss_per_token": 1.3679787715276082, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.446088194847107, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.446088194847107, "logits_per_char": -0.7230440974235535, "num_chars": 2}, {"sum_logits": -1.230682373046875, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -1.230682373046875, "logits_per_char": -0.6153411865234375, "num_chars": 2}, {"sum_logits": -1.4852583408355713, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.4852583408355713, "logits_per_char": -0.7426291704177856, "num_chars": 2}, {"sum_logits": -1.4271657466888428, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.4271657466888428, "logits_per_char": -0.7135828733444214, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 234, "native_id": "Mercury_7085383", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5044825077056885, "incorrect_loss_raw": 1.3584925333658855, "correct_loss_per_char": 0.7522412538528442, "incorrect_loss_per_char": 0.6792462666829427, "correct_loss_per_token": 1.5044825077056885, "incorrect_loss_per_token": 1.3584925333658855, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3991175889968872, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.3991175889968872, "logits_per_char": -0.6995587944984436, "num_chars": 2}, {"sum_logits": -1.2705475091934204, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": true, "logits_per_token": -1.2705475091934204, "logits_per_char": -0.6352737545967102, "num_chars": 2}, {"sum_logits": -1.4058125019073486, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.4058125019073486, "logits_per_char": -0.7029062509536743, "num_chars": 2}, {"sum_logits": -1.5044825077056885, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.5044825077056885, "logits_per_char": -0.7522412538528442, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 235, "native_id": "MEA_2013_5_9", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.350701093673706, "incorrect_loss_raw": 1.4048434098561604, "correct_loss_per_char": 0.675350546836853, "incorrect_loss_per_char": 0.7024217049280802, "correct_loss_per_token": 1.350701093673706, "incorrect_loss_per_token": 1.4048434098561604, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.350701093673706, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": true, "logits_per_token": -1.350701093673706, "logits_per_char": -0.675350546836853, "num_chars": 2}, {"sum_logits": -1.3730621337890625, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.3730621337890625, "logits_per_char": -0.6865310668945312, "num_chars": 2}, {"sum_logits": -1.3704172372817993, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.3704172372817993, "logits_per_char": -0.6852086186408997, "num_chars": 2}, {"sum_logits": -1.4710508584976196, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.4710508584976196, "logits_per_char": -0.7355254292488098, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 236, "native_id": "MEA_2013_8_19", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4208580255508423, "incorrect_loss_raw": 1.3903103272120159, "correct_loss_per_char": 0.7104290127754211, "incorrect_loss_per_char": 0.6951551636060079, "correct_loss_per_token": 1.4208580255508423, "incorrect_loss_per_token": 1.3903103272120159, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5546361207962036, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.5546361207962036, "logits_per_char": -0.7773180603981018, "num_chars": 2}, {"sum_logits": -1.4223005771636963, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.4223005771636963, "logits_per_char": -0.7111502885818481, "num_chars": 2}, {"sum_logits": -1.4208580255508423, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.4208580255508423, "logits_per_char": -0.7104290127754211, "num_chars": 2}, {"sum_logits": -1.1939942836761475, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.1939942836761475, "logits_per_char": -0.5969971418380737, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 237, "native_id": "TIMSS_1995_8_K16", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.361360788345337, "incorrect_loss_raw": 1.4010450442632039, "correct_loss_per_char": 0.6806803941726685, "incorrect_loss_per_char": 0.7005225221316019, "correct_loss_per_token": 1.361360788345337, "incorrect_loss_per_token": 1.4010450442632039, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.361360788345337, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": true, "logits_per_token": -1.361360788345337, "logits_per_char": -0.6806803941726685, "num_chars": 2}, {"sum_logits": -1.381319522857666, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.381319522857666, "logits_per_char": -0.690659761428833, "num_chars": 2}, {"sum_logits": -1.4391002655029297, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.4391002655029297, "logits_per_char": -0.7195501327514648, "num_chars": 2}, {"sum_logits": -1.3827153444290161, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.3827153444290161, "logits_per_char": -0.6913576722145081, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 238, "native_id": "Mercury_7274313", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3946388959884644, "incorrect_loss_raw": 1.3934133847554524, "correct_loss_per_char": 0.6973194479942322, "incorrect_loss_per_char": 0.6967066923777262, "correct_loss_per_token": 1.3946388959884644, "incorrect_loss_per_token": 1.3934133847554524, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4610440731048584, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.4610440731048584, "logits_per_char": -0.7305220365524292, "num_chars": 2}, {"sum_logits": -1.4127222299575806, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.4127222299575806, "logits_per_char": -0.7063611149787903, "num_chars": 2}, {"sum_logits": -1.3946388959884644, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.3946388959884644, "logits_per_char": -0.6973194479942322, "num_chars": 2}, {"sum_logits": -1.3064738512039185, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": true, "logits_per_token": -1.3064738512039185, "logits_per_char": -0.6532369256019592, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 239, "native_id": "MEAP_2005_8_1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4309033155441284, "incorrect_loss_raw": 1.3782750765482585, "correct_loss_per_char": 0.7154516577720642, "incorrect_loss_per_char": 0.6891375382741293, "correct_loss_per_token": 1.4309033155441284, "incorrect_loss_per_token": 1.3782750765482585, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4309033155441284, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.4309033155441284, "logits_per_char": -0.7154516577720642, "num_chars": 2}, {"sum_logits": -1.354108214378357, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.354108214378357, "logits_per_char": -0.6770541071891785, "num_chars": 2}, {"sum_logits": -1.4353338479995728, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.4353338479995728, "logits_per_char": -0.7176669239997864, "num_chars": 2}, {"sum_logits": -1.3453831672668457, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": true, "logits_per_token": -1.3453831672668457, "logits_per_char": -0.6726915836334229, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 240, "native_id": "NYSEDREGENTS_2012_4_18", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4840987920761108, "incorrect_loss_raw": 1.363225261370341, "correct_loss_per_char": 0.7420493960380554, "incorrect_loss_per_char": 0.6816126306851705, "correct_loss_per_token": 1.4840987920761108, "incorrect_loss_per_token": 1.363225261370341, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4059807062149048, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.4059807062149048, "logits_per_char": -0.7029903531074524, "num_chars": 2}, {"sum_logits": -1.2564750909805298, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": true, "logits_per_token": -1.2564750909805298, "logits_per_char": -0.6282375454902649, "num_chars": 2}, {"sum_logits": -1.4272199869155884, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.4272199869155884, "logits_per_char": -0.7136099934577942, "num_chars": 2}, {"sum_logits": -1.4840987920761108, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.4840987920761108, "logits_per_char": -0.7420493960380554, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 241, "native_id": "Mercury_7040950", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2732025384902954, "incorrect_loss_raw": 1.4326555728912354, "correct_loss_per_char": 0.6366012692451477, "incorrect_loss_per_char": 0.7163277864456177, "correct_loss_per_token": 1.2732025384902954, "incorrect_loss_per_token": 1.4326555728912354, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.443615198135376, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.443615198135376, "logits_per_char": -0.721807599067688, "num_chars": 2}, {"sum_logits": -1.3950777053833008, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.3950777053833008, "logits_per_char": -0.6975388526916504, "num_chars": 2}, {"sum_logits": -1.4592738151550293, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.4592738151550293, "logits_per_char": -0.7296369075775146, "num_chars": 2}, {"sum_logits": -1.2732025384902954, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": true, "logits_per_token": -1.2732025384902954, "logits_per_char": -0.6366012692451477, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 242, "native_id": "OHAT_2008_8_29", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4800217151641846, "incorrect_loss_raw": 1.3692731459935505, "correct_loss_per_char": 0.7400108575820923, "incorrect_loss_per_char": 0.6846365729967753, "correct_loss_per_token": 1.4800217151641846, "incorrect_loss_per_token": 1.3692731459935505, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5203405618667603, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.5203405618667603, "logits_per_char": -0.7601702809333801, "num_chars": 2}, {"sum_logits": -1.4800217151641846, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.4800217151641846, "logits_per_char": -0.7400108575820923, "num_chars": 2}, {"sum_logits": -1.3622090816497803, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.3622090816497803, "logits_per_char": -0.6811045408248901, "num_chars": 2}, {"sum_logits": -1.2252697944641113, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": true, "logits_per_token": -1.2252697944641113, "logits_per_char": -0.6126348972320557, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 243, "native_id": "Mercury_7213675", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.424477219581604, "incorrect_loss_raw": 1.3885103861490886, "correct_loss_per_char": 0.712238609790802, "incorrect_loss_per_char": 0.6942551930745443, "correct_loss_per_token": 1.424477219581604, "incorrect_loss_per_token": 1.3885103861490886, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3419320583343506, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.3419320583343506, "logits_per_char": -0.6709660291671753, "num_chars": 2}, {"sum_logits": -1.424477219581604, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.424477219581604, "logits_per_char": -0.712238609790802, "num_chars": 2}, {"sum_logits": -1.557794451713562, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.557794451713562, "logits_per_char": -0.778897225856781, "num_chars": 2}, {"sum_logits": -1.265804648399353, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": true, "logits_per_token": -1.265804648399353, "logits_per_char": -0.6329023241996765, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 244, "native_id": "MCAS_2003_5_8", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3091435432434082, "incorrect_loss_raw": 1.4212984641393025, "correct_loss_per_char": 0.6545717716217041, "incorrect_loss_per_char": 0.7106492320696512, "correct_loss_per_token": 1.3091435432434082, "incorrect_loss_per_token": 1.4212984641393025, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5189802646636963, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.5189802646636963, "logits_per_char": -0.7594901323318481, "num_chars": 2}, {"sum_logits": -1.3746471405029297, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.3746471405029297, "logits_per_char": -0.6873235702514648, "num_chars": 2}, {"sum_logits": -1.3091435432434082, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": true, "logits_per_token": -1.3091435432434082, "logits_per_char": -0.6545717716217041, "num_chars": 2}, {"sum_logits": -1.3702679872512817, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.3702679872512817, "logits_per_char": -0.6851339936256409, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 245, "native_id": "Mercury_SC_401166", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.312466025352478, "incorrect_loss_raw": 1.4192183017730713, "correct_loss_per_char": 0.656233012676239, "incorrect_loss_per_char": 0.7096091508865356, "correct_loss_per_token": 1.312466025352478, "incorrect_loss_per_token": 1.4192183017730713, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.312466025352478, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": true, "logits_per_token": -1.312466025352478, "logits_per_char": -0.656233012676239, "num_chars": 2}, {"sum_logits": -1.3905606269836426, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.3905606269836426, "logits_per_char": -0.6952803134918213, "num_chars": 2}, {"sum_logits": -1.4527515172958374, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.4527515172958374, "logits_per_char": -0.7263757586479187, "num_chars": 2}, {"sum_logits": -1.4143427610397339, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.4143427610397339, "logits_per_char": -0.7071713805198669, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 246, "native_id": "Mercury_185238", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.309109091758728, "incorrect_loss_raw": 1.4206943114598591, "correct_loss_per_char": 0.654554545879364, "incorrect_loss_per_char": 0.7103471557299296, "correct_loss_per_token": 1.309109091758728, "incorrect_loss_per_token": 1.4206943114598591, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3616009950637817, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.3616009950637817, "logits_per_char": -0.6808004975318909, "num_chars": 2}, {"sum_logits": -1.309109091758728, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": true, "logits_per_token": -1.309109091758728, "logits_per_char": -0.654554545879364, "num_chars": 2}, {"sum_logits": -1.4948612451553345, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.4948612451553345, "logits_per_char": -0.7474306225776672, "num_chars": 2}, {"sum_logits": -1.4056206941604614, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.4056206941604614, "logits_per_char": -0.7028103470802307, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 247, "native_id": "Mercury_7007473", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4221925735473633, "incorrect_loss_raw": 1.3813635110855103, "correct_loss_per_char": 0.7110962867736816, "incorrect_loss_per_char": 0.6906817555427551, "correct_loss_per_token": 1.4221925735473633, "incorrect_loss_per_token": 1.3813635110855103, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4221925735473633, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.4221925735473633, "logits_per_char": -0.7110962867736816, "num_chars": 2}, {"sum_logits": -1.4230387210845947, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.4230387210845947, "logits_per_char": -0.7115193605422974, "num_chars": 2}, {"sum_logits": -1.2799780368804932, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": true, "logits_per_token": -1.2799780368804932, "logits_per_char": -0.6399890184402466, "num_chars": 2}, {"sum_logits": -1.4410737752914429, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.4410737752914429, "logits_per_char": -0.7205368876457214, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 248, "native_id": "Mercury_7223265", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4469159841537476, "incorrect_loss_raw": 1.3744893074035645, "correct_loss_per_char": 0.7234579920768738, "incorrect_loss_per_char": 0.6872446537017822, "correct_loss_per_token": 1.4469159841537476, "incorrect_loss_per_token": 1.3744893074035645, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4466965198516846, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.4466965198516846, "logits_per_char": -0.7233482599258423, "num_chars": 2}, {"sum_logits": -1.2892719507217407, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": true, "logits_per_token": -1.2892719507217407, "logits_per_char": -0.6446359753608704, "num_chars": 2}, {"sum_logits": -1.387499451637268, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.387499451637268, "logits_per_char": -0.693749725818634, "num_chars": 2}, {"sum_logits": -1.4469159841537476, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.4469159841537476, "logits_per_char": -0.7234579920768738, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 249, "native_id": "MCAS_1999_4_16", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3116164207458496, "incorrect_loss_raw": 1.4212518533070881, "correct_loss_per_char": 0.6558082103729248, "incorrect_loss_per_char": 0.7106259266535441, "correct_loss_per_token": 1.3116164207458496, "incorrect_loss_per_token": 1.4212518533070881, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3116164207458496, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": true, "logits_per_token": -1.3116164207458496, "logits_per_char": -0.6558082103729248, "num_chars": 2}, {"sum_logits": -1.5217933654785156, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.5217933654785156, "logits_per_char": -0.7608966827392578, "num_chars": 2}, {"sum_logits": -1.4000799655914307, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.4000799655914307, "logits_per_char": -0.7000399827957153, "num_chars": 2}, {"sum_logits": -1.3418822288513184, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.3418822288513184, "logits_per_char": -0.6709411144256592, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 250, "native_id": "Mercury_400806", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3554168939590454, "incorrect_loss_raw": 1.4036751985549927, "correct_loss_per_char": 0.6777084469795227, "incorrect_loss_per_char": 0.7018375992774963, "correct_loss_per_token": 1.3554168939590454, "incorrect_loss_per_token": 1.4036751985549927, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4660664796829224, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.4660664796829224, "logits_per_char": -0.7330332398414612, "num_chars": 2}, {"sum_logits": -1.3554168939590454, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.3554168939590454, "logits_per_char": -0.6777084469795227, "num_chars": 2}, {"sum_logits": -1.3274372816085815, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": true, "logits_per_token": -1.3274372816085815, "logits_per_char": -0.6637186408042908, "num_chars": 2}, {"sum_logits": -1.4175218343734741, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.4175218343734741, "logits_per_char": -0.7087609171867371, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 251, "native_id": "Mercury_SC_401787", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4628791809082031, "incorrect_loss_raw": 1.3689420223236084, "correct_loss_per_char": 0.7314395904541016, "incorrect_loss_per_char": 0.6844710111618042, "correct_loss_per_token": 1.4628791809082031, "incorrect_loss_per_token": 1.3689420223236084, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3832323551177979, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.3832323551177979, "logits_per_char": -0.6916161775588989, "num_chars": 2}, {"sum_logits": -1.4628791809082031, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.4628791809082031, "logits_per_char": -0.7314395904541016, "num_chars": 2}, {"sum_logits": -1.3056517839431763, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": true, "logits_per_token": -1.3056517839431763, "logits_per_char": -0.6528258919715881, "num_chars": 2}, {"sum_logits": -1.417941927909851, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.417941927909851, "logits_per_char": -0.7089709639549255, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 252, "native_id": "Mercury_SC_408857", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4665675163269043, "incorrect_loss_raw": 1.3711127042770386, "correct_loss_per_char": 0.7332837581634521, "incorrect_loss_per_char": 0.6855563521385193, "correct_loss_per_token": 1.4665675163269043, "incorrect_loss_per_token": 1.3711127042770386, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4656857252120972, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.4656857252120972, "logits_per_char": -0.7328428626060486, "num_chars": 2}, {"sum_logits": -1.4665675163269043, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.4665675163269043, "logits_per_char": -0.7332837581634521, "num_chars": 2}, {"sum_logits": -1.4129149913787842, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.4129149913787842, "logits_per_char": -0.7064574956893921, "num_chars": 2}, {"sum_logits": -1.2347373962402344, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": true, "logits_per_token": -1.2347373962402344, "logits_per_char": -0.6173686981201172, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 253, "native_id": "Mercury_405771", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.372849464416504, "incorrect_loss_raw": 1.4001473983128865, "correct_loss_per_char": 0.686424732208252, "incorrect_loss_per_char": 0.7000736991564432, "correct_loss_per_token": 1.372849464416504, "incorrect_loss_per_token": 1.4001473983128865, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4663535356521606, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.4663535356521606, "logits_per_char": -0.7331767678260803, "num_chars": 2}, {"sum_logits": -1.3047940731048584, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": true, "logits_per_token": -1.3047940731048584, "logits_per_char": -0.6523970365524292, "num_chars": 2}, {"sum_logits": -1.372849464416504, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.372849464416504, "logits_per_char": -0.686424732208252, "num_chars": 2}, {"sum_logits": -1.4292945861816406, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.4292945861816406, "logits_per_char": -0.7146472930908203, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 254, "native_id": "Mercury_SC_401122", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2968699932098389, "incorrect_loss_raw": 1.4229139884312947, "correct_loss_per_char": 0.6484349966049194, "incorrect_loss_per_char": 0.7114569942156473, "correct_loss_per_token": 1.2968699932098389, "incorrect_loss_per_token": 1.4229139884312947, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.432226538658142, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.432226538658142, "logits_per_char": -0.716113269329071, "num_chars": 2}, {"sum_logits": -1.4294975996017456, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.4294975996017456, "logits_per_char": -0.7147487998008728, "num_chars": 2}, {"sum_logits": -1.2968699932098389, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": true, "logits_per_token": -1.2968699932098389, "logits_per_char": -0.6484349966049194, "num_chars": 2}, {"sum_logits": -1.4070178270339966, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.4070178270339966, "logits_per_char": -0.7035089135169983, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 255, "native_id": "ACTAAP_2011_5_13", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5108404159545898, "incorrect_loss_raw": 1.3549110094706218, "correct_loss_per_char": 0.7554202079772949, "incorrect_loss_per_char": 0.6774555047353109, "correct_loss_per_token": 1.5108404159545898, "incorrect_loss_per_token": 1.3549110094706218, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5108404159545898, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.5108404159545898, "logits_per_char": -0.7554202079772949, "num_chars": 2}, {"sum_logits": -1.3962750434875488, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3962750434875488, "logits_per_char": -0.6981375217437744, "num_chars": 2}, {"sum_logits": -1.341873049736023, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.341873049736023, "logits_per_char": -0.6709365248680115, "num_chars": 2}, {"sum_logits": -1.3265849351882935, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.3265849351882935, "logits_per_char": -0.6632924675941467, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 256, "native_id": "TIMSS_2003_4_pg81", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.442115306854248, "incorrect_loss_raw": 1.3760961294174194, "correct_loss_per_char": 0.721057653427124, "incorrect_loss_per_char": 0.6880480647087097, "correct_loss_per_token": 1.442115306854248, "incorrect_loss_per_token": 1.3760961294174194, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4579890966415405, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.4579890966415405, "logits_per_char": -0.7289945483207703, "num_chars": 2}, {"sum_logits": -1.3071017265319824, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": true, "logits_per_token": -1.3071017265319824, "logits_per_char": -0.6535508632659912, "num_chars": 2}, {"sum_logits": -1.3631975650787354, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.3631975650787354, "logits_per_char": -0.6815987825393677, "num_chars": 2}, {"sum_logits": -1.442115306854248, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.442115306854248, "logits_per_char": -0.721057653427124, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 257, "native_id": "Mercury_401659", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.449313998222351, "incorrect_loss_raw": 1.3730142911275227, "correct_loss_per_char": 0.7246569991111755, "incorrect_loss_per_char": 0.6865071455637614, "correct_loss_per_token": 1.449313998222351, "incorrect_loss_per_token": 1.3730142911275227, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.449313998222351, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.449313998222351, "logits_per_char": -0.7246569991111755, "num_chars": 2}, {"sum_logits": -1.3940603733062744, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.3940603733062744, "logits_per_char": -0.6970301866531372, "num_chars": 2}, {"sum_logits": -1.4031217098236084, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.4031217098236084, "logits_per_char": -0.7015608549118042, "num_chars": 2}, {"sum_logits": -1.3218607902526855, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": true, "logits_per_token": -1.3218607902526855, "logits_per_char": -0.6609303951263428, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 258, "native_id": "Mercury_7099225", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3612642288208008, "incorrect_loss_raw": 1.4010250568389893, "correct_loss_per_char": 0.6806321144104004, "incorrect_loss_per_char": 0.7005125284194946, "correct_loss_per_token": 1.3612642288208008, "incorrect_loss_per_token": 1.4010250568389893, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4311074018478394, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.4311074018478394, "logits_per_char": -0.7155537009239197, "num_chars": 2}, {"sum_logits": -1.3669456243515015, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.3669456243515015, "logits_per_char": -0.6834728121757507, "num_chars": 2}, {"sum_logits": -1.405022144317627, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.405022144317627, "logits_per_char": -0.7025110721588135, "num_chars": 2}, {"sum_logits": -1.3612642288208008, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": true, "logits_per_token": -1.3612642288208008, "logits_per_char": -0.6806321144104004, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 259, "native_id": "Mercury_7110215", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2588436603546143, "incorrect_loss_raw": 1.4386546611785889, "correct_loss_per_char": 0.6294218301773071, "incorrect_loss_per_char": 0.7193273305892944, "correct_loss_per_token": 1.2588436603546143, "incorrect_loss_per_token": 1.4386546611785889, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4157912731170654, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4157912731170654, "logits_per_char": -0.7078956365585327, "num_chars": 2}, {"sum_logits": -1.4510411024093628, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4510411024093628, "logits_per_char": -0.7255205512046814, "num_chars": 2}, {"sum_logits": -1.4491316080093384, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4491316080093384, "logits_per_char": -0.7245658040046692, "num_chars": 2}, {"sum_logits": -1.2588436603546143, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.2588436603546143, "logits_per_char": -0.6294218301773071, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 260, "native_id": "Mercury_7246313", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3721994161605835, "incorrect_loss_raw": 1.398802359898885, "correct_loss_per_char": 0.6860997080802917, "incorrect_loss_per_char": 0.6994011799494425, "correct_loss_per_token": 1.3721994161605835, "incorrect_loss_per_token": 1.398802359898885, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3177168369293213, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": true, "logits_per_token": -1.3177168369293213, "logits_per_char": -0.6588584184646606, "num_chars": 2}, {"sum_logits": -1.4084364175796509, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.4084364175796509, "logits_per_char": -0.7042182087898254, "num_chars": 2}, {"sum_logits": -1.3721994161605835, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.3721994161605835, "logits_per_char": -0.6860997080802917, "num_chars": 2}, {"sum_logits": -1.470253825187683, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.470253825187683, "logits_per_char": -0.7351269125938416, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 261, "native_id": "MCAS_2005_8_17", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3537237644195557, "incorrect_loss_raw": 1.4031543334325154, "correct_loss_per_char": 0.6768618822097778, "incorrect_loss_per_char": 0.7015771667162577, "correct_loss_per_token": 1.3537237644195557, "incorrect_loss_per_token": 1.4031543334325154, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.423709750175476, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.423709750175476, "logits_per_char": -0.711854875087738, "num_chars": 2}, {"sum_logits": -1.406551718711853, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.406551718711853, "logits_per_char": -0.7032758593559265, "num_chars": 2}, {"sum_logits": -1.3792015314102173, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.3792015314102173, "logits_per_char": -0.6896007657051086, "num_chars": 2}, {"sum_logits": -1.3537237644195557, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": true, "logits_per_token": -1.3537237644195557, "logits_per_char": -0.6768618822097778, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 262, "native_id": "Mercury_SC_401143", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4728668928146362, "incorrect_loss_raw": 1.370149811108907, "correct_loss_per_char": 0.7364334464073181, "incorrect_loss_per_char": 0.6850749055544535, "correct_loss_per_token": 1.4728668928146362, "incorrect_loss_per_token": 1.370149811108907, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3247947692871094, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.3247947692871094, "logits_per_char": -0.6623973846435547, "num_chars": 2}, {"sum_logits": -1.2529406547546387, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": true, "logits_per_token": -1.2529406547546387, "logits_per_char": -0.6264703273773193, "num_chars": 2}, {"sum_logits": -1.4728668928146362, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.4728668928146362, "logits_per_char": -0.7364334464073181, "num_chars": 2}, {"sum_logits": -1.5327140092849731, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.5327140092849731, "logits_per_char": -0.7663570046424866, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 263, "native_id": "MCAS_2011_8_17685", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2849948406219482, "incorrect_loss_raw": 1.4303211371103923, "correct_loss_per_char": 0.6424974203109741, "incorrect_loss_per_char": 0.7151605685551962, "correct_loss_per_token": 1.2849948406219482, "incorrect_loss_per_token": 1.4303211371103923, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4907444715499878, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.4907444715499878, "logits_per_char": -0.7453722357749939, "num_chars": 2}, {"sum_logits": -1.3543891906738281, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.3543891906738281, "logits_per_char": -0.6771945953369141, "num_chars": 2}, {"sum_logits": -1.4458297491073608, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.4458297491073608, "logits_per_char": -0.7229148745536804, "num_chars": 2}, {"sum_logits": -1.2849948406219482, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": true, "logits_per_token": -1.2849948406219482, "logits_per_char": -0.6424974203109741, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 264, "native_id": "AKDE&ED_2008_8_39", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4027715921401978, "incorrect_loss_raw": 1.3876899083455403, "correct_loss_per_char": 0.7013857960700989, "incorrect_loss_per_char": 0.6938449541727701, "correct_loss_per_token": 1.4027715921401978, "incorrect_loss_per_token": 1.3876899083455403, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3972431421279907, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.3972431421279907, "logits_per_char": -0.6986215710639954, "num_chars": 2}, {"sum_logits": -1.4076309204101562, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.4076309204101562, "logits_per_char": -0.7038154602050781, "num_chars": 2}, {"sum_logits": -1.4027715921401978, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.4027715921401978, "logits_per_char": -0.7013857960700989, "num_chars": 2}, {"sum_logits": -1.3581956624984741, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": true, "logits_per_token": -1.3581956624984741, "logits_per_char": -0.6790978312492371, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 265, "native_id": "Mercury_7024360", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.415345549583435, "incorrect_loss_raw": 1.3874330917994182, "correct_loss_per_char": 0.7076727747917175, "incorrect_loss_per_char": 0.6937165458997091, "correct_loss_per_token": 1.415345549583435, "incorrect_loss_per_token": 1.3874330917994182, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5136873722076416, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.5136873722076416, "logits_per_char": -0.7568436861038208, "num_chars": 2}, {"sum_logits": -1.3891899585723877, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.3891899585723877, "logits_per_char": -0.6945949792861938, "num_chars": 2}, {"sum_logits": -1.415345549583435, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.415345549583435, "logits_per_char": -0.7076727747917175, "num_chars": 2}, {"sum_logits": -1.259421944618225, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": true, "logits_per_token": -1.259421944618225, "logits_per_char": -0.6297109723091125, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 266, "native_id": "MSA_2012_5_34", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1548699140548706, "incorrect_loss_raw": 1.4882025718688965, "correct_loss_per_char": 0.5774349570274353, "incorrect_loss_per_char": 0.7441012859344482, "correct_loss_per_token": 1.1548699140548706, "incorrect_loss_per_token": 1.4882025718688965, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6366949081420898, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.6366949081420898, "logits_per_char": -0.8183474540710449, "num_chars": 2}, {"sum_logits": -1.4399466514587402, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.4399466514587402, "logits_per_char": -0.7199733257293701, "num_chars": 2}, {"sum_logits": -1.3879661560058594, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.3879661560058594, "logits_per_char": -0.6939830780029297, "num_chars": 2}, {"sum_logits": -1.1548699140548706, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": true, "logits_per_token": -1.1548699140548706, "logits_per_char": -0.5774349570274353, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 267, "native_id": "TIMSS_2003_8_pg44", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2098995447158813, "incorrect_loss_raw": 1.4755966265996296, "correct_loss_per_char": 0.6049497723579407, "incorrect_loss_per_char": 0.7377983132998148, "correct_loss_per_token": 1.2098995447158813, "incorrect_loss_per_token": 1.4755966265996296, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.7252057790756226, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.7252057790756226, "logits_per_char": -0.8626028895378113, "num_chars": 2}, {"sum_logits": -1.4191787242889404, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.4191787242889404, "logits_per_char": -0.7095893621444702, "num_chars": 2}, {"sum_logits": -1.2824053764343262, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.2824053764343262, "logits_per_char": -0.6412026882171631, "num_chars": 2}, {"sum_logits": -1.2098995447158813, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": true, "logits_per_token": -1.2098995447158813, "logits_per_char": -0.6049497723579407, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 268, "native_id": "Mercury_7077525", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4358450174331665, "incorrect_loss_raw": 1.3756049474080403, "correct_loss_per_char": 0.7179225087165833, "incorrect_loss_per_char": 0.6878024737040201, "correct_loss_per_token": 1.4358450174331665, "incorrect_loss_per_token": 1.3756049474080403, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3823728561401367, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.3823728561401367, "logits_per_char": -0.6911864280700684, "num_chars": 2}, {"sum_logits": -1.4164272546768188, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.4164272546768188, "logits_per_char": -0.7082136273384094, "num_chars": 2}, {"sum_logits": -1.4358450174331665, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.4358450174331665, "logits_per_char": -0.7179225087165833, "num_chars": 2}, {"sum_logits": -1.3280147314071655, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": true, "logits_per_token": -1.3280147314071655, "logits_per_char": -0.6640073657035828, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 269, "native_id": "Mercury_SC_405164", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.310178518295288, "incorrect_loss_raw": 1.4199929634730022, "correct_loss_per_char": 0.655089259147644, "incorrect_loss_per_char": 0.7099964817365011, "correct_loss_per_token": 1.310178518295288, "incorrect_loss_per_token": 1.4199929634730022, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4447218179702759, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.4447218179702759, "logits_per_char": -0.7223609089851379, "num_chars": 2}, {"sum_logits": -1.468775749206543, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.468775749206543, "logits_per_char": -0.7343878746032715, "num_chars": 2}, {"sum_logits": -1.310178518295288, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": true, "logits_per_token": -1.310178518295288, "logits_per_char": -0.655089259147644, "num_chars": 2}, {"sum_logits": -1.3464813232421875, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.3464813232421875, "logits_per_char": -0.6732406616210938, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 270, "native_id": "NYSEDREGENTS_2012_4_7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4971587657928467, "incorrect_loss_raw": 1.3572261730829875, "correct_loss_per_char": 0.7485793828964233, "incorrect_loss_per_char": 0.6786130865414938, "correct_loss_per_token": 1.4971587657928467, "incorrect_loss_per_token": 1.3572261730829875, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2823501825332642, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": true, "logits_per_token": -1.2823501825332642, "logits_per_char": -0.6411750912666321, "num_chars": 2}, {"sum_logits": -1.389106035232544, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.389106035232544, "logits_per_char": -0.694553017616272, "num_chars": 2}, {"sum_logits": -1.4971587657928467, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.4971587657928467, "logits_per_char": -0.7485793828964233, "num_chars": 2}, {"sum_logits": -1.4002223014831543, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.4002223014831543, "logits_per_char": -0.7001111507415771, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 271, "native_id": "TIMSS_2007_8_pg102", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3282012939453125, "incorrect_loss_raw": 1.4204328854878743, "correct_loss_per_char": 0.6641006469726562, "incorrect_loss_per_char": 0.7102164427439371, "correct_loss_per_token": 1.3282012939453125, "incorrect_loss_per_token": 1.4204328854878743, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3282012939453125, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.3282012939453125, "logits_per_char": -0.6641006469726562, "num_chars": 2}, {"sum_logits": -1.3268412351608276, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": true, "logits_per_token": -1.3268412351608276, "logits_per_char": -0.6634206175804138, "num_chars": 2}, {"sum_logits": -1.3641266822814941, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.3641266822814941, "logits_per_char": -0.6820633411407471, "num_chars": 2}, {"sum_logits": -1.5703307390213013, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.5703307390213013, "logits_per_char": -0.7851653695106506, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 272, "native_id": "Mercury_7250128", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6279083490371704, "incorrect_loss_raw": 1.3237191438674927, "correct_loss_per_char": 0.8139541745185852, "incorrect_loss_per_char": 0.6618595719337463, "correct_loss_per_token": 1.6279083490371704, "incorrect_loss_per_token": 1.3237191438674927, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.373252034187317, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.373252034187317, "logits_per_char": -0.6866260170936584, "num_chars": 2}, {"sum_logits": -1.2927476167678833, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": true, "logits_per_token": -1.2927476167678833, "logits_per_char": -0.6463738083839417, "num_chars": 2}, {"sum_logits": -1.3051577806472778, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.3051577806472778, "logits_per_char": -0.6525788903236389, "num_chars": 2}, {"sum_logits": -1.6279083490371704, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.6279083490371704, "logits_per_char": -0.8139541745185852, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 273, "native_id": "Mercury_7213763", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.282593846321106, "incorrect_loss_raw": 1.434690276781718, "correct_loss_per_char": 0.641296923160553, "incorrect_loss_per_char": 0.717345138390859, "correct_loss_per_token": 1.282593846321106, "incorrect_loss_per_token": 1.434690276781718, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.359004020690918, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.359004020690918, "logits_per_char": -0.679502010345459, "num_chars": 2}, {"sum_logits": -1.282593846321106, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": true, "logits_per_token": -1.282593846321106, "logits_per_char": -0.641296923160553, "num_chars": 2}, {"sum_logits": -1.380098581314087, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.380098581314087, "logits_per_char": -0.6900492906570435, "num_chars": 2}, {"sum_logits": -1.564968228340149, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.564968228340149, "logits_per_char": -0.7824841141700745, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 274, "native_id": "Mercury_SC_407450", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4308228492736816, "incorrect_loss_raw": 1.3801953395207722, "correct_loss_per_char": 0.7154114246368408, "incorrect_loss_per_char": 0.6900976697603861, "correct_loss_per_token": 1.4308228492736816, "incorrect_loss_per_token": 1.3801953395207722, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4921607971191406, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": false, "logits_per_token": -1.4921607971191406, "logits_per_char": -0.7460803985595703, "num_chars": 2}, {"sum_logits": -1.3054261207580566, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": true, "logits_per_token": -1.3054261207580566, "logits_per_char": -0.6527130603790283, "num_chars": 2}, {"sum_logits": -1.4308228492736816, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": false, "logits_per_token": -1.4308228492736816, "logits_per_char": -0.7154114246368408, "num_chars": 2}, {"sum_logits": -1.3429991006851196, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": false, "logits_per_token": -1.3429991006851196, "logits_per_char": -0.6714995503425598, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 275, "native_id": "Mercury_SC_405232", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3958007097244263, "incorrect_loss_raw": 1.3886740605036418, "correct_loss_per_char": 0.6979003548622131, "incorrect_loss_per_char": 0.6943370302518209, "correct_loss_per_token": 1.3958007097244263, "incorrect_loss_per_token": 1.3886740605036418, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4270514249801636, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.4270514249801636, "logits_per_char": -0.7135257124900818, "num_chars": 2}, {"sum_logits": -1.375065803527832, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.375065803527832, "logits_per_char": -0.687532901763916, "num_chars": 2}, {"sum_logits": -1.3958007097244263, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.3958007097244263, "logits_per_char": -0.6979003548622131, "num_chars": 2}, {"sum_logits": -1.3639049530029297, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": true, "logits_per_token": -1.3639049530029297, "logits_per_char": -0.6819524765014648, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 276, "native_id": "VASoL_2009_5_28", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5399664640426636, "incorrect_loss_raw": 1.3552486499150593, "correct_loss_per_char": 0.7699832320213318, "incorrect_loss_per_char": 0.6776243249575297, "correct_loss_per_token": 1.5399664640426636, "incorrect_loss_per_token": 1.3552486499150593, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2337292432785034, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": true, "logits_per_token": -1.2337292432785034, "logits_per_char": -0.6168646216392517, "num_chars": 2}, {"sum_logits": -1.284533977508545, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.284533977508545, "logits_per_char": -0.6422669887542725, "num_chars": 2}, {"sum_logits": -1.5399664640426636, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.5399664640426636, "logits_per_char": -0.7699832320213318, "num_chars": 2}, {"sum_logits": -1.5474827289581299, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.5474827289581299, "logits_per_char": -0.7737413644790649, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 277, "native_id": "MDSA_2009_8_32", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3755520582199097, "incorrect_loss_raw": 1.3995921611785889, "correct_loss_per_char": 0.6877760291099548, "incorrect_loss_per_char": 0.6997960805892944, "correct_loss_per_token": 1.3755520582199097, "incorrect_loss_per_token": 1.3995921611785889, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.509621024131775, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.509621024131775, "logits_per_char": -0.7548105120658875, "num_chars": 2}, {"sum_logits": -1.3593676090240479, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.3593676090240479, "logits_per_char": -0.6796838045120239, "num_chars": 2}, {"sum_logits": -1.3755520582199097, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.3755520582199097, "logits_per_char": -0.6877760291099548, "num_chars": 2}, {"sum_logits": -1.3297878503799438, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.3297878503799438, "logits_per_char": -0.6648939251899719, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 278, "native_id": "ACTAAP_2007_7_35", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.445235013961792, "incorrect_loss_raw": 1.3745834430058796, "correct_loss_per_char": 0.722617506980896, "incorrect_loss_per_char": 0.6872917215029398, "correct_loss_per_token": 1.445235013961792, "incorrect_loss_per_token": 1.3745834430058796, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3341302871704102, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.3341302871704102, "logits_per_char": -0.6670651435852051, "num_chars": 2}, {"sum_logits": -1.4573436975479126, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.4573436975479126, "logits_per_char": -0.7286718487739563, "num_chars": 2}, {"sum_logits": -1.445235013961792, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.445235013961792, "logits_per_char": -0.722617506980896, "num_chars": 2}, {"sum_logits": -1.3322763442993164, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": true, "logits_per_token": -1.3322763442993164, "logits_per_char": -0.6661381721496582, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 279, "native_id": "NCEOGA_2013_8_32", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3941361904144287, "incorrect_loss_raw": 1.3920905590057373, "correct_loss_per_char": 0.6970680952072144, "incorrect_loss_per_char": 0.6960452795028687, "correct_loss_per_token": 1.3941361904144287, "incorrect_loss_per_token": 1.3920905590057373, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3941361904144287, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.3941361904144287, "logits_per_char": -0.6970680952072144, "num_chars": 2}, {"sum_logits": -1.3680576086044312, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.3680576086044312, "logits_per_char": -0.6840288043022156, "num_chars": 2}, {"sum_logits": -1.4973156452178955, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.4973156452178955, "logits_per_char": -0.7486578226089478, "num_chars": 2}, {"sum_logits": -1.3108984231948853, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": true, "logits_per_token": -1.3108984231948853, "logits_per_char": -0.6554492115974426, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 280, "native_id": "VASoL_2008_5_5", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3839555978775024, "incorrect_loss_raw": 1.40195628007253, "correct_loss_per_char": 0.6919777989387512, "incorrect_loss_per_char": 0.700978140036265, "correct_loss_per_token": 1.3839555978775024, "incorrect_loss_per_token": 1.40195628007253, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5436210632324219, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.5436210632324219, "logits_per_char": -0.7718105316162109, "num_chars": 2}, {"sum_logits": -1.4338433742523193, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.4338433742523193, "logits_per_char": -0.7169216871261597, "num_chars": 2}, {"sum_logits": -1.3839555978775024, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.3839555978775024, "logits_per_char": -0.6919777989387512, "num_chars": 2}, {"sum_logits": -1.2284044027328491, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": true, "logits_per_token": -1.2284044027328491, "logits_per_char": -0.6142022013664246, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 281, "native_id": "Mercury_7240923", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3922157287597656, "incorrect_loss_raw": 1.3918376763661702, "correct_loss_per_char": 0.6961078643798828, "incorrect_loss_per_char": 0.6959188381830851, "correct_loss_per_token": 1.3922157287597656, "incorrect_loss_per_token": 1.3918376763661702, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3922157287597656, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.3922157287597656, "logits_per_char": -0.6961078643798828, "num_chars": 2}, {"sum_logits": -1.3400465250015259, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": true, "logits_per_token": -1.3400465250015259, "logits_per_char": -0.6700232625007629, "num_chars": 2}, {"sum_logits": -1.3866379261016846, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.3866379261016846, "logits_per_char": -0.6933189630508423, "num_chars": 2}, {"sum_logits": -1.4488285779953003, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.4488285779953003, "logits_per_char": -0.7244142889976501, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 282, "native_id": "Mercury_7122955", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.460214614868164, "incorrect_loss_raw": 1.3665722608566284, "correct_loss_per_char": 0.730107307434082, "incorrect_loss_per_char": 0.6832861304283142, "correct_loss_per_token": 1.460214614868164, "incorrect_loss_per_token": 1.3665722608566284, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3426892757415771, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": true, "logits_per_token": -1.3426892757415771, "logits_per_char": -0.6713446378707886, "num_chars": 2}, {"sum_logits": -1.358701467514038, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.358701467514038, "logits_per_char": -0.679350733757019, "num_chars": 2}, {"sum_logits": -1.460214614868164, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.460214614868164, "logits_per_char": -0.730107307434082, "num_chars": 2}, {"sum_logits": -1.39832603931427, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.39832603931427, "logits_per_char": -0.699163019657135, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 283, "native_id": "NYSEDREGENTS_2008_4_5", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4669808149337769, "incorrect_loss_raw": 1.371880571047465, "correct_loss_per_char": 0.7334904074668884, "incorrect_loss_per_char": 0.6859402855237325, "correct_loss_per_token": 1.4669808149337769, "incorrect_loss_per_token": 1.371880571047465, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3340853452682495, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -1.3340853452682495, "logits_per_char": -0.6670426726341248, "num_chars": 2}, {"sum_logits": -1.4669808149337769, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -1.4669808149337769, "logits_per_char": -0.7334904074668884, "num_chars": 2}, {"sum_logits": -1.3194490671157837, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": true, "logits_per_token": -1.3194490671157837, "logits_per_char": -0.6597245335578918, "num_chars": 2}, {"sum_logits": -1.4621073007583618, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -1.4621073007583618, "logits_per_char": -0.7310536503791809, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 284, "native_id": "Mercury_7015663", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.351662278175354, "incorrect_loss_raw": 1.4054367144902546, "correct_loss_per_char": 0.675831139087677, "incorrect_loss_per_char": 0.7027183572451273, "correct_loss_per_token": 1.351662278175354, "incorrect_loss_per_token": 1.4054367144902546, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.351662278175354, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.351662278175354, "logits_per_char": -0.675831139087677, "num_chars": 2}, {"sum_logits": -1.336996078491211, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": true, "logits_per_token": -1.336996078491211, "logits_per_char": -0.6684980392456055, "num_chars": 2}, {"sum_logits": -1.453959584236145, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.453959584236145, "logits_per_char": -0.7269797921180725, "num_chars": 2}, {"sum_logits": -1.4253544807434082, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.4253544807434082, "logits_per_char": -0.7126772403717041, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 285, "native_id": "Mercury_7057785", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4507408142089844, "incorrect_loss_raw": 1.3744673331578572, "correct_loss_per_char": 0.7253704071044922, "incorrect_loss_per_char": 0.6872336665789286, "correct_loss_per_token": 1.4507408142089844, "incorrect_loss_per_token": 1.3744673331578572, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3064340353012085, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": true, "logits_per_token": -1.3064340353012085, "logits_per_char": -0.6532170176506042, "num_chars": 2}, {"sum_logits": -1.4507408142089844, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.4507408142089844, "logits_per_char": -0.7253704071044922, "num_chars": 2}, {"sum_logits": -1.3739774227142334, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.3739774227142334, "logits_per_char": -0.6869887113571167, "num_chars": 2}, {"sum_logits": -1.4429905414581299, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.4429905414581299, "logits_per_char": -0.7214952707290649, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 286, "native_id": "Mercury_401785", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4039660692214966, "incorrect_loss_raw": 1.3865247964859009, "correct_loss_per_char": 0.7019830346107483, "incorrect_loss_per_char": 0.6932623982429504, "correct_loss_per_token": 1.4039660692214966, "incorrect_loss_per_token": 1.3865247964859009, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3780946731567383, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.3780946731567383, "logits_per_char": -0.6890473365783691, "num_chars": 2}, {"sum_logits": -1.4039660692214966, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.4039660692214966, "logits_per_char": -0.7019830346107483, "num_chars": 2}, {"sum_logits": -1.4346669912338257, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.4346669912338257, "logits_per_char": -0.7173334956169128, "num_chars": 2}, {"sum_logits": -1.3468127250671387, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": true, "logits_per_token": -1.3468127250671387, "logits_per_char": -0.6734063625335693, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 287, "native_id": "Mercury_SC_405510", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.453753113746643, "incorrect_loss_raw": 1.3715806007385254, "correct_loss_per_char": 0.7268765568733215, "incorrect_loss_per_char": 0.6857903003692627, "correct_loss_per_token": 1.453753113746643, "incorrect_loss_per_token": 1.3715806007385254, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4247113466262817, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.4247113466262817, "logits_per_char": -0.7123556733131409, "num_chars": 2}, {"sum_logits": -1.3580783605575562, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.3580783605575562, "logits_per_char": -0.6790391802787781, "num_chars": 2}, {"sum_logits": -1.3319520950317383, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": true, "logits_per_token": -1.3319520950317383, "logits_per_char": -0.6659760475158691, "num_chars": 2}, {"sum_logits": -1.453753113746643, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.453753113746643, "logits_per_char": -0.7268765568733215, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 288, "native_id": "Mercury_7001313", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3413636684417725, "incorrect_loss_raw": 1.410724441210429, "correct_loss_per_char": 0.6706818342208862, "incorrect_loss_per_char": 0.7053622206052145, "correct_loss_per_token": 1.3413636684417725, "incorrect_loss_per_token": 1.410724441210429, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4618560075759888, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.4618560075759888, "logits_per_char": -0.7309280037879944, "num_chars": 2}, {"sum_logits": -1.305408239364624, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.305408239364624, "logits_per_char": -0.652704119682312, "num_chars": 2}, {"sum_logits": -1.4649090766906738, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.4649090766906738, "logits_per_char": -0.7324545383453369, "num_chars": 2}, {"sum_logits": -1.3413636684417725, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.3413636684417725, "logits_per_char": -0.6706818342208862, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 289, "native_id": "MCAS_2010_5_13", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2603583335876465, "incorrect_loss_raw": 1.4654122988382976, "correct_loss_per_char": 0.6301791667938232, "incorrect_loss_per_char": 0.7327061494191488, "correct_loss_per_token": 1.2603583335876465, "incorrect_loss_per_token": 1.4654122988382976, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1604201793670654, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": true, "logits_per_token": -1.1604201793670654, "logits_per_char": -0.5802100896835327, "num_chars": 2}, {"sum_logits": -1.2603583335876465, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.2603583335876465, "logits_per_char": -0.6301791667938232, "num_chars": 2}, {"sum_logits": -1.5519393682479858, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.5519393682479858, "logits_per_char": -0.7759696841239929, "num_chars": 2}, {"sum_logits": -1.6838773488998413, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.6838773488998413, "logits_per_char": -0.8419386744499207, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 290, "native_id": "Mercury_7140298", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.39802885055542, "incorrect_loss_raw": 1.3902639150619507, "correct_loss_per_char": 0.69901442527771, "incorrect_loss_per_char": 0.6951319575309753, "correct_loss_per_token": 1.39802885055542, "incorrect_loss_per_token": 1.3902639150619507, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.42649245262146, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.42649245262146, "logits_per_char": -0.71324622631073, "num_chars": 2}, {"sum_logits": -1.39802885055542, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.39802885055542, "logits_per_char": -0.69901442527771, "num_chars": 2}, {"sum_logits": -1.440475344657898, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.440475344657898, "logits_per_char": -0.720237672328949, "num_chars": 2}, {"sum_logits": -1.3038239479064941, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": true, "logits_per_token": -1.3038239479064941, "logits_per_char": -0.6519119739532471, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 291, "native_id": "Mercury_SC_402254", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2764216661453247, "incorrect_loss_raw": 1.43678085009257, "correct_loss_per_char": 0.6382108330726624, "incorrect_loss_per_char": 0.718390425046285, "correct_loss_per_token": 1.2764216661453247, "incorrect_loss_per_token": 1.43678085009257, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4387812614440918, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.4387812614440918, "logits_per_char": -0.7193906307220459, "num_chars": 2}, {"sum_logits": -1.528613805770874, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.528613805770874, "logits_per_char": -0.764306902885437, "num_chars": 2}, {"sum_logits": -1.3429474830627441, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.3429474830627441, "logits_per_char": -0.6714737415313721, "num_chars": 2}, {"sum_logits": -1.2764216661453247, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": true, "logits_per_token": -1.2764216661453247, "logits_per_char": -0.6382108330726624, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 292, "native_id": "MCAS_2011_5_17668", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3814542293548584, "incorrect_loss_raw": 1.3967243432998657, "correct_loss_per_char": 0.6907271146774292, "incorrect_loss_per_char": 0.6983621716499329, "correct_loss_per_token": 1.3814542293548584, "incorrect_loss_per_token": 1.3967243432998657, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3814542293548584, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.3814542293548584, "logits_per_char": -0.6907271146774292, "num_chars": 2}, {"sum_logits": -1.3520684242248535, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": true, "logits_per_token": -1.3520684242248535, "logits_per_char": -0.6760342121124268, "num_chars": 2}, {"sum_logits": -1.3576217889785767, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.3576217889785767, "logits_per_char": -0.6788108944892883, "num_chars": 2}, {"sum_logits": -1.480482816696167, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.480482816696167, "logits_per_char": -0.7402414083480835, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 293, "native_id": "MEA_2013_8_2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.411123275756836, "incorrect_loss_raw": 1.3844257195790608, "correct_loss_per_char": 0.705561637878418, "incorrect_loss_per_char": 0.6922128597895304, "correct_loss_per_token": 1.411123275756836, "incorrect_loss_per_token": 1.3844257195790608, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3256611824035645, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": true, "logits_per_token": -1.3256611824035645, "logits_per_char": -0.6628305912017822, "num_chars": 2}, {"sum_logits": -1.411123275756836, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.411123275756836, "logits_per_char": -0.705561637878418, "num_chars": 2}, {"sum_logits": -1.430275321006775, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.430275321006775, "logits_per_char": -0.7151376605033875, "num_chars": 2}, {"sum_logits": -1.3973406553268433, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.3973406553268433, "logits_per_char": -0.6986703276634216, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 294, "native_id": "NCEOGA_2013_5_18", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.389310598373413, "incorrect_loss_raw": 1.3932069142659504, "correct_loss_per_char": 0.6946552991867065, "incorrect_loss_per_char": 0.6966034571329752, "correct_loss_per_token": 1.389310598373413, "incorrect_loss_per_token": 1.3932069142659504, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4842740297317505, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.4842740297317505, "logits_per_char": -0.7421370148658752, "num_chars": 2}, {"sum_logits": -1.4054983854293823, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.4054983854293823, "logits_per_char": -0.7027491927146912, "num_chars": 2}, {"sum_logits": -1.389310598373413, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.389310598373413, "logits_per_char": -0.6946552991867065, "num_chars": 2}, {"sum_logits": -1.2898483276367188, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": true, "logits_per_token": -1.2898483276367188, "logits_per_char": -0.6449241638183594, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 295, "native_id": "WASL_2003_5_8", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1729904413223267, "incorrect_loss_raw": 1.3594688773155212, "correct_loss_per_char": 0.5864952206611633, "incorrect_loss_per_char": 0.6797344386577606, "correct_loss_per_token": 1.1729904413223267, "incorrect_loss_per_token": 1.3594688773155212, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1729904413223267, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": true, "logits_per_token": -1.1729904413223267, "logits_per_char": -0.5864952206611633, "num_chars": 2}, {"sum_logits": -1.338545799255371, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.338545799255371, "logits_per_char": -0.6692728996276855, "num_chars": 2}, {"sum_logits": -1.3803919553756714, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.3803919553756714, "logits_per_char": -0.6901959776878357, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 296, "native_id": "Mercury_7014385", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4286749362945557, "incorrect_loss_raw": 1.38108495871226, "correct_loss_per_char": 0.7143374681472778, "incorrect_loss_per_char": 0.69054247935613, "correct_loss_per_token": 1.4286749362945557, "incorrect_loss_per_token": 1.38108495871226, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5058319568634033, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.5058319568634033, "logits_per_char": -0.7529159784317017, "num_chars": 2}, {"sum_logits": -1.3153059482574463, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.3153059482574463, "logits_per_char": -0.6576529741287231, "num_chars": 2}, {"sum_logits": -1.4286749362945557, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4286749362945557, "logits_per_char": -0.7143374681472778, "num_chars": 2}, {"sum_logits": -1.3221169710159302, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.3221169710159302, "logits_per_char": -0.6610584855079651, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 297, "native_id": "Mercury_SC_415773", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3560792207717896, "incorrect_loss_raw": 1.404093821843465, "correct_loss_per_char": 0.6780396103858948, "incorrect_loss_per_char": 0.7020469109217325, "correct_loss_per_token": 1.3560792207717896, "incorrect_loss_per_token": 1.404093821843465, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3560792207717896, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.3560792207717896, "logits_per_char": -0.6780396103858948, "num_chars": 2}, {"sum_logits": -1.4600275754928589, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.4600275754928589, "logits_per_char": -0.7300137877464294, "num_chars": 2}, {"sum_logits": -1.41285240650177, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.41285240650177, "logits_per_char": -0.706426203250885, "num_chars": 2}, {"sum_logits": -1.3394014835357666, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": true, "logits_per_token": -1.3394014835357666, "logits_per_char": -0.6697007417678833, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 298, "native_id": "ACTAAP_2008_5_14", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5314977169036865, "incorrect_loss_raw": 1.350565234820048, "correct_loss_per_char": 0.7657488584518433, "incorrect_loss_per_char": 0.675282617410024, "correct_loss_per_token": 1.5314977169036865, "incorrect_loss_per_token": 1.350565234820048, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5314977169036865, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.5314977169036865, "logits_per_char": -0.7657488584518433, "num_chars": 2}, {"sum_logits": -1.359637975692749, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.359637975692749, "logits_per_char": -0.6798189878463745, "num_chars": 2}, {"sum_logits": -1.4470046758651733, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.4470046758651733, "logits_per_char": -0.7235023379325867, "num_chars": 2}, {"sum_logits": -1.2450530529022217, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": true, "logits_per_token": -1.2450530529022217, "logits_per_char": -0.6225265264511108, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 299, "native_id": "MDSA_2008_8_29", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3189810514450073, "incorrect_loss_raw": 1.4256437222162883, "correct_loss_per_char": 0.6594905257225037, "incorrect_loss_per_char": 0.7128218611081442, "correct_loss_per_token": 1.3189810514450073, "incorrect_loss_per_token": 1.4256437222162883, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5130367279052734, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.5130367279052734, "logits_per_char": -0.7565183639526367, "num_chars": 2}, {"sum_logits": -1.230181336402893, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": true, "logits_per_token": -1.230181336402893, "logits_per_char": -0.6150906682014465, "num_chars": 2}, {"sum_logits": -1.3189810514450073, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.3189810514450073, "logits_per_char": -0.6594905257225037, "num_chars": 2}, {"sum_logits": -1.5337131023406982, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.5337131023406982, "logits_per_char": -0.7668565511703491, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 300, "native_id": "Mercury_7085453", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4111671447753906, "incorrect_loss_raw": 1.3848718007405598, "correct_loss_per_char": 0.7055835723876953, "incorrect_loss_per_char": 0.6924359003702799, "correct_loss_per_token": 1.4111671447753906, "incorrect_loss_per_token": 1.3848718007405598, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3153014183044434, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": true, "logits_per_token": -1.3153014183044434, "logits_per_char": -0.6576507091522217, "num_chars": 2}, {"sum_logits": -1.372775912284851, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.372775912284851, "logits_per_char": -0.6863879561424255, "num_chars": 2}, {"sum_logits": -1.4111671447753906, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.4111671447753906, "logits_per_char": -0.7055835723876953, "num_chars": 2}, {"sum_logits": -1.4665380716323853, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.4665380716323853, "logits_per_char": -0.7332690358161926, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 301, "native_id": "Mercury_LBS10126", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6127994060516357, "incorrect_loss_raw": 1.3344624837239583, "correct_loss_per_char": 0.8063997030258179, "incorrect_loss_per_char": 0.6672312418619791, "correct_loss_per_token": 1.6127994060516357, "incorrect_loss_per_token": 1.3344624837239583, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6127994060516357, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.6127994060516357, "logits_per_char": -0.8063997030258179, "num_chars": 2}, {"sum_logits": -1.4220449924468994, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4220449924468994, "logits_per_char": -0.7110224962234497, "num_chars": 2}, {"sum_logits": -1.43184494972229, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.43184494972229, "logits_per_char": -0.715922474861145, "num_chars": 2}, {"sum_logits": -1.1494975090026855, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.1494975090026855, "logits_per_char": -0.5747487545013428, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 302, "native_id": "Mercury_SC_408782", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3726810216903687, "incorrect_loss_raw": 1.3999183575312297, "correct_loss_per_char": 0.6863405108451843, "incorrect_loss_per_char": 0.6999591787656149, "correct_loss_per_token": 1.3726810216903687, "incorrect_loss_per_token": 1.3999183575312297, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4665955305099487, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.4665955305099487, "logits_per_char": -0.7332977652549744, "num_chars": 2}, {"sum_logits": -1.4301483631134033, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.4301483631134033, "logits_per_char": -0.7150741815567017, "num_chars": 2}, {"sum_logits": -1.3726810216903687, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.3726810216903687, "logits_per_char": -0.6863405108451843, "num_chars": 2}, {"sum_logits": -1.303011178970337, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": true, "logits_per_token": -1.303011178970337, "logits_per_char": -0.6515055894851685, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 303, "native_id": "MSA_2015_5_34", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4071906805038452, "incorrect_loss_raw": 1.386074384053548, "correct_loss_per_char": 0.7035953402519226, "incorrect_loss_per_char": 0.693037192026774, "correct_loss_per_token": 1.4071906805038452, "incorrect_loss_per_token": 1.386074384053548, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4490766525268555, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.4490766525268555, "logits_per_char": -0.7245383262634277, "num_chars": 2}, {"sum_logits": -1.3431299924850464, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": true, "logits_per_token": -1.3431299924850464, "logits_per_char": -0.6715649962425232, "num_chars": 2}, {"sum_logits": -1.3660165071487427, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.3660165071487427, "logits_per_char": -0.6830082535743713, "num_chars": 2}, {"sum_logits": -1.4071906805038452, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.4071906805038452, "logits_per_char": -0.7035953402519226, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 304, "native_id": "Mercury_7115255", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4416402578353882, "incorrect_loss_raw": 1.3773231108983357, "correct_loss_per_char": 0.7208201289176941, "incorrect_loss_per_char": 0.6886615554491679, "correct_loss_per_token": 1.4416402578353882, "incorrect_loss_per_token": 1.3773231108983357, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4478695392608643, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4478695392608643, "logits_per_char": -0.7239347696304321, "num_chars": 2}, {"sum_logits": -1.4416402578353882, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4416402578353882, "logits_per_char": -0.7208201289176941, "num_chars": 2}, {"sum_logits": -1.4374873638153076, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4374873638153076, "logits_per_char": -0.7187436819076538, "num_chars": 2}, {"sum_logits": -1.2466124296188354, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.2466124296188354, "logits_per_char": -0.6233062148094177, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 305, "native_id": "MCAS_1999_8_33", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4812285900115967, "incorrect_loss_raw": 1.3742332061131795, "correct_loss_per_char": 0.7406142950057983, "incorrect_loss_per_char": 0.6871166030565897, "correct_loss_per_token": 1.4812285900115967, "incorrect_loss_per_token": 1.3742332061131795, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4657663106918335, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.4657663106918335, "logits_per_char": -0.7328831553459167, "num_chars": 2}, {"sum_logits": -1.4812285900115967, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.4812285900115967, "logits_per_char": -0.7406142950057983, "num_chars": 2}, {"sum_logits": -1.4939650297164917, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.4939650297164917, "logits_per_char": -0.7469825148582458, "num_chars": 2}, {"sum_logits": -1.1629682779312134, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": true, "logits_per_token": -1.1629682779312134, "logits_per_char": -0.5814841389656067, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 306, "native_id": "NYSEDREGENTS_2013_4_3", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.422346830368042, "incorrect_loss_raw": 1.3804832299550374, "correct_loss_per_char": 0.711173415184021, "incorrect_loss_per_char": 0.6902416149775187, "correct_loss_per_token": 1.422346830368042, "incorrect_loss_per_token": 1.3804832299550374, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4165993928909302, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.4165993928909302, "logits_per_char": -0.7082996964454651, "num_chars": 2}, {"sum_logits": -1.422346830368042, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.422346830368042, "logits_per_char": -0.711173415184021, "num_chars": 2}, {"sum_logits": -1.3820905685424805, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.3820905685424805, "logits_per_char": -0.6910452842712402, "num_chars": 2}, {"sum_logits": -1.3427597284317017, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": true, "logits_per_token": -1.3427597284317017, "logits_per_char": -0.6713798642158508, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 307, "native_id": "Mercury_7018060", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3808997869491577, "incorrect_loss_raw": 1.3938809633255005, "correct_loss_per_char": 0.6904498934745789, "incorrect_loss_per_char": 0.6969404816627502, "correct_loss_per_token": 1.3808997869491577, "incorrect_loss_per_token": 1.3938809633255005, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4049959182739258, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.4049959182739258, "logits_per_char": -0.7024979591369629, "num_chars": 2}, {"sum_logits": -1.3808997869491577, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.3808997869491577, "logits_per_char": -0.6904498934745789, "num_chars": 2}, {"sum_logits": -1.4061721563339233, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.4061721563339233, "logits_per_char": -0.7030860781669617, "num_chars": 2}, {"sum_logits": -1.3704748153686523, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": true, "logits_per_token": -1.3704748153686523, "logits_per_char": -0.6852374076843262, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 308, "native_id": "Mercury_SC_415390", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.444459319114685, "incorrect_loss_raw": 1.3769662380218506, "correct_loss_per_char": 0.7222296595573425, "incorrect_loss_per_char": 0.6884831190109253, "correct_loss_per_token": 1.444459319114685, "incorrect_loss_per_token": 1.3769662380218506, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4385957717895508, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.4385957717895508, "logits_per_char": -0.7192978858947754, "num_chars": 2}, {"sum_logits": -1.2542507648468018, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": true, "logits_per_token": -1.2542507648468018, "logits_per_char": -0.6271253824234009, "num_chars": 2}, {"sum_logits": -1.4380521774291992, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.4380521774291992, "logits_per_char": -0.7190260887145996, "num_chars": 2}, {"sum_logits": -1.444459319114685, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.444459319114685, "logits_per_char": -0.7222296595573425, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 309, "native_id": "Mercury_7210350", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3474828004837036, "incorrect_loss_raw": 1.4111051559448242, "correct_loss_per_char": 0.6737414002418518, "incorrect_loss_per_char": 0.7055525779724121, "correct_loss_per_token": 1.3474828004837036, "incorrect_loss_per_token": 1.4111051559448242, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5548291206359863, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.5548291206359863, "logits_per_char": -0.7774145603179932, "num_chars": 2}, {"sum_logits": -1.3474828004837036, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.3474828004837036, "logits_per_char": -0.6737414002418518, "num_chars": 2}, {"sum_logits": -1.4027323722839355, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.4027323722839355, "logits_per_char": -0.7013661861419678, "num_chars": 2}, {"sum_logits": -1.2757539749145508, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": true, "logits_per_token": -1.2757539749145508, "logits_per_char": -0.6378769874572754, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 310, "native_id": "Mercury_7161298", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5082165002822876, "incorrect_loss_raw": 1.3597954114278157, "correct_loss_per_char": 0.7541082501411438, "incorrect_loss_per_char": 0.6798977057139078, "correct_loss_per_token": 1.5082165002822876, "incorrect_loss_per_token": 1.3597954114278157, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.485470175743103, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.485470175743103, "logits_per_char": -0.7427350878715515, "num_chars": 2}, {"sum_logits": -1.5082165002822876, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.5082165002822876, "logits_per_char": -0.7541082501411438, "num_chars": 2}, {"sum_logits": -1.3843019008636475, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.3843019008636475, "logits_per_char": -0.6921509504318237, "num_chars": 2}, {"sum_logits": -1.2096141576766968, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.2096141576766968, "logits_per_char": -0.6048070788383484, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 311, "native_id": "Mercury_405942", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.467154860496521, "incorrect_loss_raw": 1.3698770602544148, "correct_loss_per_char": 0.7335774302482605, "incorrect_loss_per_char": 0.6849385301272074, "correct_loss_per_token": 1.467154860496521, "incorrect_loss_per_token": 1.3698770602544148, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.467154860496521, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.467154860496521, "logits_per_char": -0.7335774302482605, "num_chars": 2}, {"sum_logits": -1.3174784183502197, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.3174784183502197, "logits_per_char": -0.6587392091751099, "num_chars": 2}, {"sum_logits": -1.3113505840301514, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": true, "logits_per_token": -1.3113505840301514, "logits_per_char": -0.6556752920150757, "num_chars": 2}, {"sum_logits": -1.4808021783828735, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.4808021783828735, "logits_per_char": -0.7404010891914368, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 312, "native_id": "Mercury_SC_415335", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4581679105758667, "incorrect_loss_raw": 1.3720926841100056, "correct_loss_per_char": 0.7290839552879333, "incorrect_loss_per_char": 0.6860463420550028, "correct_loss_per_token": 1.4581679105758667, "incorrect_loss_per_token": 1.3720926841100056, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4581679105758667, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.4581679105758667, "logits_per_char": -0.7290839552879333, "num_chars": 2}, {"sum_logits": -1.4852752685546875, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.4852752685546875, "logits_per_char": -0.7426376342773438, "num_chars": 2}, {"sum_logits": -1.3462269306182861, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.3462269306182861, "logits_per_char": -0.6731134653091431, "num_chars": 2}, {"sum_logits": -1.2847758531570435, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": true, "logits_per_token": -1.2847758531570435, "logits_per_char": -0.6423879265785217, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 313, "native_id": "Mercury_SC_401170", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4228529930114746, "incorrect_loss_raw": 1.3820308446884155, "correct_loss_per_char": 0.7114264965057373, "incorrect_loss_per_char": 0.6910154223442078, "correct_loss_per_token": 1.4228529930114746, "incorrect_loss_per_token": 1.3820308446884155, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4228529930114746, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.4228529930114746, "logits_per_char": -0.7114264965057373, "num_chars": 2}, {"sum_logits": -1.3366080522537231, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": true, "logits_per_token": -1.3366080522537231, "logits_per_char": -0.6683040261268616, "num_chars": 2}, {"sum_logits": -1.393675684928894, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.393675684928894, "logits_per_char": -0.696837842464447, "num_chars": 2}, {"sum_logits": -1.4158087968826294, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.4158087968826294, "logits_per_char": -0.7079043984413147, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 314, "native_id": "Mercury_7077490", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.479507327079773, "incorrect_loss_raw": 1.3619653383890789, "correct_loss_per_char": 0.7397536635398865, "incorrect_loss_per_char": 0.6809826691945394, "correct_loss_per_token": 1.479507327079773, "incorrect_loss_per_token": 1.3619653383890789, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3896501064300537, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.3896501064300537, "logits_per_char": -0.6948250532150269, "num_chars": 2}, {"sum_logits": -1.479507327079773, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.479507327079773, "logits_per_char": -0.7397536635398865, "num_chars": 2}, {"sum_logits": -1.348746657371521, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.348746657371521, "logits_per_char": -0.6743733286857605, "num_chars": 2}, {"sum_logits": -1.3474992513656616, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": true, "logits_per_token": -1.3474992513656616, "logits_per_char": -0.6737496256828308, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 315, "native_id": "Mercury_7210018", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.38309907913208, "incorrect_loss_raw": 1.3930099805196126, "correct_loss_per_char": 0.69154953956604, "incorrect_loss_per_char": 0.6965049902598063, "correct_loss_per_token": 1.38309907913208, "incorrect_loss_per_token": 1.3930099805196126, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4194185733795166, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": false, "logits_per_token": -1.4194185733795166, "logits_per_char": -0.7097092866897583, "num_chars": 2}, {"sum_logits": -1.416940450668335, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": false, "logits_per_token": -1.416940450668335, "logits_per_char": -0.7084702253341675, "num_chars": 2}, {"sum_logits": -1.38309907913208, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": false, "logits_per_token": -1.38309907913208, "logits_per_char": -0.69154953956604, "num_chars": 2}, {"sum_logits": -1.3426709175109863, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": true, "logits_per_token": -1.3426709175109863, "logits_per_char": -0.6713354587554932, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 316, "native_id": "MCAS_2011_8_17698", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.361449122428894, "incorrect_loss_raw": 1.4025099674860637, "correct_loss_per_char": 0.680724561214447, "incorrect_loss_per_char": 0.7012549837430319, "correct_loss_per_token": 1.361449122428894, "incorrect_loss_per_token": 1.4025099674860637, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4619150161743164, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4619150161743164, "logits_per_char": -0.7309575080871582, "num_chars": 2}, {"sum_logits": -1.4013959169387817, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4013959169387817, "logits_per_char": -0.7006979584693909, "num_chars": 2}, {"sum_logits": -1.3442189693450928, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.3442189693450928, "logits_per_char": -0.6721094846725464, "num_chars": 2}, {"sum_logits": -1.361449122428894, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.361449122428894, "logits_per_char": -0.680724561214447, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 317, "native_id": "Mercury_SC_408991", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3206030130386353, "incorrect_loss_raw": 1.4141884644826253, "correct_loss_per_char": 0.6603015065193176, "incorrect_loss_per_char": 0.7070942322413126, "correct_loss_per_token": 1.3206030130386353, "incorrect_loss_per_token": 1.4141884644826253, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4185712337493896, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.4185712337493896, "logits_per_char": -0.7092856168746948, "num_chars": 2}, {"sum_logits": -1.416337251663208, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.416337251663208, "logits_per_char": -0.708168625831604, "num_chars": 2}, {"sum_logits": -1.4076569080352783, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.4076569080352783, "logits_per_char": -0.7038284540176392, "num_chars": 2}, {"sum_logits": -1.3206030130386353, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": true, "logits_per_token": -1.3206030130386353, "logits_per_char": -0.6603015065193176, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 318, "native_id": "VASoL_2007_5_17", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.326300024986267, "incorrect_loss_raw": 1.4122577508290608, "correct_loss_per_char": 0.6631500124931335, "incorrect_loss_per_char": 0.7061288754145304, "correct_loss_per_token": 1.326300024986267, "incorrect_loss_per_token": 1.4122577508290608, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4001420736312866, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.4001420736312866, "logits_per_char": -0.7000710368156433, "num_chars": 2}, {"sum_logits": -1.4295467138290405, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.4295467138290405, "logits_per_char": -0.7147733569145203, "num_chars": 2}, {"sum_logits": -1.4070844650268555, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.4070844650268555, "logits_per_char": -0.7035422325134277, "num_chars": 2}, {"sum_logits": -1.326300024986267, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": true, "logits_per_token": -1.326300024986267, "logits_per_char": -0.6631500124931335, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 319, "native_id": "Mercury_189753", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4073737859725952, "incorrect_loss_raw": 1.3914813995361328, "correct_loss_per_char": 0.7036868929862976, "incorrect_loss_per_char": 0.6957406997680664, "correct_loss_per_token": 1.4073737859725952, "incorrect_loss_per_token": 1.3914813995361328, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.529624342918396, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.529624342918396, "logits_per_char": -0.764812171459198, "num_chars": 2}, {"sum_logits": -1.4073737859725952, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4073737859725952, "logits_per_char": -0.7036868929862976, "num_chars": 2}, {"sum_logits": -1.3988374471664429, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.3988374471664429, "logits_per_char": -0.6994187235832214, "num_chars": 2}, {"sum_logits": -1.2459824085235596, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.2459824085235596, "logits_per_char": -0.6229912042617798, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 320, "native_id": "Mercury_SC_401288", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3067797422409058, "incorrect_loss_raw": 1.4204940001169841, "correct_loss_per_char": 0.6533898711204529, "incorrect_loss_per_char": 0.7102470000584921, "correct_loss_per_token": 1.3067797422409058, "incorrect_loss_per_token": 1.4204940001169841, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3067797422409058, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": true, "logits_per_token": -1.3067797422409058, "logits_per_char": -0.6533898711204529, "num_chars": 2}, {"sum_logits": -1.3696383237838745, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.3696383237838745, "logits_per_char": -0.6848191618919373, "num_chars": 2}, {"sum_logits": -1.4641023874282837, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.4641023874282837, "logits_per_char": -0.7320511937141418, "num_chars": 2}, {"sum_logits": -1.427741289138794, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.427741289138794, "logits_per_char": -0.713870644569397, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 321, "native_id": "ACTAAP_2009_5_1", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3313864469528198, "incorrect_loss_raw": 1.4216372966766357, "correct_loss_per_char": 0.6656932234764099, "incorrect_loss_per_char": 0.7108186483383179, "correct_loss_per_token": 1.3313864469528198, "incorrect_loss_per_token": 1.4216372966766357, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3313864469528198, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.3313864469528198, "logits_per_char": -0.6656932234764099, "num_chars": 2}, {"sum_logits": -1.4874073266983032, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.4874073266983032, "logits_per_char": -0.7437036633491516, "num_chars": 2}, {"sum_logits": -1.2679500579833984, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": true, "logits_per_token": -1.2679500579833984, "logits_per_char": -0.6339750289916992, "num_chars": 2}, {"sum_logits": -1.5095545053482056, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.5095545053482056, "logits_per_char": -0.7547772526741028, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 322, "native_id": "TIMSS_2011_8_pg50", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.206125259399414, "incorrect_loss_raw": 1.4633882443110149, "correct_loss_per_char": 0.603062629699707, "incorrect_loss_per_char": 0.7316941221555074, "correct_loss_per_token": 1.206125259399414, "incorrect_loss_per_token": 1.4633882443110149, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.206125259399414, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": true, "logits_per_token": -1.206125259399414, "logits_per_char": -0.603062629699707, "num_chars": 2}, {"sum_logits": -1.4029536247253418, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.4029536247253418, "logits_per_char": -0.7014768123626709, "num_chars": 2}, {"sum_logits": -1.5413066148757935, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.5413066148757935, "logits_per_char": -0.7706533074378967, "num_chars": 2}, {"sum_logits": -1.4459044933319092, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.4459044933319092, "logits_per_char": -0.7229522466659546, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 323, "native_id": "MCAS_2012_8_23653", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3613749742507935, "incorrect_loss_raw": 1.404781738917033, "correct_loss_per_char": 0.6806874871253967, "incorrect_loss_per_char": 0.7023908694585165, "correct_loss_per_token": 1.3613749742507935, "incorrect_loss_per_token": 1.404781738917033, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5271798372268677, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.5271798372268677, "logits_per_char": -0.7635899186134338, "num_chars": 2}, {"sum_logits": -1.3613749742507935, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.3613749742507935, "logits_per_char": -0.6806874871253967, "num_chars": 2}, {"sum_logits": -1.3210275173187256, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.3210275173187256, "logits_per_char": -0.6605137586593628, "num_chars": 2}, {"sum_logits": -1.3661378622055054, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.3661378622055054, "logits_per_char": -0.6830689311027527, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 324, "native_id": "Mercury_7183523", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.59590744972229, "incorrect_loss_raw": 1.3385077317555745, "correct_loss_per_char": 0.797953724861145, "incorrect_loss_per_char": 0.6692538658777872, "correct_loss_per_token": 1.59590744972229, "incorrect_loss_per_token": 1.3385077317555745, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.59590744972229, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.59590744972229, "logits_per_char": -0.797953724861145, "num_chars": 2}, {"sum_logits": -1.4306186437606812, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.4306186437606812, "logits_per_char": -0.7153093218803406, "num_chars": 2}, {"sum_logits": -1.4265005588531494, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.4265005588531494, "logits_per_char": -0.7132502794265747, "num_chars": 2}, {"sum_logits": -1.158403992652893, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.158403992652893, "logits_per_char": -0.5792019963264465, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 325, "native_id": "MCAS_2000_4_29", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4376035928726196, "incorrect_loss_raw": 1.39041801293691, "correct_loss_per_char": 0.7188017964363098, "incorrect_loss_per_char": 0.695209006468455, "correct_loss_per_token": 1.4376035928726196, "incorrect_loss_per_token": 1.39041801293691, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4376035928726196, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.4376035928726196, "logits_per_char": -0.7188017964363098, "num_chars": 2}, {"sum_logits": -1.6177350282669067, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.6177350282669067, "logits_per_char": -0.8088675141334534, "num_chars": 2}, {"sum_logits": -1.3571525812149048, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.3571525812149048, "logits_per_char": -0.6785762906074524, "num_chars": 2}, {"sum_logits": -1.1963664293289185, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": true, "logits_per_token": -1.1963664293289185, "logits_per_char": -0.5981832146644592, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 326, "native_id": "Mercury_7011288", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4503467082977295, "incorrect_loss_raw": 1.371965726216634, "correct_loss_per_char": 0.7251733541488647, "incorrect_loss_per_char": 0.685982863108317, "correct_loss_per_token": 1.4503467082977295, "incorrect_loss_per_token": 1.371965726216634, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4158084392547607, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.4158084392547607, "logits_per_char": -0.7079042196273804, "num_chars": 2}, {"sum_logits": -1.3621079921722412, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.3621079921722412, "logits_per_char": -0.6810539960861206, "num_chars": 2}, {"sum_logits": -1.3379807472229004, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": true, "logits_per_token": -1.3379807472229004, "logits_per_char": -0.6689903736114502, "num_chars": 2}, {"sum_logits": -1.4503467082977295, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.4503467082977295, "logits_per_char": -0.7251733541488647, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 327, "native_id": "Mercury_7210630", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3949443101882935, "incorrect_loss_raw": 1.3941009442011516, "correct_loss_per_char": 0.6974721550941467, "incorrect_loss_per_char": 0.6970504721005758, "correct_loss_per_token": 1.3949443101882935, "incorrect_loss_per_token": 1.3941009442011516, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.354378581047058, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": true, "logits_per_token": -1.354378581047058, "logits_per_char": -0.677189290523529, "num_chars": 2}, {"sum_logits": -1.4333600997924805, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.4333600997924805, "logits_per_char": -0.7166800498962402, "num_chars": 2}, {"sum_logits": -1.3949443101882935, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.3949443101882935, "logits_per_char": -0.6974721550941467, "num_chars": 2}, {"sum_logits": -1.394564151763916, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.394564151763916, "logits_per_char": -0.697282075881958, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 328, "native_id": "CSZ20770", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.360495686531067, "incorrect_loss_raw": 1.4141822655995686, "correct_loss_per_char": 0.6802478432655334, "incorrect_loss_per_char": 0.7070911327997843, "correct_loss_per_token": 1.360495686531067, "incorrect_loss_per_token": 1.4141822655995686, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4406144618988037, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": false, "logits_per_token": -1.4406144618988037, "logits_per_char": -0.7203072309494019, "num_chars": 2}, {"sum_logits": -1.2286850214004517, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": true, "logits_per_token": -1.2286850214004517, "logits_per_char": -0.6143425107002258, "num_chars": 2}, {"sum_logits": -1.360495686531067, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": false, "logits_per_token": -1.360495686531067, "logits_per_char": -0.6802478432655334, "num_chars": 2}, {"sum_logits": -1.5732473134994507, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": false, "logits_per_token": -1.5732473134994507, "logits_per_char": -0.7866236567497253, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 329, "native_id": "Mercury_177153", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.322441816329956, "incorrect_loss_raw": 1.4265415668487549, "correct_loss_per_char": 0.661220908164978, "incorrect_loss_per_char": 0.7132707834243774, "correct_loss_per_token": 1.322441816329956, "incorrect_loss_per_token": 1.4265415668487549, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.223680019378662, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": true, "logits_per_token": -1.223680019378662, "logits_per_char": -0.611840009689331, "num_chars": 2}, {"sum_logits": -1.4579620361328125, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.4579620361328125, "logits_per_char": -0.7289810180664062, "num_chars": 2}, {"sum_logits": -1.59798264503479, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.59798264503479, "logits_per_char": -0.798991322517395, "num_chars": 2}, {"sum_logits": -1.322441816329956, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.322441816329956, "logits_per_char": -0.661220908164978, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 330, "native_id": "Mercury_SC_402064", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3573627471923828, "incorrect_loss_raw": 1.402215043703715, "correct_loss_per_char": 0.6786813735961914, "incorrect_loss_per_char": 0.7011075218518575, "correct_loss_per_token": 1.3573627471923828, "incorrect_loss_per_token": 1.402215043703715, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4239126443862915, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.4239126443862915, "logits_per_char": -0.7119563221931458, "num_chars": 2}, {"sum_logits": -1.3947433233261108, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.3947433233261108, "logits_per_char": -0.6973716616630554, "num_chars": 2}, {"sum_logits": -1.3879891633987427, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.3879891633987427, "logits_per_char": -0.6939945816993713, "num_chars": 2}, {"sum_logits": -1.3573627471923828, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": true, "logits_per_token": -1.3573627471923828, "logits_per_char": -0.6786813735961914, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 331, "native_id": "Mercury_7221025", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.344006896018982, "incorrect_loss_raw": 1.4079442421595256, "correct_loss_per_char": 0.672003448009491, "incorrect_loss_per_char": 0.7039721210797628, "correct_loss_per_token": 1.344006896018982, "incorrect_loss_per_token": 1.4079442421595256, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.344006896018982, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.344006896018982, "logits_per_char": -0.672003448009491, "num_chars": 2}, {"sum_logits": -1.3558138608932495, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.3558138608932495, "logits_per_char": -0.6779069304466248, "num_chars": 2}, {"sum_logits": -1.4558240175247192, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4558240175247192, "logits_per_char": -0.7279120087623596, "num_chars": 2}, {"sum_logits": -1.412194848060608, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.412194848060608, "logits_per_char": -0.706097424030304, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 332, "native_id": "MSA_2012_8_9", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4362893104553223, "incorrect_loss_raw": 1.3750330607096355, "correct_loss_per_char": 0.7181446552276611, "incorrect_loss_per_char": 0.6875165303548177, "correct_loss_per_token": 1.4362893104553223, "incorrect_loss_per_token": 1.3750330607096355, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3850129842758179, "num_tokens": 1, "num_tokens_all": 470, "is_greedy": false, "logits_per_token": -1.3850129842758179, "logits_per_char": -0.6925064921379089, "num_chars": 2}, {"sum_logits": -1.361129879951477, "num_tokens": 1, "num_tokens_all": 470, "is_greedy": true, "logits_per_token": -1.361129879951477, "logits_per_char": -0.6805649399757385, "num_chars": 2}, {"sum_logits": -1.4362893104553223, "num_tokens": 1, "num_tokens_all": 470, "is_greedy": false, "logits_per_token": -1.4362893104553223, "logits_per_char": -0.7181446552276611, "num_chars": 2}, {"sum_logits": -1.3789563179016113, "num_tokens": 1, "num_tokens_all": 470, "is_greedy": false, "logits_per_token": -1.3789563179016113, "logits_per_char": -0.6894781589508057, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 333, "native_id": "NCEOGA_2013_5_49", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2862566709518433, "incorrect_loss_raw": 1.4302889903386433, "correct_loss_per_char": 0.6431283354759216, "incorrect_loss_per_char": 0.7151444951693217, "correct_loss_per_token": 1.2862566709518433, "incorrect_loss_per_token": 1.4302889903386433, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3802504539489746, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.3802504539489746, "logits_per_char": -0.6901252269744873, "num_chars": 2}, {"sum_logits": -1.4780341386795044, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.4780341386795044, "logits_per_char": -0.7390170693397522, "num_chars": 2}, {"sum_logits": -1.4325823783874512, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.4325823783874512, "logits_per_char": -0.7162911891937256, "num_chars": 2}, {"sum_logits": -1.2862566709518433, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": true, "logits_per_token": -1.2862566709518433, "logits_per_char": -0.6431283354759216, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 334, "native_id": "Mercury_SC_402071", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4004994630813599, "incorrect_loss_raw": 1.3887590964635212, "correct_loss_per_char": 0.7002497315406799, "incorrect_loss_per_char": 0.6943795482317606, "correct_loss_per_token": 1.4004994630813599, "incorrect_loss_per_token": 1.3887590964635212, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.384831190109253, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.384831190109253, "logits_per_char": -0.6924155950546265, "num_chars": 2}, {"sum_logits": -1.438065528869629, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.438065528869629, "logits_per_char": -0.7190327644348145, "num_chars": 2}, {"sum_logits": -1.4004994630813599, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.4004994630813599, "logits_per_char": -0.7002497315406799, "num_chars": 2}, {"sum_logits": -1.3433805704116821, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": true, "logits_per_token": -1.3433805704116821, "logits_per_char": -0.6716902852058411, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 335, "native_id": "Mercury_7043890", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4153218269348145, "incorrect_loss_raw": 1.3821073770523071, "correct_loss_per_char": 0.7076609134674072, "incorrect_loss_per_char": 0.6910536885261536, "correct_loss_per_token": 1.4153218269348145, "incorrect_loss_per_token": 1.3821073770523071, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3881261348724365, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.3881261348724365, "logits_per_char": -0.6940630674362183, "num_chars": 2}, {"sum_logits": -1.4153218269348145, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.4153218269348145, "logits_per_char": -0.7076609134674072, "num_chars": 2}, {"sum_logits": -1.3744345903396606, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": true, "logits_per_token": -1.3744345903396606, "logits_per_char": -0.6872172951698303, "num_chars": 2}, {"sum_logits": -1.3837614059448242, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.3837614059448242, "logits_per_char": -0.6918807029724121, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 336, "native_id": "Mercury_7188353", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.520248532295227, "incorrect_loss_raw": 1.353711724281311, "correct_loss_per_char": 0.7601242661476135, "incorrect_loss_per_char": 0.6768558621406555, "correct_loss_per_token": 1.520248532295227, "incorrect_loss_per_token": 1.353711724281311, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.520248532295227, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.520248532295227, "logits_per_char": -0.7601242661476135, "num_chars": 2}, {"sum_logits": -1.4293441772460938, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.4293441772460938, "logits_per_char": -0.7146720886230469, "num_chars": 2}, {"sum_logits": -1.3881036043167114, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.3881036043167114, "logits_per_char": -0.6940518021583557, "num_chars": 2}, {"sum_logits": -1.243687391281128, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": true, "logits_per_token": -1.243687391281128, "logits_per_char": -0.621843695640564, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 337, "native_id": "MCAS_2004_8_20", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5623279809951782, "incorrect_loss_raw": 1.341495434443156, "correct_loss_per_char": 0.7811639904975891, "incorrect_loss_per_char": 0.670747717221578, "correct_loss_per_token": 1.5623279809951782, "incorrect_loss_per_token": 1.341495434443156, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5623279809951782, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.5623279809951782, "logits_per_char": -0.7811639904975891, "num_chars": 2}, {"sum_logits": -1.3620339632034302, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.3620339632034302, "logits_per_char": -0.6810169816017151, "num_chars": 2}, {"sum_logits": -1.4091380834579468, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.4091380834579468, "logits_per_char": -0.7045690417289734, "num_chars": 2}, {"sum_logits": -1.2533142566680908, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -1.2533142566680908, "logits_per_char": -0.6266571283340454, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 338, "native_id": "Mercury_SC_409563", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4346425533294678, "incorrect_loss_raw": 1.3812856276830037, "correct_loss_per_char": 0.7173212766647339, "incorrect_loss_per_char": 0.6906428138415018, "correct_loss_per_token": 1.4346425533294678, "incorrect_loss_per_token": 1.3812856276830037, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4346425533294678, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.4346425533294678, "logits_per_char": -0.7173212766647339, "num_chars": 2}, {"sum_logits": -1.477444052696228, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.477444052696228, "logits_per_char": -0.738722026348114, "num_chars": 2}, {"sum_logits": -1.4192259311676025, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.4192259311676025, "logits_per_char": -0.7096129655838013, "num_chars": 2}, {"sum_logits": -1.2471868991851807, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.2471868991851807, "logits_per_char": -0.6235934495925903, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 339, "native_id": "Mercury_7135853", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3842904567718506, "incorrect_loss_raw": 1.3962748448053997, "correct_loss_per_char": 0.6921452283859253, "incorrect_loss_per_char": 0.6981374224026998, "correct_loss_per_token": 1.3842904567718506, "incorrect_loss_per_token": 1.3962748448053997, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.310180902481079, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": true, "logits_per_token": -1.310180902481079, "logits_per_char": -0.6550904512405396, "num_chars": 2}, {"sum_logits": -1.3842904567718506, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.3842904567718506, "logits_per_char": -0.6921452283859253, "num_chars": 2}, {"sum_logits": -1.4052575826644897, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.4052575826644897, "logits_per_char": -0.7026287913322449, "num_chars": 2}, {"sum_logits": -1.4733860492706299, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.4733860492706299, "logits_per_char": -0.7366930246353149, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 340, "native_id": "Mercury_7040933", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.398878574371338, "incorrect_loss_raw": 1.4028093417485554, "correct_loss_per_char": 0.699439287185669, "incorrect_loss_per_char": 0.7014046708742777, "correct_loss_per_token": 1.398878574371338, "incorrect_loss_per_token": 1.4028093417485554, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2017476558685303, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": true, "logits_per_token": -1.2017476558685303, "logits_per_char": -0.6008738279342651, "num_chars": 2}, {"sum_logits": -1.398878574371338, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.398878574371338, "logits_per_char": -0.699439287185669, "num_chars": 2}, {"sum_logits": -1.4605039358139038, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.4605039358139038, "logits_per_char": -0.7302519679069519, "num_chars": 2}, {"sum_logits": -1.5461764335632324, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.5461764335632324, "logits_per_char": -0.7730882167816162, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 341, "native_id": "Mercury_7044065", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3195329904556274, "incorrect_loss_raw": 1.4175173441569011, "correct_loss_per_char": 0.6597664952278137, "incorrect_loss_per_char": 0.7087586720784506, "correct_loss_per_token": 1.3195329904556274, "incorrect_loss_per_token": 1.4175173441569011, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3896862268447876, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.3896862268447876, "logits_per_char": -0.6948431134223938, "num_chars": 2}, {"sum_logits": -1.4459046125411987, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.4459046125411987, "logits_per_char": -0.7229523062705994, "num_chars": 2}, {"sum_logits": -1.4169611930847168, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.4169611930847168, "logits_per_char": -0.7084805965423584, "num_chars": 2}, {"sum_logits": -1.3195329904556274, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": true, "logits_per_token": -1.3195329904556274, "logits_per_char": -0.6597664952278137, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 342, "native_id": "AKDE&ED_2008_8_50", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3085057735443115, "incorrect_loss_raw": 1.4218422969182332, "correct_loss_per_char": 0.6542528867721558, "incorrect_loss_per_char": 0.7109211484591166, "correct_loss_per_token": 1.3085057735443115, "incorrect_loss_per_token": 1.4218422969182332, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4974855184555054, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.4974855184555054, "logits_per_char": -0.7487427592277527, "num_chars": 2}, {"sum_logits": -1.3085057735443115, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": true, "logits_per_token": -1.3085057735443115, "logits_per_char": -0.6542528867721558, "num_chars": 2}, {"sum_logits": -1.379676342010498, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.379676342010498, "logits_per_char": -0.689838171005249, "num_chars": 2}, {"sum_logits": -1.3883650302886963, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.3883650302886963, "logits_per_char": -0.6941825151443481, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 343, "native_id": "MCAS_1999_8_32", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4321638345718384, "incorrect_loss_raw": 1.3792130947113037, "correct_loss_per_char": 0.7160819172859192, "incorrect_loss_per_char": 0.6896065473556519, "correct_loss_per_token": 1.4321638345718384, "incorrect_loss_per_token": 1.3792130947113037, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4962366819381714, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.4962366819381714, "logits_per_char": -0.7481183409690857, "num_chars": 2}, {"sum_logits": -1.35639488697052, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.35639488697052, "logits_per_char": -0.67819744348526, "num_chars": 2}, {"sum_logits": -1.4321638345718384, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.4321638345718384, "logits_per_char": -0.7160819172859192, "num_chars": 2}, {"sum_logits": -1.2850077152252197, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": true, "logits_per_token": -1.2850077152252197, "logits_per_char": -0.6425038576126099, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 344, "native_id": "NYSEDREGENTS_2012_8_32", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4176710844039917, "incorrect_loss_raw": 1.3822381099065144, "correct_loss_per_char": 0.7088355422019958, "incorrect_loss_per_char": 0.6911190549532572, "correct_loss_per_token": 1.4176710844039917, "incorrect_loss_per_token": 1.3822381099065144, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4176710844039917, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.4176710844039917, "logits_per_char": -0.7088355422019958, "num_chars": 2}, {"sum_logits": -1.42176353931427, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.42176353931427, "logits_per_char": -0.710881769657135, "num_chars": 2}, {"sum_logits": -1.3505808115005493, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": true, "logits_per_token": -1.3505808115005493, "logits_per_char": -0.6752904057502747, "num_chars": 2}, {"sum_logits": -1.3743699789047241, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.3743699789047241, "logits_per_char": -0.6871849894523621, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 345, "native_id": "Mercury_416672", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3859267234802246, "incorrect_loss_raw": 1.3931638797124226, "correct_loss_per_char": 0.6929633617401123, "incorrect_loss_per_char": 0.6965819398562113, "correct_loss_per_token": 1.3859267234802246, "incorrect_loss_per_token": 1.3931638797124226, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3944027423858643, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.3944027423858643, "logits_per_char": -0.6972013711929321, "num_chars": 2}, {"sum_logits": -1.3859267234802246, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.3859267234802246, "logits_per_char": -0.6929633617401123, "num_chars": 2}, {"sum_logits": -1.432225227355957, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.432225227355957, "logits_per_char": -0.7161126136779785, "num_chars": 2}, {"sum_logits": -1.3528636693954468, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": true, "logits_per_token": -1.3528636693954468, "logits_per_char": -0.6764318346977234, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 346, "native_id": "ACTAAP_2007_7_13", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3451241254806519, "incorrect_loss_raw": 1.415904959042867, "correct_loss_per_char": 0.6725620627403259, "incorrect_loss_per_char": 0.7079524795214335, "correct_loss_per_token": 1.3451241254806519, "incorrect_loss_per_token": 1.415904959042867, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.617516040802002, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.617516040802002, "logits_per_char": -0.808758020401001, "num_chars": 2}, {"sum_logits": -1.3451241254806519, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.3451241254806519, "logits_per_char": -0.6725620627403259, "num_chars": 2}, {"sum_logits": -1.3255170583724976, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.3255170583724976, "logits_per_char": -0.6627585291862488, "num_chars": 2}, {"sum_logits": -1.3046817779541016, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": true, "logits_per_token": -1.3046817779541016, "logits_per_char": -0.6523408889770508, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 347, "native_id": "NCEOGA_2013_5_13", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3602778911590576, "incorrect_loss_raw": 1.4022910197575886, "correct_loss_per_char": 0.6801389455795288, "incorrect_loss_per_char": 0.7011455098787943, "correct_loss_per_token": 1.3602778911590576, "incorrect_loss_per_token": 1.4022910197575886, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3602778911590576, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.3602778911590576, "logits_per_char": -0.6801389455795288, "num_chars": 2}, {"sum_logits": -1.380136489868164, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.380136489868164, "logits_per_char": -0.690068244934082, "num_chars": 2}, {"sum_logits": -1.4053876399993896, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4053876399993896, "logits_per_char": -0.7026938199996948, "num_chars": 2}, {"sum_logits": -1.4213489294052124, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4213489294052124, "logits_per_char": -0.7106744647026062, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 348, "native_id": "Mercury_7268275", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4263962507247925, "incorrect_loss_raw": 1.3804221947987874, "correct_loss_per_char": 0.7131981253623962, "incorrect_loss_per_char": 0.6902110973993937, "correct_loss_per_token": 1.4263962507247925, "incorrect_loss_per_token": 1.3804221947987874, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4263962507247925, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.4263962507247925, "logits_per_char": -0.7131981253623962, "num_chars": 2}, {"sum_logits": -1.3293719291687012, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": true, "logits_per_token": -1.3293719291687012, "logits_per_char": -0.6646859645843506, "num_chars": 2}, {"sum_logits": -1.4544779062271118, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.4544779062271118, "logits_per_char": -0.7272389531135559, "num_chars": 2}, {"sum_logits": -1.3574167490005493, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.3574167490005493, "logits_per_char": -0.6787083745002747, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 349, "native_id": "MEA_2014_5_12", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3547828197479248, "incorrect_loss_raw": 1.405066967010498, "correct_loss_per_char": 0.6773914098739624, "incorrect_loss_per_char": 0.702533483505249, "correct_loss_per_token": 1.3547828197479248, "incorrect_loss_per_token": 1.405066967010498, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4782249927520752, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.4782249927520752, "logits_per_char": -0.7391124963760376, "num_chars": 2}, {"sum_logits": -1.3547828197479248, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.3547828197479248, "logits_per_char": -0.6773914098739624, "num_chars": 2}, {"sum_logits": -1.4306672811508179, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.4306672811508179, "logits_per_char": -0.7153336405754089, "num_chars": 2}, {"sum_logits": -1.306308627128601, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": true, "logits_per_token": -1.306308627128601, "logits_per_char": -0.6531543135643005, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 350, "native_id": "Mercury_7271373", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3493953943252563, "incorrect_loss_raw": 1.4091954231262207, "correct_loss_per_char": 0.6746976971626282, "incorrect_loss_per_char": 0.7045977115631104, "correct_loss_per_token": 1.3493953943252563, "incorrect_loss_per_token": 1.4091954231262207, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4882320165634155, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.4882320165634155, "logits_per_char": -0.7441160082817078, "num_chars": 2}, {"sum_logits": -1.2989829778671265, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": true, "logits_per_token": -1.2989829778671265, "logits_per_char": -0.6494914889335632, "num_chars": 2}, {"sum_logits": -1.3493953943252563, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.3493953943252563, "logits_per_char": -0.6746976971626282, "num_chars": 2}, {"sum_logits": -1.4403712749481201, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.4403712749481201, "logits_per_char": -0.7201856374740601, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 351, "native_id": "MCAS_2003_8_17", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6105172634124756, "incorrect_loss_raw": 1.3302292823791504, "correct_loss_per_char": 0.8052586317062378, "incorrect_loss_per_char": 0.6651146411895752, "correct_loss_per_token": 1.6105172634124756, "incorrect_loss_per_token": 1.3302292823791504, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6105172634124756, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.6105172634124756, "logits_per_char": -0.8052586317062378, "num_chars": 2}, {"sum_logits": -1.3825479745864868, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.3825479745864868, "logits_per_char": -0.6912739872932434, "num_chars": 2}, {"sum_logits": -1.3632099628448486, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.3632099628448486, "logits_per_char": -0.6816049814224243, "num_chars": 2}, {"sum_logits": -1.2449299097061157, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": true, "logits_per_token": -1.2449299097061157, "logits_per_char": -0.6224649548530579, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 352, "native_id": "Mercury_7220343", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.470773696899414, "incorrect_loss_raw": 1.3663864930470784, "correct_loss_per_char": 0.735386848449707, "incorrect_loss_per_char": 0.6831932465235392, "correct_loss_per_token": 1.470773696899414, "incorrect_loss_per_token": 1.3663864930470784, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.470773696899414, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.470773696899414, "logits_per_char": -0.735386848449707, "num_chars": 2}, {"sum_logits": -1.404335856437683, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.404335856437683, "logits_per_char": -0.7021679282188416, "num_chars": 2}, {"sum_logits": -1.4060685634613037, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.4060685634613037, "logits_per_char": -0.7030342817306519, "num_chars": 2}, {"sum_logits": -1.2887550592422485, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": true, "logits_per_token": -1.2887550592422485, "logits_per_char": -0.6443775296211243, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 353, "native_id": "Mercury_7262850", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4193799495697021, "incorrect_loss_raw": 1.3900505701700847, "correct_loss_per_char": 0.7096899747848511, "incorrect_loss_per_char": 0.6950252850850424, "correct_loss_per_token": 1.4193799495697021, "incorrect_loss_per_token": 1.3900505701700847, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2921512126922607, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": true, "logits_per_token": -1.2921512126922607, "logits_per_char": -0.6460756063461304, "num_chars": 2}, {"sum_logits": -1.3571186065673828, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.3571186065673828, "logits_per_char": -0.6785593032836914, "num_chars": 2}, {"sum_logits": -1.4193799495697021, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.4193799495697021, "logits_per_char": -0.7096899747848511, "num_chars": 2}, {"sum_logits": -1.5208818912506104, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.5208818912506104, "logits_per_char": -0.7604409456253052, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 354, "native_id": "NYSEDREGENTS_2012_4_6", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4777259826660156, "incorrect_loss_raw": 1.3667290210723877, "correct_loss_per_char": 0.7388629913330078, "incorrect_loss_per_char": 0.6833645105361938, "correct_loss_per_token": 1.4777259826660156, "incorrect_loss_per_token": 1.3667290210723877, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3446437120437622, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.3446437120437622, "logits_per_char": -0.6723218560218811, "num_chars": 2}, {"sum_logits": -1.301466703414917, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": true, "logits_per_token": -1.301466703414917, "logits_per_char": -0.6507333517074585, "num_chars": 2}, {"sum_logits": -1.4777259826660156, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.4777259826660156, "logits_per_char": -0.7388629913330078, "num_chars": 2}, {"sum_logits": -1.4540766477584839, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.4540766477584839, "logits_per_char": -0.7270383238792419, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 355, "native_id": "TAKS_2009_8_38", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2021589279174805, "incorrect_loss_raw": 1.4702725013097127, "correct_loss_per_char": 0.6010794639587402, "incorrect_loss_per_char": 0.7351362506548563, "correct_loss_per_token": 1.2021589279174805, "incorrect_loss_per_token": 1.4702725013097127, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5709209442138672, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.5709209442138672, "logits_per_char": -0.7854604721069336, "num_chars": 2}, {"sum_logits": -1.5460256338119507, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.5460256338119507, "logits_per_char": -0.7730128169059753, "num_chars": 2}, {"sum_logits": -1.2938709259033203, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.2938709259033203, "logits_per_char": -0.6469354629516602, "num_chars": 2}, {"sum_logits": -1.2021589279174805, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": true, "logits_per_token": -1.2021589279174805, "logits_per_char": -0.6010794639587402, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 356, "native_id": "Mercury_SC_416107", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.419246792793274, "incorrect_loss_raw": 1.388381004333496, "correct_loss_per_char": 0.709623396396637, "incorrect_loss_per_char": 0.694190502166748, "correct_loss_per_token": 1.419246792793274, "incorrect_loss_per_token": 1.388381004333496, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.419246792793274, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.419246792793274, "logits_per_char": -0.709623396396637, "num_chars": 2}, {"sum_logits": -1.2884818315505981, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": true, "logits_per_token": -1.2884818315505981, "logits_per_char": -0.6442409157752991, "num_chars": 2}, {"sum_logits": -1.3064664602279663, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.3064664602279663, "logits_per_char": -0.6532332301139832, "num_chars": 2}, {"sum_logits": -1.5701947212219238, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.5701947212219238, "logits_per_char": -0.7850973606109619, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 357, "native_id": "NCEOGA_2013_8_13", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5045244693756104, "incorrect_loss_raw": 1.3576298952102661, "correct_loss_per_char": 0.7522622346878052, "incorrect_loss_per_char": 0.6788149476051331, "correct_loss_per_token": 1.5045244693756104, "incorrect_loss_per_token": 1.3576298952102661, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3602263927459717, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.3602263927459717, "logits_per_char": -0.6801131963729858, "num_chars": 2}, {"sum_logits": -1.2910484075546265, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": true, "logits_per_token": -1.2910484075546265, "logits_per_char": -0.6455242037773132, "num_chars": 2}, {"sum_logits": -1.5045244693756104, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.5045244693756104, "logits_per_char": -0.7522622346878052, "num_chars": 2}, {"sum_logits": -1.4216148853302002, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.4216148853302002, "logits_per_char": -0.7108074426651001, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 358, "native_id": "NYSEDREGENTS_2012_4_3", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4565929174423218, "incorrect_loss_raw": 1.3754554986953735, "correct_loss_per_char": 0.7282964587211609, "incorrect_loss_per_char": 0.6877277493476868, "correct_loss_per_token": 1.4565929174423218, "incorrect_loss_per_token": 1.3754554986953735, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5184047222137451, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": false, "logits_per_token": -1.5184047222137451, "logits_per_char": -0.7592023611068726, "num_chars": 2}, {"sum_logits": -1.2899070978164673, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": true, "logits_per_token": -1.2899070978164673, "logits_per_char": -0.6449535489082336, "num_chars": 2}, {"sum_logits": -1.3180546760559082, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": false, "logits_per_token": -1.3180546760559082, "logits_per_char": -0.6590273380279541, "num_chars": 2}, {"sum_logits": -1.4565929174423218, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": false, "logits_per_token": -1.4565929174423218, "logits_per_char": -0.7282964587211609, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 359, "native_id": "Mercury_SC_405490", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2929468154907227, "incorrect_loss_raw": 1.4302164713541667, "correct_loss_per_char": 0.6464734077453613, "incorrect_loss_per_char": 0.7151082356770834, "correct_loss_per_token": 1.2929468154907227, "incorrect_loss_per_token": 1.4302164713541667, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5422083139419556, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.5422083139419556, "logits_per_char": -0.7711041569709778, "num_chars": 2}, {"sum_logits": -1.448000192642212, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.448000192642212, "logits_per_char": -0.724000096321106, "num_chars": 2}, {"sum_logits": -1.3004409074783325, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.3004409074783325, "logits_per_char": -0.6502204537391663, "num_chars": 2}, {"sum_logits": -1.2929468154907227, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": true, "logits_per_token": -1.2929468154907227, "logits_per_char": -0.6464734077453613, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 360, "native_id": "Mercury_SC_408554", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2889289855957031, "incorrect_loss_raw": 1.4417540629704793, "correct_loss_per_char": 0.6444644927978516, "incorrect_loss_per_char": 0.7208770314852396, "correct_loss_per_token": 1.2889289855957031, "incorrect_loss_per_token": 1.4417540629704793, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6874836683273315, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.6874836683273315, "logits_per_char": -0.8437418341636658, "num_chars": 2}, {"sum_logits": -1.3792157173156738, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.3792157173156738, "logits_per_char": -0.6896078586578369, "num_chars": 2}, {"sum_logits": -1.2889289855957031, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.2889289855957031, "logits_per_char": -0.6444644927978516, "num_chars": 2}, {"sum_logits": -1.2585628032684326, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.2585628032684326, "logits_per_char": -0.6292814016342163, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 361, "native_id": "Mercury_7005058", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.337416410446167, "incorrect_loss_raw": 1.4118507305781047, "correct_loss_per_char": 0.6687082052230835, "incorrect_loss_per_char": 0.7059253652890524, "correct_loss_per_token": 1.337416410446167, "incorrect_loss_per_token": 1.4118507305781047, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.435346245765686, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.435346245765686, "logits_per_char": -0.717673122882843, "num_chars": 2}, {"sum_logits": -1.4644888639450073, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.4644888639450073, "logits_per_char": -0.7322444319725037, "num_chars": 2}, {"sum_logits": -1.3357170820236206, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": true, "logits_per_token": -1.3357170820236206, "logits_per_char": -0.6678585410118103, "num_chars": 2}, {"sum_logits": -1.337416410446167, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.337416410446167, "logits_per_char": -0.6687082052230835, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 362, "native_id": "MDSA_2007_5_57", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2751349210739136, "incorrect_loss_raw": 1.4344270626703899, "correct_loss_per_char": 0.6375674605369568, "incorrect_loss_per_char": 0.7172135313351949, "correct_loss_per_token": 1.2751349210739136, "incorrect_loss_per_token": 1.4344270626703899, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5528262853622437, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.5528262853622437, "logits_per_char": -0.7764131426811218, "num_chars": 2}, {"sum_logits": -1.3462165594100952, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.3462165594100952, "logits_per_char": -0.6731082797050476, "num_chars": 2}, {"sum_logits": -1.4042383432388306, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.4042383432388306, "logits_per_char": -0.7021191716194153, "num_chars": 2}, {"sum_logits": -1.2751349210739136, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": true, "logits_per_token": -1.2751349210739136, "logits_per_char": -0.6375674605369568, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 363, "native_id": "ACTAAP_2014_7_12", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3230727910995483, "incorrect_loss_raw": 1.4163153966267903, "correct_loss_per_char": 0.6615363955497742, "incorrect_loss_per_char": 0.7081576983133951, "correct_loss_per_token": 1.3230727910995483, "incorrect_loss_per_token": 1.4163153966267903, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5115346908569336, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.5115346908569336, "logits_per_char": -0.7557673454284668, "num_chars": 2}, {"sum_logits": -1.3428089618682861, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.3428089618682861, "logits_per_char": -0.6714044809341431, "num_chars": 2}, {"sum_logits": -1.3230727910995483, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": true, "logits_per_token": -1.3230727910995483, "logits_per_char": -0.6615363955497742, "num_chars": 2}, {"sum_logits": -1.3946025371551514, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.3946025371551514, "logits_per_char": -0.6973012685775757, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 364, "native_id": "Mercury_7027335", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.505713939666748, "incorrect_loss_raw": 1.3640772104263306, "correct_loss_per_char": 0.752856969833374, "incorrect_loss_per_char": 0.6820386052131653, "correct_loss_per_token": 1.505713939666748, "incorrect_loss_per_token": 1.3640772104263306, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.518160343170166, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.518160343170166, "logits_per_char": -0.759080171585083, "num_chars": 2}, {"sum_logits": -1.3158048391342163, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.3158048391342163, "logits_per_char": -0.6579024195671082, "num_chars": 2}, {"sum_logits": -1.505713939666748, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.505713939666748, "logits_per_char": -0.752856969833374, "num_chars": 2}, {"sum_logits": -1.2582664489746094, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": true, "logits_per_token": -1.2582664489746094, "logits_per_char": -0.6291332244873047, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 365, "native_id": "Mercury_7246365", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4133951663970947, "incorrect_loss_raw": 1.3858820994695027, "correct_loss_per_char": 0.7066975831985474, "incorrect_loss_per_char": 0.6929410497347513, "correct_loss_per_token": 1.4133951663970947, "incorrect_loss_per_token": 1.3858820994695027, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.403228759765625, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.403228759765625, "logits_per_char": -0.7016143798828125, "num_chars": 2}, {"sum_logits": -1.325536847114563, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": true, "logits_per_token": -1.325536847114563, "logits_per_char": -0.6627684235572815, "num_chars": 2}, {"sum_logits": -1.4133951663970947, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.4133951663970947, "logits_per_char": -0.7066975831985474, "num_chars": 2}, {"sum_logits": -1.4288806915283203, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.4288806915283203, "logits_per_char": -0.7144403457641602, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 366, "native_id": "Mercury_406923", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3105573654174805, "incorrect_loss_raw": 1.4212642113367717, "correct_loss_per_char": 0.6552786827087402, "incorrect_loss_per_char": 0.7106321056683859, "correct_loss_per_token": 1.3105573654174805, "incorrect_loss_per_token": 1.4212642113367717, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4005317687988281, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.4005317687988281, "logits_per_char": -0.7002658843994141, "num_chars": 2}, {"sum_logits": -1.3276294469833374, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.3276294469833374, "logits_per_char": -0.6638147234916687, "num_chars": 2}, {"sum_logits": -1.3105573654174805, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": true, "logits_per_token": -1.3105573654174805, "logits_per_char": -0.6552786827087402, "num_chars": 2}, {"sum_logits": -1.5356314182281494, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.5356314182281494, "logits_per_char": -0.7678157091140747, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 367, "native_id": "Mercury_7074988", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.452353596687317, "incorrect_loss_raw": 1.373724063237508, "correct_loss_per_char": 0.7261767983436584, "incorrect_loss_per_char": 0.686862031618754, "correct_loss_per_token": 1.452353596687317, "incorrect_loss_per_token": 1.373724063237508, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.452353596687317, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.452353596687317, "logits_per_char": -0.7261767983436584, "num_chars": 2}, {"sum_logits": -1.390753149986267, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.390753149986267, "logits_per_char": -0.6953765749931335, "num_chars": 2}, {"sum_logits": -1.4587769508361816, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.4587769508361816, "logits_per_char": -0.7293884754180908, "num_chars": 2}, {"sum_logits": -1.2716420888900757, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": true, "logits_per_token": -1.2716420888900757, "logits_per_char": -0.6358210444450378, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 368, "native_id": "MDSA_2007_8_23", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.372383952140808, "incorrect_loss_raw": 1.41024116675059, "correct_loss_per_char": 0.686191976070404, "incorrect_loss_per_char": 0.705120583375295, "correct_loss_per_token": 1.372383952140808, "incorrect_loss_per_token": 1.41024116675059, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.583303451538086, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.583303451538086, "logits_per_char": -0.791651725769043, "num_chars": 2}, {"sum_logits": -1.4660369157791138, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4660369157791138, "logits_per_char": -0.7330184578895569, "num_chars": 2}, {"sum_logits": -1.372383952140808, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.372383952140808, "logits_per_char": -0.686191976070404, "num_chars": 2}, {"sum_logits": -1.1813831329345703, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.1813831329345703, "logits_per_char": -0.5906915664672852, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 369, "native_id": "Mercury_LBS10205", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3145595788955688, "incorrect_loss_raw": 1.418229301770528, "correct_loss_per_char": 0.6572797894477844, "incorrect_loss_per_char": 0.709114650885264, "correct_loss_per_token": 1.3145595788955688, "incorrect_loss_per_token": 1.418229301770528, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.444469928741455, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.444469928741455, "logits_per_char": -0.7222349643707275, "num_chars": 2}, {"sum_logits": -1.3145595788955688, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": true, "logits_per_token": -1.3145595788955688, "logits_per_char": -0.6572797894477844, "num_chars": 2}, {"sum_logits": -1.4319123029708862, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.4319123029708862, "logits_per_char": -0.7159561514854431, "num_chars": 2}, {"sum_logits": -1.3783056735992432, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.3783056735992432, "logits_per_char": -0.6891528367996216, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 370, "native_id": "Mercury_7141785", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5446871519088745, "incorrect_loss_raw": 1.3476293881734211, "correct_loss_per_char": 0.7723435759544373, "incorrect_loss_per_char": 0.6738146940867106, "correct_loss_per_token": 1.5446871519088745, "incorrect_loss_per_token": 1.3476293881734211, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5446871519088745, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.5446871519088745, "logits_per_char": -0.7723435759544373, "num_chars": 2}, {"sum_logits": -1.435333251953125, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.435333251953125, "logits_per_char": -0.7176666259765625, "num_chars": 2}, {"sum_logits": -1.3856908082962036, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.3856908082962036, "logits_per_char": -0.6928454041481018, "num_chars": 2}, {"sum_logits": -1.221864104270935, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": true, "logits_per_token": -1.221864104270935, "logits_per_char": -0.6109320521354675, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 371, "native_id": "Mercury_SC_401613", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1427973508834839, "incorrect_loss_raw": 1.491636594136556, "correct_loss_per_char": 0.5713986754417419, "incorrect_loss_per_char": 0.745818297068278, "correct_loss_per_token": 1.1427973508834839, "incorrect_loss_per_token": 1.491636594136556, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5806024074554443, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.5806024074554443, "logits_per_char": -0.7903012037277222, "num_chars": 2}, {"sum_logits": -1.5003197193145752, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.5003197193145752, "logits_per_char": -0.7501598596572876, "num_chars": 2}, {"sum_logits": -1.3939876556396484, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.3939876556396484, "logits_per_char": -0.6969938278198242, "num_chars": 2}, {"sum_logits": -1.1427973508834839, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.1427973508834839, "logits_per_char": -0.5713986754417419, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 372, "native_id": "Mercury_7175735", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3844263553619385, "incorrect_loss_raw": 1.4031248490015666, "correct_loss_per_char": 0.6922131776809692, "incorrect_loss_per_char": 0.7015624245007833, "correct_loss_per_token": 1.3844263553619385, "incorrect_loss_per_token": 1.4031248490015666, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5362765789031982, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.5362765789031982, "logits_per_char": -0.7681382894515991, "num_chars": 2}, {"sum_logits": -1.4652293920516968, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.4652293920516968, "logits_per_char": -0.7326146960258484, "num_chars": 2}, {"sum_logits": -1.3844263553619385, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.3844263553619385, "logits_per_char": -0.6922131776809692, "num_chars": 2}, {"sum_logits": -1.2078685760498047, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": true, "logits_per_token": -1.2078685760498047, "logits_per_char": -0.6039342880249023, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 373, "native_id": "TIMSS_2003_8_pg42", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.415976881980896, "incorrect_loss_raw": 1.3854719003041585, "correct_loss_per_char": 0.707988440990448, "incorrect_loss_per_char": 0.6927359501520792, "correct_loss_per_token": 1.415976881980896, "incorrect_loss_per_token": 1.3854719003041585, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4927806854248047, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.4927806854248047, "logits_per_char": -0.7463903427124023, "num_chars": 2}, {"sum_logits": -1.398149013519287, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.398149013519287, "logits_per_char": -0.6990745067596436, "num_chars": 2}, {"sum_logits": -1.415976881980896, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.415976881980896, "logits_per_char": -0.707988440990448, "num_chars": 2}, {"sum_logits": -1.2654860019683838, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.2654860019683838, "logits_per_char": -0.6327430009841919, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 374, "native_id": "TIMSS_2007_8_pg130", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3990663290023804, "incorrect_loss_raw": 1.3906346162160237, "correct_loss_per_char": 0.6995331645011902, "incorrect_loss_per_char": 0.6953173081080118, "correct_loss_per_token": 1.3990663290023804, "incorrect_loss_per_token": 1.3906346162160237, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3486270904541016, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": true, "logits_per_token": -1.3486270904541016, "logits_per_char": -0.6743135452270508, "num_chars": 2}, {"sum_logits": -1.3602203130722046, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.3602203130722046, "logits_per_char": -0.6801101565361023, "num_chars": 2}, {"sum_logits": -1.4630564451217651, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.4630564451217651, "logits_per_char": -0.7315282225608826, "num_chars": 2}, {"sum_logits": -1.3990663290023804, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.3990663290023804, "logits_per_char": -0.6995331645011902, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 375, "native_id": "Mercury_401643", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.44599187374115, "incorrect_loss_raw": 1.376511017481486, "correct_loss_per_char": 0.722995936870575, "incorrect_loss_per_char": 0.688255508740743, "correct_loss_per_token": 1.44599187374115, "incorrect_loss_per_token": 1.376511017481486, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.460431456565857, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": false, "logits_per_token": -1.460431456565857, "logits_per_char": -0.7302157282829285, "num_chars": 2}, {"sum_logits": -1.44599187374115, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": false, "logits_per_token": -1.44599187374115, "logits_per_char": -0.722995936870575, "num_chars": 2}, {"sum_logits": -1.3975863456726074, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": false, "logits_per_token": -1.3975863456726074, "logits_per_char": -0.6987931728363037, "num_chars": 2}, {"sum_logits": -1.2715152502059937, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": true, "logits_per_token": -1.2715152502059937, "logits_per_char": -0.6357576251029968, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 376, "native_id": "Mercury_7162785", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.459025502204895, "incorrect_loss_raw": 1.3693635861078899, "correct_loss_per_char": 0.7295127511024475, "incorrect_loss_per_char": 0.6846817930539449, "correct_loss_per_token": 1.459025502204895, "incorrect_loss_per_token": 1.3693635861078899, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3904935121536255, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.3904935121536255, "logits_per_char": -0.6952467560768127, "num_chars": 2}, {"sum_logits": -1.459025502204895, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.459025502204895, "logits_per_char": -0.7295127511024475, "num_chars": 2}, {"sum_logits": -1.3820868730545044, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.3820868730545044, "logits_per_char": -0.6910434365272522, "num_chars": 2}, {"sum_logits": -1.3355103731155396, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": true, "logits_per_token": -1.3355103731155396, "logits_per_char": -0.6677551865577698, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 377, "native_id": "Mercury_7082075", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3955042362213135, "incorrect_loss_raw": 1.3880941073099773, "correct_loss_per_char": 0.6977521181106567, "incorrect_loss_per_char": 0.6940470536549886, "correct_loss_per_token": 1.3955042362213135, "incorrect_loss_per_token": 1.3880941073099773, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4028929471969604, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.4028929471969604, "logits_per_char": -0.7014464735984802, "num_chars": 2}, {"sum_logits": -1.4314366579055786, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.4314366579055786, "logits_per_char": -0.7157183289527893, "num_chars": 2}, {"sum_logits": -1.3955042362213135, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.3955042362213135, "logits_per_char": -0.6977521181106567, "num_chars": 2}, {"sum_logits": -1.3299527168273926, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": true, "logits_per_token": -1.3299527168273926, "logits_per_char": -0.6649763584136963, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 378, "native_id": "NYSEDREGENTS_2013_4_7", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.380666971206665, "incorrect_loss_raw": 1.393523136774699, "correct_loss_per_char": 0.6903334856033325, "incorrect_loss_per_char": 0.6967615683873495, "correct_loss_per_token": 1.380666971206665, "incorrect_loss_per_token": 1.393523136774699, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.384477972984314, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.384477972984314, "logits_per_char": -0.692238986492157, "num_chars": 2}, {"sum_logits": -1.3718026876449585, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": true, "logits_per_token": -1.3718026876449585, "logits_per_char": -0.6859013438224792, "num_chars": 2}, {"sum_logits": -1.380666971206665, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.380666971206665, "logits_per_char": -0.6903334856033325, "num_chars": 2}, {"sum_logits": -1.4242887496948242, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.4242887496948242, "logits_per_char": -0.7121443748474121, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 379, "native_id": "NYSEDREGENTS_2012_4_26", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5330555438995361, "incorrect_loss_raw": 1.347794532775879, "correct_loss_per_char": 0.7665277719497681, "incorrect_loss_per_char": 0.6738972663879395, "correct_loss_per_token": 1.5330555438995361, "incorrect_loss_per_token": 1.347794532775879, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.368186354637146, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -1.368186354637146, "logits_per_char": -0.684093177318573, "num_chars": 2}, {"sum_logits": -1.3702067136764526, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -1.3702067136764526, "logits_per_char": -0.6851033568382263, "num_chars": 2}, {"sum_logits": -1.304990530014038, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": true, "logits_per_token": -1.304990530014038, "logits_per_char": -0.652495265007019, "num_chars": 2}, {"sum_logits": -1.5330555438995361, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -1.5330555438995361, "logits_per_char": -0.7665277719497681, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 380, "native_id": "Mercury_7220833", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4608149528503418, "incorrect_loss_raw": 1.3724592129389446, "correct_loss_per_char": 0.7304074764251709, "incorrect_loss_per_char": 0.6862296064694723, "correct_loss_per_token": 1.4608149528503418, "incorrect_loss_per_token": 1.3724592129389446, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4762502908706665, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.4762502908706665, "logits_per_char": -0.7381251454353333, "num_chars": 2}, {"sum_logits": -1.2681729793548584, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": true, "logits_per_token": -1.2681729793548584, "logits_per_char": -0.6340864896774292, "num_chars": 2}, {"sum_logits": -1.4608149528503418, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.4608149528503418, "logits_per_char": -0.7304074764251709, "num_chars": 2}, {"sum_logits": -1.3729543685913086, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.3729543685913086, "logits_per_char": -0.6864771842956543, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 381, "native_id": "Mercury_7210158", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3453432321548462, "incorrect_loss_raw": 1.4150584936141968, "correct_loss_per_char": 0.6726716160774231, "incorrect_loss_per_char": 0.7075292468070984, "correct_loss_per_token": 1.3453432321548462, "incorrect_loss_per_token": 1.4150584936141968, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5812119245529175, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.5812119245529175, "logits_per_char": -0.7906059622764587, "num_chars": 2}, {"sum_logits": -1.3453432321548462, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.3453432321548462, "logits_per_char": -0.6726716160774231, "num_chars": 2}, {"sum_logits": -1.4150941371917725, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.4150941371917725, "logits_per_char": -0.7075470685958862, "num_chars": 2}, {"sum_logits": -1.2488694190979004, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": true, "logits_per_token": -1.2488694190979004, "logits_per_char": -0.6244347095489502, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 382, "native_id": "Mercury_SC_416161", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2878139019012451, "incorrect_loss_raw": 1.4346457322438557, "correct_loss_per_char": 0.6439069509506226, "incorrect_loss_per_char": 0.7173228661219279, "correct_loss_per_token": 1.2878139019012451, "incorrect_loss_per_token": 1.4346457322438557, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2878139019012451, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": true, "logits_per_token": -1.2878139019012451, "logits_per_char": -0.6439069509506226, "num_chars": 2}, {"sum_logits": -1.4486554861068726, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.4486554861068726, "logits_per_char": -0.7243277430534363, "num_chars": 2}, {"sum_logits": -1.501236915588379, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.501236915588379, "logits_per_char": -0.7506184577941895, "num_chars": 2}, {"sum_logits": -1.354044795036316, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.354044795036316, "logits_per_char": -0.677022397518158, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 383, "native_id": "Mercury_7264040", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3447078466415405, "incorrect_loss_raw": 1.4090653657913208, "correct_loss_per_char": 0.6723539233207703, "incorrect_loss_per_char": 0.7045326828956604, "correct_loss_per_token": 1.3447078466415405, "incorrect_loss_per_token": 1.4090653657913208, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3289018869400024, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": true, "logits_per_token": -1.3289018869400024, "logits_per_char": -0.6644509434700012, "num_chars": 2}, {"sum_logits": -1.3447078466415405, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.3447078466415405, "logits_per_char": -0.6723539233207703, "num_chars": 2}, {"sum_logits": -1.3709315061569214, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.3709315061569214, "logits_per_char": -0.6854657530784607, "num_chars": 2}, {"sum_logits": -1.5273627042770386, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.5273627042770386, "logits_per_char": -0.7636813521385193, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 384, "native_id": "Mercury_SC_409172", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4019240140914917, "incorrect_loss_raw": 1.387175480524699, "correct_loss_per_char": 0.7009620070457458, "incorrect_loss_per_char": 0.6935877402623495, "correct_loss_per_token": 1.4019240140914917, "incorrect_loss_per_token": 1.387175480524699, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4019240140914917, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4019240140914917, "logits_per_char": -0.7009620070457458, "num_chars": 2}, {"sum_logits": -1.4070651531219482, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4070651531219482, "logits_per_char": -0.7035325765609741, "num_chars": 2}, {"sum_logits": -1.3909558057785034, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.3909558057785034, "logits_per_char": -0.6954779028892517, "num_chars": 2}, {"sum_logits": -1.363505482673645, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.363505482673645, "logits_per_char": -0.6817527413368225, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 385, "native_id": "MCAS_2015_5_11", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3582600355148315, "incorrect_loss_raw": 1.4036840597788494, "correct_loss_per_char": 0.6791300177574158, "incorrect_loss_per_char": 0.7018420298894247, "correct_loss_per_token": 1.3582600355148315, "incorrect_loss_per_token": 1.4036840597788494, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3613393306732178, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.3613393306732178, "logits_per_char": -0.6806696653366089, "num_chars": 2}, {"sum_logits": -1.4018217325210571, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.4018217325210571, "logits_per_char": -0.7009108662605286, "num_chars": 2}, {"sum_logits": -1.447891116142273, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.447891116142273, "logits_per_char": -0.7239455580711365, "num_chars": 2}, {"sum_logits": -1.3582600355148315, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": true, "logits_per_token": -1.3582600355148315, "logits_per_char": -0.6791300177574158, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 386, "native_id": "NYSEDREGENTS_2008_4_2", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4488623142242432, "incorrect_loss_raw": 1.3715516726175945, "correct_loss_per_char": 0.7244311571121216, "incorrect_loss_per_char": 0.6857758363087972, "correct_loss_per_token": 1.4488623142242432, "incorrect_loss_per_token": 1.3715516726175945, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3566211462020874, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.3566211462020874, "logits_per_char": -0.6783105731010437, "num_chars": 2}, {"sum_logits": -1.4488623142242432, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.4488623142242432, "logits_per_char": -0.7244311571121216, "num_chars": 2}, {"sum_logits": -1.3460224866867065, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": true, "logits_per_token": -1.3460224866867065, "logits_per_char": -0.6730112433433533, "num_chars": 2}, {"sum_logits": -1.4120113849639893, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.4120113849639893, "logits_per_char": -0.7060056924819946, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 387, "native_id": "LEAP_2004_4_10260", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3851344585418701, "incorrect_loss_raw": 1.403859535853068, "correct_loss_per_char": 0.6925672292709351, "incorrect_loss_per_char": 0.701929767926534, "correct_loss_per_token": 1.3851344585418701, "incorrect_loss_per_token": 1.403859535853068, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3159583806991577, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.3159583806991577, "logits_per_char": -0.6579791903495789, "num_chars": 2}, {"sum_logits": -1.3851344585418701, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.3851344585418701, "logits_per_char": -0.6925672292709351, "num_chars": 2}, {"sum_logits": -1.3087267875671387, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": true, "logits_per_token": -1.3087267875671387, "logits_per_char": -0.6543633937835693, "num_chars": 2}, {"sum_logits": -1.5868934392929077, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.5868934392929077, "logits_per_char": -0.7934467196464539, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 388, "native_id": "Mercury_7217228", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.30843985080719, "incorrect_loss_raw": 1.423095464706421, "correct_loss_per_char": 0.654219925403595, "incorrect_loss_per_char": 0.7115477323532104, "correct_loss_per_token": 1.30843985080719, "incorrect_loss_per_token": 1.423095464706421, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3958204984664917, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.3958204984664917, "logits_per_char": -0.6979102492332458, "num_chars": 2}, {"sum_logits": -1.3358696699142456, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.3358696699142456, "logits_per_char": -0.6679348349571228, "num_chars": 2}, {"sum_logits": -1.30843985080719, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": true, "logits_per_token": -1.30843985080719, "logits_per_char": -0.654219925403595, "num_chars": 2}, {"sum_logits": -1.5375962257385254, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.5375962257385254, "logits_per_char": -0.7687981128692627, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 389, "native_id": "Mercury_7071978", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4370980262756348, "incorrect_loss_raw": 1.3753570318222046, "correct_loss_per_char": 0.7185490131378174, "incorrect_loss_per_char": 0.6876785159111023, "correct_loss_per_token": 1.4370980262756348, "incorrect_loss_per_token": 1.3753570318222046, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.303359031677246, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": true, "logits_per_token": -1.303359031677246, "logits_per_char": -0.651679515838623, "num_chars": 2}, {"sum_logits": -1.4370980262756348, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.4370980262756348, "logits_per_char": -0.7185490131378174, "num_chars": 2}, {"sum_logits": -1.4188405275344849, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.4188405275344849, "logits_per_char": -0.7094202637672424, "num_chars": 2}, {"sum_logits": -1.4038715362548828, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.4038715362548828, "logits_per_char": -0.7019357681274414, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 390, "native_id": "Mercury_7106785", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4720577001571655, "incorrect_loss_raw": 1.364082137743632, "correct_loss_per_char": 0.7360288500785828, "incorrect_loss_per_char": 0.682041068871816, "correct_loss_per_token": 1.4720577001571655, "incorrect_loss_per_token": 1.364082137743632, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.357031226158142, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": true, "logits_per_token": -1.357031226158142, "logits_per_char": -0.678515613079071, "num_chars": 2}, {"sum_logits": -1.3769216537475586, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.3769216537475586, "logits_per_char": -0.6884608268737793, "num_chars": 2}, {"sum_logits": -1.4720577001571655, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.4720577001571655, "logits_per_char": -0.7360288500785828, "num_chars": 2}, {"sum_logits": -1.3582935333251953, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.3582935333251953, "logits_per_char": -0.6791467666625977, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 391, "native_id": "Mercury_404895", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3401974439620972, "incorrect_loss_raw": 1.4093927145004272, "correct_loss_per_char": 0.6700987219810486, "incorrect_loss_per_char": 0.7046963572502136, "correct_loss_per_token": 1.3401974439620972, "incorrect_loss_per_token": 1.4093927145004272, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4862343072891235, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.4862343072891235, "logits_per_char": -0.7431171536445618, "num_chars": 2}, {"sum_logits": -1.3401974439620972, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": true, "logits_per_token": -1.3401974439620972, "logits_per_char": -0.6700987219810486, "num_chars": 2}, {"sum_logits": -1.3507156372070312, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.3507156372070312, "logits_per_char": -0.6753578186035156, "num_chars": 2}, {"sum_logits": -1.391228199005127, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.391228199005127, "logits_per_char": -0.6956140995025635, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 392, "native_id": "NYSEDREGENTS_2012_8_30", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4350833892822266, "incorrect_loss_raw": 1.3808360894521077, "correct_loss_per_char": 0.7175416946411133, "incorrect_loss_per_char": 0.6904180447260538, "correct_loss_per_token": 1.4350833892822266, "incorrect_loss_per_token": 1.3808360894521077, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3220802545547485, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.3220802545547485, "logits_per_char": -0.6610401272773743, "num_chars": 2}, {"sum_logits": -1.4350833892822266, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.4350833892822266, "logits_per_char": -0.7175416946411133, "num_chars": 2}, {"sum_logits": -1.31620192527771, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": true, "logits_per_token": -1.31620192527771, "logits_per_char": -0.658100962638855, "num_chars": 2}, {"sum_logits": -1.5042260885238647, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.5042260885238647, "logits_per_char": -0.7521130442619324, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 393, "native_id": "Mercury_LBS10706", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3451337814331055, "incorrect_loss_raw": 1.4062298933664958, "correct_loss_per_char": 0.6725668907165527, "incorrect_loss_per_char": 0.7031149466832479, "correct_loss_per_token": 1.3451337814331055, "incorrect_loss_per_token": 1.4062298933664958, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4424923658370972, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.4424923658370972, "logits_per_char": -0.7212461829185486, "num_chars": 2}, {"sum_logits": -1.4030728340148926, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.4030728340148926, "logits_per_char": -0.7015364170074463, "num_chars": 2}, {"sum_logits": -1.3731244802474976, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.3731244802474976, "logits_per_char": -0.6865622401237488, "num_chars": 2}, {"sum_logits": -1.3451337814331055, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": true, "logits_per_token": -1.3451337814331055, "logits_per_char": -0.6725668907165527, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 394, "native_id": "LEAP_2006_4_10275", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3702611923217773, "incorrect_loss_raw": 1.3964397112528484, "correct_loss_per_char": 0.6851305961608887, "incorrect_loss_per_char": 0.6982198556264242, "correct_loss_per_token": 1.3702611923217773, "incorrect_loss_per_token": 1.3964397112528484, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3822360038757324, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.3822360038757324, "logits_per_char": -0.6911180019378662, "num_chars": 2}, {"sum_logits": -1.4334945678710938, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.4334945678710938, "logits_per_char": -0.7167472839355469, "num_chars": 2}, {"sum_logits": -1.3702611923217773, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": true, "logits_per_token": -1.3702611923217773, "logits_per_char": -0.6851305961608887, "num_chars": 2}, {"sum_logits": -1.3735885620117188, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.3735885620117188, "logits_per_char": -0.6867942810058594, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 395, "native_id": "Mercury_177188", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.241456389427185, "incorrect_loss_raw": 1.4446518421173096, "correct_loss_per_char": 0.6207281947135925, "incorrect_loss_per_char": 0.7223259210586548, "correct_loss_per_token": 1.241456389427185, "incorrect_loss_per_token": 1.4446518421173096, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.241456389427185, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": true, "logits_per_token": -1.241456389427185, "logits_per_char": -0.6207281947135925, "num_chars": 2}, {"sum_logits": -1.4147852659225464, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.4147852659225464, "logits_per_char": -0.7073926329612732, "num_chars": 2}, {"sum_logits": -1.4120978116989136, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.4120978116989136, "logits_per_char": -0.7060489058494568, "num_chars": 2}, {"sum_logits": -1.5070724487304688, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.5070724487304688, "logits_per_char": -0.7535362243652344, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 396, "native_id": "Mercury_7041388", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4925768375396729, "incorrect_loss_raw": 1.3594265381495159, "correct_loss_per_char": 0.7462884187698364, "incorrect_loss_per_char": 0.6797132690747579, "correct_loss_per_token": 1.4925768375396729, "incorrect_loss_per_token": 1.3594265381495159, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.333957314491272, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": true, "logits_per_token": -1.333957314491272, "logits_per_char": -0.666978657245636, "num_chars": 2}, {"sum_logits": -1.3414336442947388, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.3414336442947388, "logits_per_char": -0.6707168221473694, "num_chars": 2}, {"sum_logits": -1.4028886556625366, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.4028886556625366, "logits_per_char": -0.7014443278312683, "num_chars": 2}, {"sum_logits": -1.4925768375396729, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.4925768375396729, "logits_per_char": -0.7462884187698364, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 397, "native_id": "Mercury_7012863", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3952738046646118, "incorrect_loss_raw": 1.3929680188496907, "correct_loss_per_char": 0.6976369023323059, "incorrect_loss_per_char": 0.6964840094248453, "correct_loss_per_token": 1.3952738046646118, "incorrect_loss_per_token": 1.3929680188496907, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3952738046646118, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.3952738046646118, "logits_per_char": -0.6976369023323059, "num_chars": 2}, {"sum_logits": -1.4972796440124512, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.4972796440124512, "logits_per_char": -0.7486398220062256, "num_chars": 2}, {"sum_logits": -1.389143943786621, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.389143943786621, "logits_per_char": -0.6945719718933105, "num_chars": 2}, {"sum_logits": -1.29248046875, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": true, "logits_per_token": -1.29248046875, "logits_per_char": -0.646240234375, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 398, "native_id": "Mercury_7015908", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4859633445739746, "incorrect_loss_raw": 1.3622088432312012, "correct_loss_per_char": 0.7429816722869873, "incorrect_loss_per_char": 0.6811044216156006, "correct_loss_per_token": 1.4859633445739746, "incorrect_loss_per_token": 1.3622088432312012, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.307948350906372, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": true, "logits_per_token": -1.307948350906372, "logits_per_char": -0.653974175453186, "num_chars": 2}, {"sum_logits": -1.4184486865997314, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.4184486865997314, "logits_per_char": -0.7092243432998657, "num_chars": 2}, {"sum_logits": -1.3602294921875, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.3602294921875, "logits_per_char": -0.68011474609375, "num_chars": 2}, {"sum_logits": -1.4859633445739746, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.4859633445739746, "logits_per_char": -0.7429816722869873, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 399, "native_id": "TAKS_2009_5_6", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5290699005126953, "incorrect_loss_raw": 1.3628722031911213, "correct_loss_per_char": 0.7645349502563477, "incorrect_loss_per_char": 0.6814361015955607, "correct_loss_per_token": 1.5290699005126953, "incorrect_loss_per_token": 1.3628722031911213, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5290699005126953, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.5290699005126953, "logits_per_char": -0.7645349502563477, "num_chars": 2}, {"sum_logits": -1.4330896139144897, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.4330896139144897, "logits_per_char": -0.7165448069572449, "num_chars": 2}, {"sum_logits": -1.5170804262161255, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.5170804262161255, "logits_per_char": -0.7585402131080627, "num_chars": 2}, {"sum_logits": -1.138446569442749, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": true, "logits_per_token": -1.138446569442749, "logits_per_char": -0.5692232847213745, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 400, "native_id": "VASoL_2009_5_27", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3983154296875, "incorrect_loss_raw": 1.3891688187917073, "correct_loss_per_char": 0.69915771484375, "incorrect_loss_per_char": 0.6945844093958536, "correct_loss_per_token": 1.3983154296875, "incorrect_loss_per_token": 1.3891688187917073, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4493589401245117, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.4493589401245117, "logits_per_char": -0.7246794700622559, "num_chars": 2}, {"sum_logits": -1.4103327989578247, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.4103327989578247, "logits_per_char": -0.7051663994789124, "num_chars": 2}, {"sum_logits": -1.3983154296875, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.3983154296875, "logits_per_char": -0.69915771484375, "num_chars": 2}, {"sum_logits": -1.3078147172927856, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": true, "logits_per_token": -1.3078147172927856, "logits_per_char": -0.6539073586463928, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 401, "native_id": "Mercury_7013843", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4048428535461426, "incorrect_loss_raw": 1.3867555061976116, "correct_loss_per_char": 0.7024214267730713, "incorrect_loss_per_char": 0.6933777530988058, "correct_loss_per_token": 1.4048428535461426, "incorrect_loss_per_token": 1.3867555061976116, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4605541229248047, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.4605541229248047, "logits_per_char": -0.7302770614624023, "num_chars": 2}, {"sum_logits": -1.382728934288025, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.382728934288025, "logits_per_char": -0.6913644671440125, "num_chars": 2}, {"sum_logits": -1.3169834613800049, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": true, "logits_per_token": -1.3169834613800049, "logits_per_char": -0.6584917306900024, "num_chars": 2}, {"sum_logits": -1.4048428535461426, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.4048428535461426, "logits_per_char": -0.7024214267730713, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 402, "native_id": "MCAS_8_2014_13", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5699403285980225, "incorrect_loss_raw": 1.3402543465296428, "correct_loss_per_char": 0.7849701642990112, "incorrect_loss_per_char": 0.6701271732648214, "correct_loss_per_token": 1.5699403285980225, "incorrect_loss_per_token": 1.3402543465296428, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.323063850402832, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.323063850402832, "logits_per_char": -0.661531925201416, "num_chars": 2}, {"sum_logits": -1.2669031620025635, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": true, "logits_per_token": -1.2669031620025635, "logits_per_char": -0.6334515810012817, "num_chars": 2}, {"sum_logits": -1.4307960271835327, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.4307960271835327, "logits_per_char": -0.7153980135917664, "num_chars": 2}, {"sum_logits": -1.5699403285980225, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.5699403285980225, "logits_per_char": -0.7849701642990112, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 403, "native_id": "NYSEDREGENTS_2015_4_23", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3461599349975586, "incorrect_loss_raw": 1.4074019591013591, "correct_loss_per_char": 0.6730799674987793, "incorrect_loss_per_char": 0.7037009795506796, "correct_loss_per_token": 1.3461599349975586, "incorrect_loss_per_token": 1.4074019591013591, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4992488622665405, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.4992488622665405, "logits_per_char": -0.7496244311332703, "num_chars": 2}, {"sum_logits": -1.3167471885681152, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": true, "logits_per_token": -1.3167471885681152, "logits_per_char": -0.6583735942840576, "num_chars": 2}, {"sum_logits": -1.4062098264694214, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.4062098264694214, "logits_per_char": -0.7031049132347107, "num_chars": 2}, {"sum_logits": -1.3461599349975586, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.3461599349975586, "logits_per_char": -0.6730799674987793, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 404, "native_id": "Mercury_7222863", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3479315042495728, "incorrect_loss_raw": 1.4065224726994832, "correct_loss_per_char": 0.6739657521247864, "incorrect_loss_per_char": 0.7032612363497416, "correct_loss_per_token": 1.3479315042495728, "incorrect_loss_per_token": 1.4065224726994832, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.505501389503479, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.505501389503479, "logits_per_char": -0.7527506947517395, "num_chars": 2}, {"sum_logits": -1.3554052114486694, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.3554052114486694, "logits_per_char": -0.6777026057243347, "num_chars": 2}, {"sum_logits": -1.3586608171463013, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.3586608171463013, "logits_per_char": -0.6793304085731506, "num_chars": 2}, {"sum_logits": -1.3479315042495728, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": true, "logits_per_token": -1.3479315042495728, "logits_per_char": -0.6739657521247864, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 405, "native_id": "NYSEDREGENTS_2010_4_9", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.446267008781433, "incorrect_loss_raw": 1.3778469562530518, "correct_loss_per_char": 0.7231335043907166, "incorrect_loss_per_char": 0.6889234781265259, "correct_loss_per_token": 1.446267008781433, "incorrect_loss_per_token": 1.3778469562530518, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2704248428344727, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": true, "logits_per_token": -1.2704248428344727, "logits_per_char": -0.6352124214172363, "num_chars": 2}, {"sum_logits": -1.4556561708450317, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.4556561708450317, "logits_per_char": -0.7278280854225159, "num_chars": 2}, {"sum_logits": -1.4074598550796509, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.4074598550796509, "logits_per_char": -0.7037299275398254, "num_chars": 2}, {"sum_logits": -1.446267008781433, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.446267008781433, "logits_per_char": -0.7231335043907166, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 406, "native_id": "MCAS_2003_5_28", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5113204717636108, "incorrect_loss_raw": 1.3559916416803997, "correct_loss_per_char": 0.7556602358818054, "incorrect_loss_per_char": 0.6779958208401998, "correct_loss_per_token": 1.5113204717636108, "incorrect_loss_per_token": 1.3559916416803997, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5113204717636108, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.5113204717636108, "logits_per_char": -0.7556602358818054, "num_chars": 2}, {"sum_logits": -1.406956672668457, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.406956672668457, "logits_per_char": -0.7034783363342285, "num_chars": 2}, {"sum_logits": -1.4108264446258545, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.4108264446258545, "logits_per_char": -0.7054132223129272, "num_chars": 2}, {"sum_logits": -1.2501918077468872, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": true, "logits_per_token": -1.2501918077468872, "logits_per_char": -0.6250959038734436, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 407, "native_id": "MSA_2012_8_36", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4415359497070312, "incorrect_loss_raw": 1.3859751621882122, "correct_loss_per_char": 0.7207679748535156, "incorrect_loss_per_char": 0.6929875810941061, "correct_loss_per_token": 1.4415359497070312, "incorrect_loss_per_token": 1.3859751621882122, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.532213807106018, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.532213807106018, "logits_per_char": -0.766106903553009, "num_chars": 2}, {"sum_logits": -1.4548654556274414, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.4548654556274414, "logits_per_char": -0.7274327278137207, "num_chars": 2}, {"sum_logits": -1.4415359497070312, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.4415359497070312, "logits_per_char": -0.7207679748535156, "num_chars": 2}, {"sum_logits": -1.1708462238311768, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": true, "logits_per_token": -1.1708462238311768, "logits_per_char": -0.5854231119155884, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 408, "native_id": "TIMSS_2003_8_pg33", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.40237557888031, "incorrect_loss_raw": 1.3854130506515503, "correct_loss_per_char": 0.701187789440155, "incorrect_loss_per_char": 0.6927065253257751, "correct_loss_per_token": 1.40237557888031, "incorrect_loss_per_token": 1.3854130506515503, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3870652914047241, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.3870652914047241, "logits_per_char": -0.6935326457023621, "num_chars": 2}, {"sum_logits": -1.362786889076233, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": true, "logits_per_token": -1.362786889076233, "logits_per_char": -0.6813934445381165, "num_chars": 2}, {"sum_logits": -1.40237557888031, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.40237557888031, "logits_per_char": -0.701187789440155, "num_chars": 2}, {"sum_logits": -1.4063869714736938, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.4063869714736938, "logits_per_char": -0.7031934857368469, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 409, "native_id": "Mercury_SC_402627", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4559422731399536, "incorrect_loss_raw": 1.374539057413737, "correct_loss_per_char": 0.7279711365699768, "incorrect_loss_per_char": 0.6872695287068685, "correct_loss_per_token": 1.4559422731399536, "incorrect_loss_per_token": 1.374539057413737, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2411020994186401, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": true, "logits_per_token": -1.2411020994186401, "logits_per_char": -0.6205510497093201, "num_chars": 2}, {"sum_logits": -1.3910553455352783, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.3910553455352783, "logits_per_char": -0.6955276727676392, "num_chars": 2}, {"sum_logits": -1.4559422731399536, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.4559422731399536, "logits_per_char": -0.7279711365699768, "num_chars": 2}, {"sum_logits": -1.4914597272872925, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.4914597272872925, "logits_per_char": -0.7457298636436462, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 410, "native_id": "Mercury_192990", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0180485248565674, "incorrect_loss_raw": 1.5707517464955647, "correct_loss_per_char": 0.5090242624282837, "incorrect_loss_per_char": 0.7853758732477824, "correct_loss_per_token": 1.0180485248565674, "incorrect_loss_per_token": 1.5707517464955647, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0180485248565674, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": true, "logits_per_token": -1.0180485248565674, "logits_per_char": -0.5090242624282837, "num_chars": 2}, {"sum_logits": -1.4272444248199463, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.4272444248199463, "logits_per_char": -0.7136222124099731, "num_chars": 2}, {"sum_logits": -1.518611192703247, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.518611192703247, "logits_per_char": -0.7593055963516235, "num_chars": 2}, {"sum_logits": -1.766399621963501, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.766399621963501, "logits_per_char": -0.8831998109817505, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 411, "native_id": "Mercury_405772", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.497111201286316, "incorrect_loss_raw": 1.3633445501327515, "correct_loss_per_char": 0.748555600643158, "incorrect_loss_per_char": 0.6816722750663757, "correct_loss_per_token": 1.497111201286316, "incorrect_loss_per_token": 1.3633445501327515, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.497111201286316, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.497111201286316, "logits_per_char": -0.748555600643158, "num_chars": 2}, {"sum_logits": -1.4177546501159668, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.4177546501159668, "logits_per_char": -0.7088773250579834, "num_chars": 2}, {"sum_logits": -1.4498051404953003, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.4498051404953003, "logits_per_char": -0.7249025702476501, "num_chars": 2}, {"sum_logits": -1.2224738597869873, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": true, "logits_per_token": -1.2224738597869873, "logits_per_char": -0.6112369298934937, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 412, "native_id": "Mercury_SC_408509", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.454634189605713, "incorrect_loss_raw": 1.3823163112004597, "correct_loss_per_char": 0.7273170948028564, "incorrect_loss_per_char": 0.6911581556002299, "correct_loss_per_token": 1.454634189605713, "incorrect_loss_per_token": 1.3823163112004597, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5365300178527832, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.5365300178527832, "logits_per_char": -0.7682650089263916, "num_chars": 2}, {"sum_logits": -1.4482877254486084, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.4482877254486084, "logits_per_char": -0.7241438627243042, "num_chars": 2}, {"sum_logits": -1.454634189605713, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.454634189605713, "logits_per_char": -0.7273170948028564, "num_chars": 2}, {"sum_logits": -1.1621311902999878, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": true, "logits_per_token": -1.1621311902999878, "logits_per_char": -0.5810655951499939, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 413, "native_id": "LEAP__4_10228", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.294716477394104, "incorrect_loss_raw": 1.4249303340911865, "correct_loss_per_char": 0.647358238697052, "incorrect_loss_per_char": 0.7124651670455933, "correct_loss_per_token": 1.294716477394104, "incorrect_loss_per_token": 1.4249303340911865, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4640676975250244, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.4640676975250244, "logits_per_char": -0.7320338487625122, "num_chars": 2}, {"sum_logits": -1.389016032218933, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.389016032218933, "logits_per_char": -0.6945080161094666, "num_chars": 2}, {"sum_logits": -1.421707272529602, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.421707272529602, "logits_per_char": -0.710853636264801, "num_chars": 2}, {"sum_logits": -1.294716477394104, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.294716477394104, "logits_per_char": -0.647358238697052, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 414, "native_id": "NYSEDREGENTS_2010_8_15", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1775330305099487, "incorrect_loss_raw": 1.477321743965149, "correct_loss_per_char": 0.5887665152549744, "incorrect_loss_per_char": 0.7386608719825745, "correct_loss_per_token": 1.1775330305099487, "incorrect_loss_per_token": 1.477321743965149, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1775330305099487, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": true, "logits_per_token": -1.1775330305099487, "logits_per_char": -0.5887665152549744, "num_chars": 2}, {"sum_logits": -1.3431793451309204, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.3431793451309204, "logits_per_char": -0.6715896725654602, "num_chars": 2}, {"sum_logits": -1.5239964723587036, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.5239964723587036, "logits_per_char": -0.7619982361793518, "num_chars": 2}, {"sum_logits": -1.5647894144058228, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.5647894144058228, "logits_per_char": -0.7823947072029114, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 415, "native_id": "Mercury_7007613", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3902453184127808, "incorrect_loss_raw": 1.3893363078435261, "correct_loss_per_char": 0.6951226592063904, "incorrect_loss_per_char": 0.6946681539217631, "correct_loss_per_token": 1.3902453184127808, "incorrect_loss_per_token": 1.3893363078435261, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3897264003753662, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.3897264003753662, "logits_per_char": -0.6948632001876831, "num_chars": 2}, {"sum_logits": -1.3949034214019775, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.3949034214019775, "logits_per_char": -0.6974517107009888, "num_chars": 2}, {"sum_logits": -1.3833791017532349, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": true, "logits_per_token": -1.3833791017532349, "logits_per_char": -0.6916895508766174, "num_chars": 2}, {"sum_logits": -1.3902453184127808, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.3902453184127808, "logits_per_char": -0.6951226592063904, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 416, "native_id": "Mercury_7205468", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5939359664916992, "incorrect_loss_raw": 1.3317959308624268, "correct_loss_per_char": 0.7969679832458496, "incorrect_loss_per_char": 0.6658979654312134, "correct_loss_per_token": 1.5939359664916992, "incorrect_loss_per_token": 1.3317959308624268, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3154737949371338, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.3154737949371338, "logits_per_char": -0.6577368974685669, "num_chars": 2}, {"sum_logits": -1.5939359664916992, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.5939359664916992, "logits_per_char": -0.7969679832458496, "num_chars": 2}, {"sum_logits": -1.3952019214630127, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.3952019214630127, "logits_per_char": -0.6976009607315063, "num_chars": 2}, {"sum_logits": -1.2847120761871338, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": true, "logits_per_token": -1.2847120761871338, "logits_per_char": -0.6423560380935669, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 417, "native_id": "Mercury_SC_406026", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.440598964691162, "incorrect_loss_raw": 1.3755673170089722, "correct_loss_per_char": 0.720299482345581, "incorrect_loss_per_char": 0.6877836585044861, "correct_loss_per_token": 1.440598964691162, "incorrect_loss_per_token": 1.3755673170089722, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3977324962615967, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.3977324962615967, "logits_per_char": -0.6988662481307983, "num_chars": 2}, {"sum_logits": -1.440598964691162, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.440598964691162, "logits_per_char": -0.720299482345581, "num_chars": 2}, {"sum_logits": -1.4107342958450317, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.4107342958450317, "logits_per_char": -0.7053671479225159, "num_chars": 2}, {"sum_logits": -1.318235158920288, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": true, "logits_per_token": -1.318235158920288, "logits_per_char": -0.659117579460144, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 418, "native_id": "Mercury_SC_405792", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.446393609046936, "incorrect_loss_raw": 1.3743706941604614, "correct_loss_per_char": 0.723196804523468, "incorrect_loss_per_char": 0.6871853470802307, "correct_loss_per_token": 1.446393609046936, "incorrect_loss_per_token": 1.3743706941604614, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.446393609046936, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.446393609046936, "logits_per_char": -0.723196804523468, "num_chars": 2}, {"sum_logits": -1.4326478242874146, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.4326478242874146, "logits_per_char": -0.7163239121437073, "num_chars": 2}, {"sum_logits": -1.2936409711837769, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": true, "logits_per_token": -1.2936409711837769, "logits_per_char": -0.6468204855918884, "num_chars": 2}, {"sum_logits": -1.3968232870101929, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.3968232870101929, "logits_per_char": -0.6984116435050964, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 419, "native_id": "Mercury_SC_405482", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3995788097381592, "incorrect_loss_raw": 1.4004729588826497, "correct_loss_per_char": 0.6997894048690796, "incorrect_loss_per_char": 0.7002364794413248, "correct_loss_per_token": 1.3995788097381592, "incorrect_loss_per_token": 1.4004729588826497, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5752097368240356, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.5752097368240356, "logits_per_char": -0.7876048684120178, "num_chars": 2}, {"sum_logits": -1.4468019008636475, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.4468019008636475, "logits_per_char": -0.7234009504318237, "num_chars": 2}, {"sum_logits": -1.3995788097381592, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.3995788097381592, "logits_per_char": -0.6997894048690796, "num_chars": 2}, {"sum_logits": -1.1794072389602661, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": true, "logits_per_token": -1.1794072389602661, "logits_per_char": -0.5897036194801331, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 420, "native_id": "TIMSS_1995_8_M10", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.410131812095642, "incorrect_loss_raw": 1.3871405522028606, "correct_loss_per_char": 0.705065906047821, "incorrect_loss_per_char": 0.6935702761014303, "correct_loss_per_token": 1.410131812095642, "incorrect_loss_per_token": 1.3871405522028606, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4298762083053589, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -1.4298762083053589, "logits_per_char": -0.7149381041526794, "num_chars": 2}, {"sum_logits": -1.3888438940048218, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -1.3888438940048218, "logits_per_char": -0.6944219470024109, "num_chars": 2}, {"sum_logits": -1.3427015542984009, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": true, "logits_per_token": -1.3427015542984009, "logits_per_char": -0.6713507771492004, "num_chars": 2}, {"sum_logits": -1.410131812095642, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -1.410131812095642, "logits_per_char": -0.705065906047821, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 421, "native_id": "MCAS_2011_8_17682", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5055675506591797, "incorrect_loss_raw": 1.3592915534973145, "correct_loss_per_char": 0.7527837753295898, "incorrect_loss_per_char": 0.6796457767486572, "correct_loss_per_token": 1.5055675506591797, "incorrect_loss_per_token": 1.3592915534973145, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.255859136581421, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": true, "logits_per_token": -1.255859136581421, "logits_per_char": -0.6279295682907104, "num_chars": 2}, {"sum_logits": -1.3638434410095215, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.3638434410095215, "logits_per_char": -0.6819217205047607, "num_chars": 2}, {"sum_logits": -1.5055675506591797, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.5055675506591797, "logits_per_char": -0.7527837753295898, "num_chars": 2}, {"sum_logits": -1.458172082901001, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.458172082901001, "logits_per_char": -0.7290860414505005, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 422, "native_id": "VASoL_2008_5_14", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3271054029464722, "incorrect_loss_raw": 1.415899674097697, "correct_loss_per_char": 0.6635527014732361, "incorrect_loss_per_char": 0.7079498370488485, "correct_loss_per_token": 1.3271054029464722, "incorrect_loss_per_token": 1.415899674097697, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4644030332565308, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.4644030332565308, "logits_per_char": -0.7322015166282654, "num_chars": 2}, {"sum_logits": -1.3271054029464722, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.3271054029464722, "logits_per_char": -0.6635527014732361, "num_chars": 2}, {"sum_logits": -1.2931725978851318, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": true, "logits_per_token": -1.2931725978851318, "logits_per_char": -0.6465862989425659, "num_chars": 2}, {"sum_logits": -1.4901233911514282, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.4901233911514282, "logits_per_char": -0.7450616955757141, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 423, "native_id": "Mercury_7083790", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4182254076004028, "incorrect_loss_raw": 1.3806492884953816, "correct_loss_per_char": 0.7091127038002014, "incorrect_loss_per_char": 0.6903246442476908, "correct_loss_per_token": 1.4182254076004028, "incorrect_loss_per_token": 1.3806492884953816, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4182254076004028, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.4182254076004028, "logits_per_char": -0.7091127038002014, "num_chars": 2}, {"sum_logits": -1.4145104885101318, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.4145104885101318, "logits_per_char": -0.7072552442550659, "num_chars": 2}, {"sum_logits": -1.3891966342926025, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.3891966342926025, "logits_per_char": -0.6945983171463013, "num_chars": 2}, {"sum_logits": -1.3382407426834106, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": true, "logits_per_token": -1.3382407426834106, "logits_per_char": -0.6691203713417053, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 424, "native_id": "MCAS_2003_8_5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3502362966537476, "incorrect_loss_raw": 1.4049620628356934, "correct_loss_per_char": 0.6751181483268738, "incorrect_loss_per_char": 0.7024810314178467, "correct_loss_per_token": 1.3502362966537476, "incorrect_loss_per_token": 1.4049620628356934, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3502362966537476, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": true, "logits_per_token": -1.3502362966537476, "logits_per_char": -0.6751181483268738, "num_chars": 2}, {"sum_logits": -1.4132895469665527, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.4132895469665527, "logits_per_char": -0.7066447734832764, "num_chars": 2}, {"sum_logits": -1.3888295888900757, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.3888295888900757, "logits_per_char": -0.6944147944450378, "num_chars": 2}, {"sum_logits": -1.4127670526504517, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.4127670526504517, "logits_per_char": -0.7063835263252258, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 425, "native_id": "Mercury_7063980", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.326396107673645, "incorrect_loss_raw": 1.4312323331832886, "correct_loss_per_char": 0.6631980538368225, "incorrect_loss_per_char": 0.7156161665916443, "correct_loss_per_token": 1.326396107673645, "incorrect_loss_per_token": 1.4312323331832886, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.326396107673645, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.326396107673645, "logits_per_char": -0.6631980538368225, "num_chars": 2}, {"sum_logits": -1.1843085289001465, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": true, "logits_per_token": -1.1843085289001465, "logits_per_char": -0.5921542644500732, "num_chars": 2}, {"sum_logits": -1.4605165719985962, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.4605165719985962, "logits_per_char": -0.7302582859992981, "num_chars": 2}, {"sum_logits": -1.648871898651123, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.648871898651123, "logits_per_char": -0.8244359493255615, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 426, "native_id": "Mercury_SC_408740", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3351445198059082, "incorrect_loss_raw": 1.4254042307535808, "correct_loss_per_char": 0.6675722599029541, "incorrect_loss_per_char": 0.7127021153767904, "correct_loss_per_token": 1.3351445198059082, "incorrect_loss_per_token": 1.4254042307535808, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.662590503692627, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.662590503692627, "logits_per_char": -0.8312952518463135, "num_chars": 2}, {"sum_logits": -1.4036191701889038, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.4036191701889038, "logits_per_char": -0.7018095850944519, "num_chars": 2}, {"sum_logits": -1.3351445198059082, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.3351445198059082, "logits_per_char": -0.6675722599029541, "num_chars": 2}, {"sum_logits": -1.2100030183792114, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": true, "logits_per_token": -1.2100030183792114, "logits_per_char": -0.6050015091896057, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 427, "native_id": "Mercury_7012583", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3446800708770752, "incorrect_loss_raw": 1.4073046843210857, "correct_loss_per_char": 0.6723400354385376, "incorrect_loss_per_char": 0.7036523421605428, "correct_loss_per_token": 1.3446800708770752, "incorrect_loss_per_token": 1.4073046843210857, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.419277548789978, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.419277548789978, "logits_per_char": -0.709638774394989, "num_chars": 2}, {"sum_logits": -1.3625401258468628, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.3625401258468628, "logits_per_char": -0.6812700629234314, "num_chars": 2}, {"sum_logits": -1.3446800708770752, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": true, "logits_per_token": -1.3446800708770752, "logits_per_char": -0.6723400354385376, "num_chars": 2}, {"sum_logits": -1.440096378326416, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.440096378326416, "logits_per_char": -0.720048189163208, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 428, "native_id": "MCAS_2004_5_16", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3573594093322754, "incorrect_loss_raw": 1.4034614165623982, "correct_loss_per_char": 0.6786797046661377, "incorrect_loss_per_char": 0.7017307082811991, "correct_loss_per_token": 1.3573594093322754, "incorrect_loss_per_token": 1.4034614165623982, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4163792133331299, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.4163792133331299, "logits_per_char": -0.7081896066665649, "num_chars": 2}, {"sum_logits": -1.3616384267807007, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.3616384267807007, "logits_per_char": -0.6808192133903503, "num_chars": 2}, {"sum_logits": -1.3573594093322754, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": true, "logits_per_token": -1.3573594093322754, "logits_per_char": -0.6786797046661377, "num_chars": 2}, {"sum_logits": -1.4323666095733643, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.4323666095733643, "logits_per_char": -0.7161833047866821, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 429, "native_id": "Mercury_7091893", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3804608583450317, "incorrect_loss_raw": 1.3928025563557942, "correct_loss_per_char": 0.6902304291725159, "incorrect_loss_per_char": 0.6964012781778971, "correct_loss_per_token": 1.3804608583450317, "incorrect_loss_per_token": 1.3928025563557942, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.443930983543396, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.443930983543396, "logits_per_char": -0.721965491771698, "num_chars": 2}, {"sum_logits": -1.380313515663147, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.380313515663147, "logits_per_char": -0.6901567578315735, "num_chars": 2}, {"sum_logits": -1.3804608583450317, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.3804608583450317, "logits_per_char": -0.6902304291725159, "num_chars": 2}, {"sum_logits": -1.3541631698608398, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": true, "logits_per_token": -1.3541631698608398, "logits_per_char": -0.6770815849304199, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 430, "native_id": "Mercury_7176103", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5071873664855957, "incorrect_loss_raw": 1.3553585608800252, "correct_loss_per_char": 0.7535936832427979, "incorrect_loss_per_char": 0.6776792804400126, "correct_loss_per_token": 1.5071873664855957, "incorrect_loss_per_token": 1.3553585608800252, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5071873664855957, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.5071873664855957, "logits_per_char": -0.7535936832427979, "num_chars": 2}, {"sum_logits": -1.295161485671997, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": true, "logits_per_token": -1.295161485671997, "logits_per_char": -0.6475807428359985, "num_chars": 2}, {"sum_logits": -1.3690639734268188, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.3690639734268188, "logits_per_char": -0.6845319867134094, "num_chars": 2}, {"sum_logits": -1.4018502235412598, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.4018502235412598, "logits_per_char": -0.7009251117706299, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 431, "native_id": "Mercury_SC_401126", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2989588975906372, "incorrect_loss_raw": 1.4232178926467896, "correct_loss_per_char": 0.6494794487953186, "incorrect_loss_per_char": 0.7116089463233948, "correct_loss_per_token": 1.2989588975906372, "incorrect_loss_per_token": 1.4232178926467896, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3918859958648682, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.3918859958648682, "logits_per_char": -0.6959429979324341, "num_chars": 2}, {"sum_logits": -1.4391448497772217, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.4391448497772217, "logits_per_char": -0.7195724248886108, "num_chars": 2}, {"sum_logits": -1.4386228322982788, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.4386228322982788, "logits_per_char": -0.7193114161491394, "num_chars": 2}, {"sum_logits": -1.2989588975906372, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": true, "logits_per_token": -1.2989588975906372, "logits_per_char": -0.6494794487953186, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 432, "native_id": "Mercury_SC_415489", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4607422351837158, "incorrect_loss_raw": 1.3684364557266235, "correct_loss_per_char": 0.7303711175918579, "incorrect_loss_per_char": 0.6842182278633118, "correct_loss_per_token": 1.4607422351837158, "incorrect_loss_per_token": 1.3684364557266235, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4327465295791626, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.4327465295791626, "logits_per_char": -0.7163732647895813, "num_chars": 2}, {"sum_logits": -1.3643968105316162, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.3643968105316162, "logits_per_char": -0.6821984052658081, "num_chars": 2}, {"sum_logits": -1.4607422351837158, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.4607422351837158, "logits_per_char": -0.7303711175918579, "num_chars": 2}, {"sum_logits": -1.3081660270690918, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": true, "logits_per_token": -1.3081660270690918, "logits_per_char": -0.6540830135345459, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 433, "native_id": "Mercury_7162575", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4104515314102173, "incorrect_loss_raw": 1.3861937522888184, "correct_loss_per_char": 0.7052257657051086, "incorrect_loss_per_char": 0.6930968761444092, "correct_loss_per_token": 1.4104515314102173, "incorrect_loss_per_token": 1.3861937522888184, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4872145652770996, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.4872145652770996, "logits_per_char": -0.7436072826385498, "num_chars": 2}, {"sum_logits": -1.4104515314102173, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.4104515314102173, "logits_per_char": -0.7052257657051086, "num_chars": 2}, {"sum_logits": -1.3481671810150146, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.3481671810150146, "logits_per_char": -0.6740835905075073, "num_chars": 2}, {"sum_logits": -1.3231995105743408, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": true, "logits_per_token": -1.3231995105743408, "logits_per_char": -0.6615997552871704, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 434, "native_id": "VASoL_2007_5_31", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4399845600128174, "incorrect_loss_raw": 1.3759587208429973, "correct_loss_per_char": 0.7199922800064087, "incorrect_loss_per_char": 0.6879793604214987, "correct_loss_per_token": 1.4399845600128174, "incorrect_loss_per_token": 1.3759587208429973, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4182296991348267, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.4182296991348267, "logits_per_char": -0.7091148495674133, "num_chars": 2}, {"sum_logits": -1.3061749935150146, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": true, "logits_per_token": -1.3061749935150146, "logits_per_char": -0.6530874967575073, "num_chars": 2}, {"sum_logits": -1.4399845600128174, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.4399845600128174, "logits_per_char": -0.7199922800064087, "num_chars": 2}, {"sum_logits": -1.4034714698791504, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.4034714698791504, "logits_per_char": -0.7017357349395752, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 435, "native_id": "Mercury_7166863", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3353074789047241, "incorrect_loss_raw": 1.4093722899754841, "correct_loss_per_char": 0.6676537394523621, "incorrect_loss_per_char": 0.7046861449877421, "correct_loss_per_token": 1.3353074789047241, "incorrect_loss_per_token": 1.4093722899754841, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.483299970626831, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.483299970626831, "logits_per_char": -0.7416499853134155, "num_chars": 2}, {"sum_logits": -1.3353074789047241, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.3353074789047241, "logits_per_char": -0.6676537394523621, "num_chars": 2}, {"sum_logits": -1.3664329051971436, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.3664329051971436, "logits_per_char": -0.6832164525985718, "num_chars": 2}, {"sum_logits": -1.378383994102478, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.378383994102478, "logits_per_char": -0.689191997051239, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 436, "native_id": "Mercury_SC_413135", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2606669664382935, "incorrect_loss_raw": 1.4407916863759358, "correct_loss_per_char": 0.6303334832191467, "incorrect_loss_per_char": 0.7203958431879679, "correct_loss_per_token": 1.2606669664382935, "incorrect_loss_per_token": 1.4407916863759358, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3484241962432861, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.3484241962432861, "logits_per_char": -0.6742120981216431, "num_chars": 2}, {"sum_logits": -1.4544544219970703, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.4544544219970703, "logits_per_char": -0.7272272109985352, "num_chars": 2}, {"sum_logits": -1.2606669664382935, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": true, "logits_per_token": -1.2606669664382935, "logits_per_char": -0.6303334832191467, "num_chars": 2}, {"sum_logits": -1.5194964408874512, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.5194964408874512, "logits_per_char": -0.7597482204437256, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 437, "native_id": "Mercury_SC_408919", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3491499423980713, "incorrect_loss_raw": 1.4054961999257405, "correct_loss_per_char": 0.6745749711990356, "incorrect_loss_per_char": 0.7027480999628702, "correct_loss_per_token": 1.3491499423980713, "incorrect_loss_per_token": 1.4054961999257405, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.419637680053711, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.419637680053711, "logits_per_char": -0.7098188400268555, "num_chars": 2}, {"sum_logits": -1.4158910512924194, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4158910512924194, "logits_per_char": -0.7079455256462097, "num_chars": 2}, {"sum_logits": -1.3809598684310913, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.3809598684310913, "logits_per_char": -0.6904799342155457, "num_chars": 2}, {"sum_logits": -1.3491499423980713, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.3491499423980713, "logits_per_char": -0.6745749711990356, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 438, "native_id": "TIMSS_1995_8_I14", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2933449745178223, "incorrect_loss_raw": 1.4271812041600545, "correct_loss_per_char": 0.6466724872589111, "incorrect_loss_per_char": 0.7135906020800272, "correct_loss_per_token": 1.2933449745178223, "incorrect_loss_per_token": 1.4271812041600545, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3861750364303589, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.3861750364303589, "logits_per_char": -0.6930875182151794, "num_chars": 2}, {"sum_logits": -1.4357579946517944, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.4357579946517944, "logits_per_char": -0.7178789973258972, "num_chars": 2}, {"sum_logits": -1.4596105813980103, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.4596105813980103, "logits_per_char": -0.7298052906990051, "num_chars": 2}, {"sum_logits": -1.2933449745178223, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": true, "logits_per_token": -1.2933449745178223, "logits_per_char": -0.6466724872589111, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 439, "native_id": "Mercury_7267505", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.289919376373291, "incorrect_loss_raw": 1.446091095606486, "correct_loss_per_char": 0.6449596881866455, "incorrect_loss_per_char": 0.723045547803243, "correct_loss_per_token": 1.289919376373291, "incorrect_loss_per_token": 1.446091095606486, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.289919376373291, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.289919376373291, "logits_per_char": -0.6449596881866455, "num_chars": 2}, {"sum_logits": -1.1811158657073975, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": true, "logits_per_token": -1.1811158657073975, "logits_per_char": -0.5905579328536987, "num_chars": 2}, {"sum_logits": -1.5614855289459229, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.5614855289459229, "logits_per_char": -0.7807427644729614, "num_chars": 2}, {"sum_logits": -1.5956718921661377, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.5956718921661377, "logits_per_char": -0.7978359460830688, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 440, "native_id": "Mercury_7234623", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4043165445327759, "incorrect_loss_raw": 1.3870683511098225, "correct_loss_per_char": 0.7021582722663879, "incorrect_loss_per_char": 0.6935341755549113, "correct_loss_per_token": 1.4043165445327759, "incorrect_loss_per_token": 1.3870683511098225, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3806837797164917, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.3806837797164917, "logits_per_char": -0.6903418898582458, "num_chars": 2}, {"sum_logits": -1.432898998260498, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.432898998260498, "logits_per_char": -0.716449499130249, "num_chars": 2}, {"sum_logits": -1.347622275352478, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": true, "logits_per_token": -1.347622275352478, "logits_per_char": -0.673811137676239, "num_chars": 2}, {"sum_logits": -1.4043165445327759, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.4043165445327759, "logits_per_char": -0.7021582722663879, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 441, "native_id": "ACTAAP_2015_5_7", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3398905992507935, "incorrect_loss_raw": 1.4076250791549683, "correct_loss_per_char": 0.6699452996253967, "incorrect_loss_per_char": 0.7038125395774841, "correct_loss_per_token": 1.3398905992507935, "incorrect_loss_per_token": 1.4076250791549683, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4569785594940186, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.4569785594940186, "logits_per_char": -0.7284892797470093, "num_chars": 2}, {"sum_logits": -1.4105652570724487, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.4105652570724487, "logits_per_char": -0.7052826285362244, "num_chars": 2}, {"sum_logits": -1.3398905992507935, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": true, "logits_per_token": -1.3398905992507935, "logits_per_char": -0.6699452996253967, "num_chars": 2}, {"sum_logits": -1.3553314208984375, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.3553314208984375, "logits_per_char": -0.6776657104492188, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 442, "native_id": "MCAS_2000_4_31", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4110060930252075, "incorrect_loss_raw": 1.3881327311197917, "correct_loss_per_char": 0.7055030465126038, "incorrect_loss_per_char": 0.6940663655598959, "correct_loss_per_token": 1.4110060930252075, "incorrect_loss_per_token": 1.3881327311197917, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4684348106384277, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.4684348106384277, "logits_per_char": -0.7342174053192139, "num_chars": 2}, {"sum_logits": -1.4110060930252075, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.4110060930252075, "logits_per_char": -0.7055030465126038, "num_chars": 2}, {"sum_logits": -1.4406607151031494, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.4406607151031494, "logits_per_char": -0.7203303575515747, "num_chars": 2}, {"sum_logits": -1.2553026676177979, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": true, "logits_per_token": -1.2553026676177979, "logits_per_char": -0.6276513338088989, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 443, "native_id": "Mercury_177345", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5525544881820679, "incorrect_loss_raw": 1.3411064942677815, "correct_loss_per_char": 0.7762772440910339, "incorrect_loss_per_char": 0.6705532471338908, "correct_loss_per_token": 1.5525544881820679, "incorrect_loss_per_token": 1.3411064942677815, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3606232404708862, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.3606232404708862, "logits_per_char": -0.6803116202354431, "num_chars": 2}, {"sum_logits": -1.3289060592651367, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": true, "logits_per_token": -1.3289060592651367, "logits_per_char": -0.6644530296325684, "num_chars": 2}, {"sum_logits": -1.3337901830673218, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.3337901830673218, "logits_per_char": -0.6668950915336609, "num_chars": 2}, {"sum_logits": -1.5525544881820679, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.5525544881820679, "logits_per_char": -0.7762772440910339, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 444, "native_id": "MDSA_2010_5_19", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.358579158782959, "incorrect_loss_raw": 1.4058339595794678, "correct_loss_per_char": 0.6792895793914795, "incorrect_loss_per_char": 0.7029169797897339, "correct_loss_per_token": 1.358579158782959, "incorrect_loss_per_token": 1.4058339595794678, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5204864740371704, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.5204864740371704, "logits_per_char": -0.7602432370185852, "num_chars": 2}, {"sum_logits": -1.358579158782959, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.358579158782959, "logits_per_char": -0.6792895793914795, "num_chars": 2}, {"sum_logits": -1.4248363971710205, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.4248363971710205, "logits_per_char": -0.7124181985855103, "num_chars": 2}, {"sum_logits": -1.2721790075302124, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": true, "logits_per_token": -1.2721790075302124, "logits_per_char": -0.6360895037651062, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 445, "native_id": "Mercury_7004778", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3829268217086792, "incorrect_loss_raw": 1.3958024581273396, "correct_loss_per_char": 0.6914634108543396, "incorrect_loss_per_char": 0.6979012290636698, "correct_loss_per_token": 1.3829268217086792, "incorrect_loss_per_token": 1.3958024581273396, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3829268217086792, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.3829268217086792, "logits_per_char": -0.6914634108543396, "num_chars": 2}, {"sum_logits": -1.3881940841674805, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.3881940841674805, "logits_per_char": -0.6940970420837402, "num_chars": 2}, {"sum_logits": -1.3043971061706543, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": true, "logits_per_token": -1.3043971061706543, "logits_per_char": -0.6521985530853271, "num_chars": 2}, {"sum_logits": -1.4948161840438843, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.4948161840438843, "logits_per_char": -0.7474080920219421, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 446, "native_id": "Mercury_7026618", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.509037971496582, "incorrect_loss_raw": 1.355297843615214, "correct_loss_per_char": 0.754518985748291, "incorrect_loss_per_char": 0.677648921807607, "correct_loss_per_token": 1.509037971496582, "incorrect_loss_per_token": 1.355297843615214, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.509037971496582, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.509037971496582, "logits_per_char": -0.754518985748291, "num_chars": 2}, {"sum_logits": -1.3118605613708496, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": true, "logits_per_token": -1.3118605613708496, "logits_per_char": -0.6559302806854248, "num_chars": 2}, {"sum_logits": -1.333474040031433, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.333474040031433, "logits_per_char": -0.6667370200157166, "num_chars": 2}, {"sum_logits": -1.4205589294433594, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.4205589294433594, "logits_per_char": -0.7102794647216797, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 447, "native_id": "Mercury_SC_400676", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4380817413330078, "incorrect_loss_raw": 1.3765850861867268, "correct_loss_per_char": 0.7190408706665039, "incorrect_loss_per_char": 0.6882925430933634, "correct_loss_per_token": 1.4380817413330078, "incorrect_loss_per_token": 1.3765850861867268, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4508728981018066, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.4508728981018066, "logits_per_char": -0.7254364490509033, "num_chars": 2}, {"sum_logits": -1.395346760749817, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.395346760749817, "logits_per_char": -0.6976733803749084, "num_chars": 2}, {"sum_logits": -1.4380817413330078, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.4380817413330078, "logits_per_char": -0.7190408706665039, "num_chars": 2}, {"sum_logits": -1.2835355997085571, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": true, "logits_per_token": -1.2835355997085571, "logits_per_char": -0.6417677998542786, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 448, "native_id": "TIMSS_2003_4_pg10", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.269614815711975, "incorrect_loss_raw": 1.436639944712321, "correct_loss_per_char": 0.6348074078559875, "incorrect_loss_per_char": 0.7183199723561605, "correct_loss_per_token": 1.269614815711975, "incorrect_loss_per_token": 1.436639944712321, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.269614815711975, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": true, "logits_per_token": -1.269614815711975, "logits_per_char": -0.6348074078559875, "num_chars": 2}, {"sum_logits": -1.419834852218628, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.419834852218628, "logits_per_char": -0.709917426109314, "num_chars": 2}, {"sum_logits": -1.4258003234863281, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.4258003234863281, "logits_per_char": -0.7129001617431641, "num_chars": 2}, {"sum_logits": -1.4642846584320068, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.4642846584320068, "logits_per_char": -0.7321423292160034, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 449, "native_id": "Mercury_7141278", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2924672365188599, "incorrect_loss_raw": 1.4290388425191243, "correct_loss_per_char": 0.6462336182594299, "incorrect_loss_per_char": 0.7145194212595621, "correct_loss_per_token": 1.2924672365188599, "incorrect_loss_per_token": 1.4290388425191243, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2924672365188599, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": true, "logits_per_token": -1.2924672365188599, "logits_per_char": -0.6462336182594299, "num_chars": 2}, {"sum_logits": -1.3792604207992554, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.3792604207992554, "logits_per_char": -0.6896302103996277, "num_chars": 2}, {"sum_logits": -1.398054599761963, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.398054599761963, "logits_per_char": -0.6990272998809814, "num_chars": 2}, {"sum_logits": -1.5098015069961548, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.5098015069961548, "logits_per_char": -0.7549007534980774, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 450, "native_id": "Mercury_SC_LBS10906", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4268349409103394, "incorrect_loss_raw": 1.3775373299916585, "correct_loss_per_char": 0.7134174704551697, "incorrect_loss_per_char": 0.6887686649958292, "correct_loss_per_token": 1.4268349409103394, "incorrect_loss_per_token": 1.3775373299916585, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3861638307571411, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.3861638307571411, "logits_per_char": -0.6930819153785706, "num_chars": 2}, {"sum_logits": -1.3545600175857544, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": true, "logits_per_token": -1.3545600175857544, "logits_per_char": -0.6772800087928772, "num_chars": 2}, {"sum_logits": -1.39188814163208, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.39188814163208, "logits_per_char": -0.69594407081604, "num_chars": 2}, {"sum_logits": -1.4268349409103394, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.4268349409103394, "logits_per_char": -0.7134174704551697, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 451, "native_id": "TIMSS_2011_8_pg77", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3586345911026, "incorrect_loss_raw": 1.4040168523788452, "correct_loss_per_char": 0.6793172955513, "incorrect_loss_per_char": 0.7020084261894226, "correct_loss_per_token": 1.3586345911026, "incorrect_loss_per_token": 1.4040168523788452, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3586345911026, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": true, "logits_per_token": -1.3586345911026, "logits_per_char": -0.6793172955513, "num_chars": 2}, {"sum_logits": -1.391040325164795, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.391040325164795, "logits_per_char": -0.6955201625823975, "num_chars": 2}, {"sum_logits": -1.3872281312942505, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.3872281312942505, "logits_per_char": -0.6936140656471252, "num_chars": 2}, {"sum_logits": -1.4337821006774902, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.4337821006774902, "logits_per_char": -0.7168910503387451, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 452, "native_id": "Mercury_7084438", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3894842863082886, "incorrect_loss_raw": 1.3905623356501262, "correct_loss_per_char": 0.6947421431541443, "incorrect_loss_per_char": 0.6952811678250631, "correct_loss_per_token": 1.3894842863082886, "incorrect_loss_per_token": 1.3905623356501262, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3673789501190186, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": true, "logits_per_token": -1.3673789501190186, "logits_per_char": -0.6836894750595093, "num_chars": 2}, {"sum_logits": -1.3894842863082886, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.3894842863082886, "logits_per_char": -0.6947421431541443, "num_chars": 2}, {"sum_logits": -1.4120622873306274, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.4120622873306274, "logits_per_char": -0.7060311436653137, "num_chars": 2}, {"sum_logits": -1.3922457695007324, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.3922457695007324, "logits_per_char": -0.6961228847503662, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 453, "native_id": "Mercury_416550", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4637477397918701, "incorrect_loss_raw": 1.374043583869934, "correct_loss_per_char": 0.7318738698959351, "incorrect_loss_per_char": 0.687021791934967, "correct_loss_per_token": 1.4637477397918701, "incorrect_loss_per_token": 1.374043583869934, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4422625303268433, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4422625303268433, "logits_per_char": -0.7211312651634216, "num_chars": 2}, {"sum_logits": -1.4637477397918701, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4637477397918701, "logits_per_char": -0.7318738698959351, "num_chars": 2}, {"sum_logits": -1.4591737985610962, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4591737985610962, "logits_per_char": -0.7295868992805481, "num_chars": 2}, {"sum_logits": -1.2206944227218628, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.2206944227218628, "logits_per_char": -0.6103472113609314, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 454, "native_id": "NYSEDREGENTS_2008_4_7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4098764657974243, "incorrect_loss_raw": 1.385330319404602, "correct_loss_per_char": 0.7049382328987122, "incorrect_loss_per_char": 0.692665159702301, "correct_loss_per_token": 1.4098764657974243, "incorrect_loss_per_token": 1.385330319404602, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3486882448196411, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": true, "logits_per_token": -1.3486882448196411, "logits_per_char": -0.6743441224098206, "num_chars": 2}, {"sum_logits": -1.4057749509811401, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.4057749509811401, "logits_per_char": -0.7028874754905701, "num_chars": 2}, {"sum_logits": -1.401527762413025, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.401527762413025, "logits_per_char": -0.7007638812065125, "num_chars": 2}, {"sum_logits": -1.4098764657974243, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.4098764657974243, "logits_per_char": -0.7049382328987122, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 455, "native_id": "Mercury_SC_402980", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3763898611068726, "incorrect_loss_raw": 1.395871678988139, "correct_loss_per_char": 0.6881949305534363, "incorrect_loss_per_char": 0.6979358394940695, "correct_loss_per_token": 1.3763898611068726, "incorrect_loss_per_token": 1.395871678988139, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3793854713439941, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.3793854713439941, "logits_per_char": -0.6896927356719971, "num_chars": 2}, {"sum_logits": -1.3763898611068726, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.3763898611068726, "logits_per_char": -0.6881949305534363, "num_chars": 2}, {"sum_logits": -1.4378323554992676, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.4378323554992676, "logits_per_char": -0.7189161777496338, "num_chars": 2}, {"sum_logits": -1.3703972101211548, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": true, "logits_per_token": -1.3703972101211548, "logits_per_char": -0.6851986050605774, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 456, "native_id": "Mercury_406811", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4277161359786987, "incorrect_loss_raw": 1.3787132104237874, "correct_loss_per_char": 0.7138580679893494, "incorrect_loss_per_char": 0.6893566052118937, "correct_loss_per_token": 1.4277161359786987, "incorrect_loss_per_token": 1.3787132104237874, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4232319593429565, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.4232319593429565, "logits_per_char": -0.7116159796714783, "num_chars": 2}, {"sum_logits": -1.3814902305603027, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.3814902305603027, "logits_per_char": -0.6907451152801514, "num_chars": 2}, {"sum_logits": -1.331417441368103, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": true, "logits_per_token": -1.331417441368103, "logits_per_char": -0.6657087206840515, "num_chars": 2}, {"sum_logits": -1.4277161359786987, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.4277161359786987, "logits_per_char": -0.7138580679893494, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 457, "native_id": "Mercury_7214235", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5049139261245728, "incorrect_loss_raw": 1.3572835127512615, "correct_loss_per_char": 0.7524569630622864, "incorrect_loss_per_char": 0.6786417563756307, "correct_loss_per_token": 1.5049139261245728, "incorrect_loss_per_token": 1.3572835127512615, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5049139261245728, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.5049139261245728, "logits_per_char": -0.7524569630622864, "num_chars": 2}, {"sum_logits": -1.429818868637085, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.429818868637085, "logits_per_char": -0.7149094343185425, "num_chars": 2}, {"sum_logits": -1.3739880323410034, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.3739880323410034, "logits_per_char": -0.6869940161705017, "num_chars": 2}, {"sum_logits": -1.2680436372756958, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": true, "logits_per_token": -1.2680436372756958, "logits_per_char": -0.6340218186378479, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 458, "native_id": "Mercury_7250110", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3506383895874023, "incorrect_loss_raw": 1.4106879631678264, "correct_loss_per_char": 0.6753191947937012, "incorrect_loss_per_char": 0.7053439815839132, "correct_loss_per_token": 1.3506383895874023, "incorrect_loss_per_token": 1.4106879631678264, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3003746271133423, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": true, "logits_per_token": -1.3003746271133423, "logits_per_char": -0.6501873135566711, "num_chars": 2}, {"sum_logits": -1.3506383895874023, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.3506383895874023, "logits_per_char": -0.6753191947937012, "num_chars": 2}, {"sum_logits": -1.3961853981018066, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.3961853981018066, "logits_per_char": -0.6980926990509033, "num_chars": 2}, {"sum_logits": -1.53550386428833, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.53550386428833, "logits_per_char": -0.767751932144165, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 459, "native_id": "Mercury_416586", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5936801433563232, "incorrect_loss_raw": 1.331937591234843, "correct_loss_per_char": 0.7968400716781616, "incorrect_loss_per_char": 0.6659687956174215, "correct_loss_per_token": 1.5936801433563232, "incorrect_loss_per_token": 1.331937591234843, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5936801433563232, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.5936801433563232, "logits_per_char": -0.7968400716781616, "num_chars": 2}, {"sum_logits": -1.3147083520889282, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.3147083520889282, "logits_per_char": -0.6573541760444641, "num_chars": 2}, {"sum_logits": -1.4071893692016602, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.4071893692016602, "logits_per_char": -0.7035946846008301, "num_chars": 2}, {"sum_logits": -1.2739150524139404, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": true, "logits_per_token": -1.2739150524139404, "logits_per_char": -0.6369575262069702, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 460, "native_id": "MCAS_2014_8_11", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.416731834411621, "incorrect_loss_raw": 1.4167495568593342, "correct_loss_per_char": 0.7083659172058105, "incorrect_loss_per_char": 0.7083747784296671, "correct_loss_per_token": 1.416731834411621, "incorrect_loss_per_token": 1.4167495568593342, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.7478822469711304, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.7478822469711304, "logits_per_char": -0.8739411234855652, "num_chars": 2}, {"sum_logits": -1.416731834411621, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.416731834411621, "logits_per_char": -0.7083659172058105, "num_chars": 2}, {"sum_logits": -1.414251685142517, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.414251685142517, "logits_per_char": -0.7071258425712585, "num_chars": 2}, {"sum_logits": -1.0881147384643555, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": true, "logits_per_token": -1.0881147384643555, "logits_per_char": -0.5440573692321777, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 461, "native_id": "NYSEDREGENTS_2015_8_24", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3998363018035889, "incorrect_loss_raw": 1.3897395928700764, "correct_loss_per_char": 0.6999181509017944, "incorrect_loss_per_char": 0.6948697964350382, "correct_loss_per_token": 1.3998363018035889, "incorrect_loss_per_token": 1.3897395928700764, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3184438943862915, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": true, "logits_per_token": -1.3184438943862915, "logits_per_char": -0.6592219471931458, "num_chars": 2}, {"sum_logits": -1.3569889068603516, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.3569889068603516, "logits_per_char": -0.6784944534301758, "num_chars": 2}, {"sum_logits": -1.3998363018035889, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.3998363018035889, "logits_per_char": -0.6999181509017944, "num_chars": 2}, {"sum_logits": -1.4937859773635864, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.4937859773635864, "logits_per_char": -0.7468929886817932, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 462, "native_id": "ACTAAP_2013_7_4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4799121618270874, "incorrect_loss_raw": 1.3630464871724446, "correct_loss_per_char": 0.7399560809135437, "incorrect_loss_per_char": 0.6815232435862223, "correct_loss_per_token": 1.4799121618270874, "incorrect_loss_per_token": 1.3630464871724446, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3157799243927002, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": true, "logits_per_token": -1.3157799243927002, "logits_per_char": -0.6578899621963501, "num_chars": 2}, {"sum_logits": -1.4050767421722412, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.4050767421722412, "logits_per_char": -0.7025383710861206, "num_chars": 2}, {"sum_logits": -1.4799121618270874, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.4799121618270874, "logits_per_char": -0.7399560809135437, "num_chars": 2}, {"sum_logits": -1.3682827949523926, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.3682827949523926, "logits_per_char": -0.6841413974761963, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 463, "native_id": "VASoL_2010_3_11", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4944504499435425, "incorrect_loss_raw": 1.360957423845927, "correct_loss_per_char": 0.7472252249717712, "incorrect_loss_per_char": 0.6804787119229635, "correct_loss_per_token": 1.4944504499435425, "incorrect_loss_per_token": 1.360957423845927, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3867801427841187, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.3867801427841187, "logits_per_char": -0.6933900713920593, "num_chars": 2}, {"sum_logits": -1.3283535242080688, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": true, "logits_per_token": -1.3283535242080688, "logits_per_char": -0.6641767621040344, "num_chars": 2}, {"sum_logits": -1.4944504499435425, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.4944504499435425, "logits_per_char": -0.7472252249717712, "num_chars": 2}, {"sum_logits": -1.3677386045455933, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.3677386045455933, "logits_per_char": -0.6838693022727966, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 464, "native_id": "Mercury_7165795", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4345844984054565, "incorrect_loss_raw": 1.3798502286275227, "correct_loss_per_char": 0.7172922492027283, "incorrect_loss_per_char": 0.6899251143137614, "correct_loss_per_token": 1.4345844984054565, "incorrect_loss_per_token": 1.3798502286275227, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4345844984054565, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.4345844984054565, "logits_per_char": -0.7172922492027283, "num_chars": 2}, {"sum_logits": -1.3217788934707642, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.3217788934707642, "logits_per_char": -0.6608894467353821, "num_chars": 2}, {"sum_logits": -1.30808687210083, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": true, "logits_per_token": -1.30808687210083, "logits_per_char": -0.654043436050415, "num_chars": 2}, {"sum_logits": -1.5096849203109741, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.5096849203109741, "logits_per_char": -0.7548424601554871, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 465, "native_id": "FCAT_2012_8_4", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4190576076507568, "incorrect_loss_raw": 1.3825912475585938, "correct_loss_per_char": 0.7095288038253784, "incorrect_loss_per_char": 0.6912956237792969, "correct_loss_per_token": 1.4190576076507568, "incorrect_loss_per_token": 1.3825912475585938, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4841259717941284, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4841259717941284, "logits_per_char": -0.7420629858970642, "num_chars": 2}, {"sum_logits": -1.4190576076507568, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4190576076507568, "logits_per_char": -0.7095288038253784, "num_chars": 2}, {"sum_logits": -1.3571436405181885, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.3571436405181885, "logits_per_char": -0.6785718202590942, "num_chars": 2}, {"sum_logits": -1.3065041303634644, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.3065041303634644, "logits_per_char": -0.6532520651817322, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 466, "native_id": "Mercury_7012495", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3999154567718506, "incorrect_loss_raw": 1.393770734469096, "correct_loss_per_char": 0.6999577283859253, "incorrect_loss_per_char": 0.696885367234548, "correct_loss_per_token": 1.3999154567718506, "incorrect_loss_per_token": 1.393770734469096, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.496328353881836, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.496328353881836, "logits_per_char": -0.748164176940918, "num_chars": 2}, {"sum_logits": -1.4191538095474243, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.4191538095474243, "logits_per_char": -0.7095769047737122, "num_chars": 2}, {"sum_logits": -1.2658300399780273, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": true, "logits_per_token": -1.2658300399780273, "logits_per_char": -0.6329150199890137, "num_chars": 2}, {"sum_logits": -1.3999154567718506, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.3999154567718506, "logits_per_char": -0.6999577283859253, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 467, "native_id": "Mercury_7128870", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3945732116699219, "incorrect_loss_raw": 1.3892874320348103, "correct_loss_per_char": 0.6972866058349609, "incorrect_loss_per_char": 0.6946437160174052, "correct_loss_per_token": 1.3945732116699219, "incorrect_loss_per_token": 1.3892874320348103, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3945732116699219, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.3945732116699219, "logits_per_char": -0.6972866058349609, "num_chars": 2}, {"sum_logits": -1.3397841453552246, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": true, "logits_per_token": -1.3397841453552246, "logits_per_char": -0.6698920726776123, "num_chars": 2}, {"sum_logits": -1.404933214187622, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.404933214187622, "logits_per_char": -0.702466607093811, "num_chars": 2}, {"sum_logits": -1.4231449365615845, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.4231449365615845, "logits_per_char": -0.7115724682807922, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 468, "native_id": "MDSA_2007_8_25", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4751778841018677, "incorrect_loss_raw": 1.3672727346420288, "correct_loss_per_char": 0.7375889420509338, "incorrect_loss_per_char": 0.6836363673210144, "correct_loss_per_token": 1.4751778841018677, "incorrect_loss_per_token": 1.3672727346420288, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.312619924545288, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": true, "logits_per_token": -1.312619924545288, "logits_per_char": -0.656309962272644, "num_chars": 2}, {"sum_logits": -1.3305410146713257, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.3305410146713257, "logits_per_char": -0.6652705073356628, "num_chars": 2}, {"sum_logits": -1.4751778841018677, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.4751778841018677, "logits_per_char": -0.7375889420509338, "num_chars": 2}, {"sum_logits": -1.4586572647094727, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.4586572647094727, "logits_per_char": -0.7293286323547363, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 469, "native_id": "MEA_2013_5_16", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.481188178062439, "incorrect_loss_raw": 1.3627183437347412, "correct_loss_per_char": 0.7405940890312195, "incorrect_loss_per_char": 0.6813591718673706, "correct_loss_per_token": 1.481188178062439, "incorrect_loss_per_token": 1.3627183437347412, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.481188178062439, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.481188178062439, "logits_per_char": -0.7405940890312195, "num_chars": 2}, {"sum_logits": -1.382840871810913, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.382840871810913, "logits_per_char": -0.6914204359054565, "num_chars": 2}, {"sum_logits": -1.362148404121399, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.362148404121399, "logits_per_char": -0.6810742020606995, "num_chars": 2}, {"sum_logits": -1.3431657552719116, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": true, "logits_per_token": -1.3431657552719116, "logits_per_char": -0.6715828776359558, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 470, "native_id": "Mercury_7234168", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4332948923110962, "incorrect_loss_raw": 1.3821799357732136, "correct_loss_per_char": 0.7166474461555481, "incorrect_loss_per_char": 0.6910899678866068, "correct_loss_per_token": 1.4332948923110962, "incorrect_loss_per_token": 1.3821799357732136, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.50825035572052, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.50825035572052, "logits_per_char": -0.75412517786026, "num_chars": 2}, {"sum_logits": -1.376965045928955, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.376965045928955, "logits_per_char": -0.6884825229644775, "num_chars": 2}, {"sum_logits": -1.4332948923110962, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.4332948923110962, "logits_per_char": -0.7166474461555481, "num_chars": 2}, {"sum_logits": -1.261324405670166, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -1.261324405670166, "logits_per_char": -0.630662202835083, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 471, "native_id": "Mercury_SC_401163", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3563671112060547, "incorrect_loss_raw": 1.4060518741607666, "correct_loss_per_char": 0.6781835556030273, "incorrect_loss_per_char": 0.7030259370803833, "correct_loss_per_token": 1.3563671112060547, "incorrect_loss_per_token": 1.4060518741607666, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5430437326431274, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.5430437326431274, "logits_per_char": -0.7715218663215637, "num_chars": 2}, {"sum_logits": -1.3458577394485474, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.3458577394485474, "logits_per_char": -0.6729288697242737, "num_chars": 2}, {"sum_logits": -1.3563671112060547, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.3563671112060547, "logits_per_char": -0.6781835556030273, "num_chars": 2}, {"sum_logits": -1.329254150390625, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": true, "logits_per_token": -1.329254150390625, "logits_per_char": -0.6646270751953125, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 472, "native_id": "Mercury_SC_415001", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.399762511253357, "incorrect_loss_raw": 1.3907272418340046, "correct_loss_per_char": 0.6998812556266785, "incorrect_loss_per_char": 0.6953636209170023, "correct_loss_per_token": 1.399762511253357, "incorrect_loss_per_token": 1.3907272418340046, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3223886489868164, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": true, "logits_per_token": -1.3223886489868164, "logits_per_char": -0.6611943244934082, "num_chars": 2}, {"sum_logits": -1.399762511253357, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.399762511253357, "logits_per_char": -0.6998812556266785, "num_chars": 2}, {"sum_logits": -1.343140721321106, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.343140721321106, "logits_per_char": -0.671570360660553, "num_chars": 2}, {"sum_logits": -1.5066523551940918, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.5066523551940918, "logits_per_char": -0.7533261775970459, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 473, "native_id": "Mercury_7220483", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.394107699394226, "incorrect_loss_raw": 1.395622968673706, "correct_loss_per_char": 0.697053849697113, "incorrect_loss_per_char": 0.697811484336853, "correct_loss_per_token": 1.394107699394226, "incorrect_loss_per_token": 1.395622968673706, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5365371704101562, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.5365371704101562, "logits_per_char": -0.7682685852050781, "num_chars": 2}, {"sum_logits": -1.394107699394226, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.394107699394226, "logits_per_char": -0.697053849697113, "num_chars": 2}, {"sum_logits": -1.4044281244277954, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.4044281244277954, "logits_per_char": -0.7022140622138977, "num_chars": 2}, {"sum_logits": -1.2459036111831665, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": true, "logits_per_token": -1.2459036111831665, "logits_per_char": -0.6229518055915833, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 474, "native_id": "NYSEDREGENTS_2012_4_14", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3874025344848633, "incorrect_loss_raw": 1.39761487642924, "correct_loss_per_char": 0.6937012672424316, "incorrect_loss_per_char": 0.69880743821462, "correct_loss_per_token": 1.3874025344848633, "incorrect_loss_per_token": 1.39761487642924, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2504807710647583, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": true, "logits_per_token": -1.2504807710647583, "logits_per_char": -0.6252403855323792, "num_chars": 2}, {"sum_logits": -1.3874025344848633, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.3874025344848633, "logits_per_char": -0.6937012672424316, "num_chars": 2}, {"sum_logits": -1.4824604988098145, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.4824604988098145, "logits_per_char": -0.7412302494049072, "num_chars": 2}, {"sum_logits": -1.459903359413147, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.459903359413147, "logits_per_char": -0.7299516797065735, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 475, "native_id": "MCAS_2004_9_6-v1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4160794019699097, "incorrect_loss_raw": 1.3838396469751995, "correct_loss_per_char": 0.7080397009849548, "incorrect_loss_per_char": 0.6919198234875997, "correct_loss_per_token": 1.4160794019699097, "incorrect_loss_per_token": 1.3838396469751995, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.313080906867981, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": true, "logits_per_token": -1.313080906867981, "logits_per_char": -0.6565404534339905, "num_chars": 2}, {"sum_logits": -1.4400193691253662, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.4400193691253662, "logits_per_char": -0.7200096845626831, "num_chars": 2}, {"sum_logits": -1.4160794019699097, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.4160794019699097, "logits_per_char": -0.7080397009849548, "num_chars": 2}, {"sum_logits": -1.398418664932251, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.398418664932251, "logits_per_char": -0.6992093324661255, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 476, "native_id": "Mercury_SC_409576", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4001309871673584, "incorrect_loss_raw": 1.3899766206741333, "correct_loss_per_char": 0.7000654935836792, "incorrect_loss_per_char": 0.6949883103370667, "correct_loss_per_token": 1.4001309871673584, "incorrect_loss_per_token": 1.3899766206741333, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5051087141036987, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.5051087141036987, "logits_per_char": -0.7525543570518494, "num_chars": 2}, {"sum_logits": -1.4001309871673584, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.4001309871673584, "logits_per_char": -0.7000654935836792, "num_chars": 2}, {"sum_logits": -1.3638519048690796, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.3638519048690796, "logits_per_char": -0.6819259524345398, "num_chars": 2}, {"sum_logits": -1.3009692430496216, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": true, "logits_per_token": -1.3009692430496216, "logits_per_char": -0.6504846215248108, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 477, "native_id": "VASoL_2009_5_24", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4047112464904785, "incorrect_loss_raw": 1.3891691366831462, "correct_loss_per_char": 0.7023556232452393, "incorrect_loss_per_char": 0.6945845683415731, "correct_loss_per_token": 1.4047112464904785, "incorrect_loss_per_token": 1.3891691366831462, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4482026100158691, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.4482026100158691, "logits_per_char": -0.7241013050079346, "num_chars": 2}, {"sum_logits": -1.4047112464904785, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.4047112464904785, "logits_per_char": -0.7023556232452393, "num_chars": 2}, {"sum_logits": -1.4350863695144653, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.4350863695144653, "logits_per_char": -0.7175431847572327, "num_chars": 2}, {"sum_logits": -1.284218430519104, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": true, "logits_per_token": -1.284218430519104, "logits_per_char": -0.642109215259552, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 478, "native_id": "Mercury_416507", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4108827114105225, "incorrect_loss_raw": 1.3902994394302368, "correct_loss_per_char": 0.7054413557052612, "incorrect_loss_per_char": 0.6951497197151184, "correct_loss_per_token": 1.4108827114105225, "incorrect_loss_per_token": 1.3902994394302368, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4598439931869507, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4598439931869507, "logits_per_char": -0.7299219965934753, "num_chars": 2}, {"sum_logits": -1.3426028490066528, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.3426028490066528, "logits_per_char": -0.6713014245033264, "num_chars": 2}, {"sum_logits": -1.4108827114105225, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4108827114105225, "logits_per_char": -0.7054413557052612, "num_chars": 2}, {"sum_logits": -1.368451476097107, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.368451476097107, "logits_per_char": -0.6842257380485535, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 479, "native_id": "AKDE&ED_2012_4_2", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4117218255996704, "incorrect_loss_raw": 1.3852654298146565, "correct_loss_per_char": 0.7058609127998352, "incorrect_loss_per_char": 0.6926327149073283, "correct_loss_per_token": 1.4117218255996704, "incorrect_loss_per_token": 1.3852654298146565, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4840896129608154, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.4840896129608154, "logits_per_char": -0.7420448064804077, "num_chars": 2}, {"sum_logits": -1.3561697006225586, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.3561697006225586, "logits_per_char": -0.6780848503112793, "num_chars": 2}, {"sum_logits": -1.4117218255996704, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.4117218255996704, "logits_per_char": -0.7058609127998352, "num_chars": 2}, {"sum_logits": -1.3155369758605957, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": true, "logits_per_token": -1.3155369758605957, "logits_per_char": -0.6577684879302979, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 480, "native_id": "Mercury_SC_LBS10784", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3440048694610596, "incorrect_loss_raw": 1.406928539276123, "correct_loss_per_char": 0.6720024347305298, "incorrect_loss_per_char": 0.7034642696380615, "correct_loss_per_token": 1.3440048694610596, "incorrect_loss_per_token": 1.406928539276123, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4164482355117798, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.4164482355117798, "logits_per_char": -0.7082241177558899, "num_chars": 2}, {"sum_logits": -1.3440048694610596, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": true, "logits_per_token": -1.3440048694610596, "logits_per_char": -0.6720024347305298, "num_chars": 2}, {"sum_logits": -1.408967137336731, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.408967137336731, "logits_per_char": -0.7044835686683655, "num_chars": 2}, {"sum_logits": -1.3953702449798584, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.3953702449798584, "logits_per_char": -0.6976851224899292, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 481, "native_id": "MEA_2014_5_3", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4441431760787964, "incorrect_loss_raw": 1.3731680711110432, "correct_loss_per_char": 0.7220715880393982, "incorrect_loss_per_char": 0.6865840355555216, "correct_loss_per_token": 1.4441431760787964, "incorrect_loss_per_token": 1.3731680711110432, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.358090877532959, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.358090877532959, "logits_per_char": -0.6790454387664795, "num_chars": 2}, {"sum_logits": -1.327356219291687, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -1.327356219291687, "logits_per_char": -0.6636781096458435, "num_chars": 2}, {"sum_logits": -1.4441431760787964, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.4441431760787964, "logits_per_char": -0.7220715880393982, "num_chars": 2}, {"sum_logits": -1.4340571165084839, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.4340571165084839, "logits_per_char": -0.7170285582542419, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 482, "native_id": "VASoL_2010_3_2", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.513551950454712, "incorrect_loss_raw": 1.3577603101730347, "correct_loss_per_char": 0.756775975227356, "incorrect_loss_per_char": 0.6788801550865173, "correct_loss_per_token": 1.513551950454712, "incorrect_loss_per_token": 1.3577603101730347, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.313515305519104, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.313515305519104, "logits_per_char": -0.656757652759552, "num_chars": 2}, {"sum_logits": -1.266154170036316, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": true, "logits_per_token": -1.266154170036316, "logits_per_char": -0.633077085018158, "num_chars": 2}, {"sum_logits": -1.513551950454712, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.513551950454712, "logits_per_char": -0.756775975227356, "num_chars": 2}, {"sum_logits": -1.493611454963684, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.493611454963684, "logits_per_char": -0.746805727481842, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 483, "native_id": "Mercury_SC_409157", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4187321662902832, "incorrect_loss_raw": 1.383738915125529, "correct_loss_per_char": 0.7093660831451416, "incorrect_loss_per_char": 0.6918694575627645, "correct_loss_per_token": 1.4187321662902832, "incorrect_loss_per_token": 1.383738915125529, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4187321662902832, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.4187321662902832, "logits_per_char": -0.7093660831451416, "num_chars": 2}, {"sum_logits": -1.4581317901611328, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.4581317901611328, "logits_per_char": -0.7290658950805664, "num_chars": 2}, {"sum_logits": -1.4258184432983398, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.4258184432983398, "logits_per_char": -0.7129092216491699, "num_chars": 2}, {"sum_logits": -1.2672665119171143, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.2672665119171143, "logits_per_char": -0.6336332559585571, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 484, "native_id": "Mercury_7270533", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5576239824295044, "incorrect_loss_raw": 1.3440335591634114, "correct_loss_per_char": 0.7788119912147522, "incorrect_loss_per_char": 0.6720167795817057, "correct_loss_per_token": 1.5576239824295044, "incorrect_loss_per_token": 1.3440335591634114, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5576239824295044, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.5576239824295044, "logits_per_char": -0.7788119912147522, "num_chars": 2}, {"sum_logits": -1.4111170768737793, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.4111170768737793, "logits_per_char": -0.7055585384368896, "num_chars": 2}, {"sum_logits": -1.3852564096450806, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.3852564096450806, "logits_per_char": -0.6926282048225403, "num_chars": 2}, {"sum_logits": -1.2357271909713745, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": true, "logits_per_token": -1.2357271909713745, "logits_per_char": -0.6178635954856873, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 485, "native_id": "MDSA_2011_8_13", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3500752449035645, "incorrect_loss_raw": 1.4073656400044758, "correct_loss_per_char": 0.6750376224517822, "incorrect_loss_per_char": 0.7036828200022379, "correct_loss_per_token": 1.3500752449035645, "incorrect_loss_per_token": 1.4073656400044758, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.515799641609192, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.515799641609192, "logits_per_char": -0.757899820804596, "num_chars": 2}, {"sum_logits": -1.312078833580017, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": true, "logits_per_token": -1.312078833580017, "logits_per_char": -0.6560394167900085, "num_chars": 2}, {"sum_logits": -1.3942184448242188, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.3942184448242188, "logits_per_char": -0.6971092224121094, "num_chars": 2}, {"sum_logits": -1.3500752449035645, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.3500752449035645, "logits_per_char": -0.6750376224517822, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 486, "native_id": "Mercury_7013370", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4347937107086182, "incorrect_loss_raw": 1.3790156443913777, "correct_loss_per_char": 0.7173968553543091, "incorrect_loss_per_char": 0.6895078221956888, "correct_loss_per_token": 1.4347937107086182, "incorrect_loss_per_token": 1.3790156443913777, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3303227424621582, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": true, "logits_per_token": -1.3303227424621582, "logits_per_char": -0.6651613712310791, "num_chars": 2}, {"sum_logits": -1.4347937107086182, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.4347937107086182, "logits_per_char": -0.7173968553543091, "num_chars": 2}, {"sum_logits": -1.3461973667144775, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.3461973667144775, "logits_per_char": -0.6730986833572388, "num_chars": 2}, {"sum_logits": -1.4605268239974976, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.4605268239974976, "logits_per_char": -0.7302634119987488, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 487, "native_id": "Mercury_SC_400132", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3175766468048096, "incorrect_loss_raw": 1.4178365071614583, "correct_loss_per_char": 0.6587883234024048, "incorrect_loss_per_char": 0.7089182535807291, "correct_loss_per_token": 1.3175766468048096, "incorrect_loss_per_token": 1.4178365071614583, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5169671773910522, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.5169671773910522, "logits_per_char": -0.7584835886955261, "num_chars": 2}, {"sum_logits": -1.3550294637680054, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.3550294637680054, "logits_per_char": -0.6775147318840027, "num_chars": 2}, {"sum_logits": -1.3815128803253174, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.3815128803253174, "logits_per_char": -0.6907564401626587, "num_chars": 2}, {"sum_logits": -1.3175766468048096, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": true, "logits_per_token": -1.3175766468048096, "logits_per_char": -0.6587883234024048, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 488, "native_id": "TIMSS_1995_8_P4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6073554754257202, "incorrect_loss_raw": 1.6227145791053772, "correct_loss_per_char": 0.8036777377128601, "incorrect_loss_per_char": 0.8113572895526886, "correct_loss_per_token": 1.6073554754257202, "incorrect_loss_per_token": 1.6227145791053772, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5622762441635132, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.5622762441635132, "logits_per_char": -0.7811381220817566, "num_chars": 2}, {"sum_logits": -1.68602454662323, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.68602454662323, "logits_per_char": -0.843012273311615, "num_chars": 2}, {"sum_logits": -1.5828438997268677, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.5828438997268677, "logits_per_char": -0.7914219498634338, "num_chars": 2}, {"sum_logits": -1.659713625907898, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.659713625907898, "logits_per_char": -0.829856812953949, "num_chars": 2}, {"sum_logits": -1.6073554754257202, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.6073554754257202, "logits_per_char": -0.8036777377128601, "num_chars": 2}], "label": 4, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 489, "native_id": "WASL_2005_5_10", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2285858392715454, "incorrect_loss_raw": 1.3776954412460327, "correct_loss_per_char": 0.6142929196357727, "incorrect_loss_per_char": 0.6888477206230164, "correct_loss_per_token": 1.2285858392715454, "incorrect_loss_per_token": 1.3776954412460327, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2285858392715454, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": true, "logits_per_token": -1.2285858392715454, "logits_per_char": -0.6142929196357727, "num_chars": 2}, {"sum_logits": -1.3338855504989624, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.3338855504989624, "logits_per_char": -0.6669427752494812, "num_chars": 2}, {"sum_logits": -1.421505331993103, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.421505331993103, "logits_per_char": -0.7107526659965515, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 490, "native_id": "MDSA_2008_8_25", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3750206232070923, "incorrect_loss_raw": 1.4023534059524536, "correct_loss_per_char": 0.6875103116035461, "incorrect_loss_per_char": 0.7011767029762268, "correct_loss_per_token": 1.3750206232070923, "incorrect_loss_per_token": 1.4023534059524536, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5599634647369385, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.5599634647369385, "logits_per_char": -0.7799817323684692, "num_chars": 2}, {"sum_logits": -1.3112410306930542, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": true, "logits_per_token": -1.3112410306930542, "logits_per_char": -0.6556205153465271, "num_chars": 2}, {"sum_logits": -1.3750206232070923, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.3750206232070923, "logits_per_char": -0.6875103116035461, "num_chars": 2}, {"sum_logits": -1.3358557224273682, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.3358557224273682, "logits_per_char": -0.6679278612136841, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 491, "native_id": "Mercury_SC_401786", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3964502811431885, "incorrect_loss_raw": 1.3927802642186482, "correct_loss_per_char": 0.6982251405715942, "incorrect_loss_per_char": 0.6963901321093241, "correct_loss_per_token": 1.3964502811431885, "incorrect_loss_per_token": 1.3927802642186482, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4681452512741089, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.4681452512741089, "logits_per_char": -0.7340726256370544, "num_chars": 2}, {"sum_logits": -1.4220871925354004, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.4220871925354004, "logits_per_char": -0.7110435962677002, "num_chars": 2}, {"sum_logits": -1.3964502811431885, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.3964502811431885, "logits_per_char": -0.6982251405715942, "num_chars": 2}, {"sum_logits": -1.2881083488464355, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": true, "logits_per_token": -1.2881083488464355, "logits_per_char": -0.6440541744232178, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 492, "native_id": "Mercury_7201163", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4438543319702148, "incorrect_loss_raw": 1.3863316377003987, "correct_loss_per_char": 0.7219271659851074, "incorrect_loss_per_char": 0.6931658188501993, "correct_loss_per_token": 1.4438543319702148, "incorrect_loss_per_token": 1.3863316377003987, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5763353109359741, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.5763353109359741, "logits_per_char": -0.7881676554679871, "num_chars": 2}, {"sum_logits": -1.3995658159255981, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.3995658159255981, "logits_per_char": -0.6997829079627991, "num_chars": 2}, {"sum_logits": -1.4438543319702148, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.4438543319702148, "logits_per_char": -0.7219271659851074, "num_chars": 2}, {"sum_logits": -1.183093786239624, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.183093786239624, "logits_per_char": -0.591546893119812, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 493, "native_id": "MEA_2014_8_2", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4873894453048706, "incorrect_loss_raw": 1.3633431990941365, "correct_loss_per_char": 0.7436947226524353, "incorrect_loss_per_char": 0.6816715995470682, "correct_loss_per_token": 1.4873894453048706, "incorrect_loss_per_token": 1.3633431990941365, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.352126121520996, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.352126121520996, "logits_per_char": -0.676063060760498, "num_chars": 2}, {"sum_logits": -1.2965612411499023, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.2965612411499023, "logits_per_char": -0.6482806205749512, "num_chars": 2}, {"sum_logits": -1.4413422346115112, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.4413422346115112, "logits_per_char": -0.7206711173057556, "num_chars": 2}, {"sum_logits": -1.4873894453048706, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.4873894453048706, "logits_per_char": -0.7436947226524353, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 494, "native_id": "Mercury_SC_402261", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3685369491577148, "incorrect_loss_raw": 1.402248700459798, "correct_loss_per_char": 0.6842684745788574, "incorrect_loss_per_char": 0.701124350229899, "correct_loss_per_token": 1.3685369491577148, "incorrect_loss_per_token": 1.402248700459798, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.504297137260437, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.504297137260437, "logits_per_char": -0.7521485686302185, "num_chars": 2}, {"sum_logits": -1.3995827436447144, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.3995827436447144, "logits_per_char": -0.6997913718223572, "num_chars": 2}, {"sum_logits": -1.3685369491577148, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.3685369491577148, "logits_per_char": -0.6842684745788574, "num_chars": 2}, {"sum_logits": -1.3028662204742432, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": true, "logits_per_token": -1.3028662204742432, "logits_per_char": -0.6514331102371216, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 495, "native_id": "TIMSS_1995_8_Q11", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.275131344795227, "incorrect_loss_raw": 1.4337445894877117, "correct_loss_per_char": 0.6375656723976135, "incorrect_loss_per_char": 0.7168722947438558, "correct_loss_per_token": 1.275131344795227, "incorrect_loss_per_token": 1.4337445894877117, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.275131344795227, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": true, "logits_per_token": -1.275131344795227, "logits_per_char": -0.6375656723976135, "num_chars": 2}, {"sum_logits": -1.3932807445526123, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.3932807445526123, "logits_per_char": -0.6966403722763062, "num_chars": 2}, {"sum_logits": -1.4558284282684326, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.4558284282684326, "logits_per_char": -0.7279142141342163, "num_chars": 2}, {"sum_logits": -1.4521245956420898, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.4521245956420898, "logits_per_char": -0.7260622978210449, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 496, "native_id": "Mercury_7124128", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.398946762084961, "incorrect_loss_raw": 1.3874417940775554, "correct_loss_per_char": 0.6994733810424805, "incorrect_loss_per_char": 0.6937208970387777, "correct_loss_per_token": 1.398946762084961, "incorrect_loss_per_token": 1.3874417940775554, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.384527325630188, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.384527325630188, "logits_per_char": -0.692263662815094, "num_chars": 2}, {"sum_logits": -1.363039493560791, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": true, "logits_per_token": -1.363039493560791, "logits_per_char": -0.6815197467803955, "num_chars": 2}, {"sum_logits": -1.414758563041687, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.414758563041687, "logits_per_char": -0.7073792815208435, "num_chars": 2}, {"sum_logits": -1.398946762084961, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.398946762084961, "logits_per_char": -0.6994733810424805, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 497, "native_id": "Mercury_7001628", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3654471635818481, "incorrect_loss_raw": 1.401749610900879, "correct_loss_per_char": 0.6827235817909241, "incorrect_loss_per_char": 0.7008748054504395, "correct_loss_per_token": 1.3654471635818481, "incorrect_loss_per_token": 1.401749610900879, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4985711574554443, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.4985711574554443, "logits_per_char": -0.7492855787277222, "num_chars": 2}, {"sum_logits": -1.4190174341201782, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.4190174341201782, "logits_per_char": -0.7095087170600891, "num_chars": 2}, {"sum_logits": -1.3654471635818481, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.3654471635818481, "logits_per_char": -0.6827235817909241, "num_chars": 2}, {"sum_logits": -1.2876602411270142, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": true, "logits_per_token": -1.2876602411270142, "logits_per_char": -0.6438301205635071, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 498, "native_id": "Mercury_7219118", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2694249153137207, "incorrect_loss_raw": 1.4350693623224895, "correct_loss_per_char": 0.6347124576568604, "incorrect_loss_per_char": 0.7175346811612447, "correct_loss_per_token": 1.2694249153137207, "incorrect_loss_per_token": 1.4350693623224895, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4741146564483643, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.4741146564483643, "logits_per_char": -0.7370573282241821, "num_chars": 2}, {"sum_logits": -1.4204403162002563, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.4204403162002563, "logits_per_char": -0.7102201581001282, "num_chars": 2}, {"sum_logits": -1.2694249153137207, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": true, "logits_per_token": -1.2694249153137207, "logits_per_char": -0.6347124576568604, "num_chars": 2}, {"sum_logits": -1.4106531143188477, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.4106531143188477, "logits_per_char": -0.7053265571594238, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 499, "native_id": "Mercury_404720", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.42657470703125, "incorrect_loss_raw": 1.3801015218098958, "correct_loss_per_char": 0.713287353515625, "incorrect_loss_per_char": 0.6900507609049479, "correct_loss_per_token": 1.42657470703125, "incorrect_loss_per_token": 1.3801015218098958, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3241429328918457, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": true, "logits_per_token": -1.3241429328918457, "logits_per_char": -0.6620714664459229, "num_chars": 2}, {"sum_logits": -1.3840179443359375, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.3840179443359375, "logits_per_char": -0.6920089721679688, "num_chars": 2}, {"sum_logits": -1.4321436882019043, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.4321436882019043, "logits_per_char": -0.7160718441009521, "num_chars": 2}, {"sum_logits": -1.42657470703125, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.42657470703125, "logits_per_char": -0.713287353515625, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 500, "native_id": "MDSA_2009_8_38", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.474707007408142, "incorrect_loss_raw": 1.3646354675292969, "correct_loss_per_char": 0.737353503704071, "incorrect_loss_per_char": 0.6823177337646484, "correct_loss_per_token": 1.474707007408142, "incorrect_loss_per_token": 1.3646354675292969, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.453277587890625, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.453277587890625, "logits_per_char": -0.7266387939453125, "num_chars": 2}, {"sum_logits": -1.474707007408142, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.474707007408142, "logits_per_char": -0.737353503704071, "num_chars": 2}, {"sum_logits": -1.36403226852417, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.36403226852417, "logits_per_char": -0.682016134262085, "num_chars": 2}, {"sum_logits": -1.2765965461730957, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.2765965461730957, "logits_per_char": -0.6382982730865479, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 501, "native_id": "AKDE&ED_2012_4_14", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3728814125061035, "incorrect_loss_raw": 1.39803151289622, "correct_loss_per_char": 0.6864407062530518, "incorrect_loss_per_char": 0.69901575644811, "correct_loss_per_token": 1.3728814125061035, "incorrect_loss_per_token": 1.39803151289622, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4122146368026733, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.4122146368026733, "logits_per_char": -0.7061073184013367, "num_chars": 2}, {"sum_logits": -1.448925495147705, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.448925495147705, "logits_per_char": -0.7244627475738525, "num_chars": 2}, {"sum_logits": -1.3728814125061035, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.3728814125061035, "logits_per_char": -0.6864407062530518, "num_chars": 2}, {"sum_logits": -1.3329544067382812, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": true, "logits_per_token": -1.3329544067382812, "logits_per_char": -0.6664772033691406, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 502, "native_id": "MCAS_2005_5_12", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4172261953353882, "incorrect_loss_raw": 1.3824318647384644, "correct_loss_per_char": 0.7086130976676941, "incorrect_loss_per_char": 0.6912159323692322, "correct_loss_per_token": 1.4172261953353882, "incorrect_loss_per_token": 1.3824318647384644, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4277360439300537, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.4277360439300537, "logits_per_char": -0.7138680219650269, "num_chars": 2}, {"sum_logits": -1.4167530536651611, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.4167530536651611, "logits_per_char": -0.7083765268325806, "num_chars": 2}, {"sum_logits": -1.4172261953353882, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.4172261953353882, "logits_per_char": -0.7086130976676941, "num_chars": 2}, {"sum_logits": -1.3028064966201782, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": true, "logits_per_token": -1.3028064966201782, "logits_per_char": -0.6514032483100891, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 503, "native_id": "NYSEDREGENTS_2015_8_26", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4728807210922241, "incorrect_loss_raw": 1.364297906557719, "correct_loss_per_char": 0.7364403605461121, "incorrect_loss_per_char": 0.6821489532788595, "correct_loss_per_token": 1.4728807210922241, "incorrect_loss_per_token": 1.364297906557719, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.33806574344635, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": true, "logits_per_token": -1.33806574344635, "logits_per_char": -0.669032871723175, "num_chars": 2}, {"sum_logits": -1.3735774755477905, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.3735774755477905, "logits_per_char": -0.6867887377738953, "num_chars": 2}, {"sum_logits": -1.3812505006790161, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.3812505006790161, "logits_per_char": -0.6906252503395081, "num_chars": 2}, {"sum_logits": -1.4728807210922241, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.4728807210922241, "logits_per_char": -0.7364403605461121, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 504, "native_id": "Mercury_7007683", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2752480506896973, "incorrect_loss_raw": 1.4356077909469604, "correct_loss_per_char": 0.6376240253448486, "incorrect_loss_per_char": 0.7178038954734802, "correct_loss_per_token": 1.2752480506896973, "incorrect_loss_per_token": 1.4356077909469604, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2752480506896973, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": true, "logits_per_token": -1.2752480506896973, "logits_per_char": -0.6376240253448486, "num_chars": 2}, {"sum_logits": -1.3646678924560547, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.3646678924560547, "logits_per_char": -0.6823339462280273, "num_chars": 2}, {"sum_logits": -1.4227101802825928, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.4227101802825928, "logits_per_char": -0.7113550901412964, "num_chars": 2}, {"sum_logits": -1.5194453001022339, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.5194453001022339, "logits_per_char": -0.7597226500511169, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 505, "native_id": "MDSA_2011_8_33", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4032392501831055, "incorrect_loss_raw": 1.386338750521342, "correct_loss_per_char": 0.7016196250915527, "incorrect_loss_per_char": 0.693169375260671, "correct_loss_per_token": 1.4032392501831055, "incorrect_loss_per_token": 1.386338750521342, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4032392501831055, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.4032392501831055, "logits_per_char": -0.7016196250915527, "num_chars": 2}, {"sum_logits": -1.3477611541748047, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": true, "logits_per_token": -1.3477611541748047, "logits_per_char": -0.6738805770874023, "num_chars": 2}, {"sum_logits": -1.453288197517395, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.453288197517395, "logits_per_char": -0.7266440987586975, "num_chars": 2}, {"sum_logits": -1.3579668998718262, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.3579668998718262, "logits_per_char": -0.6789834499359131, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 506, "native_id": "NYSEDREGENTS_2015_8_31", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.326101303100586, "incorrect_loss_raw": 1.414021611213684, "correct_loss_per_char": 0.663050651550293, "incorrect_loss_per_char": 0.707010805606842, "correct_loss_per_token": 1.326101303100586, "incorrect_loss_per_token": 1.414021611213684, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.391123652458191, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -1.391123652458191, "logits_per_char": -0.6955618262290955, "num_chars": 2}, {"sum_logits": -1.4997543096542358, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -1.4997543096542358, "logits_per_char": -0.7498771548271179, "num_chars": 2}, {"sum_logits": -1.3511868715286255, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -1.3511868715286255, "logits_per_char": -0.6755934357643127, "num_chars": 2}, {"sum_logits": -1.326101303100586, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": true, "logits_per_token": -1.326101303100586, "logits_per_char": -0.663050651550293, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 507, "native_id": "Mercury_SC_413637", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4022960662841797, "incorrect_loss_raw": 1.3889022668202717, "correct_loss_per_char": 0.7011480331420898, "incorrect_loss_per_char": 0.6944511334101359, "correct_loss_per_token": 1.4022960662841797, "incorrect_loss_per_token": 1.3889022668202717, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.455978512763977, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.455978512763977, "logits_per_char": -0.7279892563819885, "num_chars": 2}, {"sum_logits": -1.3076996803283691, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": true, "logits_per_token": -1.3076996803283691, "logits_per_char": -0.6538498401641846, "num_chars": 2}, {"sum_logits": -1.4030286073684692, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.4030286073684692, "logits_per_char": -0.7015143036842346, "num_chars": 2}, {"sum_logits": -1.4022960662841797, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.4022960662841797, "logits_per_char": -0.7011480331420898, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 508, "native_id": "Mercury_404153", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3029320240020752, "incorrect_loss_raw": 1.431162158648173, "correct_loss_per_char": 0.6514660120010376, "incorrect_loss_per_char": 0.7155810793240865, "correct_loss_per_token": 1.3029320240020752, "incorrect_loss_per_token": 1.431162158648173, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3176937103271484, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.3176937103271484, "logits_per_char": -0.6588468551635742, "num_chars": 2}, {"sum_logits": -1.4095885753631592, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.4095885753631592, "logits_per_char": -0.7047942876815796, "num_chars": 2}, {"sum_logits": -1.5662041902542114, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.5662041902542114, "logits_per_char": -0.7831020951271057, "num_chars": 2}, {"sum_logits": -1.3029320240020752, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": true, "logits_per_token": -1.3029320240020752, "logits_per_char": -0.6514660120010376, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 509, "native_id": "VASoL_2009_5_36", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3172094821929932, "incorrect_loss_raw": 1.4179214239120483, "correct_loss_per_char": 0.6586047410964966, "incorrect_loss_per_char": 0.7089607119560242, "correct_loss_per_token": 1.3172094821929932, "incorrect_loss_per_token": 1.4179214239120483, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4356741905212402, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.4356741905212402, "logits_per_char": -0.7178370952606201, "num_chars": 2}, {"sum_logits": -1.3918589353561401, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.3918589353561401, "logits_per_char": -0.6959294676780701, "num_chars": 2}, {"sum_logits": -1.4262311458587646, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.4262311458587646, "logits_per_char": -0.7131155729293823, "num_chars": 2}, {"sum_logits": -1.3172094821929932, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": true, "logits_per_token": -1.3172094821929932, "logits_per_char": -0.6586047410964966, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 510, "native_id": "Mercury_7115290", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4304555654525757, "incorrect_loss_raw": 1.3785603443781536, "correct_loss_per_char": 0.7152277827262878, "incorrect_loss_per_char": 0.6892801721890768, "correct_loss_per_token": 1.4304555654525757, "incorrect_loss_per_token": 1.3785603443781536, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4461251497268677, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.4461251497268677, "logits_per_char": -0.7230625748634338, "num_chars": 2}, {"sum_logits": -1.3714425563812256, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.3714425563812256, "logits_per_char": -0.6857212781906128, "num_chars": 2}, {"sum_logits": -1.4304555654525757, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.4304555654525757, "logits_per_char": -0.7152277827262878, "num_chars": 2}, {"sum_logits": -1.3181133270263672, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": true, "logits_per_token": -1.3181133270263672, "logits_per_char": -0.6590566635131836, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 511, "native_id": "NCEOGA_2013_5_24", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3870434761047363, "incorrect_loss_raw": 1.4006979862848918, "correct_loss_per_char": 0.6935217380523682, "incorrect_loss_per_char": 0.7003489931424459, "correct_loss_per_token": 1.3870434761047363, "incorrect_loss_per_token": 1.4006979862848918, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5948771238327026, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": false, "logits_per_token": -1.5948771238327026, "logits_per_char": -0.7974385619163513, "num_chars": 2}, {"sum_logits": -1.3669228553771973, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": false, "logits_per_token": -1.3669228553771973, "logits_per_char": -0.6834614276885986, "num_chars": 2}, {"sum_logits": -1.3870434761047363, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": false, "logits_per_token": -1.3870434761047363, "logits_per_char": -0.6935217380523682, "num_chars": 2}, {"sum_logits": -1.2402939796447754, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": true, "logits_per_token": -1.2402939796447754, "logits_per_char": -0.6201469898223877, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 512, "native_id": "LEAP__4_10224", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4308111667633057, "incorrect_loss_raw": 1.4026156266530354, "correct_loss_per_char": 0.7154055833816528, "incorrect_loss_per_char": 0.7013078133265177, "correct_loss_per_token": 1.4308111667633057, "incorrect_loss_per_token": 1.4026156266530354, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5664699077606201, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.5664699077606201, "logits_per_char": -0.7832349538803101, "num_chars": 2}, {"sum_logits": -1.4308111667633057, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.4308111667633057, "logits_per_char": -0.7154055833816528, "num_chars": 2}, {"sum_logits": -1.555383563041687, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.555383563041687, "logits_per_char": -0.7776917815208435, "num_chars": 2}, {"sum_logits": -1.0859934091567993, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": true, "logits_per_token": -1.0859934091567993, "logits_per_char": -0.5429967045783997, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 513, "native_id": "Mercury_7223423", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5733710527420044, "incorrect_loss_raw": 1.3378899892171223, "correct_loss_per_char": 0.7866855263710022, "incorrect_loss_per_char": 0.6689449946085612, "correct_loss_per_token": 1.5733710527420044, "incorrect_loss_per_token": 1.3378899892171223, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2649887800216675, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": true, "logits_per_token": -1.2649887800216675, "logits_per_char": -0.6324943900108337, "num_chars": 2}, {"sum_logits": -1.401695966720581, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.401695966720581, "logits_per_char": -0.7008479833602905, "num_chars": 2}, {"sum_logits": -1.3469852209091187, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.3469852209091187, "logits_per_char": -0.6734926104545593, "num_chars": 2}, {"sum_logits": -1.5733710527420044, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.5733710527420044, "logits_per_char": -0.7866855263710022, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 514, "native_id": "Mercury_7173880", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3532967567443848, "incorrect_loss_raw": 1.4054774443308513, "correct_loss_per_char": 0.6766483783721924, "incorrect_loss_per_char": 0.7027387221654257, "correct_loss_per_token": 1.3532967567443848, "incorrect_loss_per_token": 1.4054774443308513, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4946660995483398, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.4946660995483398, "logits_per_char": -0.7473330497741699, "num_chars": 2}, {"sum_logits": -1.337393045425415, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": true, "logits_per_token": -1.337393045425415, "logits_per_char": -0.6686965227127075, "num_chars": 2}, {"sum_logits": -1.3843731880187988, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.3843731880187988, "logits_per_char": -0.6921865940093994, "num_chars": 2}, {"sum_logits": -1.3532967567443848, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.3532967567443848, "logits_per_char": -0.6766483783721924, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 515, "native_id": "ACTAAP_2008_5_12", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.247565746307373, "incorrect_loss_raw": 1.4442405700683594, "correct_loss_per_char": 0.6237828731536865, "incorrect_loss_per_char": 0.7221202850341797, "correct_loss_per_token": 1.247565746307373, "incorrect_loss_per_token": 1.4442405700683594, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.247565746307373, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": true, "logits_per_token": -1.247565746307373, "logits_per_char": -0.6237828731536865, "num_chars": 2}, {"sum_logits": -1.4063494205474854, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.4063494205474854, "logits_per_char": -0.7031747102737427, "num_chars": 2}, {"sum_logits": -1.4455856084823608, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.4455856084823608, "logits_per_char": -0.7227928042411804, "num_chars": 2}, {"sum_logits": -1.480786681175232, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.480786681175232, "logits_per_char": -0.740393340587616, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 516, "native_id": "Mercury_SC_403014", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3981019258499146, "incorrect_loss_raw": 1.3897020816802979, "correct_loss_per_char": 0.6990509629249573, "incorrect_loss_per_char": 0.6948510408401489, "correct_loss_per_token": 1.3981019258499146, "incorrect_loss_per_token": 1.3897020816802979, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3474531173706055, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": true, "logits_per_token": -1.3474531173706055, "logits_per_char": -0.6737265586853027, "num_chars": 2}, {"sum_logits": -1.365609049797058, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.365609049797058, "logits_per_char": -0.682804524898529, "num_chars": 2}, {"sum_logits": -1.3981019258499146, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.3981019258499146, "logits_per_char": -0.6990509629249573, "num_chars": 2}, {"sum_logits": -1.45604407787323, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.45604407787323, "logits_per_char": -0.728022038936615, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 517, "native_id": "Mercury_SC_400854", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5179598331451416, "incorrect_loss_raw": 1.352561116218567, "correct_loss_per_char": 0.7589799165725708, "incorrect_loss_per_char": 0.6762805581092834, "correct_loss_per_token": 1.5179598331451416, "incorrect_loss_per_token": 1.352561116218567, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3849304914474487, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.3849304914474487, "logits_per_char": -0.6924652457237244, "num_chars": 2}, {"sum_logits": -1.39890456199646, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.39890456199646, "logits_per_char": -0.69945228099823, "num_chars": 2}, {"sum_logits": -1.273848295211792, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": true, "logits_per_token": -1.273848295211792, "logits_per_char": -0.636924147605896, "num_chars": 2}, {"sum_logits": -1.5179598331451416, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.5179598331451416, "logits_per_char": -0.7589799165725708, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 518, "native_id": "TAKS_2009_8_12", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2553033828735352, "incorrect_loss_raw": 1.4431736866633098, "correct_loss_per_char": 0.6276516914367676, "incorrect_loss_per_char": 0.7215868433316549, "correct_loss_per_token": 1.2553033828735352, "incorrect_loss_per_token": 1.4431736866633098, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3583766222000122, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.3583766222000122, "logits_per_char": -0.6791883111000061, "num_chars": 2}, {"sum_logits": -1.2553033828735352, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": true, "logits_per_token": -1.2553033828735352, "logits_per_char": -0.6276516914367676, "num_chars": 2}, {"sum_logits": -1.4474999904632568, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.4474999904632568, "logits_per_char": -0.7237499952316284, "num_chars": 2}, {"sum_logits": -1.5236444473266602, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.5236444473266602, "logits_per_char": -0.7618222236633301, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 519, "native_id": "Mercury_7075128", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.492220401763916, "incorrect_loss_raw": 1.3616748253504436, "correct_loss_per_char": 0.746110200881958, "incorrect_loss_per_char": 0.6808374126752218, "correct_loss_per_token": 1.492220401763916, "incorrect_loss_per_token": 1.3616748253504436, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2713595628738403, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": true, "logits_per_token": -1.2713595628738403, "logits_per_char": -0.6356797814369202, "num_chars": 2}, {"sum_logits": -1.464272141456604, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.464272141456604, "logits_per_char": -0.732136070728302, "num_chars": 2}, {"sum_logits": -1.492220401763916, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.492220401763916, "logits_per_char": -0.746110200881958, "num_chars": 2}, {"sum_logits": -1.3493927717208862, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.3493927717208862, "logits_per_char": -0.6746963858604431, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 520, "native_id": "Mercury_SC_405783", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3567726612091064, "incorrect_loss_raw": 1.404354453086853, "correct_loss_per_char": 0.6783863306045532, "incorrect_loss_per_char": 0.7021772265434265, "correct_loss_per_token": 1.3567726612091064, "incorrect_loss_per_token": 1.404354453086853, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5027517080307007, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.5027517080307007, "logits_per_char": -0.7513758540153503, "num_chars": 2}, {"sum_logits": -1.347643256187439, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": true, "logits_per_token": -1.347643256187439, "logits_per_char": -0.6738216280937195, "num_chars": 2}, {"sum_logits": -1.3567726612091064, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.3567726612091064, "logits_per_char": -0.6783863306045532, "num_chars": 2}, {"sum_logits": -1.3626683950424194, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.3626683950424194, "logits_per_char": -0.6813341975212097, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 521, "native_id": "Mercury_SC_402054", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1832244396209717, "incorrect_loss_raw": 1.471404790878296, "correct_loss_per_char": 0.5916122198104858, "incorrect_loss_per_char": 0.735702395439148, "correct_loss_per_token": 1.1832244396209717, "incorrect_loss_per_token": 1.471404790878296, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.495728850364685, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.495728850364685, "logits_per_char": -0.7478644251823425, "num_chars": 2}, {"sum_logits": -1.5305057764053345, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.5305057764053345, "logits_per_char": -0.7652528882026672, "num_chars": 2}, {"sum_logits": -1.3879797458648682, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.3879797458648682, "logits_per_char": -0.6939898729324341, "num_chars": 2}, {"sum_logits": -1.1832244396209717, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": true, "logits_per_token": -1.1832244396209717, "logits_per_char": -0.5916122198104858, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 522, "native_id": "NCEOGA_2013_5_14", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2254242897033691, "incorrect_loss_raw": 1.4535708824793498, "correct_loss_per_char": 0.6127121448516846, "incorrect_loss_per_char": 0.7267854412396749, "correct_loss_per_token": 1.2254242897033691, "incorrect_loss_per_token": 1.4535708824793498, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5211753845214844, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.5211753845214844, "logits_per_char": -0.7605876922607422, "num_chars": 2}, {"sum_logits": -1.4416821002960205, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.4416821002960205, "logits_per_char": -0.7208410501480103, "num_chars": 2}, {"sum_logits": -1.3978551626205444, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.3978551626205444, "logits_per_char": -0.6989275813102722, "num_chars": 2}, {"sum_logits": -1.2254242897033691, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": true, "logits_per_token": -1.2254242897033691, "logits_per_char": -0.6127121448516846, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 523, "native_id": "Mercury_7090755", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.440238118171692, "incorrect_loss_raw": 1.37772802511851, "correct_loss_per_char": 0.720119059085846, "incorrect_loss_per_char": 0.688864012559255, "correct_loss_per_token": 1.440238118171692, "incorrect_loss_per_token": 1.37772802511851, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.440238118171692, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.440238118171692, "logits_per_char": -0.720119059085846, "num_chars": 2}, {"sum_logits": -1.3205958604812622, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.3205958604812622, "logits_per_char": -0.6602979302406311, "num_chars": 2}, {"sum_logits": -1.3012527227401733, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": true, "logits_per_token": -1.3012527227401733, "logits_per_char": -0.6506263613700867, "num_chars": 2}, {"sum_logits": -1.5113354921340942, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.5113354921340942, "logits_per_char": -0.7556677460670471, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 524, "native_id": "NYSEDREGENTS_2012_4_17", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3380179405212402, "incorrect_loss_raw": 1.410110870997111, "correct_loss_per_char": 0.6690089702606201, "incorrect_loss_per_char": 0.7050554354985555, "correct_loss_per_token": 1.3380179405212402, "incorrect_loss_per_token": 1.410110870997111, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3380179405212402, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.3380179405212402, "logits_per_char": -0.6690089702606201, "num_chars": 2}, {"sum_logits": -1.474630355834961, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.474630355834961, "logits_per_char": -0.7373151779174805, "num_chars": 2}, {"sum_logits": -1.426124095916748, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.426124095916748, "logits_per_char": -0.713062047958374, "num_chars": 2}, {"sum_logits": -1.329578161239624, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": true, "logits_per_token": -1.329578161239624, "logits_per_char": -0.664789080619812, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 525, "native_id": "NYSEDREGENTS_2013_4_14", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.399868369102478, "incorrect_loss_raw": 1.3896132707595825, "correct_loss_per_char": 0.699934184551239, "incorrect_loss_per_char": 0.6948066353797913, "correct_loss_per_token": 1.399868369102478, "incorrect_loss_per_token": 1.3896132707595825, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3168495893478394, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": true, "logits_per_token": -1.3168495893478394, "logits_per_char": -0.6584247946739197, "num_chars": 2}, {"sum_logits": -1.399868369102478, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.399868369102478, "logits_per_char": -0.699934184551239, "num_chars": 2}, {"sum_logits": -1.4576852321624756, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.4576852321624756, "logits_per_char": -0.7288426160812378, "num_chars": 2}, {"sum_logits": -1.3943049907684326, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.3943049907684326, "logits_per_char": -0.6971524953842163, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 526, "native_id": "Mercury_LBS10817", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4696000814437866, "incorrect_loss_raw": 1.3660510778427124, "correct_loss_per_char": 0.7348000407218933, "incorrect_loss_per_char": 0.6830255389213562, "correct_loss_per_token": 1.4696000814437866, "incorrect_loss_per_token": 1.3660510778427124, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3350201845169067, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": true, "logits_per_token": -1.3350201845169067, "logits_per_char": -0.6675100922584534, "num_chars": 2}, {"sum_logits": -1.382769227027893, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.382769227027893, "logits_per_char": -0.6913846135139465, "num_chars": 2}, {"sum_logits": -1.4696000814437866, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.4696000814437866, "logits_per_char": -0.7348000407218933, "num_chars": 2}, {"sum_logits": -1.3803638219833374, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.3803638219833374, "logits_per_char": -0.6901819109916687, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 527, "native_id": "Mercury_SC_405856", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3528380393981934, "incorrect_loss_raw": 1.402835726737976, "correct_loss_per_char": 0.6764190196990967, "incorrect_loss_per_char": 0.701417863368988, "correct_loss_per_token": 1.3528380393981934, "incorrect_loss_per_token": 1.402835726737976, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3528380393981934, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.3528380393981934, "logits_per_char": -0.6764190196990967, "num_chars": 2}, {"sum_logits": -1.4273089170455933, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4273089170455933, "logits_per_char": -0.7136544585227966, "num_chars": 2}, {"sum_logits": -1.4034010171890259, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4034010171890259, "logits_per_char": -0.7017005085945129, "num_chars": 2}, {"sum_logits": -1.377797245979309, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.377797245979309, "logits_per_char": -0.6888986229896545, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 528, "native_id": "Mercury_407053", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.625554084777832, "incorrect_loss_raw": 1.3528145949045818, "correct_loss_per_char": 0.812777042388916, "incorrect_loss_per_char": 0.6764072974522909, "correct_loss_per_token": 1.625554084777832, "incorrect_loss_per_token": 1.3528145949045818, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0616753101348877, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": true, "logits_per_token": -1.0616753101348877, "logits_per_char": -0.5308376550674438, "num_chars": 2}, {"sum_logits": -1.429856777191162, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.429856777191162, "logits_per_char": -0.714928388595581, "num_chars": 2}, {"sum_logits": -1.625554084777832, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.625554084777832, "logits_per_char": -0.812777042388916, "num_chars": 2}, {"sum_logits": -1.5669116973876953, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.5669116973876953, "logits_per_char": -0.7834558486938477, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 529, "native_id": "Mercury_SC_414130", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4173460006713867, "incorrect_loss_raw": 1.3849925597508748, "correct_loss_per_char": 0.7086730003356934, "incorrect_loss_per_char": 0.6924962798754374, "correct_loss_per_token": 1.4173460006713867, "incorrect_loss_per_token": 1.3849925597508748, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4361634254455566, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.4361634254455566, "logits_per_char": -0.7180817127227783, "num_chars": 2}, {"sum_logits": -1.4173460006713867, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.4173460006713867, "logits_per_char": -0.7086730003356934, "num_chars": 2}, {"sum_logits": -1.4610595703125, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.4610595703125, "logits_per_char": -0.73052978515625, "num_chars": 2}, {"sum_logits": -1.2577546834945679, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": true, "logits_per_token": -1.2577546834945679, "logits_per_char": -0.6288773417472839, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 530, "native_id": "VASoL_2010_3_28", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4181488752365112, "incorrect_loss_raw": 1.3842331171035767, "correct_loss_per_char": 0.7090744376182556, "incorrect_loss_per_char": 0.6921165585517883, "correct_loss_per_token": 1.4181488752365112, "incorrect_loss_per_token": 1.3842331171035767, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3036749362945557, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": true, "logits_per_token": -1.3036749362945557, "logits_per_char": -0.6518374681472778, "num_chars": 2}, {"sum_logits": -1.454543948173523, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.454543948173523, "logits_per_char": -0.7272719740867615, "num_chars": 2}, {"sum_logits": -1.3944804668426514, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.3944804668426514, "logits_per_char": -0.6972402334213257, "num_chars": 2}, {"sum_logits": -1.4181488752365112, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.4181488752365112, "logits_per_char": -0.7090744376182556, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 531, "native_id": "Mercury_7271215", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4131450653076172, "incorrect_loss_raw": 1.3990357716878254, "correct_loss_per_char": 0.7065725326538086, "incorrect_loss_per_char": 0.6995178858439127, "correct_loss_per_token": 1.4131450653076172, "incorrect_loss_per_token": 1.3990357716878254, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6551399230957031, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.6551399230957031, "logits_per_char": -0.8275699615478516, "num_chars": 2}, {"sum_logits": -1.4131450653076172, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.4131450653076172, "logits_per_char": -0.7065725326538086, "num_chars": 2}, {"sum_logits": -1.2968131303787231, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.2968131303787231, "logits_per_char": -0.6484065651893616, "num_chars": 2}, {"sum_logits": -1.2451542615890503, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": true, "logits_per_token": -1.2451542615890503, "logits_per_char": -0.6225771307945251, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 532, "native_id": "TIMSS_2003_4_pg87", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4076955318450928, "incorrect_loss_raw": 1.385489026705424, "correct_loss_per_char": 0.7038477659225464, "incorrect_loss_per_char": 0.692744513352712, "correct_loss_per_token": 1.4076955318450928, "incorrect_loss_per_token": 1.385489026705424, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4256912469863892, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.4256912469863892, "logits_per_char": -0.7128456234931946, "num_chars": 2}, {"sum_logits": -1.4072951078414917, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.4072951078414917, "logits_per_char": -0.7036475539207458, "num_chars": 2}, {"sum_logits": -1.4076955318450928, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.4076955318450928, "logits_per_char": -0.7038477659225464, "num_chars": 2}, {"sum_logits": -1.3234807252883911, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": true, "logits_per_token": -1.3234807252883911, "logits_per_char": -0.6617403626441956, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 533, "native_id": "Mercury_SC_408628", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4859378337860107, "incorrect_loss_raw": 1.3752131064732869, "correct_loss_per_char": 0.7429689168930054, "incorrect_loss_per_char": 0.6876065532366434, "correct_loss_per_token": 1.4859378337860107, "incorrect_loss_per_token": 1.3752131064732869, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4672071933746338, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4672071933746338, "logits_per_char": -0.7336035966873169, "num_chars": 2}, {"sum_logits": -1.5127744674682617, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.5127744674682617, "logits_per_char": -0.7563872337341309, "num_chars": 2}, {"sum_logits": -1.4859378337860107, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4859378337860107, "logits_per_char": -0.7429689168930054, "num_chars": 2}, {"sum_logits": -1.1456576585769653, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.1456576585769653, "logits_per_char": -0.5728288292884827, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 534, "native_id": "ACTAAP_2009_5_4", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2853729724884033, "incorrect_loss_raw": 1.4319665431976318, "correct_loss_per_char": 0.6426864862442017, "incorrect_loss_per_char": 0.7159832715988159, "correct_loss_per_token": 1.2853729724884033, "incorrect_loss_per_token": 1.4319665431976318, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.533721923828125, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.533721923828125, "logits_per_char": -0.7668609619140625, "num_chars": 2}, {"sum_logits": -1.3657766580581665, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.3657766580581665, "logits_per_char": -0.6828883290290833, "num_chars": 2}, {"sum_logits": -1.396401047706604, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.396401047706604, "logits_per_char": -0.698200523853302, "num_chars": 2}, {"sum_logits": -1.2853729724884033, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": true, "logits_per_token": -1.2853729724884033, "logits_per_char": -0.6426864862442017, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 535, "native_id": "Mercury_7205923", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5126070976257324, "incorrect_loss_raw": 1.3570228417714436, "correct_loss_per_char": 0.7563035488128662, "incorrect_loss_per_char": 0.6785114208857218, "correct_loss_per_token": 1.5126070976257324, "incorrect_loss_per_token": 1.3570228417714436, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5126070976257324, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.5126070976257324, "logits_per_char": -0.7563035488128662, "num_chars": 2}, {"sum_logits": -1.392427921295166, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.392427921295166, "logits_per_char": -0.696213960647583, "num_chars": 2}, {"sum_logits": -1.4349145889282227, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.4349145889282227, "logits_per_char": -0.7174572944641113, "num_chars": 2}, {"sum_logits": -1.2437260150909424, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": true, "logits_per_token": -1.2437260150909424, "logits_per_char": -0.6218630075454712, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 536, "native_id": "Mercury_7171955", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5390958786010742, "incorrect_loss_raw": 1.347665786743164, "correct_loss_per_char": 0.7695479393005371, "incorrect_loss_per_char": 0.673832893371582, "correct_loss_per_token": 1.5390958786010742, "incorrect_loss_per_token": 1.347665786743164, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5390958786010742, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.5390958786010742, "logits_per_char": -0.7695479393005371, "num_chars": 2}, {"sum_logits": -1.4078831672668457, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.4078831672668457, "logits_per_char": -0.7039415836334229, "num_chars": 2}, {"sum_logits": -1.3969441652297974, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.3969441652297974, "logits_per_char": -0.6984720826148987, "num_chars": 2}, {"sum_logits": -1.2381700277328491, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": true, "logits_per_token": -1.2381700277328491, "logits_per_char": -0.6190850138664246, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 537, "native_id": "NYSEDREGENTS_2008_8_28", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4274060726165771, "incorrect_loss_raw": 1.3807998498280842, "correct_loss_per_char": 0.7137030363082886, "incorrect_loss_per_char": 0.6903999249140421, "correct_loss_per_token": 1.4274060726165771, "incorrect_loss_per_token": 1.3807998498280842, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4305658340454102, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.4305658340454102, "logits_per_char": -0.7152829170227051, "num_chars": 2}, {"sum_logits": -1.3845442533493042, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.3845442533493042, "logits_per_char": -0.6922721266746521, "num_chars": 2}, {"sum_logits": -1.4274060726165771, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.4274060726165771, "logits_per_char": -0.7137030363082886, "num_chars": 2}, {"sum_logits": -1.3272894620895386, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": true, "logits_per_token": -1.3272894620895386, "logits_per_char": -0.6636447310447693, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 538, "native_id": "TIMSS_2011_4_pg15", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5330966711044312, "incorrect_loss_raw": 1.353073239326477, "correct_loss_per_char": 0.7665483355522156, "incorrect_loss_per_char": 0.6765366196632385, "correct_loss_per_token": 1.5330966711044312, "incorrect_loss_per_token": 1.353073239326477, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2765183448791504, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": true, "logits_per_token": -1.2765183448791504, "logits_per_char": -0.6382591724395752, "num_chars": 2}, {"sum_logits": -1.2841628789901733, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.2841628789901733, "logits_per_char": -0.6420814394950867, "num_chars": 2}, {"sum_logits": -1.4985384941101074, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.4985384941101074, "logits_per_char": -0.7492692470550537, "num_chars": 2}, {"sum_logits": -1.5330966711044312, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.5330966711044312, "logits_per_char": -0.7665483355522156, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 539, "native_id": "Mercury_SC_409026", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3841685056686401, "incorrect_loss_raw": 1.3976588646570842, "correct_loss_per_char": 0.6920842528343201, "incorrect_loss_per_char": 0.6988294323285421, "correct_loss_per_token": 1.3841685056686401, "incorrect_loss_per_token": 1.3976588646570842, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4865208864212036, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4865208864212036, "logits_per_char": -0.7432604432106018, "num_chars": 2}, {"sum_logits": -1.4570562839508057, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4570562839508057, "logits_per_char": -0.7285281419754028, "num_chars": 2}, {"sum_logits": -1.3841685056686401, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.3841685056686401, "logits_per_char": -0.6920842528343201, "num_chars": 2}, {"sum_logits": -1.2493994235992432, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.2493994235992432, "logits_per_char": -0.6246997117996216, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 540, "native_id": "Mercury_7082653", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.367465853691101, "incorrect_loss_raw": 1.3991299867630005, "correct_loss_per_char": 0.6837329268455505, "incorrect_loss_per_char": 0.6995649933815002, "correct_loss_per_token": 1.367465853691101, "incorrect_loss_per_token": 1.3991299867630005, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4836158752441406, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.4836158752441406, "logits_per_char": -0.7418079376220703, "num_chars": 2}, {"sum_logits": -1.367465853691101, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.367465853691101, "logits_per_char": -0.6837329268455505, "num_chars": 2}, {"sum_logits": -1.3617717027664185, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.3617717027664185, "logits_per_char": -0.6808858513832092, "num_chars": 2}, {"sum_logits": -1.3520023822784424, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": true, "logits_per_token": -1.3520023822784424, "logits_per_char": -0.6760011911392212, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 541, "native_id": "NYSEDREGENTS_2013_4_8", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3124592304229736, "incorrect_loss_raw": 1.4201333125432332, "correct_loss_per_char": 0.6562296152114868, "incorrect_loss_per_char": 0.7100666562716166, "correct_loss_per_token": 1.3124592304229736, "incorrect_loss_per_token": 1.4201333125432332, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4330438375473022, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4330438375473022, "logits_per_char": -0.7165219187736511, "num_chars": 2}, {"sum_logits": -1.3309800624847412, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.3309800624847412, "logits_per_char": -0.6654900312423706, "num_chars": 2}, {"sum_logits": -1.4963760375976562, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4963760375976562, "logits_per_char": -0.7481880187988281, "num_chars": 2}, {"sum_logits": -1.3124592304229736, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.3124592304229736, "logits_per_char": -0.6562296152114868, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 542, "native_id": "Mercury_SC_415535", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3453792333602905, "incorrect_loss_raw": 1.4085615873336792, "correct_loss_per_char": 0.6726896166801453, "incorrect_loss_per_char": 0.7042807936668396, "correct_loss_per_token": 1.3453792333602905, "incorrect_loss_per_token": 1.4085615873336792, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.414693832397461, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.414693832397461, "logits_per_char": -0.7073469161987305, "num_chars": 2}, {"sum_logits": -1.3453792333602905, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.3453792333602905, "logits_per_char": -0.6726896166801453, "num_chars": 2}, {"sum_logits": -1.3179452419281006, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": true, "logits_per_token": -1.3179452419281006, "logits_per_char": -0.6589726209640503, "num_chars": 2}, {"sum_logits": -1.493045687675476, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.493045687675476, "logits_per_char": -0.746522843837738, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 543, "native_id": "Mercury_SC_400851", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.441907286643982, "incorrect_loss_raw": 1.3744560082753499, "correct_loss_per_char": 0.720953643321991, "incorrect_loss_per_char": 0.6872280041376749, "correct_loss_per_token": 1.441907286643982, "incorrect_loss_per_token": 1.3744560082753499, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3371046781539917, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": true, "logits_per_token": -1.3371046781539917, "logits_per_char": -0.6685523390769958, "num_chars": 2}, {"sum_logits": -1.3924520015716553, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.3924520015716553, "logits_per_char": -0.6962260007858276, "num_chars": 2}, {"sum_logits": -1.3938113451004028, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.3938113451004028, "logits_per_char": -0.6969056725502014, "num_chars": 2}, {"sum_logits": -1.441907286643982, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.441907286643982, "logits_per_char": -0.720953643321991, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 544, "native_id": "Mercury_SC_416171", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3606843948364258, "incorrect_loss_raw": 1.4012631972630818, "correct_loss_per_char": 0.6803421974182129, "incorrect_loss_per_char": 0.7006315986315409, "correct_loss_per_token": 1.3606843948364258, "incorrect_loss_per_token": 1.4012631972630818, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4158464670181274, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.4158464670181274, "logits_per_char": -0.7079232335090637, "num_chars": 2}, {"sum_logits": -1.3606843948364258, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": true, "logits_per_token": -1.3606843948364258, "logits_per_char": -0.6803421974182129, "num_chars": 2}, {"sum_logits": -1.3630549907684326, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.3630549907684326, "logits_per_char": -0.6815274953842163, "num_chars": 2}, {"sum_logits": -1.4248881340026855, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.4248881340026855, "logits_per_char": -0.7124440670013428, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 545, "native_id": "TIMSS_2003_4_pg35", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6590853929519653, "incorrect_loss_raw": 1.6129080057144165, "correct_loss_per_char": 0.8295426964759827, "incorrect_loss_per_char": 0.8064540028572083, "correct_loss_per_token": 1.6590853929519653, "incorrect_loss_per_token": 1.6129080057144165, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.802024006843567, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.802024006843567, "logits_per_char": -0.9010120034217834, "num_chars": 2}, {"sum_logits": -1.6590853929519653, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.6590853929519653, "logits_per_char": -0.8295426964759827, "num_chars": 2}, {"sum_logits": -1.489413857460022, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.489413857460022, "logits_per_char": -0.744706928730011, "num_chars": 2}, {"sum_logits": -1.4877911806106567, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": true, "logits_per_token": -1.4877911806106567, "logits_per_char": -0.7438955903053284, "num_chars": 2}, {"sum_logits": -1.6724029779434204, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.6724029779434204, "logits_per_char": -0.8362014889717102, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 546, "native_id": "NYSEDREGENTS_2013_4_10", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3006330728530884, "incorrect_loss_raw": 1.4219303925832112, "correct_loss_per_char": 0.6503165364265442, "incorrect_loss_per_char": 0.7109651962916056, "correct_loss_per_token": 1.3006330728530884, "incorrect_loss_per_token": 1.4219303925832112, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3006330728530884, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": true, "logits_per_token": -1.3006330728530884, "logits_per_char": -0.6503165364265442, "num_chars": 2}, {"sum_logits": -1.371595859527588, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.371595859527588, "logits_per_char": -0.685797929763794, "num_chars": 2}, {"sum_logits": -1.4205642938613892, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.4205642938613892, "logits_per_char": -0.7102821469306946, "num_chars": 2}, {"sum_logits": -1.4736310243606567, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.4736310243606567, "logits_per_char": -0.7368155121803284, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 547, "native_id": "MSA_2012_5_16", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.419067144393921, "incorrect_loss_raw": 1.38027818997701, "correct_loss_per_char": 0.7095335721969604, "incorrect_loss_per_char": 0.690139094988505, "correct_loss_per_token": 1.419067144393921, "incorrect_loss_per_token": 1.38027818997701, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3796041011810303, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.3796041011810303, "logits_per_char": -0.6898020505905151, "num_chars": 2}, {"sum_logits": -1.419067144393921, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.419067144393921, "logits_per_char": -0.7095335721969604, "num_chars": 2}, {"sum_logits": -1.3967950344085693, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.3967950344085693, "logits_per_char": -0.6983975172042847, "num_chars": 2}, {"sum_logits": -1.3644354343414307, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": true, "logits_per_token": -1.3644354343414307, "logits_per_char": -0.6822177171707153, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 548, "native_id": "Mercury_SC_405020", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.2132515907287598, "incorrect_loss_raw": 1.4254599412282307, "correct_loss_per_char": 1.1066257953643799, "incorrect_loss_per_char": 0.7127299706141154, "correct_loss_per_token": 2.2132515907287598, "incorrect_loss_per_token": 1.4254599412282307, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0984165668487549, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": true, "logits_per_token": -1.0984165668487549, "logits_per_char": -0.5492082834243774, "num_chars": 2}, {"sum_logits": -1.4131584167480469, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.4131584167480469, "logits_per_char": -0.7065792083740234, "num_chars": 2}, {"sum_logits": -1.7648048400878906, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.7648048400878906, "logits_per_char": -0.8824024200439453, "num_chars": 2}, {"sum_logits": -2.2132515907287598, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -2.2132515907287598, "logits_per_char": -1.1066257953643799, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 549, "native_id": "Mercury_7009713", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.382459044456482, "incorrect_loss_raw": 1.3939451376597087, "correct_loss_per_char": 0.691229522228241, "incorrect_loss_per_char": 0.6969725688298544, "correct_loss_per_token": 1.382459044456482, "incorrect_loss_per_token": 1.3939451376597087, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.382459044456482, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.382459044456482, "logits_per_char": -0.691229522228241, "num_chars": 2}, {"sum_logits": -1.4205232858657837, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.4205232858657837, "logits_per_char": -0.7102616429328918, "num_chars": 2}, {"sum_logits": -1.4054762125015259, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.4054762125015259, "logits_per_char": -0.7027381062507629, "num_chars": 2}, {"sum_logits": -1.3558359146118164, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": true, "logits_per_token": -1.3558359146118164, "logits_per_char": -0.6779179573059082, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 550, "native_id": "Mercury_SC_LBS11012", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3116916418075562, "incorrect_loss_raw": 1.4222217798233032, "correct_loss_per_char": 0.6558458209037781, "incorrect_loss_per_char": 0.7111108899116516, "correct_loss_per_token": 1.3116916418075562, "incorrect_loss_per_token": 1.4222217798233032, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5564085245132446, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.5564085245132446, "logits_per_char": -0.7782042622566223, "num_chars": 2}, {"sum_logits": -1.36505925655365, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.36505925655365, "logits_per_char": -0.682529628276825, "num_chars": 2}, {"sum_logits": -1.3451975584030151, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.3451975584030151, "logits_per_char": -0.6725987792015076, "num_chars": 2}, {"sum_logits": -1.3116916418075562, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": true, "logits_per_token": -1.3116916418075562, "logits_per_char": -0.6558458209037781, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 551, "native_id": "Mercury_SC_401269", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3793562650680542, "incorrect_loss_raw": 1.3979460795720418, "correct_loss_per_char": 0.6896781325340271, "incorrect_loss_per_char": 0.6989730397860209, "correct_loss_per_token": 1.3793562650680542, "incorrect_loss_per_token": 1.3979460795720418, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2761627435684204, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": true, "logits_per_token": -1.2761627435684204, "logits_per_char": -0.6380813717842102, "num_chars": 2}, {"sum_logits": -1.3793562650680542, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.3793562650680542, "logits_per_char": -0.6896781325340271, "num_chars": 2}, {"sum_logits": -1.444622278213501, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.444622278213501, "logits_per_char": -0.7223111391067505, "num_chars": 2}, {"sum_logits": -1.473053216934204, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.473053216934204, "logits_per_char": -0.736526608467102, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 552, "native_id": "Mercury_SC_401296", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3838915824890137, "incorrect_loss_raw": 1.395993749300639, "correct_loss_per_char": 0.6919457912445068, "incorrect_loss_per_char": 0.6979968746503195, "correct_loss_per_token": 1.3838915824890137, "incorrect_loss_per_token": 1.395993749300639, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3838915824890137, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.3838915824890137, "logits_per_char": -0.6919457912445068, "num_chars": 2}, {"sum_logits": -1.4110982418060303, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.4110982418060303, "logits_per_char": -0.7055491209030151, "num_chars": 2}, {"sum_logits": -1.4724621772766113, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.4724621772766113, "logits_per_char": -0.7362310886383057, "num_chars": 2}, {"sum_logits": -1.304420828819275, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": true, "logits_per_token": -1.304420828819275, "logits_per_char": -0.6522104144096375, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 553, "native_id": "Mercury_SC_LBS10940", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3446497917175293, "incorrect_loss_raw": 1.4085644086201985, "correct_loss_per_char": 0.6723248958587646, "incorrect_loss_per_char": 0.7042822043100992, "correct_loss_per_token": 1.3446497917175293, "incorrect_loss_per_token": 1.4085644086201985, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4068886041641235, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -1.4068886041641235, "logits_per_char": -0.7034443020820618, "num_chars": 2}, {"sum_logits": -1.447757363319397, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -1.447757363319397, "logits_per_char": -0.7238786816596985, "num_chars": 2}, {"sum_logits": -1.3446497917175293, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": true, "logits_per_token": -1.3446497917175293, "logits_per_char": -0.6723248958587646, "num_chars": 2}, {"sum_logits": -1.3710472583770752, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -1.3710472583770752, "logits_per_char": -0.6855236291885376, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 554, "native_id": "Mercury_181545", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3929394483566284, "incorrect_loss_raw": 1.391351580619812, "correct_loss_per_char": 0.6964697241783142, "incorrect_loss_per_char": 0.695675790309906, "correct_loss_per_token": 1.3929394483566284, "incorrect_loss_per_token": 1.391351580619812, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4604390859603882, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.4604390859603882, "logits_per_char": -0.7302195429801941, "num_chars": 2}, {"sum_logits": -1.408308744430542, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.408308744430542, "logits_per_char": -0.704154372215271, "num_chars": 2}, {"sum_logits": -1.3929394483566284, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.3929394483566284, "logits_per_char": -0.6964697241783142, "num_chars": 2}, {"sum_logits": -1.3053069114685059, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": true, "logits_per_token": -1.3053069114685059, "logits_per_char": -0.6526534557342529, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 555, "native_id": "TIMSS_2007_8_pg4", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4861644506454468, "incorrect_loss_raw": 1.3692956765492756, "correct_loss_per_char": 0.7430822253227234, "incorrect_loss_per_char": 0.6846478382746378, "correct_loss_per_token": 1.4861644506454468, "incorrect_loss_per_token": 1.3692956765492756, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4755266904830933, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.4755266904830933, "logits_per_char": -0.7377633452415466, "num_chars": 2}, {"sum_logits": -1.2965558767318726, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": true, "logits_per_token": -1.2965558767318726, "logits_per_char": -0.6482779383659363, "num_chars": 2}, {"sum_logits": -1.3358044624328613, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.3358044624328613, "logits_per_char": -0.6679022312164307, "num_chars": 2}, {"sum_logits": -1.4861644506454468, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.4861644506454468, "logits_per_char": -0.7430822253227234, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 556, "native_id": "NYSEDREGENTS_2015_4_27", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2380869388580322, "incorrect_loss_raw": 1.45477290948232, "correct_loss_per_char": 0.6190434694290161, "incorrect_loss_per_char": 0.72738645474116, "correct_loss_per_token": 1.2380869388580322, "incorrect_loss_per_token": 1.45477290948232, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2751485109329224, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.2751485109329224, "logits_per_char": -0.6375742554664612, "num_chars": 2}, {"sum_logits": -1.2380869388580322, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": true, "logits_per_token": -1.2380869388580322, "logits_per_char": -0.6190434694290161, "num_chars": 2}, {"sum_logits": -1.5411657094955444, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.5411657094955444, "logits_per_char": -0.7705828547477722, "num_chars": 2}, {"sum_logits": -1.5480045080184937, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.5480045080184937, "logits_per_char": -0.7740022540092468, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 557, "native_id": "Mercury_7267575", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4312328100204468, "incorrect_loss_raw": 1.377323865890503, "correct_loss_per_char": 0.7156164050102234, "incorrect_loss_per_char": 0.6886619329452515, "correct_loss_per_token": 1.4312328100204468, "incorrect_loss_per_token": 1.377323865890503, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3828296661376953, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.3828296661376953, "logits_per_char": -0.6914148330688477, "num_chars": 2}, {"sum_logits": -1.3727632761001587, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": true, "logits_per_token": -1.3727632761001587, "logits_per_char": -0.6863816380500793, "num_chars": 2}, {"sum_logits": -1.4312328100204468, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.4312328100204468, "logits_per_char": -0.7156164050102234, "num_chars": 2}, {"sum_logits": -1.3763786554336548, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.3763786554336548, "logits_per_char": -0.6881893277168274, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 558, "native_id": "Mercury_190190", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4280025959014893, "incorrect_loss_raw": 1.379364053408305, "correct_loss_per_char": 0.7140012979507446, "incorrect_loss_per_char": 0.6896820267041525, "correct_loss_per_token": 1.4280025959014893, "incorrect_loss_per_token": 1.379364053408305, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3729686737060547, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.3729686737060547, "logits_per_char": -0.6864843368530273, "num_chars": 2}, {"sum_logits": -1.3577412366867065, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": true, "logits_per_token": -1.3577412366867065, "logits_per_char": -0.6788706183433533, "num_chars": 2}, {"sum_logits": -1.4073822498321533, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.4073822498321533, "logits_per_char": -0.7036911249160767, "num_chars": 2}, {"sum_logits": -1.4280025959014893, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.4280025959014893, "logits_per_char": -0.7140012979507446, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 559, "native_id": "NYSEDREGENTS_2008_4_10", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.427632212638855, "incorrect_loss_raw": 1.3805982271830242, "correct_loss_per_char": 0.7138161063194275, "incorrect_loss_per_char": 0.6902991135915121, "correct_loss_per_token": 1.427632212638855, "incorrect_loss_per_token": 1.3805982271830242, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.427632212638855, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.427632212638855, "logits_per_char": -0.7138161063194275, "num_chars": 2}, {"sum_logits": -1.2800503969192505, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": true, "logits_per_token": -1.2800503969192505, "logits_per_char": -0.6400251984596252, "num_chars": 2}, {"sum_logits": -1.402190089225769, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.402190089225769, "logits_per_char": -0.7010950446128845, "num_chars": 2}, {"sum_logits": -1.4595541954040527, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.4595541954040527, "logits_per_char": -0.7297770977020264, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 560, "native_id": "Mercury_7210193", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3923276662826538, "incorrect_loss_raw": 1.392317533493042, "correct_loss_per_char": 0.6961638331413269, "incorrect_loss_per_char": 0.696158766746521, "correct_loss_per_token": 1.3923276662826538, "incorrect_loss_per_token": 1.392317533493042, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3923276662826538, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.3923276662826538, "logits_per_char": -0.6961638331413269, "num_chars": 2}, {"sum_logits": -1.4537559747695923, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.4537559747695923, "logits_per_char": -0.7268779873847961, "num_chars": 2}, {"sum_logits": -1.3966089487075806, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.3966089487075806, "logits_per_char": -0.6983044743537903, "num_chars": 2}, {"sum_logits": -1.3265876770019531, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": true, "logits_per_token": -1.3265876770019531, "logits_per_char": -0.6632938385009766, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 561, "native_id": "Mercury_SC_405999", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.49705970287323, "incorrect_loss_raw": 1.3608814875284831, "correct_loss_per_char": 0.748529851436615, "incorrect_loss_per_char": 0.6804407437642416, "correct_loss_per_token": 1.49705970287323, "incorrect_loss_per_token": 1.3608814875284831, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.49705970287323, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.49705970287323, "logits_per_char": -0.748529851436615, "num_chars": 2}, {"sum_logits": -1.3814103603363037, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.3814103603363037, "logits_per_char": -0.6907051801681519, "num_chars": 2}, {"sum_logits": -1.4510281085968018, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.4510281085968018, "logits_per_char": -0.7255140542984009, "num_chars": 2}, {"sum_logits": -1.2502059936523438, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": true, "logits_per_token": -1.2502059936523438, "logits_per_char": -0.6251029968261719, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 562, "native_id": "Mercury_SC_400603", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2996609210968018, "incorrect_loss_raw": 1.4223726590474446, "correct_loss_per_char": 0.6498304605484009, "incorrect_loss_per_char": 0.7111863295237223, "correct_loss_per_token": 1.2996609210968018, "incorrect_loss_per_token": 1.4223726590474446, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2996609210968018, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": true, "logits_per_token": -1.2996609210968018, "logits_per_char": -0.6498304605484009, "num_chars": 2}, {"sum_logits": -1.4624276161193848, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.4624276161193848, "logits_per_char": -0.7312138080596924, "num_chars": 2}, {"sum_logits": -1.3386610746383667, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.3386610746383667, "logits_per_char": -0.6693305373191833, "num_chars": 2}, {"sum_logits": -1.4660292863845825, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.4660292863845825, "logits_per_char": -0.7330146431922913, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 563, "native_id": "Mercury_SC_LBS10618", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4084291458129883, "incorrect_loss_raw": 1.3893656730651855, "correct_loss_per_char": 0.7042145729064941, "incorrect_loss_per_char": 0.6946828365325928, "correct_loss_per_token": 1.4084291458129883, "incorrect_loss_per_token": 1.3893656730651855, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4084291458129883, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.4084291458129883, "logits_per_char": -0.7042145729064941, "num_chars": 2}, {"sum_logits": -1.2983911037445068, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": true, "logits_per_token": -1.2983911037445068, "logits_per_char": -0.6491955518722534, "num_chars": 2}, {"sum_logits": -1.387115240097046, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.387115240097046, "logits_per_char": -0.693557620048523, "num_chars": 2}, {"sum_logits": -1.482590675354004, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.482590675354004, "logits_per_char": -0.741295337677002, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 564, "native_id": "Mercury_7056543", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5032707452774048, "incorrect_loss_raw": 1.3572897513707478, "correct_loss_per_char": 0.7516353726387024, "incorrect_loss_per_char": 0.6786448756853739, "correct_loss_per_token": 1.5032707452774048, "incorrect_loss_per_token": 1.3572897513707478, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4093838930130005, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.4093838930130005, "logits_per_char": -0.7046919465065002, "num_chars": 2}, {"sum_logits": -1.2861335277557373, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": true, "logits_per_token": -1.2861335277557373, "logits_per_char": -0.6430667638778687, "num_chars": 2}, {"sum_logits": -1.3763518333435059, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.3763518333435059, "logits_per_char": -0.6881759166717529, "num_chars": 2}, {"sum_logits": -1.5032707452774048, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.5032707452774048, "logits_per_char": -0.7516353726387024, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 565, "native_id": "Mercury_7164920", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3072588443756104, "incorrect_loss_raw": 1.4218274752298992, "correct_loss_per_char": 0.6536294221878052, "incorrect_loss_per_char": 0.7109137376149496, "correct_loss_per_token": 1.3072588443756104, "incorrect_loss_per_token": 1.4218274752298992, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4238790273666382, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.4238790273666382, "logits_per_char": -0.7119395136833191, "num_chars": 2}, {"sum_logits": -1.3556203842163086, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.3556203842163086, "logits_per_char": -0.6778101921081543, "num_chars": 2}, {"sum_logits": -1.4859830141067505, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.4859830141067505, "logits_per_char": -0.7429915070533752, "num_chars": 2}, {"sum_logits": -1.3072588443756104, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": true, "logits_per_token": -1.3072588443756104, "logits_per_char": -0.6536294221878052, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 566, "native_id": "Mercury_SC_LBS10949", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4933315515518188, "incorrect_loss_raw": 1.3584121068318684, "correct_loss_per_char": 0.7466657757759094, "incorrect_loss_per_char": 0.6792060534159342, "correct_loss_per_token": 1.4933315515518188, "incorrect_loss_per_token": 1.3584121068318684, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3048354387283325, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": true, "logits_per_token": -1.3048354387283325, "logits_per_char": -0.6524177193641663, "num_chars": 2}, {"sum_logits": -1.3828229904174805, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.3828229904174805, "logits_per_char": -0.6914114952087402, "num_chars": 2}, {"sum_logits": -1.4933315515518188, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.4933315515518188, "logits_per_char": -0.7466657757759094, "num_chars": 2}, {"sum_logits": -1.3875778913497925, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.3875778913497925, "logits_per_char": -0.6937889456748962, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 567, "native_id": "Mercury_7201268", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.266071081161499, "incorrect_loss_raw": 1.4393277963002522, "correct_loss_per_char": 0.6330355405807495, "incorrect_loss_per_char": 0.7196638981501261, "correct_loss_per_token": 1.266071081161499, "incorrect_loss_per_token": 1.4393277963002522, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.531916618347168, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.531916618347168, "logits_per_char": -0.765958309173584, "num_chars": 2}, {"sum_logits": -1.266071081161499, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": true, "logits_per_token": -1.266071081161499, "logits_per_char": -0.6330355405807495, "num_chars": 2}, {"sum_logits": -1.4666550159454346, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.4666550159454346, "logits_per_char": -0.7333275079727173, "num_chars": 2}, {"sum_logits": -1.3194117546081543, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.3194117546081543, "logits_per_char": -0.6597058773040771, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 568, "native_id": "MSA_2012_5_28", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4285911321640015, "incorrect_loss_raw": 1.380086898803711, "correct_loss_per_char": 0.7142955660820007, "incorrect_loss_per_char": 0.6900434494018555, "correct_loss_per_token": 1.4285911321640015, "incorrect_loss_per_token": 1.380086898803711, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4285911321640015, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.4285911321640015, "logits_per_char": -0.7142955660820007, "num_chars": 2}, {"sum_logits": -1.295778751373291, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": true, "logits_per_token": -1.295778751373291, "logits_per_char": -0.6478893756866455, "num_chars": 2}, {"sum_logits": -1.4309850931167603, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.4309850931167603, "logits_per_char": -0.7154925465583801, "num_chars": 2}, {"sum_logits": -1.4134968519210815, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.4134968519210815, "logits_per_char": -0.7067484259605408, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 569, "native_id": "VASoL_2009_3_7", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5345722436904907, "incorrect_loss_raw": 1.3474748929341633, "correct_loss_per_char": 0.7672861218452454, "incorrect_loss_per_char": 0.6737374464670817, "correct_loss_per_token": 1.5345722436904907, "incorrect_loss_per_token": 1.3474748929341633, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5345722436904907, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.5345722436904907, "logits_per_char": -0.7672861218452454, "num_chars": 2}, {"sum_logits": -1.3729122877120972, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.3729122877120972, "logits_per_char": -0.6864561438560486, "num_chars": 2}, {"sum_logits": -1.382155179977417, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.382155179977417, "logits_per_char": -0.6910775899887085, "num_chars": 2}, {"sum_logits": -1.287357211112976, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": true, "logits_per_token": -1.287357211112976, "logits_per_char": -0.643678605556488, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 570, "native_id": "VASoL_2009_3_21", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.363775372505188, "incorrect_loss_raw": 1.40337077776591, "correct_loss_per_char": 0.681887686252594, "incorrect_loss_per_char": 0.701685388882955, "correct_loss_per_token": 1.363775372505188, "incorrect_loss_per_token": 1.40337077776591, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3949388265609741, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.3949388265609741, "logits_per_char": -0.6974694132804871, "num_chars": 2}, {"sum_logits": -1.3868248462677002, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.3868248462677002, "logits_per_char": -0.6934124231338501, "num_chars": 2}, {"sum_logits": -1.363775372505188, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": true, "logits_per_token": -1.363775372505188, "logits_per_char": -0.681887686252594, "num_chars": 2}, {"sum_logits": -1.4283486604690552, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.4283486604690552, "logits_per_char": -0.7141743302345276, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 571, "native_id": "Mercury_7013230", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3396193981170654, "incorrect_loss_raw": 1.4083675543467205, "correct_loss_per_char": 0.6698096990585327, "incorrect_loss_per_char": 0.7041837771733602, "correct_loss_per_token": 1.3396193981170654, "incorrect_loss_per_token": 1.4083675543467205, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3396193981170654, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": true, "logits_per_token": -1.3396193981170654, "logits_per_char": -0.6698096990585327, "num_chars": 2}, {"sum_logits": -1.3513292074203491, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.3513292074203491, "logits_per_char": -0.6756646037101746, "num_chars": 2}, {"sum_logits": -1.4630966186523438, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.4630966186523438, "logits_per_char": -0.7315483093261719, "num_chars": 2}, {"sum_logits": -1.4106768369674683, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.4106768369674683, "logits_per_char": -0.7053384184837341, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 572, "native_id": "VASoL_2009_5_3", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.282723307609558, "incorrect_loss_raw": 1.4335800409317017, "correct_loss_per_char": 0.641361653804779, "incorrect_loss_per_char": 0.7167900204658508, "correct_loss_per_token": 1.282723307609558, "incorrect_loss_per_token": 1.4335800409317017, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.282723307609558, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": true, "logits_per_token": -1.282723307609558, "logits_per_char": -0.641361653804779, "num_chars": 2}, {"sum_logits": -1.2998276948928833, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.2998276948928833, "logits_per_char": -0.6499138474464417, "num_chars": 2}, {"sum_logits": -1.5341253280639648, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.5341253280639648, "logits_per_char": -0.7670626640319824, "num_chars": 2}, {"sum_logits": -1.4667870998382568, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.4667870998382568, "logits_per_char": -0.7333935499191284, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 573, "native_id": "Mercury_SC_406703", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4781461954116821, "incorrect_loss_raw": 1.364188591639201, "correct_loss_per_char": 0.7390730977058411, "incorrect_loss_per_char": 0.6820942958196005, "correct_loss_per_token": 1.4781461954116821, "incorrect_loss_per_token": 1.364188591639201, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3069756031036377, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": true, "logits_per_token": -1.3069756031036377, "logits_per_char": -0.6534878015518188, "num_chars": 2}, {"sum_logits": -1.3590725660324097, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.3590725660324097, "logits_per_char": -0.6795362830162048, "num_chars": 2}, {"sum_logits": -1.4781461954116821, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.4781461954116821, "logits_per_char": -0.7390730977058411, "num_chars": 2}, {"sum_logits": -1.4265176057815552, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.4265176057815552, "logits_per_char": -0.7132588028907776, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 574, "native_id": "Mercury_7001348", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4840151071548462, "incorrect_loss_raw": 1.3657562335332234, "correct_loss_per_char": 0.7420075535774231, "incorrect_loss_per_char": 0.6828781167666117, "correct_loss_per_token": 1.4840151071548462, "incorrect_loss_per_token": 1.3657562335332234, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4840151071548462, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4840151071548462, "logits_per_char": -0.7420075535774231, "num_chars": 2}, {"sum_logits": -1.4645997285842896, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4645997285842896, "logits_per_char": -0.7322998642921448, "num_chars": 2}, {"sum_logits": -1.3769769668579102, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3769769668579102, "logits_per_char": -0.6884884834289551, "num_chars": 2}, {"sum_logits": -1.2556920051574707, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.2556920051574707, "logits_per_char": -0.6278460025787354, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 575, "native_id": "MCAS_2012_8_23650", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3795181512832642, "incorrect_loss_raw": 1.398001790046692, "correct_loss_per_char": 0.6897590756416321, "incorrect_loss_per_char": 0.699000895023346, "correct_loss_per_token": 1.3795181512832642, "incorrect_loss_per_token": 1.398001790046692, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5154129266738892, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.5154129266738892, "logits_per_char": -0.7577064633369446, "num_chars": 2}, {"sum_logits": -1.3795181512832642, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.3795181512832642, "logits_per_char": -0.6897590756416321, "num_chars": 2}, {"sum_logits": -1.3813221454620361, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.3813221454620361, "logits_per_char": -0.6906610727310181, "num_chars": 2}, {"sum_logits": -1.2972702980041504, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": true, "logits_per_token": -1.2972702980041504, "logits_per_char": -0.6486351490020752, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 576, "native_id": "Mercury_7218488", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1861305236816406, "incorrect_loss_raw": 1.4722753763198853, "correct_loss_per_char": 0.5930652618408203, "incorrect_loss_per_char": 0.7361376881599426, "correct_loss_per_token": 1.1861305236816406, "incorrect_loss_per_token": 1.4722753763198853, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5697673559188843, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.5697673559188843, "logits_per_char": -0.7848836779594421, "num_chars": 2}, {"sum_logits": -1.4612962007522583, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.4612962007522583, "logits_per_char": -0.7306481003761292, "num_chars": 2}, {"sum_logits": -1.3857625722885132, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.3857625722885132, "logits_per_char": -0.6928812861442566, "num_chars": 2}, {"sum_logits": -1.1861305236816406, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": true, "logits_per_token": -1.1861305236816406, "logits_per_char": -0.5930652618408203, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 577, "native_id": "NYSEDREGENTS_2013_8_9", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4196590185165405, "incorrect_loss_raw": 1.3817155361175537, "correct_loss_per_char": 0.7098295092582703, "incorrect_loss_per_char": 0.6908577680587769, "correct_loss_per_token": 1.4196590185165405, "incorrect_loss_per_token": 1.3817155361175537, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4303845167160034, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.4303845167160034, "logits_per_char": -0.7151922583580017, "num_chars": 2}, {"sum_logits": -1.4061847925186157, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.4061847925186157, "logits_per_char": -0.7030923962593079, "num_chars": 2}, {"sum_logits": -1.4196590185165405, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.4196590185165405, "logits_per_char": -0.7098295092582703, "num_chars": 2}, {"sum_logits": -1.308577299118042, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": true, "logits_per_token": -1.308577299118042, "logits_per_char": -0.654288649559021, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 578, "native_id": "MCAS_2013_8_29434", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3117514848709106, "incorrect_loss_raw": 1.4215997060139973, "correct_loss_per_char": 0.6558757424354553, "incorrect_loss_per_char": 0.7107998530069987, "correct_loss_per_token": 1.3117514848709106, "incorrect_loss_per_token": 1.4215997060139973, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.472802758216858, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.472802758216858, "logits_per_char": -0.736401379108429, "num_chars": 2}, {"sum_logits": -1.3117514848709106, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": true, "logits_per_token": -1.3117514848709106, "logits_per_char": -0.6558757424354553, "num_chars": 2}, {"sum_logits": -1.4294239282608032, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.4294239282608032, "logits_per_char": -0.7147119641304016, "num_chars": 2}, {"sum_logits": -1.362572431564331, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.362572431564331, "logits_per_char": -0.6812862157821655, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 579, "native_id": "LEAP_2002_8_10389", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0815215110778809, "incorrect_loss_raw": 1.5205446084340413, "correct_loss_per_char": 0.5407607555389404, "incorrect_loss_per_char": 0.7602723042170206, "correct_loss_per_token": 1.0815215110778809, "incorrect_loss_per_token": 1.5205446084340413, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6214733123779297, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.6214733123779297, "logits_per_char": -0.8107366561889648, "num_chars": 2}, {"sum_logits": -1.4419291019439697, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.4419291019439697, "logits_per_char": -0.7209645509719849, "num_chars": 2}, {"sum_logits": -1.4982314109802246, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.4982314109802246, "logits_per_char": -0.7491157054901123, "num_chars": 2}, {"sum_logits": -1.0815215110778809, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": true, "logits_per_token": -1.0815215110778809, "logits_per_char": -0.5407607555389404, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 580, "native_id": "NYSEDREGENTS_2010_4_20", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4367958307266235, "incorrect_loss_raw": 1.3756955464680989, "correct_loss_per_char": 0.7183979153633118, "incorrect_loss_per_char": 0.6878477732340494, "correct_loss_per_token": 1.4367958307266235, "incorrect_loss_per_token": 1.3756955464680989, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3653322458267212, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.3653322458267212, "logits_per_char": -0.6826661229133606, "num_chars": 2}, {"sum_logits": -1.353597640991211, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": true, "logits_per_token": -1.353597640991211, "logits_per_char": -0.6767988204956055, "num_chars": 2}, {"sum_logits": -1.4081567525863647, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.4081567525863647, "logits_per_char": -0.7040783762931824, "num_chars": 2}, {"sum_logits": -1.4367958307266235, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.4367958307266235, "logits_per_char": -0.7183979153633118, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 581, "native_id": "MSA_2012_5_22", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3359094858169556, "incorrect_loss_raw": 1.4131030639012654, "correct_loss_per_char": 0.6679547429084778, "incorrect_loss_per_char": 0.7065515319506327, "correct_loss_per_token": 1.3359094858169556, "incorrect_loss_per_token": 1.4131030639012654, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5233579874038696, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.5233579874038696, "logits_per_char": -0.7616789937019348, "num_chars": 2}, {"sum_logits": -1.3359094858169556, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.3359094858169556, "logits_per_char": -0.6679547429084778, "num_chars": 2}, {"sum_logits": -1.3990180492401123, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.3990180492401123, "logits_per_char": -0.6995090246200562, "num_chars": 2}, {"sum_logits": -1.3169331550598145, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": true, "logits_per_token": -1.3169331550598145, "logits_per_char": -0.6584665775299072, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 582, "native_id": "CSZ20228", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4165871143341064, "incorrect_loss_raw": 1.3845232725143433, "correct_loss_per_char": 0.7082935571670532, "incorrect_loss_per_char": 0.6922616362571716, "correct_loss_per_token": 1.4165871143341064, "incorrect_loss_per_token": 1.3845232725143433, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.368965983390808, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.368965983390808, "logits_per_char": -0.684482991695404, "num_chars": 2}, {"sum_logits": -1.4475435018539429, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.4475435018539429, "logits_per_char": -0.7237717509269714, "num_chars": 2}, {"sum_logits": -1.4165871143341064, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.4165871143341064, "logits_per_char": -0.7082935571670532, "num_chars": 2}, {"sum_logits": -1.3370603322982788, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": true, "logits_per_token": -1.3370603322982788, "logits_per_char": -0.6685301661491394, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 583, "native_id": "Mercury_7001715", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.422041654586792, "incorrect_loss_raw": 1.382274905840556, "correct_loss_per_char": 0.711020827293396, "incorrect_loss_per_char": 0.691137452920278, "correct_loss_per_token": 1.422041654586792, "incorrect_loss_per_token": 1.382274905840556, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2743337154388428, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": true, "logits_per_token": -1.2743337154388428, "logits_per_char": -0.6371668577194214, "num_chars": 2}, {"sum_logits": -1.4298955202102661, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.4298955202102661, "logits_per_char": -0.7149477601051331, "num_chars": 2}, {"sum_logits": -1.4425954818725586, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.4425954818725586, "logits_per_char": -0.7212977409362793, "num_chars": 2}, {"sum_logits": -1.422041654586792, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.422041654586792, "logits_per_char": -0.711020827293396, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 584, "native_id": "Mercury_7142748", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2997850179672241, "incorrect_loss_raw": 1.4236185153325398, "correct_loss_per_char": 0.6498925089836121, "incorrect_loss_per_char": 0.7118092576662699, "correct_loss_per_token": 1.2997850179672241, "incorrect_loss_per_token": 1.4236185153325398, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4502222537994385, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.4502222537994385, "logits_per_char": -0.7251111268997192, "num_chars": 2}, {"sum_logits": -1.2997850179672241, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -1.2997850179672241, "logits_per_char": -0.6498925089836121, "num_chars": 2}, {"sum_logits": -1.4039591550827026, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.4039591550827026, "logits_per_char": -0.7019795775413513, "num_chars": 2}, {"sum_logits": -1.4166741371154785, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.4166741371154785, "logits_per_char": -0.7083370685577393, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 585, "native_id": "Mercury_184328", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2550013065338135, "incorrect_loss_raw": 1.4474750757217407, "correct_loss_per_char": 0.6275006532669067, "incorrect_loss_per_char": 0.7237375378608704, "correct_loss_per_token": 1.2550013065338135, "incorrect_loss_per_token": 1.4474750757217407, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2550013065338135, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": true, "logits_per_token": -1.2550013065338135, "logits_per_char": -0.6275006532669067, "num_chars": 2}, {"sum_logits": -1.3477331399917603, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.3477331399917603, "logits_per_char": -0.6738665699958801, "num_chars": 2}, {"sum_logits": -1.3931523561477661, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.3931523561477661, "logits_per_char": -0.6965761780738831, "num_chars": 2}, {"sum_logits": -1.6015397310256958, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.6015397310256958, "logits_per_char": -0.8007698655128479, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 586, "native_id": "Mercury_SC_LBS10177", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.358399748802185, "incorrect_loss_raw": 1.4021668036778767, "correct_loss_per_char": 0.6791998744010925, "incorrect_loss_per_char": 0.7010834018389384, "correct_loss_per_token": 1.358399748802185, "incorrect_loss_per_token": 1.4021668036778767, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4462095499038696, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.4462095499038696, "logits_per_char": -0.7231047749519348, "num_chars": 2}, {"sum_logits": -1.4040435552597046, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.4040435552597046, "logits_per_char": -0.7020217776298523, "num_chars": 2}, {"sum_logits": -1.3562473058700562, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": true, "logits_per_token": -1.3562473058700562, "logits_per_char": -0.6781236529350281, "num_chars": 2}, {"sum_logits": -1.358399748802185, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.358399748802185, "logits_per_char": -0.6791998744010925, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 587, "native_id": "Mercury_182945", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3911726474761963, "incorrect_loss_raw": 1.3946322997411091, "correct_loss_per_char": 0.6955863237380981, "incorrect_loss_per_char": 0.6973161498705546, "correct_loss_per_token": 1.3911726474761963, "incorrect_loss_per_token": 1.3946322997411091, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.335170865058899, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": true, "logits_per_token": -1.335170865058899, "logits_per_char": -0.6675854325294495, "num_chars": 2}, {"sum_logits": -1.4572067260742188, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.4572067260742188, "logits_per_char": -0.7286033630371094, "num_chars": 2}, {"sum_logits": -1.39151930809021, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.39151930809021, "logits_per_char": -0.695759654045105, "num_chars": 2}, {"sum_logits": -1.3911726474761963, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.3911726474761963, "logits_per_char": -0.6955863237380981, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 588, "native_id": "Mercury_7221463", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.383368968963623, "incorrect_loss_raw": 1.395155906677246, "correct_loss_per_char": 0.6916844844818115, "incorrect_loss_per_char": 0.697577953338623, "correct_loss_per_token": 1.383368968963623, "incorrect_loss_per_token": 1.395155906677246, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4547102451324463, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.4547102451324463, "logits_per_char": -0.7273551225662231, "num_chars": 2}, {"sum_logits": -1.3083000183105469, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": true, "logits_per_token": -1.3083000183105469, "logits_per_char": -0.6541500091552734, "num_chars": 2}, {"sum_logits": -1.4224574565887451, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.4224574565887451, "logits_per_char": -0.7112287282943726, "num_chars": 2}, {"sum_logits": -1.383368968963623, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.383368968963623, "logits_per_char": -0.6916844844818115, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 589, "native_id": "Mercury_184240", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3158578872680664, "incorrect_loss_raw": 1.4245974222819011, "correct_loss_per_char": 0.6579289436340332, "incorrect_loss_per_char": 0.7122987111409506, "correct_loss_per_token": 1.3158578872680664, "incorrect_loss_per_token": 1.4245974222819011, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3154860734939575, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": true, "logits_per_token": -1.3154860734939575, "logits_per_char": -0.6577430367469788, "num_chars": 2}, {"sum_logits": -1.3158578872680664, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -1.3158578872680664, "logits_per_char": -0.6579289436340332, "num_chars": 2}, {"sum_logits": -1.3643428087234497, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -1.3643428087234497, "logits_per_char": -0.6821714043617249, "num_chars": 2}, {"sum_logits": -1.593963384628296, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -1.593963384628296, "logits_per_char": -0.796981692314148, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 590, "native_id": "Mercury_SC_LBS10606", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4143763780593872, "incorrect_loss_raw": 1.3847535451253254, "correct_loss_per_char": 0.7071881890296936, "incorrect_loss_per_char": 0.6923767725626627, "correct_loss_per_token": 1.4143763780593872, "incorrect_loss_per_token": 1.3847535451253254, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3126778602600098, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": true, "logits_per_token": -1.3126778602600098, "logits_per_char": -0.6563389301300049, "num_chars": 2}, {"sum_logits": -1.4736028909683228, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.4736028909683228, "logits_per_char": -0.7368014454841614, "num_chars": 2}, {"sum_logits": -1.367979884147644, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.367979884147644, "logits_per_char": -0.683989942073822, "num_chars": 2}, {"sum_logits": -1.4143763780593872, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.4143763780593872, "logits_per_char": -0.7071881890296936, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 591, "native_id": "Mercury_405141", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.480005145072937, "incorrect_loss_raw": 1.369196613629659, "correct_loss_per_char": 0.7400025725364685, "incorrect_loss_per_char": 0.6845983068148295, "correct_loss_per_token": 1.480005145072937, "incorrect_loss_per_token": 1.369196613629659, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.474124550819397, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.474124550819397, "logits_per_char": -0.7370622754096985, "num_chars": 2}, {"sum_logits": -1.480005145072937, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.480005145072937, "logits_per_char": -0.7400025725364685, "num_chars": 2}, {"sum_logits": -1.4186434745788574, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.4186434745788574, "logits_per_char": -0.7093217372894287, "num_chars": 2}, {"sum_logits": -1.2148218154907227, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": true, "logits_per_token": -1.2148218154907227, "logits_per_char": -0.6074109077453613, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 592, "native_id": "Mercury_7024938", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.51329505443573, "incorrect_loss_raw": 1.3748548030853271, "correct_loss_per_char": 0.756647527217865, "incorrect_loss_per_char": 0.6874274015426636, "correct_loss_per_token": 1.51329505443573, "incorrect_loss_per_token": 1.3748548030853271, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1631017923355103, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": true, "logits_per_token": -1.1631017923355103, "logits_per_char": -0.5815508961677551, "num_chars": 2}, {"sum_logits": -1.3661749362945557, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.3661749362945557, "logits_per_char": -0.6830874681472778, "num_chars": 2}, {"sum_logits": -1.5952876806259155, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.5952876806259155, "logits_per_char": -0.7976438403129578, "num_chars": 2}, {"sum_logits": -1.51329505443573, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.51329505443573, "logits_per_char": -0.756647527217865, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 593, "native_id": "Mercury_SC_400035", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4645992517471313, "incorrect_loss_raw": 1.3703945875167847, "correct_loss_per_char": 0.7322996258735657, "incorrect_loss_per_char": 0.6851972937583923, "correct_loss_per_token": 1.4645992517471313, "incorrect_loss_per_token": 1.3703945875167847, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4008878469467163, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.4008878469467163, "logits_per_char": -0.7004439234733582, "num_chars": 2}, {"sum_logits": -1.2607789039611816, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": true, "logits_per_token": -1.2607789039611816, "logits_per_char": -0.6303894519805908, "num_chars": 2}, {"sum_logits": -1.4645992517471313, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.4645992517471313, "logits_per_char": -0.7322996258735657, "num_chars": 2}, {"sum_logits": -1.449517011642456, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.449517011642456, "logits_per_char": -0.724758505821228, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 594, "native_id": "Mercury_7228113", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3341041803359985, "incorrect_loss_raw": 1.421094576517741, "correct_loss_per_char": 0.6670520901679993, "incorrect_loss_per_char": 0.7105472882588705, "correct_loss_per_token": 1.3341041803359985, "incorrect_loss_per_token": 1.421094576517741, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.497012734413147, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.497012734413147, "logits_per_char": -0.7485063672065735, "num_chars": 2}, {"sum_logits": -1.3341041803359985, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.3341041803359985, "logits_per_char": -0.6670520901679993, "num_chars": 2}, {"sum_logits": -1.5412776470184326, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.5412776470184326, "logits_per_char": -0.7706388235092163, "num_chars": 2}, {"sum_logits": -1.224993348121643, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": true, "logits_per_token": -1.224993348121643, "logits_per_char": -0.6124966740608215, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 595, "native_id": "OHAT_2008_8_15", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3498504161834717, "incorrect_loss_raw": 1.4064093430836995, "correct_loss_per_char": 0.6749252080917358, "incorrect_loss_per_char": 0.7032046715418497, "correct_loss_per_token": 1.3498504161834717, "incorrect_loss_per_token": 1.4064093430836995, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4290655851364136, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.4290655851364136, "logits_per_char": -0.7145327925682068, "num_chars": 2}, {"sum_logits": -1.3498504161834717, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.3498504161834717, "logits_per_char": -0.6749252080917358, "num_chars": 2}, {"sum_logits": -1.4410839080810547, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.4410839080810547, "logits_per_char": -0.7205419540405273, "num_chars": 2}, {"sum_logits": -1.3490785360336304, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": true, "logits_per_token": -1.3490785360336304, "logits_per_char": -0.6745392680168152, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 596, "native_id": "MCAS_2000_8_37", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5587040185928345, "incorrect_loss_raw": 1.3471810420354207, "correct_loss_per_char": 0.7793520092964172, "incorrect_loss_per_char": 0.6735905210177103, "correct_loss_per_token": 1.5587040185928345, "incorrect_loss_per_token": 1.3471810420354207, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5587040185928345, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.5587040185928345, "logits_per_char": -0.7793520092964172, "num_chars": 2}, {"sum_logits": -1.4830005168914795, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.4830005168914795, "logits_per_char": -0.7415002584457397, "num_chars": 2}, {"sum_logits": -1.299424648284912, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.299424648284912, "logits_per_char": -0.649712324142456, "num_chars": 2}, {"sum_logits": -1.2591179609298706, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": true, "logits_per_token": -1.2591179609298706, "logits_per_char": -0.6295589804649353, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 597, "native_id": "Mercury_401396", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3289674520492554, "incorrect_loss_raw": 1.4263306458791096, "correct_loss_per_char": 0.6644837260246277, "incorrect_loss_per_char": 0.7131653229395548, "correct_loss_per_token": 1.3289674520492554, "incorrect_loss_per_token": 1.4263306458791096, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6721677780151367, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.6721677780151367, "logits_per_char": -0.8360838890075684, "num_chars": 2}, {"sum_logits": -1.3289674520492554, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.3289674520492554, "logits_per_char": -0.6644837260246277, "num_chars": 2}, {"sum_logits": -1.3293192386627197, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.3293192386627197, "logits_per_char": -0.6646596193313599, "num_chars": 2}, {"sum_logits": -1.2775049209594727, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": true, "logits_per_token": -1.2775049209594727, "logits_per_char": -0.6387524604797363, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 598, "native_id": "Mercury_7146178", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.375504732131958, "incorrect_loss_raw": 1.3987022638320923, "correct_loss_per_char": 0.687752366065979, "incorrect_loss_per_char": 0.6993511319160461, "correct_loss_per_token": 1.375504732131958, "incorrect_loss_per_token": 1.3987022638320923, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4900281429290771, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.4900281429290771, "logits_per_char": -0.7450140714645386, "num_chars": 2}, {"sum_logits": -1.299965500831604, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": true, "logits_per_token": -1.299965500831604, "logits_per_char": -0.649982750415802, "num_chars": 2}, {"sum_logits": -1.375504732131958, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.375504732131958, "logits_per_char": -0.687752366065979, "num_chars": 2}, {"sum_logits": -1.4061131477355957, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.4061131477355957, "logits_per_char": -0.7030565738677979, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 599, "native_id": "TIMSS_2011_4_pg7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4665392637252808, "incorrect_loss_raw": 1.3687739372253418, "correct_loss_per_char": 0.7332696318626404, "incorrect_loss_per_char": 0.6843869686126709, "correct_loss_per_token": 1.4665392637252808, "incorrect_loss_per_token": 1.3687739372253418, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3158406019210815, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": true, "logits_per_token": -1.3158406019210815, "logits_per_char": -0.6579203009605408, "num_chars": 2}, {"sum_logits": -1.3981620073318481, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.3981620073318481, "logits_per_char": -0.6990810036659241, "num_chars": 2}, {"sum_logits": -1.3923192024230957, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.3923192024230957, "logits_per_char": -0.6961596012115479, "num_chars": 2}, {"sum_logits": -1.4665392637252808, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.4665392637252808, "logits_per_char": -0.7332696318626404, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 600, "native_id": "ACTAAP_2008_7_17", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3784065246582031, "incorrect_loss_raw": 1.3976596196492512, "correct_loss_per_char": 0.6892032623291016, "incorrect_loss_per_char": 0.6988298098246256, "correct_loss_per_token": 1.3784065246582031, "incorrect_loss_per_token": 1.3976596196492512, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4684051275253296, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.4684051275253296, "logits_per_char": -0.7342025637626648, "num_chars": 2}, {"sum_logits": -1.433467149734497, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.433467149734497, "logits_per_char": -0.7167335748672485, "num_chars": 2}, {"sum_logits": -1.2911065816879272, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": true, "logits_per_token": -1.2911065816879272, "logits_per_char": -0.6455532908439636, "num_chars": 2}, {"sum_logits": -1.3784065246582031, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.3784065246582031, "logits_per_char": -0.6892032623291016, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 601, "native_id": "ACTAAP_2009_7_12", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3790165185928345, "incorrect_loss_raw": 1.395953377087911, "correct_loss_per_char": 0.6895082592964172, "incorrect_loss_per_char": 0.6979766885439554, "correct_loss_per_token": 1.3790165185928345, "incorrect_loss_per_token": 1.395953377087911, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3790165185928345, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.3790165185928345, "logits_per_char": -0.6895082592964172, "num_chars": 2}, {"sum_logits": -1.3629767894744873, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": true, "logits_per_token": -1.3629767894744873, "logits_per_char": -0.6814883947372437, "num_chars": 2}, {"sum_logits": -1.399727702140808, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.399727702140808, "logits_per_char": -0.699863851070404, "num_chars": 2}, {"sum_logits": -1.4251556396484375, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.4251556396484375, "logits_per_char": -0.7125778198242188, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 602, "native_id": "Mercury_7004988", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4144481420516968, "incorrect_loss_raw": 1.389461358388265, "correct_loss_per_char": 0.7072240710258484, "incorrect_loss_per_char": 0.6947306791941324, "correct_loss_per_token": 1.4144481420516968, "incorrect_loss_per_token": 1.389461358388265, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4474388360977173, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.4474388360977173, "logits_per_char": -0.7237194180488586, "num_chars": 2}, {"sum_logits": -1.481665015220642, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.481665015220642, "logits_per_char": -0.740832507610321, "num_chars": 2}, {"sum_logits": -1.2392802238464355, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": true, "logits_per_token": -1.2392802238464355, "logits_per_char": -0.6196401119232178, "num_chars": 2}, {"sum_logits": -1.4144481420516968, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.4144481420516968, "logits_per_char": -0.7072240710258484, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 603, "native_id": "AIMS_2008_8_7", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4508429765701294, "incorrect_loss_raw": 1.3717906475067139, "correct_loss_per_char": 0.7254214882850647, "incorrect_loss_per_char": 0.6858953237533569, "correct_loss_per_token": 1.4508429765701294, "incorrect_loss_per_token": 1.3717906475067139, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4508429765701294, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.4508429765701294, "logits_per_char": -0.7254214882850647, "num_chars": 2}, {"sum_logits": -1.4055192470550537, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.4055192470550537, "logits_per_char": -0.7027596235275269, "num_chars": 2}, {"sum_logits": -1.3882108926773071, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.3882108926773071, "logits_per_char": -0.6941054463386536, "num_chars": 2}, {"sum_logits": -1.3216418027877808, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": true, "logits_per_token": -1.3216418027877808, "logits_per_char": -0.6608209013938904, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 604, "native_id": "Mercury_416686", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.410644292831421, "incorrect_loss_raw": 1.3840306202570598, "correct_loss_per_char": 0.7053221464157104, "incorrect_loss_per_char": 0.6920153101285299, "correct_loss_per_token": 1.410644292831421, "incorrect_loss_per_token": 1.3840306202570598, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.388861060142517, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.388861060142517, "logits_per_char": -0.6944305300712585, "num_chars": 2}, {"sum_logits": -1.410644292831421, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.410644292831421, "logits_per_char": -0.7053221464157104, "num_chars": 2}, {"sum_logits": -1.4097539186477661, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.4097539186477661, "logits_per_char": -0.7048769593238831, "num_chars": 2}, {"sum_logits": -1.353476881980896, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": true, "logits_per_token": -1.353476881980896, "logits_per_char": -0.676738440990448, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 605, "native_id": "Mercury_180863", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.294087529182434, "incorrect_loss_raw": 1.4335750341415405, "correct_loss_per_char": 0.647043764591217, "incorrect_loss_per_char": 0.7167875170707703, "correct_loss_per_token": 1.294087529182434, "incorrect_loss_per_token": 1.4335750341415405, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6217352151870728, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.6217352151870728, "logits_per_char": -0.8108676075935364, "num_chars": 2}, {"sum_logits": -1.3046623468399048, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.3046623468399048, "logits_per_char": -0.6523311734199524, "num_chars": 2}, {"sum_logits": -1.374327540397644, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.374327540397644, "logits_per_char": -0.687163770198822, "num_chars": 2}, {"sum_logits": -1.294087529182434, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": true, "logits_per_token": -1.294087529182434, "logits_per_char": -0.647043764591217, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 606, "native_id": "Mercury_409111", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.348669171333313, "incorrect_loss_raw": 1.405280590057373, "correct_loss_per_char": 0.6743345856666565, "incorrect_loss_per_char": 0.7026402950286865, "correct_loss_per_token": 1.348669171333313, "incorrect_loss_per_token": 1.405280590057373, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.440372347831726, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.440372347831726, "logits_per_char": -0.720186173915863, "num_chars": 2}, {"sum_logits": -1.348669171333313, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": true, "logits_per_token": -1.348669171333313, "logits_per_char": -0.6743345856666565, "num_chars": 2}, {"sum_logits": -1.4249836206436157, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.4249836206436157, "logits_per_char": -0.7124918103218079, "num_chars": 2}, {"sum_logits": -1.3504858016967773, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.3504858016967773, "logits_per_char": -0.6752429008483887, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 607, "native_id": "Mercury_7081550", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3425612449645996, "incorrect_loss_raw": 1.4214354356129963, "correct_loss_per_char": 0.6712806224822998, "incorrect_loss_per_char": 0.7107177178064982, "correct_loss_per_token": 1.3425612449645996, "incorrect_loss_per_token": 1.4214354356129963, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6070414781570435, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.6070414781570435, "logits_per_char": -0.8035207390785217, "num_chars": 2}, {"sum_logits": -1.3425612449645996, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.3425612449645996, "logits_per_char": -0.6712806224822998, "num_chars": 2}, {"sum_logits": -1.453566074371338, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.453566074371338, "logits_per_char": -0.726783037185669, "num_chars": 2}, {"sum_logits": -1.203698754310608, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.203698754310608, "logits_per_char": -0.601849377155304, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 608, "native_id": "NYSEDREGENTS_2008_8_16", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3907101154327393, "incorrect_loss_raw": 1.3926713069279988, "correct_loss_per_char": 0.6953550577163696, "incorrect_loss_per_char": 0.6963356534639994, "correct_loss_per_token": 1.3907101154327393, "incorrect_loss_per_token": 1.3926713069279988, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3698543310165405, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.3698543310165405, "logits_per_char": -0.6849271655082703, "num_chars": 2}, {"sum_logits": -1.3907101154327393, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.3907101154327393, "logits_per_char": -0.6953550577163696, "num_chars": 2}, {"sum_logits": -1.4904329776763916, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.4904329776763916, "logits_per_char": -0.7452164888381958, "num_chars": 2}, {"sum_logits": -1.3177266120910645, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": true, "logits_per_token": -1.3177266120910645, "logits_per_char": -0.6588633060455322, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 609, "native_id": "Mercury_SC_LBS10946", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3878751993179321, "incorrect_loss_raw": 1.394909143447876, "correct_loss_per_char": 0.6939375996589661, "incorrect_loss_per_char": 0.697454571723938, "correct_loss_per_token": 1.3878751993179321, "incorrect_loss_per_token": 1.394909143447876, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3878751993179321, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.3878751993179321, "logits_per_char": -0.6939375996589661, "num_chars": 2}, {"sum_logits": -1.5023218393325806, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.5023218393325806, "logits_per_char": -0.7511609196662903, "num_chars": 2}, {"sum_logits": -1.3270881175994873, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": true, "logits_per_token": -1.3270881175994873, "logits_per_char": -0.6635440587997437, "num_chars": 2}, {"sum_logits": -1.35531747341156, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.35531747341156, "logits_per_char": -0.67765873670578, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 610, "native_id": "Mercury_7085418", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4756566286087036, "incorrect_loss_raw": 1.3693631887435913, "correct_loss_per_char": 0.7378283143043518, "incorrect_loss_per_char": 0.6846815943717957, "correct_loss_per_token": 1.4756566286087036, "incorrect_loss_per_token": 1.3693631887435913, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4688996076583862, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.4688996076583862, "logits_per_char": -0.7344498038291931, "num_chars": 2}, {"sum_logits": -1.4756566286087036, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.4756566286087036, "logits_per_char": -0.7378283143043518, "num_chars": 2}, {"sum_logits": -1.2365756034851074, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": true, "logits_per_token": -1.2365756034851074, "logits_per_char": -0.6182878017425537, "num_chars": 2}, {"sum_logits": -1.4026143550872803, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.4026143550872803, "logits_per_char": -0.7013071775436401, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 611, "native_id": "MCAS_2000_8_32", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.222112774848938, "incorrect_loss_raw": 1.4526317914326985, "correct_loss_per_char": 0.611056387424469, "incorrect_loss_per_char": 0.7263158957163492, "correct_loss_per_token": 1.222112774848938, "incorrect_loss_per_token": 1.4526317914326985, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.222112774848938, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": true, "logits_per_token": -1.222112774848938, "logits_per_char": -0.611056387424469, "num_chars": 2}, {"sum_logits": -1.449180245399475, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.449180245399475, "logits_per_char": -0.7245901226997375, "num_chars": 2}, {"sum_logits": -1.4644306898117065, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.4644306898117065, "logits_per_char": -0.7322153449058533, "num_chars": 2}, {"sum_logits": -1.444284439086914, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.444284439086914, "logits_per_char": -0.722142219543457, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 612, "native_id": "ACTAAP_2011_5_6", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4319219589233398, "incorrect_loss_raw": 1.377062161763509, "correct_loss_per_char": 0.7159609794616699, "incorrect_loss_per_char": 0.6885310808817545, "correct_loss_per_token": 1.4319219589233398, "incorrect_loss_per_token": 1.377062161763509, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3709666728973389, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.3709666728973389, "logits_per_char": -0.6854833364486694, "num_chars": 2}, {"sum_logits": -1.4099425077438354, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.4099425077438354, "logits_per_char": -0.7049712538719177, "num_chars": 2}, {"sum_logits": -1.4319219589233398, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.4319219589233398, "logits_per_char": -0.7159609794616699, "num_chars": 2}, {"sum_logits": -1.350277304649353, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": true, "logits_per_token": -1.350277304649353, "logits_per_char": -0.6751386523246765, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 613, "native_id": "Mercury_SC_400041", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4012911319732666, "incorrect_loss_raw": 1.3889840046564739, "correct_loss_per_char": 0.7006455659866333, "incorrect_loss_per_char": 0.6944920023282369, "correct_loss_per_token": 1.4012911319732666, "incorrect_loss_per_token": 1.3889840046564739, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3678135871887207, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.3678135871887207, "logits_per_char": -0.6839067935943604, "num_chars": 2}, {"sum_logits": -1.4468833208084106, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.4468833208084106, "logits_per_char": -0.7234416604042053, "num_chars": 2}, {"sum_logits": -1.4012911319732666, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.4012911319732666, "logits_per_char": -0.7006455659866333, "num_chars": 2}, {"sum_logits": -1.35225510597229, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": true, "logits_per_token": -1.35225510597229, "logits_per_char": -0.676127552986145, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 614, "native_id": "Mercury_7141733", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3179829120635986, "incorrect_loss_raw": 1.4159492651621501, "correct_loss_per_char": 0.6589914560317993, "incorrect_loss_per_char": 0.7079746325810751, "correct_loss_per_token": 1.3179829120635986, "incorrect_loss_per_token": 1.4159492651621501, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4552205801010132, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.4552205801010132, "logits_per_char": -0.7276102900505066, "num_chars": 2}, {"sum_logits": -1.344556212425232, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.344556212425232, "logits_per_char": -0.672278106212616, "num_chars": 2}, {"sum_logits": -1.448071002960205, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.448071002960205, "logits_per_char": -0.7240355014801025, "num_chars": 2}, {"sum_logits": -1.3179829120635986, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.3179829120635986, "logits_per_char": -0.6589914560317993, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 615, "native_id": "ACTAAP_2007_7_34", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3495359420776367, "incorrect_loss_raw": 1.4075014193852742, "correct_loss_per_char": 0.6747679710388184, "incorrect_loss_per_char": 0.7037507096926371, "correct_loss_per_token": 1.3495359420776367, "incorrect_loss_per_token": 1.4075014193852742, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4811910390853882, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.4811910390853882, "logits_per_char": -0.7405955195426941, "num_chars": 2}, {"sum_logits": -1.3872030973434448, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.3872030973434448, "logits_per_char": -0.6936015486717224, "num_chars": 2}, {"sum_logits": -1.3495359420776367, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": true, "logits_per_token": -1.3495359420776367, "logits_per_char": -0.6747679710388184, "num_chars": 2}, {"sum_logits": -1.3541101217269897, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.3541101217269897, "logits_per_char": -0.6770550608634949, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 616, "native_id": "MCAS_2010_8_12003", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2851767539978027, "incorrect_loss_raw": 1.4357922871907551, "correct_loss_per_char": 0.6425883769989014, "incorrect_loss_per_char": 0.7178961435953776, "correct_loss_per_token": 1.2851767539978027, "incorrect_loss_per_token": 1.4357922871907551, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.327057957649231, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.327057957649231, "logits_per_char": -0.6635289788246155, "num_chars": 2}, {"sum_logits": -1.2851767539978027, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": true, "logits_per_token": -1.2851767539978027, "logits_per_char": -0.6425883769989014, "num_chars": 2}, {"sum_logits": -1.457653284072876, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.457653284072876, "logits_per_char": -0.728826642036438, "num_chars": 2}, {"sum_logits": -1.5226656198501587, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.5226656198501587, "logits_per_char": -0.7613328099250793, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 617, "native_id": "Mercury_SC_401221", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3886032104492188, "incorrect_loss_raw": 1.3906544049580891, "correct_loss_per_char": 0.6943016052246094, "incorrect_loss_per_char": 0.6953272024790446, "correct_loss_per_token": 1.3886032104492188, "incorrect_loss_per_token": 1.3906544049580891, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4308124780654907, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.4308124780654907, "logits_per_char": -0.7154062390327454, "num_chars": 2}, {"sum_logits": -1.3368937969207764, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": true, "logits_per_token": -1.3368937969207764, "logits_per_char": -0.6684468984603882, "num_chars": 2}, {"sum_logits": -1.4042569398880005, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.4042569398880005, "logits_per_char": -0.7021284699440002, "num_chars": 2}, {"sum_logits": -1.3886032104492188, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.3886032104492188, "logits_per_char": -0.6943016052246094, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 618, "native_id": "ACTAAP_2014_5_6", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4728761911392212, "incorrect_loss_raw": 1.3674611647923787, "correct_loss_per_char": 0.7364380955696106, "incorrect_loss_per_char": 0.6837305823961893, "correct_loss_per_token": 1.4728761911392212, "incorrect_loss_per_token": 1.3674611647923787, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4728761911392212, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.4728761911392212, "logits_per_char": -0.7364380955696106, "num_chars": 2}, {"sum_logits": -1.249645471572876, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": true, "logits_per_token": -1.249645471572876, "logits_per_char": -0.624822735786438, "num_chars": 2}, {"sum_logits": -1.4153193235397339, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.4153193235397339, "logits_per_char": -0.7076596617698669, "num_chars": 2}, {"sum_logits": -1.4374186992645264, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.4374186992645264, "logits_per_char": -0.7187093496322632, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 619, "native_id": "LEAP_2001_4_10240", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2959200143814087, "incorrect_loss_raw": 1.4246846437454224, "correct_loss_per_char": 0.6479600071907043, "incorrect_loss_per_char": 0.7123423218727112, "correct_loss_per_token": 1.2959200143814087, "incorrect_loss_per_token": 1.4246846437454224, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4581550359725952, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.4581550359725952, "logits_per_char": -0.7290775179862976, "num_chars": 2}, {"sum_logits": -1.3911505937576294, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.3911505937576294, "logits_per_char": -0.6955752968788147, "num_chars": 2}, {"sum_logits": -1.4247483015060425, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.4247483015060425, "logits_per_char": -0.7123741507530212, "num_chars": 2}, {"sum_logits": -1.2959200143814087, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.2959200143814087, "logits_per_char": -0.6479600071907043, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 620, "native_id": "Mercury_415686", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2648491859436035, "incorrect_loss_raw": 1.437772512435913, "correct_loss_per_char": 0.6324245929718018, "incorrect_loss_per_char": 0.7188862562179565, "correct_loss_per_token": 1.2648491859436035, "incorrect_loss_per_token": 1.437772512435913, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4215508699417114, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.4215508699417114, "logits_per_char": -0.7107754349708557, "num_chars": 2}, {"sum_logits": -1.4934666156768799, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.4934666156768799, "logits_per_char": -0.7467333078384399, "num_chars": 2}, {"sum_logits": -1.398300051689148, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.398300051689148, "logits_per_char": -0.699150025844574, "num_chars": 2}, {"sum_logits": -1.2648491859436035, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": true, "logits_per_token": -1.2648491859436035, "logits_per_char": -0.6324245929718018, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 621, "native_id": "Mercury_SC_408620", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4108998775482178, "incorrect_loss_raw": 1.385383168856303, "correct_loss_per_char": 0.7054499387741089, "incorrect_loss_per_char": 0.6926915844281515, "correct_loss_per_token": 1.4108998775482178, "incorrect_loss_per_token": 1.385383168856303, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4595069885253906, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.4595069885253906, "logits_per_char": -0.7297534942626953, "num_chars": 2}, {"sum_logits": -1.2972522974014282, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": true, "logits_per_token": -1.2972522974014282, "logits_per_char": -0.6486261487007141, "num_chars": 2}, {"sum_logits": -1.4108998775482178, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.4108998775482178, "logits_per_char": -0.7054499387741089, "num_chars": 2}, {"sum_logits": -1.3993902206420898, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.3993902206420898, "logits_per_char": -0.6996951103210449, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 622, "native_id": "Mercury_7094815", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4191783666610718, "incorrect_loss_raw": 1.395458181699117, "correct_loss_per_char": 0.7095891833305359, "incorrect_loss_per_char": 0.6977290908495585, "correct_loss_per_token": 1.4191783666610718, "incorrect_loss_per_token": 1.395458181699117, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.620926022529602, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.620926022529602, "logits_per_char": -0.810463011264801, "num_chars": 2}, {"sum_logits": -1.4191783666610718, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4191783666610718, "logits_per_char": -0.7095891833305359, "num_chars": 2}, {"sum_logits": -1.350050687789917, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.350050687789917, "logits_per_char": -0.6750253438949585, "num_chars": 2}, {"sum_logits": -1.215397834777832, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.215397834777832, "logits_per_char": -0.607698917388916, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 623, "native_id": "VASoL_2011_5_1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4053826332092285, "incorrect_loss_raw": 1.390079418818156, "correct_loss_per_char": 0.7026913166046143, "incorrect_loss_per_char": 0.695039709409078, "correct_loss_per_token": 1.4053826332092285, "incorrect_loss_per_token": 1.390079418818156, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.326200246810913, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": true, "logits_per_token": -1.326200246810913, "logits_per_char": -0.6631001234054565, "num_chars": 2}, {"sum_logits": -1.3498767614364624, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.3498767614364624, "logits_per_char": -0.6749383807182312, "num_chars": 2}, {"sum_logits": -1.4053826332092285, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.4053826332092285, "logits_per_char": -0.7026913166046143, "num_chars": 2}, {"sum_logits": -1.4941612482070923, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.4941612482070923, "logits_per_char": -0.7470806241035461, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 624, "native_id": "NYSEDREGENTS_2012_8_14", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.469185471534729, "incorrect_loss_raw": 1.3670178254445393, "correct_loss_per_char": 0.7345927357673645, "incorrect_loss_per_char": 0.6835089127222697, "correct_loss_per_token": 1.469185471534729, "incorrect_loss_per_token": 1.3670178254445393, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.469185471534729, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.469185471534729, "logits_per_char": -0.7345927357673645, "num_chars": 2}, {"sum_logits": -1.3285233974456787, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": true, "logits_per_token": -1.3285233974456787, "logits_per_char": -0.6642616987228394, "num_chars": 2}, {"sum_logits": -1.4041128158569336, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.4041128158569336, "logits_per_char": -0.7020564079284668, "num_chars": 2}, {"sum_logits": -1.3684172630310059, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.3684172630310059, "logits_per_char": -0.6842086315155029, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 625, "native_id": "VASoL_2008_3_21", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4605610370635986, "incorrect_loss_raw": 1.3716519673665364, "correct_loss_per_char": 0.7302805185317993, "incorrect_loss_per_char": 0.6858259836832682, "correct_loss_per_token": 1.4605610370635986, "incorrect_loss_per_token": 1.3716519673665364, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.283686637878418, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": true, "logits_per_token": -1.283686637878418, "logits_per_char": -0.641843318939209, "num_chars": 2}, {"sum_logits": -1.3848613500595093, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.3848613500595093, "logits_per_char": -0.6924306750297546, "num_chars": 2}, {"sum_logits": -1.4605610370635986, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.4605610370635986, "logits_per_char": -0.7302805185317993, "num_chars": 2}, {"sum_logits": -1.4464079141616821, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.4464079141616821, "logits_per_char": -0.7232039570808411, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 626, "native_id": "NYSEDREGENTS_2015_4_17", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3509382009506226, "incorrect_loss_raw": 1.4060999552408855, "correct_loss_per_char": 0.6754691004753113, "incorrect_loss_per_char": 0.7030499776204427, "correct_loss_per_token": 1.3509382009506226, "incorrect_loss_per_token": 1.4060999552408855, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3776812553405762, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": false, "logits_per_token": -1.3776812553405762, "logits_per_char": -0.6888406276702881, "num_chars": 2}, {"sum_logits": -1.4386433362960815, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": false, "logits_per_token": -1.4386433362960815, "logits_per_char": -0.7193216681480408, "num_chars": 2}, {"sum_logits": -1.4019752740859985, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": false, "logits_per_token": -1.4019752740859985, "logits_per_char": -0.7009876370429993, "num_chars": 2}, {"sum_logits": -1.3509382009506226, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": true, "logits_per_token": -1.3509382009506226, "logits_per_char": -0.6754691004753113, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 627, "native_id": "Mercury_182403", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3892954587936401, "incorrect_loss_raw": 1.3972519636154175, "correct_loss_per_char": 0.6946477293968201, "incorrect_loss_per_char": 0.6986259818077087, "correct_loss_per_token": 1.3892954587936401, "incorrect_loss_per_token": 1.3972519636154175, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.542944312095642, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.542944312095642, "logits_per_char": -0.771472156047821, "num_chars": 2}, {"sum_logits": -1.3892954587936401, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.3892954587936401, "logits_per_char": -0.6946477293968201, "num_chars": 2}, {"sum_logits": -1.3752000331878662, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.3752000331878662, "logits_per_char": -0.6876000165939331, "num_chars": 2}, {"sum_logits": -1.2736115455627441, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.2736115455627441, "logits_per_char": -0.6368057727813721, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 628, "native_id": "Mercury_SC_416653", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3945454359054565, "incorrect_loss_raw": 1.3899306853612263, "correct_loss_per_char": 0.6972727179527283, "incorrect_loss_per_char": 0.6949653426806132, "correct_loss_per_token": 1.3945454359054565, "incorrect_loss_per_token": 1.3899306853612263, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3991971015930176, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.3991971015930176, "logits_per_char": -0.6995985507965088, "num_chars": 2}, {"sum_logits": -1.4345344305038452, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.4345344305038452, "logits_per_char": -0.7172672152519226, "num_chars": 2}, {"sum_logits": -1.3945454359054565, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.3945454359054565, "logits_per_char": -0.6972727179527283, "num_chars": 2}, {"sum_logits": -1.3360605239868164, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": true, "logits_per_token": -1.3360605239868164, "logits_per_char": -0.6680302619934082, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 629, "native_id": "NAEP_2000_8_S21+3", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2991424798965454, "incorrect_loss_raw": 1.423526128133138, "correct_loss_per_char": 0.6495712399482727, "incorrect_loss_per_char": 0.711763064066569, "correct_loss_per_token": 1.2991424798965454, "incorrect_loss_per_token": 1.423526128133138, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4133597612380981, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.4133597612380981, "logits_per_char": -0.7066798806190491, "num_chars": 2}, {"sum_logits": -1.446199893951416, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.446199893951416, "logits_per_char": -0.723099946975708, "num_chars": 2}, {"sum_logits": -1.4110187292099, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.4110187292099, "logits_per_char": -0.70550936460495, "num_chars": 2}, {"sum_logits": -1.2991424798965454, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": true, "logits_per_token": -1.2991424798965454, "logits_per_char": -0.6495712399482727, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 630, "native_id": "NYSEDREGENTS_2008_8_26", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3468986749649048, "incorrect_loss_raw": 1.4059499104817708, "correct_loss_per_char": 0.6734493374824524, "incorrect_loss_per_char": 0.7029749552408854, "correct_loss_per_token": 1.3468986749649048, "incorrect_loss_per_token": 1.4059499104817708, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3468986749649048, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.3468986749649048, "logits_per_char": -0.6734493374824524, "num_chars": 2}, {"sum_logits": -1.3322490453720093, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": true, "logits_per_token": -1.3322490453720093, "logits_per_char": -0.6661245226860046, "num_chars": 2}, {"sum_logits": -1.434084415435791, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.434084415435791, "logits_per_char": -0.7170422077178955, "num_chars": 2}, {"sum_logits": -1.4515162706375122, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.4515162706375122, "logits_per_char": -0.7257581353187561, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 631, "native_id": "Mercury_7223090", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3264085054397583, "incorrect_loss_raw": 1.413722316424052, "correct_loss_per_char": 0.6632042527198792, "incorrect_loss_per_char": 0.706861158212026, "correct_loss_per_token": 1.3264085054397583, "incorrect_loss_per_token": 1.413722316424052, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4943689107894897, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.4943689107894897, "logits_per_char": -0.7471844553947449, "num_chars": 2}, {"sum_logits": -1.3686330318450928, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.3686330318450928, "logits_per_char": -0.6843165159225464, "num_chars": 2}, {"sum_logits": -1.3781650066375732, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.3781650066375732, "logits_per_char": -0.6890825033187866, "num_chars": 2}, {"sum_logits": -1.3264085054397583, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": true, "logits_per_token": -1.3264085054397583, "logits_per_char": -0.6632042527198792, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 632, "native_id": "NYSEDREGENTS_2015_4_8", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5546329021453857, "incorrect_loss_raw": 1.3427602847417195, "correct_loss_per_char": 0.7773164510726929, "incorrect_loss_per_char": 0.6713801423708597, "correct_loss_per_token": 1.5546329021453857, "incorrect_loss_per_token": 1.3427602847417195, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3786697387695312, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.3786697387695312, "logits_per_char": -0.6893348693847656, "num_chars": 2}, {"sum_logits": -1.2741676568984985, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": true, "logits_per_token": -1.2741676568984985, "logits_per_char": -0.6370838284492493, "num_chars": 2}, {"sum_logits": -1.375443458557129, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.375443458557129, "logits_per_char": -0.6877217292785645, "num_chars": 2}, {"sum_logits": -1.5546329021453857, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.5546329021453857, "logits_per_char": -0.7773164510726929, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 633, "native_id": "MCAS_2006_8_21", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.324134349822998, "incorrect_loss_raw": 1.4185599088668823, "correct_loss_per_char": 0.662067174911499, "incorrect_loss_per_char": 0.7092799544334412, "correct_loss_per_token": 1.324134349822998, "incorrect_loss_per_token": 1.4185599088668823, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.324134349822998, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.324134349822998, "logits_per_char": -0.662067174911499, "num_chars": 2}, {"sum_logits": -1.3067055940628052, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": true, "logits_per_token": -1.3067055940628052, "logits_per_char": -0.6533527970314026, "num_chars": 2}, {"sum_logits": -1.441292405128479, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.441292405128479, "logits_per_char": -0.7206462025642395, "num_chars": 2}, {"sum_logits": -1.5076817274093628, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.5076817274093628, "logits_per_char": -0.7538408637046814, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 634, "native_id": "Mercury_SC_400709", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2871206998825073, "incorrect_loss_raw": 1.4328030745188396, "correct_loss_per_char": 0.6435603499412537, "incorrect_loss_per_char": 0.7164015372594198, "correct_loss_per_token": 1.2871206998825073, "incorrect_loss_per_token": 1.4328030745188396, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5391539335250854, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.5391539335250854, "logits_per_char": -0.7695769667625427, "num_chars": 2}, {"sum_logits": -1.2871206998825073, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.2871206998825073, "logits_per_char": -0.6435603499412537, "num_chars": 2}, {"sum_logits": -1.3262016773223877, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3262016773223877, "logits_per_char": -0.6631008386611938, "num_chars": 2}, {"sum_logits": -1.4330536127090454, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4330536127090454, "logits_per_char": -0.7165268063545227, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 635, "native_id": "NAEP_2005_8_S11+11", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4501011371612549, "incorrect_loss_raw": 1.3769755363464355, "correct_loss_per_char": 0.7250505685806274, "incorrect_loss_per_char": 0.6884877681732178, "correct_loss_per_token": 1.4501011371612549, "incorrect_loss_per_token": 1.3769755363464355, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4603362083435059, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.4603362083435059, "logits_per_char": -0.7301681041717529, "num_chars": 2}, {"sum_logits": -1.4501011371612549, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.4501011371612549, "logits_per_char": -0.7250505685806274, "num_chars": 2}, {"sum_logits": -1.4052695035934448, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.4052695035934448, "logits_per_char": -0.7026347517967224, "num_chars": 2}, {"sum_logits": -1.265320897102356, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": true, "logits_per_token": -1.265320897102356, "logits_per_char": -0.632660448551178, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 636, "native_id": "NYSEDREGENTS_2008_8_37", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3769840002059937, "incorrect_loss_raw": 1.4138950109481812, "correct_loss_per_char": 0.6884920001029968, "incorrect_loss_per_char": 0.7069475054740906, "correct_loss_per_token": 1.3769840002059937, "incorrect_loss_per_token": 1.4138950109481812, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1686725616455078, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": true, "logits_per_token": -1.1686725616455078, "logits_per_char": -0.5843362808227539, "num_chars": 2}, {"sum_logits": -1.3769840002059937, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": false, "logits_per_token": -1.3769840002059937, "logits_per_char": -0.6884920001029968, "num_chars": 2}, {"sum_logits": -1.4528827667236328, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": false, "logits_per_token": -1.4528827667236328, "logits_per_char": -0.7264413833618164, "num_chars": 2}, {"sum_logits": -1.6201297044754028, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": false, "logits_per_token": -1.6201297044754028, "logits_per_char": -0.8100648522377014, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 637, "native_id": "Mercury_402144", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3760480880737305, "incorrect_loss_raw": 1.3967549403508503, "correct_loss_per_char": 0.6880240440368652, "incorrect_loss_per_char": 0.6983774701754252, "correct_loss_per_token": 1.3760480880737305, "incorrect_loss_per_token": 1.3967549403508503, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4490561485290527, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.4490561485290527, "logits_per_char": -0.7245280742645264, "num_chars": 2}, {"sum_logits": -1.3199034929275513, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": true, "logits_per_token": -1.3199034929275513, "logits_per_char": -0.6599517464637756, "num_chars": 2}, {"sum_logits": -1.4213051795959473, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.4213051795959473, "logits_per_char": -0.7106525897979736, "num_chars": 2}, {"sum_logits": -1.3760480880737305, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.3760480880737305, "logits_per_char": -0.6880240440368652, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 638, "native_id": "Mercury_405875", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4623610973358154, "incorrect_loss_raw": 1.3836260636647542, "correct_loss_per_char": 0.7311805486679077, "incorrect_loss_per_char": 0.6918130318323771, "correct_loss_per_token": 1.4623610973358154, "incorrect_loss_per_token": 1.3836260636647542, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5120222568511963, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": false, "logits_per_token": -1.5120222568511963, "logits_per_char": -0.7560111284255981, "num_chars": 2}, {"sum_logits": -1.5064408779144287, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": false, "logits_per_token": -1.5064408779144287, "logits_per_char": -0.7532204389572144, "num_chars": 2}, {"sum_logits": -1.4623610973358154, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": false, "logits_per_token": -1.4623610973358154, "logits_per_char": -0.7311805486679077, "num_chars": 2}, {"sum_logits": -1.1324150562286377, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": true, "logits_per_token": -1.1324150562286377, "logits_per_char": -0.5662075281143188, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 639, "native_id": "MCAS_2005_9_16", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.372237205505371, "incorrect_loss_raw": 1.399967630704244, "correct_loss_per_char": 0.6861186027526855, "incorrect_loss_per_char": 0.699983815352122, "correct_loss_per_token": 1.372237205505371, "incorrect_loss_per_token": 1.399967630704244, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4578983783721924, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.4578983783721924, "logits_per_char": -0.7289491891860962, "num_chars": 2}, {"sum_logits": -1.4617258310317993, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.4617258310317993, "logits_per_char": -0.7308629155158997, "num_chars": 2}, {"sum_logits": -1.372237205505371, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.372237205505371, "logits_per_char": -0.6861186027526855, "num_chars": 2}, {"sum_logits": -1.2802786827087402, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": true, "logits_per_token": -1.2802786827087402, "logits_per_char": -0.6401393413543701, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 640, "native_id": "ACTAAP_2015_5_8", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4948241710662842, "incorrect_loss_raw": 1.3592952489852905, "correct_loss_per_char": 0.7474120855331421, "incorrect_loss_per_char": 0.6796476244926453, "correct_loss_per_token": 1.4948241710662842, "incorrect_loss_per_token": 1.3592952489852905, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4948241710662842, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.4948241710662842, "logits_per_char": -0.7474120855331421, "num_chars": 2}, {"sum_logits": -1.322847843170166, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.322847843170166, "logits_per_char": -0.661423921585083, "num_chars": 2}, {"sum_logits": -1.4492582082748413, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.4492582082748413, "logits_per_char": -0.7246291041374207, "num_chars": 2}, {"sum_logits": -1.3057796955108643, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": true, "logits_per_token": -1.3057796955108643, "logits_per_char": -0.6528898477554321, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 641, "native_id": "CSZ_2008_5_CSZ10233", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.362607717514038, "incorrect_loss_raw": 1.4035359223683674, "correct_loss_per_char": 0.681303858757019, "incorrect_loss_per_char": 0.7017679611841837, "correct_loss_per_token": 1.362607717514038, "incorrect_loss_per_token": 1.4035359223683674, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.480392336845398, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.480392336845398, "logits_per_char": -0.740196168422699, "num_chars": 2}, {"sum_logits": -1.2771446704864502, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": true, "logits_per_token": -1.2771446704864502, "logits_per_char": -0.6385723352432251, "num_chars": 2}, {"sum_logits": -1.4530707597732544, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.4530707597732544, "logits_per_char": -0.7265353798866272, "num_chars": 2}, {"sum_logits": -1.362607717514038, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.362607717514038, "logits_per_char": -0.681303858757019, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 642, "native_id": "Mercury_SC_406626", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.428070068359375, "incorrect_loss_raw": 1.3817003170649211, "correct_loss_per_char": 0.7140350341796875, "incorrect_loss_per_char": 0.6908501585324606, "correct_loss_per_token": 1.428070068359375, "incorrect_loss_per_token": 1.3817003170649211, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3042957782745361, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": true, "logits_per_token": -1.3042957782745361, "logits_per_char": -0.6521478891372681, "num_chars": 2}, {"sum_logits": -1.3808115720748901, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.3808115720748901, "logits_per_char": -0.6904057860374451, "num_chars": 2}, {"sum_logits": -1.428070068359375, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.428070068359375, "logits_per_char": -0.7140350341796875, "num_chars": 2}, {"sum_logits": -1.459993600845337, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.459993600845337, "logits_per_char": -0.7299968004226685, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 643, "native_id": "AKDE&ED_2008_8_5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.4812204837799072, "incorrect_loss_raw": 1.3846980333328247, "correct_loss_per_char": 1.2406102418899536, "incorrect_loss_per_char": 0.6923490166664124, "correct_loss_per_token": 2.4812204837799072, "incorrect_loss_per_token": 1.3846980333328247, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0096821784973145, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": true, "logits_per_token": -1.0096821784973145, "logits_per_char": -0.5048410892486572, "num_chars": 2}, {"sum_logits": -1.382348895072937, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.382348895072937, "logits_per_char": -0.6911744475364685, "num_chars": 2}, {"sum_logits": -1.7620630264282227, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.7620630264282227, "logits_per_char": -0.8810315132141113, "num_chars": 2}, {"sum_logits": -2.4812204837799072, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -2.4812204837799072, "logits_per_char": -1.2406102418899536, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 644, "native_id": "Mercury_7206623", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4173434972763062, "incorrect_loss_raw": 1.3944928248723347, "correct_loss_per_char": 0.7086717486381531, "incorrect_loss_per_char": 0.6972464124361674, "correct_loss_per_token": 1.4173434972763062, "incorrect_loss_per_token": 1.3944928248723347, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.625572681427002, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.625572681427002, "logits_per_char": -0.812786340713501, "num_chars": 2}, {"sum_logits": -1.2967479228973389, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.2967479228973389, "logits_per_char": -0.6483739614486694, "num_chars": 2}, {"sum_logits": -1.4173434972763062, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.4173434972763062, "logits_per_char": -0.7086717486381531, "num_chars": 2}, {"sum_logits": -1.2611578702926636, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": true, "logits_per_token": -1.2611578702926636, "logits_per_char": -0.6305789351463318, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 645, "native_id": "Mercury_7016328", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.098934531211853, "incorrect_loss_raw": 1.5156302849451702, "correct_loss_per_char": 0.5494672656059265, "incorrect_loss_per_char": 0.7578151424725851, "correct_loss_per_token": 1.098934531211853, "incorrect_loss_per_token": 1.5156302849451702, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4561700820922852, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.4561700820922852, "logits_per_char": -0.7280850410461426, "num_chars": 2}, {"sum_logits": -1.6161668300628662, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.6161668300628662, "logits_per_char": -0.8080834150314331, "num_chars": 2}, {"sum_logits": -1.098934531211853, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": true, "logits_per_token": -1.098934531211853, "logits_per_char": -0.5494672656059265, "num_chars": 2}, {"sum_logits": -1.4745539426803589, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.4745539426803589, "logits_per_char": -0.7372769713401794, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 646, "native_id": "Mercury_7216860", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4441900253295898, "incorrect_loss_raw": 1.3763619263966878, "correct_loss_per_char": 0.7220950126647949, "incorrect_loss_per_char": 0.6881809631983439, "correct_loss_per_token": 1.4441900253295898, "incorrect_loss_per_token": 1.3763619263966878, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4592725038528442, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -1.4592725038528442, "logits_per_char": -0.7296362519264221, "num_chars": 2}, {"sum_logits": -1.3917889595031738, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -1.3917889595031738, "logits_per_char": -0.6958944797515869, "num_chars": 2}, {"sum_logits": -1.4441900253295898, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -1.4441900253295898, "logits_per_char": -0.7220950126647949, "num_chars": 2}, {"sum_logits": -1.2780243158340454, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": true, "logits_per_token": -1.2780243158340454, "logits_per_char": -0.6390121579170227, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 647, "native_id": "NYSEDREGENTS_2013_4_20", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4359066486358643, "incorrect_loss_raw": 1.3775731722513835, "correct_loss_per_char": 0.7179533243179321, "incorrect_loss_per_char": 0.6887865861256918, "correct_loss_per_token": 1.4359066486358643, "incorrect_loss_per_token": 1.3775731722513835, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4646302461624146, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.4646302461624146, "logits_per_char": -0.7323151230812073, "num_chars": 2}, {"sum_logits": -1.2823103666305542, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": true, "logits_per_token": -1.2823103666305542, "logits_per_char": -0.6411551833152771, "num_chars": 2}, {"sum_logits": -1.4359066486358643, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.4359066486358643, "logits_per_char": -0.7179533243179321, "num_chars": 2}, {"sum_logits": -1.3857789039611816, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.3857789039611816, "logits_per_char": -0.6928894519805908, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 648, "native_id": "Mercury_SC_415412", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3945233821868896, "incorrect_loss_raw": 1.3899200757344563, "correct_loss_per_char": 0.6972616910934448, "incorrect_loss_per_char": 0.6949600378672282, "correct_loss_per_token": 1.3945233821868896, "incorrect_loss_per_token": 1.3899200757344563, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4075891971588135, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.4075891971588135, "logits_per_char": -0.7037945985794067, "num_chars": 2}, {"sum_logits": -1.3669167757034302, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": true, "logits_per_token": -1.3669167757034302, "logits_per_char": -0.6834583878517151, "num_chars": 2}, {"sum_logits": -1.3952542543411255, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.3952542543411255, "logits_per_char": -0.6976271271705627, "num_chars": 2}, {"sum_logits": -1.3945233821868896, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.3945233821868896, "logits_per_char": -0.6972616910934448, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 649, "native_id": "Mercury_7162488", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.2426228523254395, "incorrect_loss_raw": 1.3946305116017659, "correct_loss_per_char": 1.1213114261627197, "incorrect_loss_per_char": 0.6973152558008829, "correct_loss_per_token": 2.2426228523254395, "incorrect_loss_per_token": 1.3946305116017659, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9846519231796265, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": true, "logits_per_token": -0.9846519231796265, "logits_per_char": -0.49232596158981323, "num_chars": 2}, {"sum_logits": -1.3235650062561035, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.3235650062561035, "logits_per_char": -0.6617825031280518, "num_chars": 2}, {"sum_logits": -1.8756746053695679, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.8756746053695679, "logits_per_char": -0.9378373026847839, "num_chars": 2}, {"sum_logits": -2.2426228523254395, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -2.2426228523254395, "logits_per_char": -1.1213114261627197, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 650, "native_id": "Mercury_402634", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2465578317642212, "incorrect_loss_raw": 1.4479828675587971, "correct_loss_per_char": 0.6232789158821106, "incorrect_loss_per_char": 0.7239914337793986, "correct_loss_per_token": 1.2465578317642212, "incorrect_loss_per_token": 1.4479828675587971, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4594786167144775, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.4594786167144775, "logits_per_char": -0.7297393083572388, "num_chars": 2}, {"sum_logits": -1.5561646223068237, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.5561646223068237, "logits_per_char": -0.7780823111534119, "num_chars": 2}, {"sum_logits": -1.3283053636550903, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.3283053636550903, "logits_per_char": -0.6641526818275452, "num_chars": 2}, {"sum_logits": -1.2465578317642212, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": true, "logits_per_token": -1.2465578317642212, "logits_per_char": -0.6232789158821106, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 651, "native_id": "Mercury_7123445", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4233036041259766, "incorrect_loss_raw": 1.3801358938217163, "correct_loss_per_char": 0.7116518020629883, "incorrect_loss_per_char": 0.6900679469108582, "correct_loss_per_token": 1.4233036041259766, "incorrect_loss_per_token": 1.3801358938217163, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4233036041259766, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.4233036041259766, "logits_per_char": -0.7116518020629883, "num_chars": 2}, {"sum_logits": -1.4395222663879395, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.4395222663879395, "logits_per_char": -0.7197611331939697, "num_chars": 2}, {"sum_logits": -1.3844074010849, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.3844074010849, "logits_per_char": -0.69220370054245, "num_chars": 2}, {"sum_logits": -1.3164780139923096, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": true, "logits_per_token": -1.3164780139923096, "logits_per_char": -0.6582390069961548, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 652, "native_id": "Mercury_7094395", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.293330192565918, "incorrect_loss_raw": 1.4317353963851929, "correct_loss_per_char": 0.646665096282959, "incorrect_loss_per_char": 0.7158676981925964, "correct_loss_per_token": 1.293330192565918, "incorrect_loss_per_token": 1.4317353963851929, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5860642194747925, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.5860642194747925, "logits_per_char": -0.7930321097373962, "num_chars": 2}, {"sum_logits": -1.293330192565918, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": true, "logits_per_token": -1.293330192565918, "logits_per_char": -0.646665096282959, "num_chars": 2}, {"sum_logits": -1.352167010307312, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.352167010307312, "logits_per_char": -0.676083505153656, "num_chars": 2}, {"sum_logits": -1.3569749593734741, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.3569749593734741, "logits_per_char": -0.6784874796867371, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 653, "native_id": "Mercury_7248150", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5650713443756104, "incorrect_loss_raw": 1.3418275117874146, "correct_loss_per_char": 0.7825356721878052, "incorrect_loss_per_char": 0.6709137558937073, "correct_loss_per_token": 1.5650713443756104, "incorrect_loss_per_token": 1.3418275117874146, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5650713443756104, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.5650713443756104, "logits_per_char": -0.7825356721878052, "num_chars": 2}, {"sum_logits": -1.2805359363555908, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.2805359363555908, "logits_per_char": -0.6402679681777954, "num_chars": 2}, {"sum_logits": -1.446022868156433, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.446022868156433, "logits_per_char": -0.7230114340782166, "num_chars": 2}, {"sum_logits": -1.2989237308502197, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.2989237308502197, "logits_per_char": -0.6494618654251099, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 654, "native_id": "Mercury_SC_401602", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4393051862716675, "incorrect_loss_raw": 1.3757807811101277, "correct_loss_per_char": 0.7196525931358337, "incorrect_loss_per_char": 0.6878903905550638, "correct_loss_per_token": 1.4393051862716675, "incorrect_loss_per_token": 1.3757807811101277, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4393051862716675, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.4393051862716675, "logits_per_char": -0.7196525931358337, "num_chars": 2}, {"sum_logits": -1.422670602798462, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.422670602798462, "logits_per_char": -0.711335301399231, "num_chars": 2}, {"sum_logits": -1.3480050563812256, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": true, "logits_per_token": -1.3480050563812256, "logits_per_char": -0.6740025281906128, "num_chars": 2}, {"sum_logits": -1.3566666841506958, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.3566666841506958, "logits_per_char": -0.6783333420753479, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 655, "native_id": "Mercury_SC_409574", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3328608274459839, "incorrect_loss_raw": 1.4149877627690632, "correct_loss_per_char": 0.6664304137229919, "incorrect_loss_per_char": 0.7074938813845316, "correct_loss_per_token": 1.3328608274459839, "incorrect_loss_per_token": 1.4149877627690632, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3633487224578857, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.3633487224578857, "logits_per_char": -0.6816743612289429, "num_chars": 2}, {"sum_logits": -1.3328608274459839, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": true, "logits_per_token": -1.3328608274459839, "logits_per_char": -0.6664304137229919, "num_chars": 2}, {"sum_logits": -1.5436809062957764, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.5436809062957764, "logits_per_char": -0.7718404531478882, "num_chars": 2}, {"sum_logits": -1.3379336595535278, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.3379336595535278, "logits_per_char": -0.6689668297767639, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 656, "native_id": "Mercury_SC_414356", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4113482236862183, "incorrect_loss_raw": 1.3885331551233928, "correct_loss_per_char": 0.7056741118431091, "incorrect_loss_per_char": 0.6942665775616964, "correct_loss_per_token": 1.4113482236862183, "incorrect_loss_per_token": 1.3885331551233928, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5277290344238281, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.5277290344238281, "logits_per_char": -0.7638645172119141, "num_chars": 2}, {"sum_logits": -1.4113482236862183, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.4113482236862183, "logits_per_char": -0.7056741118431091, "num_chars": 2}, {"sum_logits": -1.3194292783737183, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.3194292783737183, "logits_per_char": -0.6597146391868591, "num_chars": 2}, {"sum_logits": -1.3184411525726318, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": true, "logits_per_token": -1.3184411525726318, "logits_per_char": -0.6592205762863159, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 657, "native_id": "Mercury_7064698", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3047295808792114, "incorrect_loss_raw": 1.4197906653086345, "correct_loss_per_char": 0.6523647904396057, "incorrect_loss_per_char": 0.7098953326543173, "correct_loss_per_token": 1.3047295808792114, "incorrect_loss_per_token": 1.4197906653086345, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3047295808792114, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": true, "logits_per_token": -1.3047295808792114, "logits_per_char": -0.6523647904396057, "num_chars": 2}, {"sum_logits": -1.3692448139190674, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.3692448139190674, "logits_per_char": -0.6846224069595337, "num_chars": 2}, {"sum_logits": -1.4094611406326294, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.4094611406326294, "logits_per_char": -0.7047305703163147, "num_chars": 2}, {"sum_logits": -1.4806660413742065, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.4806660413742065, "logits_per_char": -0.7403330206871033, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 658, "native_id": "Mercury_7032690", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.382431983947754, "incorrect_loss_raw": 1.3940330346425374, "correct_loss_per_char": 0.691215991973877, "incorrect_loss_per_char": 0.6970165173212687, "correct_loss_per_token": 1.382431983947754, "incorrect_loss_per_token": 1.3940330346425374, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.373721957206726, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.373721957206726, "logits_per_char": -0.686860978603363, "num_chars": 2}, {"sum_logits": -1.382431983947754, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.382431983947754, "logits_per_char": -0.691215991973877, "num_chars": 2}, {"sum_logits": -1.3712942600250244, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": true, "logits_per_token": -1.3712942600250244, "logits_per_char": -0.6856471300125122, "num_chars": 2}, {"sum_logits": -1.4370828866958618, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.4370828866958618, "logits_per_char": -0.7185414433479309, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 659, "native_id": "NYSEDREGENTS_2008_4_4", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4917105436325073, "incorrect_loss_raw": 1.3600082794825237, "correct_loss_per_char": 0.7458552718162537, "incorrect_loss_per_char": 0.6800041397412618, "correct_loss_per_token": 1.4917105436325073, "incorrect_loss_per_token": 1.3600082794825237, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4917105436325073, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": false, "logits_per_token": -1.4917105436325073, "logits_per_char": -0.7458552718162537, "num_chars": 2}, {"sum_logits": -1.366857886314392, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": false, "logits_per_token": -1.366857886314392, "logits_per_char": -0.683428943157196, "num_chars": 2}, {"sum_logits": -1.3286362886428833, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": true, "logits_per_token": -1.3286362886428833, "logits_per_char": -0.6643181443214417, "num_chars": 2}, {"sum_logits": -1.3845306634902954, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": false, "logits_per_token": -1.3845306634902954, "logits_per_char": -0.6922653317451477, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 660, "native_id": "Mercury_404096", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3971529006958008, "incorrect_loss_raw": 1.3943590720494587, "correct_loss_per_char": 0.6985764503479004, "incorrect_loss_per_char": 0.6971795360247294, "correct_loss_per_token": 1.3971529006958008, "incorrect_loss_per_token": 1.3943590720494587, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4144681692123413, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.4144681692123413, "logits_per_char": -0.7072340846061707, "num_chars": 2}, {"sum_logits": -1.3971529006958008, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.3971529006958008, "logits_per_char": -0.6985764503479004, "num_chars": 2}, {"sum_logits": -1.2538670301437378, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": true, "logits_per_token": -1.2538670301437378, "logits_per_char": -0.6269335150718689, "num_chars": 2}, {"sum_logits": -1.5147420167922974, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.5147420167922974, "logits_per_char": -0.7573710083961487, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 661, "native_id": "Mercury_SC_408578", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3961325883865356, "incorrect_loss_raw": 1.388147234916687, "correct_loss_per_char": 0.6980662941932678, "incorrect_loss_per_char": 0.6940736174583435, "correct_loss_per_token": 1.3961325883865356, "incorrect_loss_per_token": 1.388147234916687, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.448915719985962, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.448915719985962, "logits_per_char": -0.724457859992981, "num_chars": 2}, {"sum_logits": -1.3694090843200684, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.3694090843200684, "logits_per_char": -0.6847045421600342, "num_chars": 2}, {"sum_logits": -1.3961325883865356, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.3961325883865356, "logits_per_char": -0.6980662941932678, "num_chars": 2}, {"sum_logits": -1.3461169004440308, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": true, "logits_per_token": -1.3461169004440308, "logits_per_char": -0.6730584502220154, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 662, "native_id": "Mercury_SC_405784", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4538720846176147, "incorrect_loss_raw": 1.3703882694244385, "correct_loss_per_char": 0.7269360423088074, "incorrect_loss_per_char": 0.6851941347122192, "correct_loss_per_token": 1.4538720846176147, "incorrect_loss_per_token": 1.3703882694244385, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3296334743499756, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": true, "logits_per_token": -1.3296334743499756, "logits_per_char": -0.6648167371749878, "num_chars": 2}, {"sum_logits": -1.4538720846176147, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.4538720846176147, "logits_per_char": -0.7269360423088074, "num_chars": 2}, {"sum_logits": -1.422663927078247, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.422663927078247, "logits_per_char": -0.7113319635391235, "num_chars": 2}, {"sum_logits": -1.3588674068450928, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.3588674068450928, "logits_per_char": -0.6794337034225464, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 663, "native_id": "MCAS_2000_8_16", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3961025476455688, "incorrect_loss_raw": 1.3925520181655884, "correct_loss_per_char": 0.6980512738227844, "incorrect_loss_per_char": 0.6962760090827942, "correct_loss_per_token": 1.3961025476455688, "incorrect_loss_per_token": 1.3925520181655884, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5069833993911743, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.5069833993911743, "logits_per_char": -0.7534916996955872, "num_chars": 2}, {"sum_logits": -1.3961025476455688, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3961025476455688, "logits_per_char": -0.6980512738227844, "num_chars": 2}, {"sum_logits": -1.4187328815460205, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4187328815460205, "logits_per_char": -0.7093664407730103, "num_chars": 2}, {"sum_logits": -1.2519397735595703, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.2519397735595703, "logits_per_char": -0.6259698867797852, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 664, "native_id": "Mercury_SC_LBS10952", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2572765350341797, "incorrect_loss_raw": 1.4394545157750447, "correct_loss_per_char": 0.6286382675170898, "incorrect_loss_per_char": 0.7197272578875223, "correct_loss_per_token": 1.2572765350341797, "incorrect_loss_per_token": 1.4394545157750447, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4450623989105225, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.4450623989105225, "logits_per_char": -0.7225311994552612, "num_chars": 2}, {"sum_logits": -1.2572765350341797, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": true, "logits_per_token": -1.2572765350341797, "logits_per_char": -0.6286382675170898, "num_chars": 2}, {"sum_logits": -1.4082890748977661, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.4082890748977661, "logits_per_char": -0.7041445374488831, "num_chars": 2}, {"sum_logits": -1.4650120735168457, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.4650120735168457, "logits_per_char": -0.7325060367584229, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 665, "native_id": "Mercury_7030783", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.286258578300476, "incorrect_loss_raw": 1.431065599123637, "correct_loss_per_char": 0.643129289150238, "incorrect_loss_per_char": 0.7155327995618185, "correct_loss_per_token": 1.286258578300476, "incorrect_loss_per_token": 1.431065599123637, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.286258578300476, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": true, "logits_per_token": -1.286258578300476, "logits_per_char": -0.643129289150238, "num_chars": 2}, {"sum_logits": -1.4128021001815796, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.4128021001815796, "logits_per_char": -0.7064010500907898, "num_chars": 2}, {"sum_logits": -1.3534399271011353, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.3534399271011353, "logits_per_char": -0.6767199635505676, "num_chars": 2}, {"sum_logits": -1.5269547700881958, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.5269547700881958, "logits_per_char": -0.7634773850440979, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 666, "native_id": "Mercury_7245578", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3854566812515259, "incorrect_loss_raw": 1.396678368250529, "correct_loss_per_char": 0.6927283406257629, "incorrect_loss_per_char": 0.6983391841252645, "correct_loss_per_token": 1.3854566812515259, "incorrect_loss_per_token": 1.396678368250529, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.459094524383545, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.459094524383545, "logits_per_char": -0.7295472621917725, "num_chars": 2}, {"sum_logits": -1.2941657304763794, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": true, "logits_per_token": -1.2941657304763794, "logits_per_char": -0.6470828652381897, "num_chars": 2}, {"sum_logits": -1.3854566812515259, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.3854566812515259, "logits_per_char": -0.6927283406257629, "num_chars": 2}, {"sum_logits": -1.4367748498916626, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.4367748498916626, "logits_per_char": -0.7183874249458313, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 667, "native_id": "CSZ_2009_8_CSZ30585", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3669371604919434, "incorrect_loss_raw": 1.397153655687968, "correct_loss_per_char": 0.6834685802459717, "incorrect_loss_per_char": 0.698576827843984, "correct_loss_per_token": 1.3669371604919434, "incorrect_loss_per_token": 1.397153655687968, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3669371604919434, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.3669371604919434, "logits_per_char": -0.6834685802459717, "num_chars": 2}, {"sum_logits": -1.4364066123962402, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.4364066123962402, "logits_per_char": -0.7182033061981201, "num_chars": 2}, {"sum_logits": -1.3926066160202026, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.3926066160202026, "logits_per_char": -0.6963033080101013, "num_chars": 2}, {"sum_logits": -1.362447738647461, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": true, "logits_per_token": -1.362447738647461, "logits_per_char": -0.6812238693237305, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 668, "native_id": "Mercury_SC_410835", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3701506853103638, "incorrect_loss_raw": 1.4070606629053752, "correct_loss_per_char": 0.6850753426551819, "incorrect_loss_per_char": 0.7035303314526876, "correct_loss_per_token": 1.3701506853103638, "incorrect_loss_per_token": 1.4070606629053752, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5569374561309814, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.5569374561309814, "logits_per_char": -0.7784687280654907, "num_chars": 2}, {"sum_logits": -1.447409987449646, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.447409987449646, "logits_per_char": -0.723704993724823, "num_chars": 2}, {"sum_logits": -1.3701506853103638, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.3701506853103638, "logits_per_char": -0.6850753426551819, "num_chars": 2}, {"sum_logits": -1.216834545135498, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.216834545135498, "logits_per_char": -0.608417272567749, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 669, "native_id": "Mercury_7263008", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8394527435302734, "incorrect_loss_raw": 1.604810396830241, "correct_loss_per_char": 0.9197263717651367, "incorrect_loss_per_char": 0.8024051984151205, "correct_loss_per_token": 1.8394527435302734, "incorrect_loss_per_token": 1.604810396830241, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9884166717529297, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -0.9884166717529297, "logits_per_char": -0.49420833587646484, "num_chars": 2}, {"sum_logits": -1.4260659217834473, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4260659217834473, "logits_per_char": -0.7130329608917236, "num_chars": 2}, {"sum_logits": -1.8394527435302734, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.8394527435302734, "logits_per_char": -0.9197263717651367, "num_chars": 2}, {"sum_logits": -2.3999485969543457, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -2.3999485969543457, "logits_per_char": -1.1999742984771729, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 670, "native_id": "Mercury_405057", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3691602945327759, "incorrect_loss_raw": 1.3974566459655762, "correct_loss_per_char": 0.6845801472663879, "incorrect_loss_per_char": 0.6987283229827881, "correct_loss_per_token": 1.3691602945327759, "incorrect_loss_per_token": 1.3974566459655762, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.421511173248291, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.421511173248291, "logits_per_char": -0.7107555866241455, "num_chars": 2}, {"sum_logits": -1.3691602945327759, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.3691602945327759, "logits_per_char": -0.6845801472663879, "num_chars": 2}, {"sum_logits": -1.3614329099655151, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": true, "logits_per_token": -1.3614329099655151, "logits_per_char": -0.6807164549827576, "num_chars": 2}, {"sum_logits": -1.4094258546829224, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.4094258546829224, "logits_per_char": -0.7047129273414612, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 671, "native_id": "MDSA_2012_8_36", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4146122932434082, "incorrect_loss_raw": 1.3910030921300252, "correct_loss_per_char": 0.7073061466217041, "incorrect_loss_per_char": 0.6955015460650126, "correct_loss_per_token": 1.4146122932434082, "incorrect_loss_per_token": 1.3910030921300252, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5216490030288696, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.5216490030288696, "logits_per_char": -0.7608245015144348, "num_chars": 2}, {"sum_logits": -1.4060795307159424, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4060795307159424, "logits_per_char": -0.7030397653579712, "num_chars": 2}, {"sum_logits": -1.4146122932434082, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4146122932434082, "logits_per_char": -0.7073061466217041, "num_chars": 2}, {"sum_logits": -1.2452807426452637, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.2452807426452637, "logits_per_char": -0.6226403713226318, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 672, "native_id": "MSA_2012_5_1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.351280927658081, "incorrect_loss_raw": 1.40463125705719, "correct_loss_per_char": 0.6756404638290405, "incorrect_loss_per_char": 0.702315628528595, "correct_loss_per_token": 1.351280927658081, "incorrect_loss_per_token": 1.40463125705719, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4672412872314453, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.4672412872314453, "logits_per_char": -0.7336206436157227, "num_chars": 2}, {"sum_logits": -1.351280927658081, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": true, "logits_per_token": -1.351280927658081, "logits_per_char": -0.6756404638290405, "num_chars": 2}, {"sum_logits": -1.351717472076416, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.351717472076416, "logits_per_char": -0.675858736038208, "num_chars": 2}, {"sum_logits": -1.3949350118637085, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.3949350118637085, "logits_per_char": -0.6974675059318542, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 673, "native_id": "VASoL_2008_5_16", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.391955018043518, "incorrect_loss_raw": 1.394294540087382, "correct_loss_per_char": 0.695977509021759, "incorrect_loss_per_char": 0.697147270043691, "correct_loss_per_token": 1.391955018043518, "incorrect_loss_per_token": 1.394294540087382, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5177431106567383, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.5177431106567383, "logits_per_char": -0.7588715553283691, "num_chars": 2}, {"sum_logits": -1.391955018043518, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.391955018043518, "logits_per_char": -0.695977509021759, "num_chars": 2}, {"sum_logits": -1.3563872575759888, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.3563872575759888, "logits_per_char": -0.6781936287879944, "num_chars": 2}, {"sum_logits": -1.308753252029419, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": true, "logits_per_token": -1.308753252029419, "logits_per_char": -0.6543766260147095, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 674, "native_id": "Mercury_415265", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.444405198097229, "incorrect_loss_raw": 1.3765474955240886, "correct_loss_per_char": 0.7222025990486145, "incorrect_loss_per_char": 0.6882737477620443, "correct_loss_per_token": 1.444405198097229, "incorrect_loss_per_token": 1.3765474955240886, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4465441703796387, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4465441703796387, "logits_per_char": -0.7232720851898193, "num_chars": 2}, {"sum_logits": -1.444405198097229, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.444405198097229, "logits_per_char": -0.7222025990486145, "num_chars": 2}, {"sum_logits": -1.3213175535202026, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.3213175535202026, "logits_per_char": -0.6606587767601013, "num_chars": 2}, {"sum_logits": -1.3617807626724243, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.3617807626724243, "logits_per_char": -0.6808903813362122, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 675, "native_id": "MCAS_2000_4_11", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2994776964187622, "incorrect_loss_raw": 1.4240025679270427, "correct_loss_per_char": 0.6497388482093811, "incorrect_loss_per_char": 0.7120012839635214, "correct_loss_per_token": 1.2994776964187622, "incorrect_loss_per_token": 1.4240025679270427, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2994776964187622, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": true, "logits_per_token": -1.2994776964187622, "logits_per_char": -0.6497388482093811, "num_chars": 2}, {"sum_logits": -1.417831540107727, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.417831540107727, "logits_per_char": -0.7089157700538635, "num_chars": 2}, {"sum_logits": -1.3591618537902832, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.3591618537902832, "logits_per_char": -0.6795809268951416, "num_chars": 2}, {"sum_logits": -1.4950143098831177, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.4950143098831177, "logits_per_char": -0.7475071549415588, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 676, "native_id": "MEA_2016_8_19", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3855342864990234, "incorrect_loss_raw": 1.3959041833877563, "correct_loss_per_char": 0.6927671432495117, "incorrect_loss_per_char": 0.6979520916938782, "correct_loss_per_token": 1.3855342864990234, "incorrect_loss_per_token": 1.3959041833877563, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.344334602355957, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": true, "logits_per_token": -1.344334602355957, "logits_per_char": -0.6721673011779785, "num_chars": 2}, {"sum_logits": -1.431612491607666, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.431612491607666, "logits_per_char": -0.715806245803833, "num_chars": 2}, {"sum_logits": -1.3855342864990234, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.3855342864990234, "logits_per_char": -0.6927671432495117, "num_chars": 2}, {"sum_logits": -1.411765456199646, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.411765456199646, "logits_per_char": -0.705882728099823, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 677, "native_id": "Mercury_7119875", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4963488578796387, "incorrect_loss_raw": 1.3587253491083782, "correct_loss_per_char": 0.7481744289398193, "incorrect_loss_per_char": 0.6793626745541891, "correct_loss_per_token": 1.4963488578796387, "incorrect_loss_per_token": 1.3587253491083782, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3550351858139038, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.3550351858139038, "logits_per_char": -0.6775175929069519, "num_chars": 2}, {"sum_logits": -1.4267127513885498, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.4267127513885498, "logits_per_char": -0.7133563756942749, "num_chars": 2}, {"sum_logits": -1.4963488578796387, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.4963488578796387, "logits_per_char": -0.7481744289398193, "num_chars": 2}, {"sum_logits": -1.2944281101226807, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": true, "logits_per_token": -1.2944281101226807, "logits_per_char": -0.6472140550613403, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 678, "native_id": "Mercury_7218050", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3108996152877808, "incorrect_loss_raw": 1.4232731262842815, "correct_loss_per_char": 0.6554498076438904, "incorrect_loss_per_char": 0.7116365631421407, "correct_loss_per_token": 1.3108996152877808, "incorrect_loss_per_token": 1.4232731262842815, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5495961904525757, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.5495961904525757, "logits_per_char": -0.7747980952262878, "num_chars": 2}, {"sum_logits": -1.3116779327392578, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.3116779327392578, "logits_per_char": -0.6558389663696289, "num_chars": 2}, {"sum_logits": -1.4085452556610107, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.4085452556610107, "logits_per_char": -0.7042726278305054, "num_chars": 2}, {"sum_logits": -1.3108996152877808, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": true, "logits_per_token": -1.3108996152877808, "logits_per_char": -0.6554498076438904, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 679, "native_id": "AKDE&ED_2008_8_18", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.530008316040039, "incorrect_loss_raw": 1.3507413864135742, "correct_loss_per_char": 0.7650041580200195, "incorrect_loss_per_char": 0.6753706932067871, "correct_loss_per_token": 1.530008316040039, "incorrect_loss_per_token": 1.3507413864135742, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.530008316040039, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.530008316040039, "logits_per_char": -0.7650041580200195, "num_chars": 2}, {"sum_logits": -1.326160192489624, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.326160192489624, "logits_per_char": -0.663080096244812, "num_chars": 2}, {"sum_logits": -1.4390164613723755, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.4390164613723755, "logits_per_char": -0.7195082306861877, "num_chars": 2}, {"sum_logits": -1.2870475053787231, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": true, "logits_per_token": -1.2870475053787231, "logits_per_char": -0.6435237526893616, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 680, "native_id": "Mercury_7018428", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3418372869491577, "incorrect_loss_raw": 1.4079138835271199, "correct_loss_per_char": 0.6709186434745789, "incorrect_loss_per_char": 0.7039569417635599, "correct_loss_per_token": 1.3418372869491577, "incorrect_loss_per_token": 1.4079138835271199, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4197810888290405, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.4197810888290405, "logits_per_char": -0.7098905444145203, "num_chars": 2}, {"sum_logits": -1.3418372869491577, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -1.3418372869491577, "logits_per_char": -0.6709186434745789, "num_chars": 2}, {"sum_logits": -1.4525319337844849, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.4525319337844849, "logits_per_char": -0.7262659668922424, "num_chars": 2}, {"sum_logits": -1.3514286279678345, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.3514286279678345, "logits_per_char": -0.6757143139839172, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 681, "native_id": "Mercury_SC_LBS10026", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3375388383865356, "incorrect_loss_raw": 1.4154305855433147, "correct_loss_per_char": 0.6687694191932678, "incorrect_loss_per_char": 0.7077152927716573, "correct_loss_per_token": 1.3375388383865356, "incorrect_loss_per_token": 1.4154305855433147, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2554175853729248, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": true, "logits_per_token": -1.2554175853729248, "logits_per_char": -0.6277087926864624, "num_chars": 2}, {"sum_logits": -1.4529942274093628, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.4529942274093628, "logits_per_char": -0.7264971137046814, "num_chars": 2}, {"sum_logits": -1.3375388383865356, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.3375388383865356, "logits_per_char": -0.6687694191932678, "num_chars": 2}, {"sum_logits": -1.5378799438476562, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.5378799438476562, "logits_per_char": -0.7689399719238281, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 682, "native_id": "NYSEDREGENTS_2008_4_18", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.300565481185913, "incorrect_loss_raw": 1.424938162167867, "correct_loss_per_char": 0.6502827405929565, "incorrect_loss_per_char": 0.7124690810839335, "correct_loss_per_token": 1.300565481185913, "incorrect_loss_per_token": 1.424938162167867, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.300565481185913, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": true, "logits_per_token": -1.300565481185913, "logits_per_char": -0.6502827405929565, "num_chars": 2}, {"sum_logits": -1.3543716669082642, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.3543716669082642, "logits_per_char": -0.6771858334541321, "num_chars": 2}, {"sum_logits": -1.4048035144805908, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.4048035144805908, "logits_per_char": -0.7024017572402954, "num_chars": 2}, {"sum_logits": -1.515639305114746, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.515639305114746, "logits_per_char": -0.757819652557373, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 683, "native_id": "AKDE&ED_2008_8_49", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4537798166275024, "incorrect_loss_raw": 1.3733549515406291, "correct_loss_per_char": 0.7268899083137512, "incorrect_loss_per_char": 0.6866774757703146, "correct_loss_per_token": 1.4537798166275024, "incorrect_loss_per_token": 1.3733549515406291, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4537798166275024, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.4537798166275024, "logits_per_char": -0.7268899083137512, "num_chars": 2}, {"sum_logits": -1.2895008325576782, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": true, "logits_per_token": -1.2895008325576782, "logits_per_char": -0.6447504162788391, "num_chars": 2}, {"sum_logits": -1.4581183195114136, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.4581183195114136, "logits_per_char": -0.7290591597557068, "num_chars": 2}, {"sum_logits": -1.3724457025527954, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.3724457025527954, "logits_per_char": -0.6862228512763977, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 684, "native_id": "Mercury_7248098", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3157421350479126, "incorrect_loss_raw": 1.4180025259653728, "correct_loss_per_char": 0.6578710675239563, "incorrect_loss_per_char": 0.7090012629826864, "correct_loss_per_token": 1.3157421350479126, "incorrect_loss_per_token": 1.4180025259653728, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4561408758163452, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.4561408758163452, "logits_per_char": -0.7280704379081726, "num_chars": 2}, {"sum_logits": -1.43111252784729, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.43111252784729, "logits_per_char": -0.715556263923645, "num_chars": 2}, {"sum_logits": -1.366754174232483, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.366754174232483, "logits_per_char": -0.6833770871162415, "num_chars": 2}, {"sum_logits": -1.3157421350479126, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": true, "logits_per_token": -1.3157421350479126, "logits_per_char": -0.6578710675239563, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 685, "native_id": "Mercury_7041300", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2953239679336548, "incorrect_loss_raw": 1.4242942333221436, "correct_loss_per_char": 0.6476619839668274, "incorrect_loss_per_char": 0.7121471166610718, "correct_loss_per_token": 1.2953239679336548, "incorrect_loss_per_token": 1.4242942333221436, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4875439405441284, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.4875439405441284, "logits_per_char": -0.7437719702720642, "num_chars": 2}, {"sum_logits": -1.3856414556503296, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.3856414556503296, "logits_per_char": -0.6928207278251648, "num_chars": 2}, {"sum_logits": -1.3996973037719727, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.3996973037719727, "logits_per_char": -0.6998486518859863, "num_chars": 2}, {"sum_logits": -1.2953239679336548, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": true, "logits_per_token": -1.2953239679336548, "logits_per_char": -0.6476619839668274, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 686, "native_id": "Mercury_SC_405838", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3903781175613403, "incorrect_loss_raw": 1.3944798707962036, "correct_loss_per_char": 0.6951890587806702, "incorrect_loss_per_char": 0.6972399353981018, "correct_loss_per_token": 1.3903781175613403, "incorrect_loss_per_token": 1.3944798707962036, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.454965591430664, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.454965591430664, "logits_per_char": -0.727482795715332, "num_chars": 2}, {"sum_logits": -1.3903781175613403, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.3903781175613403, "logits_per_char": -0.6951890587806702, "num_chars": 2}, {"sum_logits": -1.4626741409301758, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.4626741409301758, "logits_per_char": -0.7313370704650879, "num_chars": 2}, {"sum_logits": -1.265799880027771, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": true, "logits_per_token": -1.265799880027771, "logits_per_char": -0.6328999400138855, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 687, "native_id": "Mercury_SC_404974", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2953860759735107, "incorrect_loss_raw": 1.428614656130473, "correct_loss_per_char": 0.6476930379867554, "incorrect_loss_per_char": 0.7143073280652364, "correct_loss_per_token": 1.2953860759735107, "incorrect_loss_per_token": 1.428614656130473, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5539244413375854, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.5539244413375854, "logits_per_char": -0.7769622206687927, "num_chars": 2}, {"sum_logits": -1.391426920890808, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.391426920890808, "logits_per_char": -0.695713460445404, "num_chars": 2}, {"sum_logits": -1.340492606163025, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.340492606163025, "logits_per_char": -0.6702463030815125, "num_chars": 2}, {"sum_logits": -1.2953860759735107, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -1.2953860759735107, "logits_per_char": -0.6476930379867554, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 688, "native_id": "Mercury_416580", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.438791275024414, "incorrect_loss_raw": 1.3917477925618489, "correct_loss_per_char": 0.719395637512207, "incorrect_loss_per_char": 0.6958738962809244, "correct_loss_per_token": 1.438791275024414, "incorrect_loss_per_token": 1.3917477925618489, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6394959688186646, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.6394959688186646, "logits_per_char": -0.8197479844093323, "num_chars": 2}, {"sum_logits": -1.3239425420761108, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.3239425420761108, "logits_per_char": -0.6619712710380554, "num_chars": 2}, {"sum_logits": -1.438791275024414, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.438791275024414, "logits_per_char": -0.719395637512207, "num_chars": 2}, {"sum_logits": -1.2118048667907715, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": true, "logits_per_token": -1.2118048667907715, "logits_per_char": -0.6059024333953857, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 689, "native_id": "CSZ_2005_5_CSZ10247", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3749885559082031, "incorrect_loss_raw": 1.3956710894902546, "correct_loss_per_char": 0.6874942779541016, "incorrect_loss_per_char": 0.6978355447451273, "correct_loss_per_token": 1.3749885559082031, "incorrect_loss_per_token": 1.3956710894902546, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3749885559082031, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.3749885559082031, "logits_per_char": -0.6874942779541016, "num_chars": 2}, {"sum_logits": -1.3482760190963745, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": true, "logits_per_token": -1.3482760190963745, "logits_per_char": -0.6741380095481873, "num_chars": 2}, {"sum_logits": -1.4577269554138184, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.4577269554138184, "logits_per_char": -0.7288634777069092, "num_chars": 2}, {"sum_logits": -1.3810102939605713, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.3810102939605713, "logits_per_char": -0.6905051469802856, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 690, "native_id": "TIMSS_2003_4_pg14", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4431495666503906, "incorrect_loss_raw": 1.3903037309646606, "correct_loss_per_char": 0.7215747833251953, "incorrect_loss_per_char": 0.6951518654823303, "correct_loss_per_token": 1.4431495666503906, "incorrect_loss_per_token": 1.3903037309646606, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.172691822052002, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": true, "logits_per_token": -1.172691822052002, "logits_per_char": -0.586345911026001, "num_chars": 2}, {"sum_logits": -1.4431495666503906, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.4431495666503906, "logits_per_char": -0.7215747833251953, "num_chars": 2}, {"sum_logits": -1.404576301574707, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.404576301574707, "logits_per_char": -0.7022881507873535, "num_chars": 2}, {"sum_logits": -1.593643069267273, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.593643069267273, "logits_per_char": -0.7968215346336365, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 691, "native_id": "Mercury_7211418", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.411945104598999, "incorrect_loss_raw": 1.3844849665959675, "correct_loss_per_char": 0.7059725522994995, "incorrect_loss_per_char": 0.6922424832979838, "correct_loss_per_token": 1.411945104598999, "incorrect_loss_per_token": 1.3844849665959675, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3972771167755127, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.3972771167755127, "logits_per_char": -0.6986385583877563, "num_chars": 2}, {"sum_logits": -1.3437198400497437, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": true, "logits_per_token": -1.3437198400497437, "logits_per_char": -0.6718599200248718, "num_chars": 2}, {"sum_logits": -1.4124579429626465, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.4124579429626465, "logits_per_char": -0.7062289714813232, "num_chars": 2}, {"sum_logits": -1.411945104598999, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.411945104598999, "logits_per_char": -0.7059725522994995, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 692, "native_id": "Mercury_7044555", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3311924934387207, "incorrect_loss_raw": 1.4157232443491619, "correct_loss_per_char": 0.6655962467193604, "incorrect_loss_per_char": 0.7078616221745809, "correct_loss_per_token": 1.3311924934387207, "incorrect_loss_per_token": 1.4157232443491619, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4721455574035645, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.4721455574035645, "logits_per_char": -0.7360727787017822, "num_chars": 2}, {"sum_logits": -1.4568744897842407, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.4568744897842407, "logits_per_char": -0.7284372448921204, "num_chars": 2}, {"sum_logits": -1.3181496858596802, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": true, "logits_per_token": -1.3181496858596802, "logits_per_char": -0.6590748429298401, "num_chars": 2}, {"sum_logits": -1.3311924934387207, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.3311924934387207, "logits_per_char": -0.6655962467193604, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 693, "native_id": "Mercury_7245788", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5909652709960938, "incorrect_loss_raw": 1.3336297273635864, "correct_loss_per_char": 0.7954826354980469, "incorrect_loss_per_char": 0.6668148636817932, "correct_loss_per_token": 1.5909652709960938, "incorrect_loss_per_token": 1.3336297273635864, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5909652709960938, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.5909652709960938, "logits_per_char": -0.7954826354980469, "num_chars": 2}, {"sum_logits": -1.3912583589553833, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.3912583589553833, "logits_per_char": -0.6956291794776917, "num_chars": 2}, {"sum_logits": -1.3720293045043945, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.3720293045043945, "logits_per_char": -0.6860146522521973, "num_chars": 2}, {"sum_logits": -1.2376015186309814, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": true, "logits_per_token": -1.2376015186309814, "logits_per_char": -0.6188007593154907, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 694, "native_id": "Mercury_7141418", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.352569580078125, "incorrect_loss_raw": 1.4081380367279053, "correct_loss_per_char": 0.6762847900390625, "incorrect_loss_per_char": 0.7040690183639526, "correct_loss_per_token": 1.352569580078125, "incorrect_loss_per_token": 1.4081380367279053, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.448834776878357, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.448834776878357, "logits_per_char": -0.7244173884391785, "num_chars": 2}, {"sum_logits": -1.352569580078125, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.352569580078125, "logits_per_char": -0.6762847900390625, "num_chars": 2}, {"sum_logits": -1.504449725151062, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.504449725151062, "logits_per_char": -0.752224862575531, "num_chars": 2}, {"sum_logits": -1.2711296081542969, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": true, "logits_per_token": -1.2711296081542969, "logits_per_char": -0.6355648040771484, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 695, "native_id": "Mercury_7015925", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.372437596321106, "incorrect_loss_raw": 1.398599664370219, "correct_loss_per_char": 0.686218798160553, "incorrect_loss_per_char": 0.6992998321851095, "correct_loss_per_token": 1.372437596321106, "incorrect_loss_per_token": 1.398599664370219, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.466237187385559, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.466237187385559, "logits_per_char": -0.7331185936927795, "num_chars": 2}, {"sum_logits": -1.372437596321106, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.372437596321106, "logits_per_char": -0.686218798160553, "num_chars": 2}, {"sum_logits": -1.3706413507461548, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.3706413507461548, "logits_per_char": -0.6853206753730774, "num_chars": 2}, {"sum_logits": -1.3589204549789429, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": true, "logits_per_token": -1.3589204549789429, "logits_per_char": -0.6794602274894714, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 696, "native_id": "Mercury_7043978", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4887443780899048, "incorrect_loss_raw": 1.3882029453913372, "correct_loss_per_char": 0.7443721890449524, "incorrect_loss_per_char": 0.6941014726956686, "correct_loss_per_token": 1.4887443780899048, "incorrect_loss_per_token": 1.3882029453913372, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1521337032318115, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": true, "logits_per_token": -1.1521337032318115, "logits_per_char": -0.5760668516159058, "num_chars": 2}, {"sum_logits": -1.4887443780899048, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.4887443780899048, "logits_per_char": -0.7443721890449524, "num_chars": 2}, {"sum_logits": -1.7137590646743774, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.7137590646743774, "logits_per_char": -0.8568795323371887, "num_chars": 2}, {"sum_logits": -1.2987160682678223, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.2987160682678223, "logits_per_char": -0.6493580341339111, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 697, "native_id": "VASoL_2010_3_20", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.328652262687683, "incorrect_loss_raw": 1.413644274075826, "correct_loss_per_char": 0.6643261313438416, "incorrect_loss_per_char": 0.706822137037913, "correct_loss_per_token": 1.328652262687683, "incorrect_loss_per_token": 1.413644274075826, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.328652262687683, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.328652262687683, "logits_per_char": -0.6643261313438416, "num_chars": 2}, {"sum_logits": -1.316076636314392, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": true, "logits_per_token": -1.316076636314392, "logits_per_char": -0.658038318157196, "num_chars": 2}, {"sum_logits": -1.4221199750900269, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.4221199750900269, "logits_per_char": -0.7110599875450134, "num_chars": 2}, {"sum_logits": -1.502736210823059, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.502736210823059, "logits_per_char": -0.7513681054115295, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 698, "native_id": "Mercury_7008785", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4085323810577393, "incorrect_loss_raw": 1.3838744560877483, "correct_loss_per_char": 0.7042661905288696, "incorrect_loss_per_char": 0.6919372280438741, "correct_loss_per_token": 1.4085323810577393, "incorrect_loss_per_token": 1.3838744560877483, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4085323810577393, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.4085323810577393, "logits_per_char": -0.7042661905288696, "num_chars": 2}, {"sum_logits": -1.3726650476455688, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.3726650476455688, "logits_per_char": -0.6863325238227844, "num_chars": 2}, {"sum_logits": -1.366720199584961, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": true, "logits_per_token": -1.366720199584961, "logits_per_char": -0.6833600997924805, "num_chars": 2}, {"sum_logits": -1.4122381210327148, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.4122381210327148, "logits_per_char": -0.7061190605163574, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 699, "native_id": "Mercury_7011235", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3513435125350952, "incorrect_loss_raw": 1.4085934956868489, "correct_loss_per_char": 0.6756717562675476, "incorrect_loss_per_char": 0.7042967478434244, "correct_loss_per_token": 1.3513435125350952, "incorrect_loss_per_token": 1.4085934956868489, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2985141277313232, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": true, "logits_per_token": -1.2985141277313232, "logits_per_char": -0.6492570638656616, "num_chars": 2}, {"sum_logits": -1.3513435125350952, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.3513435125350952, "logits_per_char": -0.6756717562675476, "num_chars": 2}, {"sum_logits": -1.474549412727356, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.474549412727356, "logits_per_char": -0.737274706363678, "num_chars": 2}, {"sum_logits": -1.4527169466018677, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.4527169466018677, "logits_per_char": -0.7263584733009338, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 700, "native_id": "Mercury_SC_LBS10269", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3582994937896729, "incorrect_loss_raw": 1.4013253847757976, "correct_loss_per_char": 0.6791497468948364, "incorrect_loss_per_char": 0.7006626923878988, "correct_loss_per_token": 1.3582994937896729, "incorrect_loss_per_token": 1.4013253847757976, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3812217712402344, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.3812217712402344, "logits_per_char": -0.6906108856201172, "num_chars": 2}, {"sum_logits": -1.4638960361480713, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.4638960361480713, "logits_per_char": -0.7319480180740356, "num_chars": 2}, {"sum_logits": -1.358858346939087, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.358858346939087, "logits_per_char": -0.6794291734695435, "num_chars": 2}, {"sum_logits": -1.3582994937896729, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": true, "logits_per_token": -1.3582994937896729, "logits_per_char": -0.6791497468948364, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 701, "native_id": "Mercury_404107", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.386373519897461, "incorrect_loss_raw": 1.3953737417856853, "correct_loss_per_char": 0.6931867599487305, "incorrect_loss_per_char": 0.6976868708928426, "correct_loss_per_token": 1.386373519897461, "incorrect_loss_per_token": 1.3953737417856853, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2929292917251587, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": true, "logits_per_token": -1.2929292917251587, "logits_per_char": -0.6464646458625793, "num_chars": 2}, {"sum_logits": -1.386373519897461, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.386373519897461, "logits_per_char": -0.6931867599487305, "num_chars": 2}, {"sum_logits": -1.3930721282958984, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.3930721282958984, "logits_per_char": -0.6965360641479492, "num_chars": 2}, {"sum_logits": -1.5001198053359985, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.5001198053359985, "logits_per_char": -0.7500599026679993, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 702, "native_id": "Mercury_SC_400406", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4358290433883667, "incorrect_loss_raw": 1.3772741556167603, "correct_loss_per_char": 0.7179145216941833, "incorrect_loss_per_char": 0.6886370778083801, "correct_loss_per_token": 1.4358290433883667, "incorrect_loss_per_token": 1.3772741556167603, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3005270957946777, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": true, "logits_per_token": -1.3005270957946777, "logits_per_char": -0.6502635478973389, "num_chars": 2}, {"sum_logits": -1.4358290433883667, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.4358290433883667, "logits_per_char": -0.7179145216941833, "num_chars": 2}, {"sum_logits": -1.4138182401657104, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.4138182401657104, "logits_per_char": -0.7069091200828552, "num_chars": 2}, {"sum_logits": -1.4174771308898926, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.4174771308898926, "logits_per_char": -0.7087385654449463, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 703, "native_id": "Mercury_SC_400380", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.403090476989746, "incorrect_loss_raw": 1.3867413997650146, "correct_loss_per_char": 0.701545238494873, "incorrect_loss_per_char": 0.6933706998825073, "correct_loss_per_token": 1.403090476989746, "incorrect_loss_per_token": 1.3867413997650146, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.354062557220459, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": true, "logits_per_token": -1.354062557220459, "logits_per_char": -0.6770312786102295, "num_chars": 2}, {"sum_logits": -1.403090476989746, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.403090476989746, "logits_per_char": -0.701545238494873, "num_chars": 2}, {"sum_logits": -1.3973422050476074, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.3973422050476074, "logits_per_char": -0.6986711025238037, "num_chars": 2}, {"sum_logits": -1.4088194370269775, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.4088194370269775, "logits_per_char": -0.7044097185134888, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 704, "native_id": "Mercury_7235848", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3738764524459839, "incorrect_loss_raw": 1.3962053457895915, "correct_loss_per_char": 0.6869382262229919, "incorrect_loss_per_char": 0.6981026728947958, "correct_loss_per_token": 1.3738764524459839, "incorrect_loss_per_token": 1.3962053457895915, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3942142724990845, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.3942142724990845, "logits_per_char": -0.6971071362495422, "num_chars": 2}, {"sum_logits": -1.374491572380066, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.374491572380066, "logits_per_char": -0.687245786190033, "num_chars": 2}, {"sum_logits": -1.419910192489624, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.419910192489624, "logits_per_char": -0.709955096244812, "num_chars": 2}, {"sum_logits": -1.3738764524459839, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": true, "logits_per_token": -1.3738764524459839, "logits_per_char": -0.6869382262229919, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 705, "native_id": "Mercury_7248308", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4069116115570068, "incorrect_loss_raw": 1.40090278784434, "correct_loss_per_char": 0.7034558057785034, "incorrect_loss_per_char": 0.70045139392217, "correct_loss_per_token": 1.4069116115570068, "incorrect_loss_per_token": 1.40090278784434, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6476812362670898, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.6476812362670898, "logits_per_char": -0.8238406181335449, "num_chars": 2}, {"sum_logits": -1.4069116115570068, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.4069116115570068, "logits_per_char": -0.7034558057785034, "num_chars": 2}, {"sum_logits": -1.3544516563415527, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.3544516563415527, "logits_per_char": -0.6772258281707764, "num_chars": 2}, {"sum_logits": -1.2005754709243774, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": true, "logits_per_token": -1.2005754709243774, "logits_per_char": -0.6002877354621887, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 706, "native_id": "MCAS_2006_9_17", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4195643663406372, "incorrect_loss_raw": 1.3826534748077393, "correct_loss_per_char": 0.7097821831703186, "incorrect_loss_per_char": 0.6913267374038696, "correct_loss_per_token": 1.4195643663406372, "incorrect_loss_per_token": 1.3826534748077393, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4195643663406372, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4195643663406372, "logits_per_char": -0.7097821831703186, "num_chars": 2}, {"sum_logits": -1.3765432834625244, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3765432834625244, "logits_per_char": -0.6882716417312622, "num_chars": 2}, {"sum_logits": -1.3224141597747803, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.3224141597747803, "logits_per_char": -0.6612070798873901, "num_chars": 2}, {"sum_logits": -1.449002981185913, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.449002981185913, "logits_per_char": -0.7245014905929565, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 707, "native_id": "Mercury_7082478", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.395790934562683, "incorrect_loss_raw": 1.3897597392400105, "correct_loss_per_char": 0.6978954672813416, "incorrect_loss_per_char": 0.6948798696200053, "correct_loss_per_token": 1.395790934562683, "incorrect_loss_per_token": 1.3897597392400105, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4034957885742188, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.4034957885742188, "logits_per_char": -0.7017478942871094, "num_chars": 2}, {"sum_logits": -1.395790934562683, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.395790934562683, "logits_per_char": -0.6978954672813416, "num_chars": 2}, {"sum_logits": -1.3569161891937256, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": true, "logits_per_token": -1.3569161891937256, "logits_per_char": -0.6784580945968628, "num_chars": 2}, {"sum_logits": -1.4088672399520874, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.4088672399520874, "logits_per_char": -0.7044336199760437, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 708, "native_id": "MCAS_2010_8_12015", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4335479736328125, "incorrect_loss_raw": 1.383731484413147, "correct_loss_per_char": 0.7167739868164062, "incorrect_loss_per_char": 0.6918657422065735, "correct_loss_per_token": 1.4335479736328125, "incorrect_loss_per_token": 1.383731484413147, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3587632179260254, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.3587632179260254, "logits_per_char": -0.6793816089630127, "num_chars": 2}, {"sum_logits": -1.4335479736328125, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.4335479736328125, "logits_per_char": -0.7167739868164062, "num_chars": 2}, {"sum_logits": -1.292941927909851, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": true, "logits_per_token": -1.292941927909851, "logits_per_char": -0.6464709639549255, "num_chars": 2}, {"sum_logits": -1.4994893074035645, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.4994893074035645, "logits_per_char": -0.7497446537017822, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 709, "native_id": "Mercury_7008155", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.387941837310791, "incorrect_loss_raw": 1.3972878456115723, "correct_loss_per_char": 0.6939709186553955, "incorrect_loss_per_char": 0.6986439228057861, "correct_loss_per_token": 1.387941837310791, "incorrect_loss_per_token": 1.3972878456115723, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3019840717315674, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": true, "logits_per_token": -1.3019840717315674, "logits_per_char": -0.6509920358657837, "num_chars": 2}, {"sum_logits": -1.387941837310791, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.387941837310791, "logits_per_char": -0.6939709186553955, "num_chars": 2}, {"sum_logits": -1.3363629579544067, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.3363629579544067, "logits_per_char": -0.6681814789772034, "num_chars": 2}, {"sum_logits": -1.5535165071487427, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.5535165071487427, "logits_per_char": -0.7767582535743713, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 710, "native_id": "Mercury_SC_401611", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3693642616271973, "incorrect_loss_raw": 1.4133381446202595, "correct_loss_per_char": 0.6846821308135986, "incorrect_loss_per_char": 0.7066690723101298, "correct_loss_per_token": 1.3693642616271973, "incorrect_loss_per_token": 1.4133381446202595, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5103720426559448, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.5103720426559448, "logits_per_char": -0.7551860213279724, "num_chars": 2}, {"sum_logits": -1.5614163875579834, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.5614163875579834, "logits_per_char": -0.7807081937789917, "num_chars": 2}, {"sum_logits": -1.3693642616271973, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.3693642616271973, "logits_per_char": -0.6846821308135986, "num_chars": 2}, {"sum_logits": -1.1682260036468506, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": true, "logits_per_token": -1.1682260036468506, "logits_per_char": -0.5841130018234253, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 711, "native_id": "Mercury_415270", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4237698316574097, "incorrect_loss_raw": 1.3844011227289836, "correct_loss_per_char": 0.7118849158287048, "incorrect_loss_per_char": 0.6922005613644918, "correct_loss_per_token": 1.4237698316574097, "incorrect_loss_per_token": 1.3844011227289836, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3046091794967651, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": true, "logits_per_token": -1.3046091794967651, "logits_per_char": -0.6523045897483826, "num_chars": 2}, {"sum_logits": -1.371413230895996, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.371413230895996, "logits_per_char": -0.685706615447998, "num_chars": 2}, {"sum_logits": -1.4237698316574097, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.4237698316574097, "logits_per_char": -0.7118849158287048, "num_chars": 2}, {"sum_logits": -1.4771809577941895, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.4771809577941895, "logits_per_char": -0.7385904788970947, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 712, "native_id": "VASoL_2010_3_6", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.480655312538147, "incorrect_loss_raw": 1.3644429047902424, "correct_loss_per_char": 0.7403276562690735, "incorrect_loss_per_char": 0.6822214523951212, "correct_loss_per_token": 1.480655312538147, "incorrect_loss_per_token": 1.3644429047902424, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4247993230819702, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.4247993230819702, "logits_per_char": -0.7123996615409851, "num_chars": 2}, {"sum_logits": -1.480655312538147, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.480655312538147, "logits_per_char": -0.7403276562690735, "num_chars": 2}, {"sum_logits": -1.3324497938156128, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": true, "logits_per_token": -1.3324497938156128, "logits_per_char": -0.6662248969078064, "num_chars": 2}, {"sum_logits": -1.3360795974731445, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.3360795974731445, "logits_per_char": -0.6680397987365723, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 713, "native_id": "Mercury_184170", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3960965871810913, "incorrect_loss_raw": 1.3885739644368489, "correct_loss_per_char": 0.6980482935905457, "incorrect_loss_per_char": 0.6942869822184244, "correct_loss_per_token": 1.3960965871810913, "incorrect_loss_per_token": 1.3885739644368489, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3922882080078125, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.3922882080078125, "logits_per_char": -0.6961441040039062, "num_chars": 2}, {"sum_logits": -1.330957055091858, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": true, "logits_per_token": -1.330957055091858, "logits_per_char": -0.665478527545929, "num_chars": 2}, {"sum_logits": -1.3960965871810913, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.3960965871810913, "logits_per_char": -0.6980482935905457, "num_chars": 2}, {"sum_logits": -1.4424766302108765, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.4424766302108765, "logits_per_char": -0.7212383151054382, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 714, "native_id": "Mercury_7171535", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3439034223556519, "incorrect_loss_raw": 1.4129367272059123, "correct_loss_per_char": 0.6719517111778259, "incorrect_loss_per_char": 0.7064683636029562, "correct_loss_per_token": 1.3439034223556519, "incorrect_loss_per_token": 1.4129367272059123, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5613629817962646, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.5613629817962646, "logits_per_char": -0.7806814908981323, "num_chars": 2}, {"sum_logits": -1.3126658201217651, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": true, "logits_per_token": -1.3126658201217651, "logits_per_char": -0.6563329100608826, "num_chars": 2}, {"sum_logits": -1.364781379699707, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.364781379699707, "logits_per_char": -0.6823906898498535, "num_chars": 2}, {"sum_logits": -1.3439034223556519, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.3439034223556519, "logits_per_char": -0.6719517111778259, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 715, "native_id": "Mercury_400256", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3584545850753784, "incorrect_loss_raw": 1.4107032616933186, "correct_loss_per_char": 0.6792272925376892, "incorrect_loss_per_char": 0.7053516308466593, "correct_loss_per_token": 1.3584545850753784, "incorrect_loss_per_token": 1.4107032616933186, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5918211936950684, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.5918211936950684, "logits_per_char": -0.7959105968475342, "num_chars": 2}, {"sum_logits": -1.3584545850753784, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.3584545850753784, "logits_per_char": -0.6792272925376892, "num_chars": 2}, {"sum_logits": -1.3889495134353638, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.3889495134353638, "logits_per_char": -0.6944747567176819, "num_chars": 2}, {"sum_logits": -1.251339077949524, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.251339077949524, "logits_per_char": -0.625669538974762, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 716, "native_id": "Mercury_SC_400034", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3106026649475098, "incorrect_loss_raw": 1.419906775156657, "correct_loss_per_char": 0.6553013324737549, "incorrect_loss_per_char": 0.7099533875783285, "correct_loss_per_token": 1.3106026649475098, "incorrect_loss_per_token": 1.419906775156657, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3106026649475098, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": true, "logits_per_token": -1.3106026649475098, "logits_per_char": -0.6553013324737549, "num_chars": 2}, {"sum_logits": -1.4210330247879028, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.4210330247879028, "logits_per_char": -0.7105165123939514, "num_chars": 2}, {"sum_logits": -1.439337134361267, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.439337134361267, "logits_per_char": -0.7196685671806335, "num_chars": 2}, {"sum_logits": -1.3993501663208008, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.3993501663208008, "logits_per_char": -0.6996750831604004, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 717, "native_id": "LEAP__5_10309", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5586930513381958, "incorrect_loss_raw": 1.34468674659729, "correct_loss_per_char": 0.7793465256690979, "incorrect_loss_per_char": 0.672343373298645, "correct_loss_per_token": 1.5586930513381958, "incorrect_loss_per_token": 1.34468674659729, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5586930513381958, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.5586930513381958, "logits_per_char": -0.7793465256690979, "num_chars": 2}, {"sum_logits": -1.3190913200378418, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.3190913200378418, "logits_per_char": -0.6595456600189209, "num_chars": 2}, {"sum_logits": -1.4773600101470947, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.4773600101470947, "logits_per_char": -0.7386800050735474, "num_chars": 2}, {"sum_logits": -1.2376089096069336, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": true, "logits_per_token": -1.2376089096069336, "logits_per_char": -0.6188044548034668, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 718, "native_id": "Mercury_7113803", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4666937589645386, "incorrect_loss_raw": 1.3666833639144897, "correct_loss_per_char": 0.7333468794822693, "incorrect_loss_per_char": 0.6833416819572449, "correct_loss_per_token": 1.4666937589645386, "incorrect_loss_per_token": 1.3666833639144897, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4666937589645386, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.4666937589645386, "logits_per_char": -0.7333468794822693, "num_chars": 2}, {"sum_logits": -1.415726661682129, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.415726661682129, "logits_per_char": -0.7078633308410645, "num_chars": 2}, {"sum_logits": -1.3938429355621338, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.3938429355621338, "logits_per_char": -0.6969214677810669, "num_chars": 2}, {"sum_logits": -1.2904804944992065, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": true, "logits_per_token": -1.2904804944992065, "logits_per_char": -0.6452402472496033, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 719, "native_id": "Mercury_7222635", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.327554702758789, "incorrect_loss_raw": 1.4173222382863362, "correct_loss_per_char": 0.6637773513793945, "incorrect_loss_per_char": 0.7086611191431681, "correct_loss_per_token": 1.327554702758789, "incorrect_loss_per_token": 1.4173222382863362, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.480312466621399, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.480312466621399, "logits_per_char": -0.7401562333106995, "num_chars": 2}, {"sum_logits": -1.292006492614746, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": true, "logits_per_token": -1.292006492614746, "logits_per_char": -0.646003246307373, "num_chars": 2}, {"sum_logits": -1.327554702758789, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.327554702758789, "logits_per_char": -0.6637773513793945, "num_chars": 2}, {"sum_logits": -1.4796477556228638, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.4796477556228638, "logits_per_char": -0.7398238778114319, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 720, "native_id": "NYSEDREGENTS_2010_8_2", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.485640525817871, "incorrect_loss_raw": 1.360533078511556, "correct_loss_per_char": 0.7428202629089355, "incorrect_loss_per_char": 0.680266539255778, "correct_loss_per_token": 1.485640525817871, "incorrect_loss_per_token": 1.360533078511556, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.380515694618225, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.380515694618225, "logits_per_char": -0.6902578473091125, "num_chars": 2}, {"sum_logits": -1.2938683032989502, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": true, "logits_per_token": -1.2938683032989502, "logits_per_char": -0.6469341516494751, "num_chars": 2}, {"sum_logits": -1.4072152376174927, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.4072152376174927, "logits_per_char": -0.7036076188087463, "num_chars": 2}, {"sum_logits": -1.485640525817871, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.485640525817871, "logits_per_char": -0.7428202629089355, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 721, "native_id": "Mercury_7269098", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3680429458618164, "incorrect_loss_raw": 1.3982197443644206, "correct_loss_per_char": 0.6840214729309082, "incorrect_loss_per_char": 0.6991098721822103, "correct_loss_per_token": 1.3680429458618164, "incorrect_loss_per_token": 1.3982197443644206, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.444710373878479, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.444710373878479, "logits_per_char": -0.7223551869392395, "num_chars": 2}, {"sum_logits": -1.3959007263183594, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.3959007263183594, "logits_per_char": -0.6979503631591797, "num_chars": 2}, {"sum_logits": -1.3540481328964233, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": true, "logits_per_token": -1.3540481328964233, "logits_per_char": -0.6770240664482117, "num_chars": 2}, {"sum_logits": -1.3680429458618164, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.3680429458618164, "logits_per_char": -0.6840214729309082, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 722, "native_id": "Mercury_401187", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3641164302825928, "incorrect_loss_raw": 1.4003432194391887, "correct_loss_per_char": 0.6820582151412964, "incorrect_loss_per_char": 0.7001716097195944, "correct_loss_per_token": 1.3641164302825928, "incorrect_loss_per_token": 1.4003432194391887, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3521403074264526, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": true, "logits_per_token": -1.3521403074264526, "logits_per_char": -0.6760701537132263, "num_chars": 2}, {"sum_logits": -1.3579174280166626, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.3579174280166626, "logits_per_char": -0.6789587140083313, "num_chars": 2}, {"sum_logits": -1.3641164302825928, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.3641164302825928, "logits_per_char": -0.6820582151412964, "num_chars": 2}, {"sum_logits": -1.4909719228744507, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.4909719228744507, "logits_per_char": -0.7454859614372253, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 723, "native_id": "MCAS_2002_8_7", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4585654735565186, "incorrect_loss_raw": 1.3704214890797932, "correct_loss_per_char": 0.7292827367782593, "incorrect_loss_per_char": 0.6852107445398966, "correct_loss_per_token": 1.4585654735565186, "incorrect_loss_per_token": 1.3704214890797932, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4585654735565186, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.4585654735565186, "logits_per_char": -0.7292827367782593, "num_chars": 2}, {"sum_logits": -1.4656639099121094, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.4656639099121094, "logits_per_char": -0.7328319549560547, "num_chars": 2}, {"sum_logits": -1.3251113891601562, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.3251113891601562, "logits_per_char": -0.6625556945800781, "num_chars": 2}, {"sum_logits": -1.3204891681671143, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": true, "logits_per_token": -1.3204891681671143, "logits_per_char": -0.6602445840835571, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 724, "native_id": "Mercury_401603", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4411993026733398, "incorrect_loss_raw": 1.3791349331537883, "correct_loss_per_char": 0.7205996513366699, "incorrect_loss_per_char": 0.6895674665768942, "correct_loss_per_token": 1.4411993026733398, "incorrect_loss_per_token": 1.3791349331537883, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.507799744606018, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.507799744606018, "logits_per_char": -0.753899872303009, "num_chars": 2}, {"sum_logits": -1.4411993026733398, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.4411993026733398, "logits_per_char": -0.7205996513366699, "num_chars": 2}, {"sum_logits": -1.3278530836105347, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.3278530836105347, "logits_per_char": -0.6639265418052673, "num_chars": 2}, {"sum_logits": -1.301751971244812, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": true, "logits_per_token": -1.301751971244812, "logits_per_char": -0.650875985622406, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 725, "native_id": "Mercury_7014560", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3777921199798584, "incorrect_loss_raw": 1.3960167566935222, "correct_loss_per_char": 0.6888960599899292, "incorrect_loss_per_char": 0.6980083783467611, "correct_loss_per_token": 1.3777921199798584, "incorrect_loss_per_token": 1.3960167566935222, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.465012550354004, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.465012550354004, "logits_per_char": -0.732506275177002, "num_chars": 2}, {"sum_logits": -1.355401635169983, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": true, "logits_per_token": -1.355401635169983, "logits_per_char": -0.6777008175849915, "num_chars": 2}, {"sum_logits": -1.3777921199798584, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.3777921199798584, "logits_per_char": -0.6888960599899292, "num_chars": 2}, {"sum_logits": -1.3676360845565796, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.3676360845565796, "logits_per_char": -0.6838180422782898, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 726, "native_id": "Mercury_400089", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4700249433517456, "incorrect_loss_raw": 1.3693677981694539, "correct_loss_per_char": 0.7350124716758728, "incorrect_loss_per_char": 0.6846838990847269, "correct_loss_per_token": 1.4700249433517456, "incorrect_loss_per_token": 1.3693677981694539, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4232884645462036, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.4232884645462036, "logits_per_char": -0.7116442322731018, "num_chars": 2}, {"sum_logits": -1.3958978652954102, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.3958978652954102, "logits_per_char": -0.6979489326477051, "num_chars": 2}, {"sum_logits": -1.4700249433517456, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.4700249433517456, "logits_per_char": -0.7350124716758728, "num_chars": 2}, {"sum_logits": -1.288917064666748, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": true, "logits_per_token": -1.288917064666748, "logits_per_char": -0.644458532333374, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 727, "native_id": "Mercury_416637", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4726983308792114, "incorrect_loss_raw": 1.3680164019266765, "correct_loss_per_char": 0.7363491654396057, "incorrect_loss_per_char": 0.6840082009633383, "correct_loss_per_token": 1.4726983308792114, "incorrect_loss_per_token": 1.3680164019266765, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4726983308792114, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.4726983308792114, "logits_per_char": -0.7363491654396057, "num_chars": 2}, {"sum_logits": -1.4432411193847656, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.4432411193847656, "logits_per_char": -0.7216205596923828, "num_chars": 2}, {"sum_logits": -1.388051152229309, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.388051152229309, "logits_per_char": -0.6940255761146545, "num_chars": 2}, {"sum_logits": -1.2727569341659546, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": true, "logits_per_token": -1.2727569341659546, "logits_per_char": -0.6363784670829773, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 728, "native_id": "OHAT_2007_8_43", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.489375352859497, "incorrect_loss_raw": 1.3648435672124226, "correct_loss_per_char": 0.7446876764297485, "incorrect_loss_per_char": 0.6824217836062113, "correct_loss_per_token": 1.489375352859497, "incorrect_loss_per_token": 1.3648435672124226, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.489375352859497, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.489375352859497, "logits_per_char": -0.7446876764297485, "num_chars": 2}, {"sum_logits": -1.4975125789642334, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.4975125789642334, "logits_per_char": -0.7487562894821167, "num_chars": 2}, {"sum_logits": -1.3723053932189941, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.3723053932189941, "logits_per_char": -0.6861526966094971, "num_chars": 2}, {"sum_logits": -1.2247127294540405, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": true, "logits_per_token": -1.2247127294540405, "logits_per_char": -0.6123563647270203, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 729, "native_id": "Mercury_7185255", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4132384061813354, "incorrect_loss_raw": 1.3877254327138264, "correct_loss_per_char": 0.7066192030906677, "incorrect_loss_per_char": 0.6938627163569132, "correct_loss_per_token": 1.4132384061813354, "incorrect_loss_per_token": 1.3877254327138264, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5447667837142944, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.5447667837142944, "logits_per_char": -0.7723833918571472, "num_chars": 2}, {"sum_logits": -1.2881098985671997, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": true, "logits_per_token": -1.2881098985671997, "logits_per_char": -0.6440549492835999, "num_chars": 2}, {"sum_logits": -1.4132384061813354, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.4132384061813354, "logits_per_char": -0.7066192030906677, "num_chars": 2}, {"sum_logits": -1.3302996158599854, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.3302996158599854, "logits_per_char": -0.6651498079299927, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 730, "native_id": "Mercury_406773", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4448944330215454, "incorrect_loss_raw": 1.3854990005493164, "correct_loss_per_char": 0.7224472165107727, "incorrect_loss_per_char": 0.6927495002746582, "correct_loss_per_token": 1.4448944330215454, "incorrect_loss_per_token": 1.3854990005493164, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5847954750061035, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.5847954750061035, "logits_per_char": -0.7923977375030518, "num_chars": 2}, {"sum_logits": -1.4448944330215454, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4448944330215454, "logits_per_char": -0.7224472165107727, "num_chars": 2}, {"sum_logits": -1.3923569917678833, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.3923569917678833, "logits_per_char": -0.6961784958839417, "num_chars": 2}, {"sum_logits": -1.1793445348739624, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.1793445348739624, "logits_per_char": -0.5896722674369812, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 731, "native_id": "Mercury_7056665", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3846335411071777, "incorrect_loss_raw": 1.3936008214950562, "correct_loss_per_char": 0.6923167705535889, "incorrect_loss_per_char": 0.6968004107475281, "correct_loss_per_token": 1.3846335411071777, "incorrect_loss_per_token": 1.3936008214950562, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4689140319824219, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.4689140319824219, "logits_per_char": -0.7344570159912109, "num_chars": 2}, {"sum_logits": -1.3718773126602173, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.3718773126602173, "logits_per_char": -0.6859386563301086, "num_chars": 2}, {"sum_logits": -1.3846335411071777, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.3846335411071777, "logits_per_char": -0.6923167705535889, "num_chars": 2}, {"sum_logits": -1.3400111198425293, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": true, "logits_per_token": -1.3400111198425293, "logits_per_char": -0.6700055599212646, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 732, "native_id": "Mercury_7211628", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3106915950775146, "incorrect_loss_raw": 1.4310849507649739, "correct_loss_per_char": 0.6553457975387573, "incorrect_loss_per_char": 0.7155424753824869, "correct_loss_per_token": 1.3106915950775146, "incorrect_loss_per_token": 1.4310849507649739, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6003642082214355, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.6003642082214355, "logits_per_char": -0.8001821041107178, "num_chars": 2}, {"sum_logits": -1.3106915950775146, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.3106915950775146, "logits_per_char": -0.6553457975387573, "num_chars": 2}, {"sum_logits": -1.4668498039245605, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.4668498039245605, "logits_per_char": -0.7334249019622803, "num_chars": 2}, {"sum_logits": -1.2260408401489258, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": true, "logits_per_token": -1.2260408401489258, "logits_per_char": -0.6130204200744629, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 733, "native_id": "MEA_2010_8_6-v1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2745099067687988, "incorrect_loss_raw": 1.433026115099589, "correct_loss_per_char": 0.6372549533843994, "incorrect_loss_per_char": 0.7165130575497946, "correct_loss_per_token": 1.2745099067687988, "incorrect_loss_per_token": 1.433026115099589, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2745099067687988, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": true, "logits_per_token": -1.2745099067687988, "logits_per_char": -0.6372549533843994, "num_chars": 2}, {"sum_logits": -1.430898666381836, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.430898666381836, "logits_per_char": -0.715449333190918, "num_chars": 2}, {"sum_logits": -1.4031542539596558, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.4031542539596558, "logits_per_char": -0.7015771269798279, "num_chars": 2}, {"sum_logits": -1.4650254249572754, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.4650254249572754, "logits_per_char": -0.7325127124786377, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 734, "native_id": "NYSEDREGENTS_2010_4_27", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2271411418914795, "incorrect_loss_raw": 1.4571343262990315, "correct_loss_per_char": 0.6135705709457397, "incorrect_loss_per_char": 0.7285671631495158, "correct_loss_per_token": 1.2271411418914795, "incorrect_loss_per_token": 1.4571343262990315, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2271411418914795, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": true, "logits_per_token": -1.2271411418914795, "logits_per_char": -0.6135705709457397, "num_chars": 2}, {"sum_logits": -1.297402024269104, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.297402024269104, "logits_per_char": -0.648701012134552, "num_chars": 2}, {"sum_logits": -1.5180246829986572, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.5180246829986572, "logits_per_char": -0.7590123414993286, "num_chars": 2}, {"sum_logits": -1.5559762716293335, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.5559762716293335, "logits_per_char": -0.7779881358146667, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 735, "native_id": "Mercury_7163870", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3456686735153198, "incorrect_loss_raw": 1.4102853536605835, "correct_loss_per_char": 0.6728343367576599, "incorrect_loss_per_char": 0.7051426768302917, "correct_loss_per_token": 1.3456686735153198, "incorrect_loss_per_token": 1.4102853536605835, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5531842708587646, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.5531842708587646, "logits_per_char": -0.7765921354293823, "num_chars": 2}, {"sum_logits": -1.3265894651412964, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": true, "logits_per_token": -1.3265894651412964, "logits_per_char": -0.6632947325706482, "num_chars": 2}, {"sum_logits": -1.3456686735153198, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.3456686735153198, "logits_per_char": -0.6728343367576599, "num_chars": 2}, {"sum_logits": -1.3510823249816895, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.3510823249816895, "logits_per_char": -0.6755411624908447, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 736, "native_id": "MCAS_2010_8_12012", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4126893281936646, "incorrect_loss_raw": 1.384163498878479, "correct_loss_per_char": 0.7063446640968323, "incorrect_loss_per_char": 0.6920817494392395, "correct_loss_per_token": 1.4126893281936646, "incorrect_loss_per_token": 1.384163498878479, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3607641458511353, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": true, "logits_per_token": -1.3607641458511353, "logits_per_char": -0.6803820729255676, "num_chars": 2}, {"sum_logits": -1.4165499210357666, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.4165499210357666, "logits_per_char": -0.7082749605178833, "num_chars": 2}, {"sum_logits": -1.3751764297485352, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.3751764297485352, "logits_per_char": -0.6875882148742676, "num_chars": 2}, {"sum_logits": -1.4126893281936646, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.4126893281936646, "logits_per_char": -0.7063446640968323, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 737, "native_id": "Mercury_7026933", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3112311363220215, "incorrect_loss_raw": 1.4186228116353352, "correct_loss_per_char": 0.6556155681610107, "incorrect_loss_per_char": 0.7093114058176676, "correct_loss_per_token": 1.3112311363220215, "incorrect_loss_per_token": 1.4186228116353352, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4359711408615112, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.4359711408615112, "logits_per_char": -0.7179855704307556, "num_chars": 2}, {"sum_logits": -1.3981608152389526, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.3981608152389526, "logits_per_char": -0.6990804076194763, "num_chars": 2}, {"sum_logits": -1.421736478805542, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.421736478805542, "logits_per_char": -0.710868239402771, "num_chars": 2}, {"sum_logits": -1.3112311363220215, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.3112311363220215, "logits_per_char": -0.6556155681610107, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 738, "native_id": "Mercury_7213045", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2823854684829712, "incorrect_loss_raw": 1.4291902383168538, "correct_loss_per_char": 0.6411927342414856, "incorrect_loss_per_char": 0.7145951191584269, "correct_loss_per_token": 1.2823854684829712, "incorrect_loss_per_token": 1.4291902383168538, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4486384391784668, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.4486384391784668, "logits_per_char": -0.7243192195892334, "num_chars": 2}, {"sum_logits": -1.2823854684829712, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": true, "logits_per_token": -1.2823854684829712, "logits_per_char": -0.6411927342414856, "num_chars": 2}, {"sum_logits": -1.4153803586959839, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.4153803586959839, "logits_per_char": -0.7076901793479919, "num_chars": 2}, {"sum_logits": -1.4235519170761108, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.4235519170761108, "logits_per_char": -0.7117759585380554, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 739, "native_id": "MCAS_2000_8_12", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3661011457443237, "incorrect_loss_raw": 1.403933842976888, "correct_loss_per_char": 0.6830505728721619, "incorrect_loss_per_char": 0.701966921488444, "correct_loss_per_token": 1.3661011457443237, "incorrect_loss_per_token": 1.403933842976888, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3330718278884888, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": true, "logits_per_token": -1.3330718278884888, "logits_per_char": -0.6665359139442444, "num_chars": 2}, {"sum_logits": -1.3828524351119995, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.3828524351119995, "logits_per_char": -0.6914262175559998, "num_chars": 2}, {"sum_logits": -1.3661011457443237, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.3661011457443237, "logits_per_char": -0.6830505728721619, "num_chars": 2}, {"sum_logits": -1.4958772659301758, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.4958772659301758, "logits_per_char": -0.7479386329650879, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 740, "native_id": "Mercury_405107", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3078198432922363, "incorrect_loss_raw": 1.4217639366785686, "correct_loss_per_char": 0.6539099216461182, "incorrect_loss_per_char": 0.7108819683392843, "correct_loss_per_token": 1.3078198432922363, "incorrect_loss_per_token": 1.4217639366785686, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5230464935302734, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.5230464935302734, "logits_per_char": -0.7615232467651367, "num_chars": 2}, {"sum_logits": -1.3355263471603394, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3355263471603394, "logits_per_char": -0.6677631735801697, "num_chars": 2}, {"sum_logits": -1.4067189693450928, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4067189693450928, "logits_per_char": -0.7033594846725464, "num_chars": 2}, {"sum_logits": -1.3078198432922363, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.3078198432922363, "logits_per_char": -0.6539099216461182, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 741, "native_id": "MDSA_2008_5_23", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6341623067855835, "incorrect_loss_raw": 1.3324580589930217, "correct_loss_per_char": 0.8170811533927917, "incorrect_loss_per_char": 0.6662290294965109, "correct_loss_per_token": 1.6341623067855835, "incorrect_loss_per_token": 1.3324580589930217, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6341623067855835, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.6341623067855835, "logits_per_char": -0.8170811533927917, "num_chars": 2}, {"sum_logits": -1.5416386127471924, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.5416386127471924, "logits_per_char": -0.7708193063735962, "num_chars": 2}, {"sum_logits": -1.2789167165756226, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.2789167165756226, "logits_per_char": -0.6394583582878113, "num_chars": 2}, {"sum_logits": -1.17681884765625, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": true, "logits_per_token": -1.17681884765625, "logits_per_char": -0.588409423828125, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 742, "native_id": "Mercury_7033548", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4875130653381348, "incorrect_loss_raw": 1.3596816857655842, "correct_loss_per_char": 0.7437565326690674, "incorrect_loss_per_char": 0.6798408428827921, "correct_loss_per_token": 1.4875130653381348, "incorrect_loss_per_token": 1.3596816857655842, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.321178913116455, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": true, "logits_per_token": -1.321178913116455, "logits_per_char": -0.6605894565582275, "num_chars": 2}, {"sum_logits": -1.3901951313018799, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.3901951313018799, "logits_per_char": -0.6950975656509399, "num_chars": 2}, {"sum_logits": -1.367671012878418, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.367671012878418, "logits_per_char": -0.683835506439209, "num_chars": 2}, {"sum_logits": -1.4875130653381348, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.4875130653381348, "logits_per_char": -0.7437565326690674, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 743, "native_id": "Mercury_7016695", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2709242105484009, "incorrect_loss_raw": 1.4348374605178833, "correct_loss_per_char": 0.6354621052742004, "incorrect_loss_per_char": 0.7174187302589417, "correct_loss_per_token": 1.2709242105484009, "incorrect_loss_per_token": 1.4348374605178833, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.488122820854187, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.488122820854187, "logits_per_char": -0.7440614104270935, "num_chars": 2}, {"sum_logits": -1.450504183769226, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.450504183769226, "logits_per_char": -0.725252091884613, "num_chars": 2}, {"sum_logits": -1.3658853769302368, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.3658853769302368, "logits_per_char": -0.6829426884651184, "num_chars": 2}, {"sum_logits": -1.2709242105484009, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": true, "logits_per_token": -1.2709242105484009, "logits_per_char": -0.6354621052742004, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 744, "native_id": "VASoL_2009_5_20", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3246755599975586, "incorrect_loss_raw": 1.422907829284668, "correct_loss_per_char": 0.6623377799987793, "incorrect_loss_per_char": 0.711453914642334, "correct_loss_per_token": 1.3246755599975586, "incorrect_loss_per_token": 1.422907829284668, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5005722045898438, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.5005722045898438, "logits_per_char": -0.7502861022949219, "num_chars": 2}, {"sum_logits": -1.5058634281158447, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.5058634281158447, "logits_per_char": -0.7529317140579224, "num_chars": 2}, {"sum_logits": -1.3246755599975586, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.3246755599975586, "logits_per_char": -0.6623377799987793, "num_chars": 2}, {"sum_logits": -1.2622878551483154, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": true, "logits_per_token": -1.2622878551483154, "logits_per_char": -0.6311439275741577, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 745, "native_id": "Mercury_SC_401141", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5149285793304443, "incorrect_loss_raw": 1.3525984684626262, "correct_loss_per_char": 0.7574642896652222, "incorrect_loss_per_char": 0.6762992342313131, "correct_loss_per_token": 1.5149285793304443, "incorrect_loss_per_token": 1.3525984684626262, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5149285793304443, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.5149285793304443, "logits_per_char": -0.7574642896652222, "num_chars": 2}, {"sum_logits": -1.2850534915924072, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": true, "logits_per_token": -1.2850534915924072, "logits_per_char": -0.6425267457962036, "num_chars": 2}, {"sum_logits": -1.386220932006836, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.386220932006836, "logits_per_char": -0.693110466003418, "num_chars": 2}, {"sum_logits": -1.3865209817886353, "num_tokens": 1, "num_tokens_all": 307, "is_greedy": false, "logits_per_token": -1.3865209817886353, "logits_per_char": -0.6932604908943176, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 746, "native_id": "Mercury_7145548", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3386634588241577, "incorrect_loss_raw": 1.4137597878774006, "correct_loss_per_char": 0.6693317294120789, "incorrect_loss_per_char": 0.7068798939387003, "correct_loss_per_token": 1.3386634588241577, "incorrect_loss_per_token": 1.4137597878774006, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5510114431381226, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.5510114431381226, "logits_per_char": -0.7755057215690613, "num_chars": 2}, {"sum_logits": -1.3679169416427612, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.3679169416427612, "logits_per_char": -0.6839584708213806, "num_chars": 2}, {"sum_logits": -1.3386634588241577, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.3386634588241577, "logits_per_char": -0.6693317294120789, "num_chars": 2}, {"sum_logits": -1.3223509788513184, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": true, "logits_per_token": -1.3223509788513184, "logits_per_char": -0.6611754894256592, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 747, "native_id": "MCAS_2004_9_7", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3685197830200195, "incorrect_loss_raw": 1.3982227643330891, "correct_loss_per_char": 0.6842598915100098, "incorrect_loss_per_char": 0.6991113821665446, "correct_loss_per_token": 1.3685197830200195, "incorrect_loss_per_token": 1.3982227643330891, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.399003505706787, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.399003505706787, "logits_per_char": -0.6995017528533936, "num_chars": 2}, {"sum_logits": -1.3833712339401245, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.3833712339401245, "logits_per_char": -0.6916856169700623, "num_chars": 2}, {"sum_logits": -1.3685197830200195, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": true, "logits_per_token": -1.3685197830200195, "logits_per_char": -0.6842598915100098, "num_chars": 2}, {"sum_logits": -1.412293553352356, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.412293553352356, "logits_per_char": -0.706146776676178, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 748, "native_id": "Mercury_SC_415338", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4286941289901733, "incorrect_loss_raw": 1.3814743359883626, "correct_loss_per_char": 0.7143470644950867, "incorrect_loss_per_char": 0.6907371679941813, "correct_loss_per_token": 1.4286941289901733, "incorrect_loss_per_token": 1.3814743359883626, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5226140022277832, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.5226140022277832, "logits_per_char": -0.7613070011138916, "num_chars": 2}, {"sum_logits": -1.4286941289901733, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.4286941289901733, "logits_per_char": -0.7143470644950867, "num_chars": 2}, {"sum_logits": -1.3275842666625977, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.3275842666625977, "logits_per_char": -0.6637921333312988, "num_chars": 2}, {"sum_logits": -1.294224739074707, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": true, "logits_per_token": -1.294224739074707, "logits_per_char": -0.6471123695373535, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 749, "native_id": "Mercury_SC_415584", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.472130537033081, "incorrect_loss_raw": 1.3662578264872234, "correct_loss_per_char": 0.7360652685165405, "incorrect_loss_per_char": 0.6831289132436117, "correct_loss_per_token": 1.472130537033081, "incorrect_loss_per_token": 1.3662578264872234, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4465733766555786, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.4465733766555786, "logits_per_char": -0.7232866883277893, "num_chars": 2}, {"sum_logits": -1.472130537033081, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.472130537033081, "logits_per_char": -0.7360652685165405, "num_chars": 2}, {"sum_logits": -1.3700052499771118, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.3700052499771118, "logits_per_char": -0.6850026249885559, "num_chars": 2}, {"sum_logits": -1.2821948528289795, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.2821948528289795, "logits_per_char": -0.6410974264144897, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 750, "native_id": "Mercury_SC_400365", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4522781372070312, "incorrect_loss_raw": 1.380127191543579, "correct_loss_per_char": 0.7261390686035156, "incorrect_loss_per_char": 0.6900635957717896, "correct_loss_per_token": 1.4522781372070312, "incorrect_loss_per_token": 1.380127191543579, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.204174518585205, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": true, "logits_per_token": -1.204174518585205, "logits_per_char": -0.6020872592926025, "num_chars": 2}, {"sum_logits": -1.4256125688552856, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.4256125688552856, "logits_per_char": -0.7128062844276428, "num_chars": 2}, {"sum_logits": -1.5105944871902466, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.5105944871902466, "logits_per_char": -0.7552972435951233, "num_chars": 2}, {"sum_logits": -1.4522781372070312, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.4522781372070312, "logits_per_char": -0.7261390686035156, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 751, "native_id": "Mercury_SC_400707", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4164724349975586, "incorrect_loss_raw": 1.381476640701294, "correct_loss_per_char": 0.7082362174987793, "incorrect_loss_per_char": 0.690738320350647, "correct_loss_per_token": 1.4164724349975586, "incorrect_loss_per_token": 1.381476640701294, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4381717443466187, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.4381717443466187, "logits_per_char": -0.7190858721733093, "num_chars": 2}, {"sum_logits": -1.4164724349975586, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.4164724349975586, "logits_per_char": -0.7082362174987793, "num_chars": 2}, {"sum_logits": -1.3763272762298584, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.3763272762298584, "logits_per_char": -0.6881636381149292, "num_chars": 2}, {"sum_logits": -1.3299309015274048, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": true, "logits_per_token": -1.3299309015274048, "logits_per_char": -0.6649654507637024, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 752, "native_id": "Mercury_185115", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.441062569618225, "incorrect_loss_raw": 1.3778955141703289, "correct_loss_per_char": 0.7205312848091125, "incorrect_loss_per_char": 0.6889477570851644, "correct_loss_per_token": 1.441062569618225, "incorrect_loss_per_token": 1.3778955141703289, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4407967329025269, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.4407967329025269, "logits_per_char": -0.7203983664512634, "num_chars": 2}, {"sum_logits": -1.441062569618225, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.441062569618225, "logits_per_char": -0.7205312848091125, "num_chars": 2}, {"sum_logits": -1.242702841758728, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": true, "logits_per_token": -1.242702841758728, "logits_per_char": -0.621351420879364, "num_chars": 2}, {"sum_logits": -1.4501869678497314, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.4501869678497314, "logits_per_char": -0.7250934839248657, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 753, "native_id": "Mercury_SC_415005", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.394575595855713, "incorrect_loss_raw": 1.823179264863332, "correct_loss_per_char": 0.6972877979278564, "incorrect_loss_per_char": 0.911589632431666, "correct_loss_per_token": 1.394575595855713, "incorrect_loss_per_token": 1.823179264863332, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8673601746559143, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": true, "logits_per_token": -0.8673601746559143, "logits_per_char": -0.43368008732795715, "num_chars": 2}, {"sum_logits": -1.394575595855713, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.394575595855713, "logits_per_char": -0.6972877979278564, "num_chars": 2}, {"sum_logits": -1.8464388847351074, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.8464388847351074, "logits_per_char": -0.9232194423675537, "num_chars": 2}, {"sum_logits": -2.7557387351989746, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -2.7557387351989746, "logits_per_char": -1.3778693675994873, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 754, "native_id": "NYSEDREGENTS_2013_8_35", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2933822870254517, "incorrect_loss_raw": 1.4255203008651733, "correct_loss_per_char": 0.6466911435127258, "incorrect_loss_per_char": 0.7127601504325867, "correct_loss_per_token": 1.2933822870254517, "incorrect_loss_per_token": 1.4255203008651733, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.434350848197937, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.434350848197937, "logits_per_char": -0.7171754240989685, "num_chars": 2}, {"sum_logits": -1.3734712600708008, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.3734712600708008, "logits_per_char": -0.6867356300354004, "num_chars": 2}, {"sum_logits": -1.4687387943267822, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.4687387943267822, "logits_per_char": -0.7343693971633911, "num_chars": 2}, {"sum_logits": -1.2933822870254517, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": true, "logits_per_token": -1.2933822870254517, "logits_per_char": -0.6466911435127258, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 755, "native_id": "MCAS_1998_8_11", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5518921613693237, "incorrect_loss_raw": 1.345698316891988, "correct_loss_per_char": 0.7759460806846619, "incorrect_loss_per_char": 0.672849158445994, "correct_loss_per_token": 1.5518921613693237, "incorrect_loss_per_token": 1.345698316891988, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2798928022384644, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": true, "logits_per_token": -1.2798928022384644, "logits_per_char": -0.6399464011192322, "num_chars": 2}, {"sum_logits": -1.5518921613693237, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.5518921613693237, "logits_per_char": -0.7759460806846619, "num_chars": 2}, {"sum_logits": -1.4641878604888916, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.4641878604888916, "logits_per_char": -0.7320939302444458, "num_chars": 2}, {"sum_logits": -1.2930142879486084, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.2930142879486084, "logits_per_char": -0.6465071439743042, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 756, "native_id": "Mercury_7029855", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3201887607574463, "incorrect_loss_raw": 1.420068899790446, "correct_loss_per_char": 0.6600943803787231, "incorrect_loss_per_char": 0.710034449895223, "correct_loss_per_token": 1.3201887607574463, "incorrect_loss_per_token": 1.420068899790446, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5351386070251465, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.5351386070251465, "logits_per_char": -0.7675693035125732, "num_chars": 2}, {"sum_logits": -1.3201887607574463, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": true, "logits_per_token": -1.3201887607574463, "logits_per_char": -0.6600943803787231, "num_chars": 2}, {"sum_logits": -1.3336119651794434, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.3336119651794434, "logits_per_char": -0.6668059825897217, "num_chars": 2}, {"sum_logits": -1.391456127166748, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.391456127166748, "logits_per_char": -0.695728063583374, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 757, "native_id": "Mercury_400758", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.410364031791687, "incorrect_loss_raw": 1.3878716230392456, "correct_loss_per_char": 0.7051820158958435, "incorrect_loss_per_char": 0.6939358115196228, "correct_loss_per_token": 1.410364031791687, "incorrect_loss_per_token": 1.3878716230392456, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5077099800109863, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.5077099800109863, "logits_per_char": -0.7538549900054932, "num_chars": 2}, {"sum_logits": -1.3426035642623901, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.3426035642623901, "logits_per_char": -0.6713017821311951, "num_chars": 2}, {"sum_logits": -1.410364031791687, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.410364031791687, "logits_per_char": -0.7051820158958435, "num_chars": 2}, {"sum_logits": -1.3133013248443604, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": true, "logits_per_token": -1.3133013248443604, "logits_per_char": -0.6566506624221802, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 758, "native_id": "NYSEDREGENTS_2013_4_19", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2637081146240234, "incorrect_loss_raw": 1.4404309590657551, "correct_loss_per_char": 0.6318540573120117, "incorrect_loss_per_char": 0.7202154795328776, "correct_loss_per_token": 1.2637081146240234, "incorrect_loss_per_token": 1.4404309590657551, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2637081146240234, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": true, "logits_per_token": -1.2637081146240234, "logits_per_char": -0.6318540573120117, "num_chars": 2}, {"sum_logits": -1.318276047706604, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.318276047706604, "logits_per_char": -0.659138023853302, "num_chars": 2}, {"sum_logits": -1.4505845308303833, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.4505845308303833, "logits_per_char": -0.7252922654151917, "num_chars": 2}, {"sum_logits": -1.5524322986602783, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.5524322986602783, "logits_per_char": -0.7762161493301392, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 759, "native_id": "VASoL_2009_3_32", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4336230754852295, "incorrect_loss_raw": 1.3800627787907918, "correct_loss_per_char": 0.7168115377426147, "incorrect_loss_per_char": 0.6900313893953959, "correct_loss_per_token": 1.4336230754852295, "incorrect_loss_per_token": 1.3800627787907918, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3640110492706299, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.3640110492706299, "logits_per_char": -0.6820055246353149, "num_chars": 2}, {"sum_logits": -1.3215646743774414, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": true, "logits_per_token": -1.3215646743774414, "logits_per_char": -0.6607823371887207, "num_chars": 2}, {"sum_logits": -1.4546126127243042, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.4546126127243042, "logits_per_char": -0.7273063063621521, "num_chars": 2}, {"sum_logits": -1.4336230754852295, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.4336230754852295, "logits_per_char": -0.7168115377426147, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 760, "native_id": "Mercury_7159425", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5689560174942017, "incorrect_loss_raw": 1.3430171807607014, "correct_loss_per_char": 0.7844780087471008, "incorrect_loss_per_char": 0.6715085903803507, "correct_loss_per_token": 1.5689560174942017, "incorrect_loss_per_token": 1.3430171807607014, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5689560174942017, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.5689560174942017, "logits_per_char": -0.7844780087471008, "num_chars": 2}, {"sum_logits": -1.4704523086547852, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.4704523086547852, "logits_per_char": -0.7352261543273926, "num_chars": 2}, {"sum_logits": -1.3687770366668701, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.3687770366668701, "logits_per_char": -0.6843885183334351, "num_chars": 2}, {"sum_logits": -1.1898221969604492, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": true, "logits_per_token": -1.1898221969604492, "logits_per_char": -0.5949110984802246, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 761, "native_id": "Mercury_SC_400021", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4419748783111572, "incorrect_loss_raw": 1.3854554096857707, "correct_loss_per_char": 0.7209874391555786, "incorrect_loss_per_char": 0.6927277048428854, "correct_loss_per_token": 1.4419748783111572, "incorrect_loss_per_token": 1.3854554096857707, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5171812772750854, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.5171812772750854, "logits_per_char": -0.7585906386375427, "num_chars": 2}, {"sum_logits": -1.458872675895691, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.458872675895691, "logits_per_char": -0.7294363379478455, "num_chars": 2}, {"sum_logits": -1.4419748783111572, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.4419748783111572, "logits_per_char": -0.7209874391555786, "num_chars": 2}, {"sum_logits": -1.1803122758865356, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": true, "logits_per_token": -1.1803122758865356, "logits_per_char": -0.5901561379432678, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 762, "native_id": "Mercury_SC_415078", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5206706523895264, "incorrect_loss_raw": 1.35255761941274, "correct_loss_per_char": 0.7603353261947632, "incorrect_loss_per_char": 0.67627880970637, "correct_loss_per_token": 1.5206706523895264, "incorrect_loss_per_token": 1.35255761941274, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5206706523895264, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.5206706523895264, "logits_per_char": -0.7603353261947632, "num_chars": 2}, {"sum_logits": -1.4674620628356934, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.4674620628356934, "logits_per_char": -0.7337310314178467, "num_chars": 2}, {"sum_logits": -1.335105299949646, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.335105299949646, "logits_per_char": -0.667552649974823, "num_chars": 2}, {"sum_logits": -1.2551054954528809, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": true, "logits_per_token": -1.2551054954528809, "logits_per_char": -0.6275527477264404, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 763, "native_id": "Mercury_SC_415028", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5132834911346436, "incorrect_loss_raw": 1.3550918102264404, "correct_loss_per_char": 0.7566417455673218, "incorrect_loss_per_char": 0.6775459051132202, "correct_loss_per_token": 1.5132834911346436, "incorrect_loss_per_token": 1.3550918102264404, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.293225646018982, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": true, "logits_per_token": -1.293225646018982, "logits_per_char": -0.646612823009491, "num_chars": 2}, {"sum_logits": -1.4274702072143555, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.4274702072143555, "logits_per_char": -0.7137351036071777, "num_chars": 2}, {"sum_logits": -1.3445795774459839, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.3445795774459839, "logits_per_char": -0.6722897887229919, "num_chars": 2}, {"sum_logits": -1.5132834911346436, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.5132834911346436, "logits_per_char": -0.7566417455673218, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 764, "native_id": "MCAS_2000_8_23", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5651904344558716, "incorrect_loss_raw": 1.3394059737523396, "correct_loss_per_char": 0.7825952172279358, "incorrect_loss_per_char": 0.6697029868761698, "correct_loss_per_token": 1.5651904344558716, "incorrect_loss_per_token": 1.3394059737523396, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5651904344558716, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.5651904344558716, "logits_per_char": -0.7825952172279358, "num_chars": 2}, {"sum_logits": -1.3237696886062622, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": true, "logits_per_token": -1.3237696886062622, "logits_per_char": -0.6618848443031311, "num_chars": 2}, {"sum_logits": -1.366051197052002, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.366051197052002, "logits_per_char": -0.683025598526001, "num_chars": 2}, {"sum_logits": -1.3283970355987549, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.3283970355987549, "logits_per_char": -0.6641985177993774, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 765, "native_id": "Mercury_7270270", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.363544225692749, "incorrect_loss_raw": 1.3996037244796753, "correct_loss_per_char": 0.6817721128463745, "incorrect_loss_per_char": 0.6998018622398376, "correct_loss_per_token": 1.363544225692749, "incorrect_loss_per_token": 1.3996037244796753, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4663283824920654, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.4663283824920654, "logits_per_char": -0.7331641912460327, "num_chars": 2}, {"sum_logits": -1.3597619533538818, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": true, "logits_per_token": -1.3597619533538818, "logits_per_char": -0.6798809766769409, "num_chars": 2}, {"sum_logits": -1.363544225692749, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.363544225692749, "logits_per_char": -0.6817721128463745, "num_chars": 2}, {"sum_logits": -1.3727208375930786, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.3727208375930786, "logits_per_char": -0.6863604187965393, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 766, "native_id": "LEAP_2003_8_10394", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3576271533966064, "incorrect_loss_raw": 1.402565638224284, "correct_loss_per_char": 0.6788135766983032, "incorrect_loss_per_char": 0.701282819112142, "correct_loss_per_token": 1.3576271533966064, "incorrect_loss_per_token": 1.402565638224284, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4550225734710693, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.4550225734710693, "logits_per_char": -0.7275112867355347, "num_chars": 2}, {"sum_logits": -1.3458157777786255, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": true, "logits_per_token": -1.3458157777786255, "logits_per_char": -0.6729078888893127, "num_chars": 2}, {"sum_logits": -1.4068585634231567, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.4068585634231567, "logits_per_char": -0.7034292817115784, "num_chars": 2}, {"sum_logits": -1.3576271533966064, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.3576271533966064, "logits_per_char": -0.6788135766983032, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 767, "native_id": "CSZ30499", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.578237771987915, "incorrect_loss_raw": 1.3363497257232666, "correct_loss_per_char": 0.7891188859939575, "incorrect_loss_per_char": 0.6681748628616333, "correct_loss_per_token": 1.578237771987915, "incorrect_loss_per_token": 1.3363497257232666, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3885033130645752, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.3885033130645752, "logits_per_char": -0.6942516565322876, "num_chars": 2}, {"sum_logits": -1.578237771987915, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.578237771987915, "logits_per_char": -0.7891188859939575, "num_chars": 2}, {"sum_logits": -1.3454139232635498, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.3454139232635498, "logits_per_char": -0.6727069616317749, "num_chars": 2}, {"sum_logits": -1.2751319408416748, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.2751319408416748, "logits_per_char": -0.6375659704208374, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 768, "native_id": "MCAS_2000_4_23", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2607245445251465, "incorrect_loss_raw": 1.4370733499526978, "correct_loss_per_char": 0.6303622722625732, "incorrect_loss_per_char": 0.7185366749763489, "correct_loss_per_token": 1.2607245445251465, "incorrect_loss_per_token": 1.4370733499526978, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2607245445251465, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": true, "logits_per_token": -1.2607245445251465, "logits_per_char": -0.6303622722625732, "num_chars": 2}, {"sum_logits": -1.4526267051696777, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.4526267051696777, "logits_per_char": -0.7263133525848389, "num_chars": 2}, {"sum_logits": -1.394781470298767, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.394781470298767, "logits_per_char": -0.6973907351493835, "num_chars": 2}, {"sum_logits": -1.4638118743896484, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.4638118743896484, "logits_per_char": -0.7319059371948242, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 769, "native_id": "Mercury_7137445", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3968734741210938, "incorrect_loss_raw": 1.3932185570398967, "correct_loss_per_char": 0.6984367370605469, "incorrect_loss_per_char": 0.6966092785199484, "correct_loss_per_token": 1.3968734741210938, "incorrect_loss_per_token": 1.3932185570398967, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.539017915725708, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.539017915725708, "logits_per_char": -0.769508957862854, "num_chars": 2}, {"sum_logits": -1.359317421913147, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.359317421913147, "logits_per_char": -0.6796587109565735, "num_chars": 2}, {"sum_logits": -1.3968734741210938, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.3968734741210938, "logits_per_char": -0.6984367370605469, "num_chars": 2}, {"sum_logits": -1.281320333480835, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.281320333480835, "logits_per_char": -0.6406601667404175, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 770, "native_id": "Mercury_192203", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.374358057975769, "incorrect_loss_raw": 1.396613637606303, "correct_loss_per_char": 0.6871790289878845, "incorrect_loss_per_char": 0.6983068188031515, "correct_loss_per_token": 1.374358057975769, "incorrect_loss_per_token": 1.396613637606303, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3920568227767944, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.3920568227767944, "logits_per_char": -0.6960284113883972, "num_chars": 2}, {"sum_logits": -1.374358057975769, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": true, "logits_per_token": -1.374358057975769, "logits_per_char": -0.6871790289878845, "num_chars": 2}, {"sum_logits": -1.4042209386825562, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.4042209386825562, "logits_per_char": -0.7021104693412781, "num_chars": 2}, {"sum_logits": -1.393563151359558, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.393563151359558, "logits_per_char": -0.696781575679779, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 771, "native_id": "Mercury_7236618", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3128968477249146, "incorrect_loss_raw": 1.430617133776347, "correct_loss_per_char": 0.6564484238624573, "incorrect_loss_per_char": 0.7153085668881735, "correct_loss_per_token": 1.3128968477249146, "incorrect_loss_per_token": 1.430617133776347, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.599461317062378, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.599461317062378, "logits_per_char": -0.799730658531189, "num_chars": 2}, {"sum_logits": -1.4791995286941528, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.4791995286941528, "logits_per_char": -0.7395997643470764, "num_chars": 2}, {"sum_logits": -1.3128968477249146, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.3128968477249146, "logits_per_char": -0.6564484238624573, "num_chars": 2}, {"sum_logits": -1.2131905555725098, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": true, "logits_per_token": -1.2131905555725098, "logits_per_char": -0.6065952777862549, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 772, "native_id": "ACTAAP_2007_7_14", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.397983431816101, "incorrect_loss_raw": 1.3903148174285889, "correct_loss_per_char": 0.6989917159080505, "incorrect_loss_per_char": 0.6951574087142944, "correct_loss_per_token": 1.397983431816101, "incorrect_loss_per_token": 1.3903148174285889, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4160261154174805, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.4160261154174805, "logits_per_char": -0.7080130577087402, "num_chars": 2}, {"sum_logits": -1.3048486709594727, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": true, "logits_per_token": -1.3048486709594727, "logits_per_char": -0.6524243354797363, "num_chars": 2}, {"sum_logits": -1.397983431816101, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.397983431816101, "logits_per_char": -0.6989917159080505, "num_chars": 2}, {"sum_logits": -1.4500696659088135, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.4500696659088135, "logits_per_char": -0.7250348329544067, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 773, "native_id": "Mercury_7228200", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3923343420028687, "incorrect_loss_raw": 1.390837033589681, "correct_loss_per_char": 0.6961671710014343, "incorrect_loss_per_char": 0.6954185167948405, "correct_loss_per_token": 1.3923343420028687, "incorrect_loss_per_token": 1.390837033589681, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3599423170089722, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": true, "logits_per_token": -1.3599423170089722, "logits_per_char": -0.6799711585044861, "num_chars": 2}, {"sum_logits": -1.3712995052337646, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.3712995052337646, "logits_per_char": -0.6856497526168823, "num_chars": 2}, {"sum_logits": -1.3923343420028687, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.3923343420028687, "logits_per_char": -0.6961671710014343, "num_chars": 2}, {"sum_logits": -1.4412692785263062, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.4412692785263062, "logits_per_char": -0.7206346392631531, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 774, "native_id": "Mercury_7017903", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4106576442718506, "incorrect_loss_raw": 1.3837339878082275, "correct_loss_per_char": 0.7053288221359253, "incorrect_loss_per_char": 0.6918669939041138, "correct_loss_per_token": 1.4106576442718506, "incorrect_loss_per_token": 1.3837339878082275, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4106576442718506, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.4106576442718506, "logits_per_char": -0.7053288221359253, "num_chars": 2}, {"sum_logits": -1.3337650299072266, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": true, "logits_per_token": -1.3337650299072266, "logits_per_char": -0.6668825149536133, "num_chars": 2}, {"sum_logits": -1.4131075143814087, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.4131075143814087, "logits_per_char": -0.7065537571907043, "num_chars": 2}, {"sum_logits": -1.4043294191360474, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.4043294191360474, "logits_per_char": -0.7021647095680237, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 775, "native_id": "Mercury_SC_402630", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5511903762817383, "incorrect_loss_raw": 1.3423531850179036, "correct_loss_per_char": 0.7755951881408691, "incorrect_loss_per_char": 0.6711765925089518, "correct_loss_per_token": 1.5511903762817383, "incorrect_loss_per_token": 1.3423531850179036, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3329625129699707, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.3329625129699707, "logits_per_char": -0.6664812564849854, "num_chars": 2}, {"sum_logits": -1.3115111589431763, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": true, "logits_per_token": -1.3115111589431763, "logits_per_char": -0.6557555794715881, "num_chars": 2}, {"sum_logits": -1.382585883140564, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.382585883140564, "logits_per_char": -0.691292941570282, "num_chars": 2}, {"sum_logits": -1.5511903762817383, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.5511903762817383, "logits_per_char": -0.7755951881408691, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 776, "native_id": "Mercury_SC_402251", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3716604709625244, "incorrect_loss_raw": 1.397790511449178, "correct_loss_per_char": 0.6858302354812622, "incorrect_loss_per_char": 0.698895255724589, "correct_loss_per_token": 1.3716604709625244, "incorrect_loss_per_token": 1.397790511449178, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4066041707992554, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4066041707992554, "logits_per_char": -0.7033020853996277, "num_chars": 2}, {"sum_logits": -1.3716604709625244, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3716604709625244, "logits_per_char": -0.6858302354812622, "num_chars": 2}, {"sum_logits": -1.3558714389801025, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.3558714389801025, "logits_per_char": -0.6779357194900513, "num_chars": 2}, {"sum_logits": -1.4308959245681763, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4308959245681763, "logits_per_char": -0.7154479622840881, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 777, "native_id": "Mercury_7033600", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.343703269958496, "incorrect_loss_raw": 1.406762679417928, "correct_loss_per_char": 0.671851634979248, "incorrect_loss_per_char": 0.703381339708964, "correct_loss_per_token": 1.343703269958496, "incorrect_loss_per_token": 1.406762679417928, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.343703269958496, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": true, "logits_per_token": -1.343703269958496, "logits_per_char": -0.671851634979248, "num_chars": 2}, {"sum_logits": -1.3958100080490112, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.3958100080490112, "logits_per_char": -0.6979050040245056, "num_chars": 2}, {"sum_logits": -1.3749408721923828, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.3749408721923828, "logits_per_char": -0.6874704360961914, "num_chars": 2}, {"sum_logits": -1.4495371580123901, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.4495371580123901, "logits_per_char": -0.7247685790061951, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 778, "native_id": "Mercury_7100643", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3467206954956055, "incorrect_loss_raw": 1.4070120652516682, "correct_loss_per_char": 0.6733603477478027, "incorrect_loss_per_char": 0.7035060326258341, "correct_loss_per_token": 1.3467206954956055, "incorrect_loss_per_token": 1.4070120652516682, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4512304067611694, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.4512304067611694, "logits_per_char": -0.7256152033805847, "num_chars": 2}, {"sum_logits": -1.3278839588165283, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": true, "logits_per_token": -1.3278839588165283, "logits_per_char": -0.6639419794082642, "num_chars": 2}, {"sum_logits": -1.4419218301773071, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.4419218301773071, "logits_per_char": -0.7209609150886536, "num_chars": 2}, {"sum_logits": -1.3467206954956055, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.3467206954956055, "logits_per_char": -0.6733603477478027, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 779, "native_id": "Mercury_406779", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4029442071914673, "incorrect_loss_raw": 1.3888063828150432, "correct_loss_per_char": 0.7014721035957336, "incorrect_loss_per_char": 0.6944031914075216, "correct_loss_per_token": 1.4029442071914673, "incorrect_loss_per_token": 1.3888063828150432, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4715014696121216, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.4715014696121216, "logits_per_char": -0.7357507348060608, "num_chars": 2}, {"sum_logits": -1.413110613822937, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.413110613822937, "logits_per_char": -0.7065553069114685, "num_chars": 2}, {"sum_logits": -1.4029442071914673, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.4029442071914673, "logits_per_char": -0.7014721035957336, "num_chars": 2}, {"sum_logits": -1.2818070650100708, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": true, "logits_per_token": -1.2818070650100708, "logits_per_char": -0.6409035325050354, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 780, "native_id": "ACTAAP_2007_7_26", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3429841995239258, "incorrect_loss_raw": 1.4112226168314617, "correct_loss_per_char": 0.6714920997619629, "incorrect_loss_per_char": 0.7056113084157308, "correct_loss_per_token": 1.3429841995239258, "incorrect_loss_per_token": 1.4112226168314617, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5402252674102783, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.5402252674102783, "logits_per_char": -0.7701126337051392, "num_chars": 2}, {"sum_logits": -1.3429841995239258, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.3429841995239258, "logits_per_char": -0.6714920997619629, "num_chars": 2}, {"sum_logits": -1.366360068321228, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.366360068321228, "logits_per_char": -0.683180034160614, "num_chars": 2}, {"sum_logits": -1.3270825147628784, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -1.3270825147628784, "logits_per_char": -0.6635412573814392, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 781, "native_id": "Mercury_7094553", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1572343111038208, "incorrect_loss_raw": 1.488239049911499, "correct_loss_per_char": 0.5786171555519104, "incorrect_loss_per_char": 0.7441195249557495, "correct_loss_per_token": 1.1572343111038208, "incorrect_loss_per_token": 1.488239049911499, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6398144960403442, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.6398144960403442, "logits_per_char": -0.8199072480201721, "num_chars": 2}, {"sum_logits": -1.3638429641723633, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.3638429641723633, "logits_per_char": -0.6819214820861816, "num_chars": 2}, {"sum_logits": -1.4610596895217896, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.4610596895217896, "logits_per_char": -0.7305298447608948, "num_chars": 2}, {"sum_logits": -1.1572343111038208, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": true, "logits_per_token": -1.1572343111038208, "logits_per_char": -0.5786171555519104, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 782, "native_id": "Mercury_7194320", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4890148639678955, "incorrect_loss_raw": 1.373811960220337, "correct_loss_per_char": 0.7445074319839478, "incorrect_loss_per_char": 0.6869059801101685, "correct_loss_per_token": 1.4890148639678955, "incorrect_loss_per_token": 1.373811960220337, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4890148639678955, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.4890148639678955, "logits_per_char": -0.7445074319839478, "num_chars": 2}, {"sum_logits": -1.5295047760009766, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.5295047760009766, "logits_per_char": -0.7647523880004883, "num_chars": 2}, {"sum_logits": -1.4462729692459106, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.4462729692459106, "logits_per_char": -0.7231364846229553, "num_chars": 2}, {"sum_logits": -1.1456581354141235, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": true, "logits_per_token": -1.1456581354141235, "logits_per_char": -0.5728290677070618, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 783, "native_id": "Mercury_7180705", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3874598741531372, "incorrect_loss_raw": 1.3912804921468098, "correct_loss_per_char": 0.6937299370765686, "incorrect_loss_per_char": 0.6956402460734049, "correct_loss_per_token": 1.3874598741531372, "incorrect_loss_per_token": 1.3912804921468098, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3930208683013916, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.3930208683013916, "logits_per_char": -0.6965104341506958, "num_chars": 2}, {"sum_logits": -1.3874598741531372, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.3874598741531372, "logits_per_char": -0.6937299370765686, "num_chars": 2}, {"sum_logits": -1.412248969078064, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.412248969078064, "logits_per_char": -0.706124484539032, "num_chars": 2}, {"sum_logits": -1.3685716390609741, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": true, "logits_per_token": -1.3685716390609741, "logits_per_char": -0.6842858195304871, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 784, "native_id": "Mercury_7123533", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4179233312606812, "incorrect_loss_raw": 1.381111780802409, "correct_loss_per_char": 0.7089616656303406, "incorrect_loss_per_char": 0.6905558904012045, "correct_loss_per_token": 1.4179233312606812, "incorrect_loss_per_token": 1.381111780802409, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4179233312606812, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.4179233312606812, "logits_per_char": -0.7089616656303406, "num_chars": 2}, {"sum_logits": -1.3660762310028076, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": true, "logits_per_token": -1.3660762310028076, "logits_per_char": -0.6830381155014038, "num_chars": 2}, {"sum_logits": -1.3949453830718994, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.3949453830718994, "logits_per_char": -0.6974726915359497, "num_chars": 2}, {"sum_logits": -1.3823137283325195, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.3823137283325195, "logits_per_char": -0.6911568641662598, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 785, "native_id": "Mercury_7139720", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.207051157951355, "incorrect_loss_raw": 1.4635754426320393, "correct_loss_per_char": 0.6035255789756775, "incorrect_loss_per_char": 0.7317877213160197, "correct_loss_per_token": 1.207051157951355, "incorrect_loss_per_token": 1.4635754426320393, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5876693725585938, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.5876693725585938, "logits_per_char": -0.7938346862792969, "num_chars": 2}, {"sum_logits": -1.3887547254562378, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.3887547254562378, "logits_per_char": -0.6943773627281189, "num_chars": 2}, {"sum_logits": -1.4143022298812866, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.4143022298812866, "logits_per_char": -0.7071511149406433, "num_chars": 2}, {"sum_logits": -1.207051157951355, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": true, "logits_per_token": -1.207051157951355, "logits_per_char": -0.6035255789756775, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 786, "native_id": "Mercury_7008383", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3861186504364014, "incorrect_loss_raw": 1.3912721474965413, "correct_loss_per_char": 0.6930593252182007, "incorrect_loss_per_char": 0.6956360737482706, "correct_loss_per_token": 1.3861186504364014, "incorrect_loss_per_token": 1.3912721474965413, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3759769201278687, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": true, "logits_per_token": -1.3759769201278687, "logits_per_char": -0.6879884600639343, "num_chars": 2}, {"sum_logits": -1.4007408618927002, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.4007408618927002, "logits_per_char": -0.7003704309463501, "num_chars": 2}, {"sum_logits": -1.3861186504364014, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.3861186504364014, "logits_per_char": -0.6930593252182007, "num_chars": 2}, {"sum_logits": -1.3970986604690552, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.3970986604690552, "logits_per_char": -0.6985493302345276, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 787, "native_id": "Mercury_7100748", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2804807424545288, "incorrect_loss_raw": 1.4400770664215088, "correct_loss_per_char": 0.6402403712272644, "incorrect_loss_per_char": 0.7200385332107544, "correct_loss_per_token": 1.2804807424545288, "incorrect_loss_per_token": 1.4400770664215088, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2922965288162231, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.2922965288162231, "logits_per_char": -0.6461482644081116, "num_chars": 2}, {"sum_logits": -1.5660358667373657, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.5660358667373657, "logits_per_char": -0.7830179333686829, "num_chars": 2}, {"sum_logits": -1.4618988037109375, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.4618988037109375, "logits_per_char": -0.7309494018554688, "num_chars": 2}, {"sum_logits": -1.2804807424545288, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": true, "logits_per_token": -1.2804807424545288, "logits_per_char": -0.6402403712272644, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 788, "native_id": "MEAP_2005_8_15", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5854957103729248, "incorrect_loss_raw": 1.3330716292063396, "correct_loss_per_char": 0.7927478551864624, "incorrect_loss_per_char": 0.6665358146031698, "correct_loss_per_token": 1.5854957103729248, "incorrect_loss_per_token": 1.3330716292063396, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5854957103729248, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.5854957103729248, "logits_per_char": -0.7927478551864624, "num_chars": 2}, {"sum_logits": -1.31816565990448, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": true, "logits_per_token": -1.31816565990448, "logits_per_char": -0.65908282995224, "num_chars": 2}, {"sum_logits": -1.3287290334701538, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.3287290334701538, "logits_per_char": -0.6643645167350769, "num_chars": 2}, {"sum_logits": -1.3523201942443848, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.3523201942443848, "logits_per_char": -0.6761600971221924, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 789, "native_id": "Mercury_7001208", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3244062662124634, "incorrect_loss_raw": 1.412968675295512, "correct_loss_per_char": 0.6622031331062317, "incorrect_loss_per_char": 0.706484337647756, "correct_loss_per_token": 1.3244062662124634, "incorrect_loss_per_token": 1.412968675295512, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4426065683364868, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.4426065683364868, "logits_per_char": -0.7213032841682434, "num_chars": 2}, {"sum_logits": -1.4490904808044434, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.4490904808044434, "logits_per_char": -0.7245452404022217, "num_chars": 2}, {"sum_logits": -1.3244062662124634, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": true, "logits_per_token": -1.3244062662124634, "logits_per_char": -0.6622031331062317, "num_chars": 2}, {"sum_logits": -1.3472089767456055, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.3472089767456055, "logits_per_char": -0.6736044883728027, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 790, "native_id": "Mercury_410593", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4153456687927246, "incorrect_loss_raw": 1.385727842648824, "correct_loss_per_char": 0.7076728343963623, "incorrect_loss_per_char": 0.692863921324412, "correct_loss_per_token": 1.4153456687927246, "incorrect_loss_per_token": 1.385727842648824, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.465106725692749, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.465106725692749, "logits_per_char": -0.7325533628463745, "num_chars": 2}, {"sum_logits": -1.2765551805496216, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": true, "logits_per_token": -1.2765551805496216, "logits_per_char": -0.6382775902748108, "num_chars": 2}, {"sum_logits": -1.4153456687927246, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.4153456687927246, "logits_per_char": -0.7076728343963623, "num_chars": 2}, {"sum_logits": -1.4155216217041016, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.4155216217041016, "logits_per_char": -0.7077608108520508, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 791, "native_id": "Mercury_405465", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4934720993041992, "incorrect_loss_raw": 1.3589729865392048, "correct_loss_per_char": 0.7467360496520996, "incorrect_loss_per_char": 0.6794864932696024, "correct_loss_per_token": 1.4934720993041992, "incorrect_loss_per_token": 1.3589729865392048, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.418291687965393, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.418291687965393, "logits_per_char": -0.7091458439826965, "num_chars": 2}, {"sum_logits": -1.3408288955688477, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.3408288955688477, "logits_per_char": -0.6704144477844238, "num_chars": 2}, {"sum_logits": -1.317798376083374, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": true, "logits_per_token": -1.317798376083374, "logits_per_char": -0.658899188041687, "num_chars": 2}, {"sum_logits": -1.4934720993041992, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.4934720993041992, "logits_per_char": -0.7467360496520996, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 792, "native_id": "Mercury_7167038", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3725210428237915, "incorrect_loss_raw": 1.3980688254038494, "correct_loss_per_char": 0.6862605214118958, "incorrect_loss_per_char": 0.6990344127019247, "correct_loss_per_token": 1.3725210428237915, "incorrect_loss_per_token": 1.3980688254038494, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.438828706741333, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.438828706741333, "logits_per_char": -0.7194143533706665, "num_chars": 2}, {"sum_logits": -1.3223960399627686, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": true, "logits_per_token": -1.3223960399627686, "logits_per_char": -0.6611980199813843, "num_chars": 2}, {"sum_logits": -1.3725210428237915, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.3725210428237915, "logits_per_char": -0.6862605214118958, "num_chars": 2}, {"sum_logits": -1.4329817295074463, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.4329817295074463, "logits_per_char": -0.7164908647537231, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 793, "native_id": "Mercury_415267", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.391470193862915, "incorrect_loss_raw": 1.3941259384155273, "correct_loss_per_char": 0.6957350969314575, "incorrect_loss_per_char": 0.6970629692077637, "correct_loss_per_token": 1.391470193862915, "incorrect_loss_per_token": 1.3941259384155273, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4977129697799683, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.4977129697799683, "logits_per_char": -0.7488564848899841, "num_chars": 2}, {"sum_logits": -1.3451186418533325, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.3451186418533325, "logits_per_char": -0.6725593209266663, "num_chars": 2}, {"sum_logits": -1.3395462036132812, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": true, "logits_per_token": -1.3395462036132812, "logits_per_char": -0.6697731018066406, "num_chars": 2}, {"sum_logits": -1.391470193862915, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.391470193862915, "logits_per_char": -0.6957350969314575, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 794, "native_id": "OHAT_2007_5_12", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3685245513916016, "incorrect_loss_raw": 1.3973960876464844, "correct_loss_per_char": 0.6842622756958008, "incorrect_loss_per_char": 0.6986980438232422, "correct_loss_per_token": 1.3685245513916016, "incorrect_loss_per_token": 1.3973960876464844, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4050934314727783, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.4050934314727783, "logits_per_char": -0.7025467157363892, "num_chars": 2}, {"sum_logits": -1.3685245513916016, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.3685245513916016, "logits_per_char": -0.6842622756958008, "num_chars": 2}, {"sum_logits": -1.3708840608596802, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.3708840608596802, "logits_per_char": -0.6854420304298401, "num_chars": 2}, {"sum_logits": -1.4162107706069946, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.4162107706069946, "logits_per_char": -0.7081053853034973, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 795, "native_id": "Mercury_416502", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4601261615753174, "incorrect_loss_raw": 1.370524009068807, "correct_loss_per_char": 0.7300630807876587, "incorrect_loss_per_char": 0.6852620045344034, "correct_loss_per_token": 1.4601261615753174, "incorrect_loss_per_token": 1.370524009068807, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4135195016860962, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.4135195016860962, "logits_per_char": -0.7067597508430481, "num_chars": 2}, {"sum_logits": -1.357937216758728, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.357937216758728, "logits_per_char": -0.678968608379364, "num_chars": 2}, {"sum_logits": -1.3401153087615967, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": true, "logits_per_token": -1.3401153087615967, "logits_per_char": -0.6700576543807983, "num_chars": 2}, {"sum_logits": -1.4601261615753174, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.4601261615753174, "logits_per_char": -0.7300630807876587, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 796, "native_id": "Mercury_SC_LBS10174", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2795321941375732, "incorrect_loss_raw": 1.4400825103123982, "correct_loss_per_char": 0.6397660970687866, "incorrect_loss_per_char": 0.7200412551561991, "correct_loss_per_token": 1.2795321941375732, "incorrect_loss_per_token": 1.4400825103123982, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2795321941375732, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": true, "logits_per_token": -1.2795321941375732, "logits_per_char": -0.6397660970687866, "num_chars": 2}, {"sum_logits": -1.300079107284546, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.300079107284546, "logits_per_char": -0.650039553642273, "num_chars": 2}, {"sum_logits": -1.3907560110092163, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.3907560110092163, "logits_per_char": -0.6953780055046082, "num_chars": 2}, {"sum_logits": -1.6294124126434326, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.6294124126434326, "logits_per_char": -0.8147062063217163, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 797, "native_id": "Mercury_405948", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4554249048233032, "incorrect_loss_raw": 1.3706501722335815, "correct_loss_per_char": 0.7277124524116516, "incorrect_loss_per_char": 0.6853250861167908, "correct_loss_per_token": 1.4554249048233032, "incorrect_loss_per_token": 1.3706501722335815, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4554249048233032, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.4554249048233032, "logits_per_char": -0.7277124524116516, "num_chars": 2}, {"sum_logits": -1.395340085029602, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.395340085029602, "logits_per_char": -0.697670042514801, "num_chars": 2}, {"sum_logits": -1.312321662902832, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": true, "logits_per_token": -1.312321662902832, "logits_per_char": -0.656160831451416, "num_chars": 2}, {"sum_logits": -1.4042887687683105, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.4042887687683105, "logits_per_char": -0.7021443843841553, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 798, "native_id": "Mercury_7212503", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.300560474395752, "incorrect_loss_raw": 1.4231336116790771, "correct_loss_per_char": 0.650280237197876, "incorrect_loss_per_char": 0.7115668058395386, "correct_loss_per_token": 1.300560474395752, "incorrect_loss_per_token": 1.4231336116790771, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4798287153244019, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.4798287153244019, "logits_per_char": -0.7399143576622009, "num_chars": 2}, {"sum_logits": -1.422195553779602, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.422195553779602, "logits_per_char": -0.711097776889801, "num_chars": 2}, {"sum_logits": -1.3673765659332275, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.3673765659332275, "logits_per_char": -0.6836882829666138, "num_chars": 2}, {"sum_logits": -1.300560474395752, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": true, "logits_per_token": -1.300560474395752, "logits_per_char": -0.650280237197876, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 799, "native_id": "NYSEDREGENTS_2008_8_14", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5683872699737549, "incorrect_loss_raw": 1.3365110953648884, "correct_loss_per_char": 0.7841936349868774, "incorrect_loss_per_char": 0.6682555476824442, "correct_loss_per_token": 1.5683872699737549, "incorrect_loss_per_token": 1.3365110953648884, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3179888725280762, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": true, "logits_per_token": -1.3179888725280762, "logits_per_char": -0.6589944362640381, "num_chars": 2}, {"sum_logits": -1.3255928754806519, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.3255928754806519, "logits_per_char": -0.6627964377403259, "num_chars": 2}, {"sum_logits": -1.3659515380859375, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.3659515380859375, "logits_per_char": -0.6829757690429688, "num_chars": 2}, {"sum_logits": -1.5683872699737549, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.5683872699737549, "logits_per_char": -0.7841936349868774, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 800, "native_id": "TIMSS_1995_8_L7", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2353514432907104, "incorrect_loss_raw": 1.4485623836517334, "correct_loss_per_char": 0.6176757216453552, "incorrect_loss_per_char": 0.7242811918258667, "correct_loss_per_token": 1.2353514432907104, "incorrect_loss_per_token": 1.4485623836517334, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4918309450149536, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.4918309450149536, "logits_per_char": -0.7459154725074768, "num_chars": 2}, {"sum_logits": -1.4808837175369263, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.4808837175369263, "logits_per_char": -0.7404418587684631, "num_chars": 2}, {"sum_logits": -1.3729724884033203, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.3729724884033203, "logits_per_char": -0.6864862442016602, "num_chars": 2}, {"sum_logits": -1.2353514432907104, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.2353514432907104, "logits_per_char": -0.6176757216453552, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 801, "native_id": "Mercury_404086", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.215896725654602, "incorrect_loss_raw": 1.4579642216364543, "correct_loss_per_char": 0.607948362827301, "incorrect_loss_per_char": 0.7289821108182272, "correct_loss_per_token": 1.215896725654602, "incorrect_loss_per_token": 1.4579642216364543, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5185736417770386, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.5185736417770386, "logits_per_char": -0.7592868208885193, "num_chars": 2}, {"sum_logits": -1.354453682899475, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.354453682899475, "logits_per_char": -0.6772268414497375, "num_chars": 2}, {"sum_logits": -1.5008653402328491, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.5008653402328491, "logits_per_char": -0.7504326701164246, "num_chars": 2}, {"sum_logits": -1.215896725654602, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -1.215896725654602, "logits_per_char": -0.607948362827301, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 802, "native_id": "MDSA_2007_8_60", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3601874113082886, "incorrect_loss_raw": 1.403143286705017, "correct_loss_per_char": 0.6800937056541443, "incorrect_loss_per_char": 0.7015716433525085, "correct_loss_per_token": 1.3601874113082886, "incorrect_loss_per_token": 1.403143286705017, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4988253116607666, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.4988253116607666, "logits_per_char": -0.7494126558303833, "num_chars": 2}, {"sum_logits": -1.3237287998199463, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": true, "logits_per_token": -1.3237287998199463, "logits_per_char": -0.6618643999099731, "num_chars": 2}, {"sum_logits": -1.3868757486343384, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.3868757486343384, "logits_per_char": -0.6934378743171692, "num_chars": 2}, {"sum_logits": -1.3601874113082886, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.3601874113082886, "logits_per_char": -0.6800937056541443, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 803, "native_id": "MEAP_2005_8_31", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2950313091278076, "incorrect_loss_raw": 1.4280985991160076, "correct_loss_per_char": 0.6475156545639038, "incorrect_loss_per_char": 0.7140492995580038, "correct_loss_per_token": 1.2950313091278076, "incorrect_loss_per_token": 1.4280985991160076, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.532811164855957, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.532811164855957, "logits_per_char": -0.7664055824279785, "num_chars": 2}, {"sum_logits": -1.3176894187927246, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.3176894187927246, "logits_per_char": -0.6588447093963623, "num_chars": 2}, {"sum_logits": -1.4337952136993408, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.4337952136993408, "logits_per_char": -0.7168976068496704, "num_chars": 2}, {"sum_logits": -1.2950313091278076, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": true, "logits_per_token": -1.2950313091278076, "logits_per_char": -0.6475156545639038, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 804, "native_id": "NYSEDREGENTS_2010_4_22", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4057716131210327, "incorrect_loss_raw": 1.388698935508728, "correct_loss_per_char": 0.7028858065605164, "incorrect_loss_per_char": 0.694349467754364, "correct_loss_per_token": 1.4057716131210327, "incorrect_loss_per_token": 1.388698935508728, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3414384126663208, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": true, "logits_per_token": -1.3414384126663208, "logits_per_char": -0.6707192063331604, "num_chars": 2}, {"sum_logits": -1.4057716131210327, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.4057716131210327, "logits_per_char": -0.7028858065605164, "num_chars": 2}, {"sum_logits": -1.359071969985962, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.359071969985962, "logits_per_char": -0.679535984992981, "num_chars": 2}, {"sum_logits": -1.4655864238739014, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.4655864238739014, "logits_per_char": -0.7327932119369507, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 805, "native_id": "MEA_2014_8_7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9617912769317627, "incorrect_loss_raw": 1.65728755791982, "correct_loss_per_char": 0.9808956384658813, "incorrect_loss_per_char": 0.82864377895991, "correct_loss_per_token": 1.9617912769317627, "incorrect_loss_per_token": 1.65728755791982, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0650335550308228, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": true, "logits_per_token": -1.0650335550308228, "logits_per_char": -0.5325167775154114, "num_chars": 2}, {"sum_logits": -1.5689575672149658, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.5689575672149658, "logits_per_char": -0.7844787836074829, "num_chars": 2}, {"sum_logits": -1.9617912769317627, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.9617912769317627, "logits_per_char": -0.9808956384658813, "num_chars": 2}, {"sum_logits": -2.337871551513672, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -2.337871551513672, "logits_per_char": -1.168935775756836, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 806, "native_id": "Mercury_177730", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3392657041549683, "incorrect_loss_raw": 1.4128283262252808, "correct_loss_per_char": 0.6696328520774841, "incorrect_loss_per_char": 0.7064141631126404, "correct_loss_per_token": 1.3392657041549683, "incorrect_loss_per_token": 1.4128283262252808, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3392657041549683, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3392657041549683, "logits_per_char": -0.6696328520774841, "num_chars": 2}, {"sum_logits": -1.314026951789856, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.314026951789856, "logits_per_char": -0.657013475894928, "num_chars": 2}, {"sum_logits": -1.4828946590423584, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4828946590423584, "logits_per_char": -0.7414473295211792, "num_chars": 2}, {"sum_logits": -1.441563367843628, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.441563367843628, "logits_per_char": -0.720781683921814, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 807, "native_id": "MCAS_2012_5_22237", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3803662061691284, "incorrect_loss_raw": 1.3936062256495159, "correct_loss_per_char": 0.6901831030845642, "incorrect_loss_per_char": 0.6968031128247579, "correct_loss_per_token": 1.3803662061691284, "incorrect_loss_per_token": 1.3936062256495159, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4304157495498657, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.4304157495498657, "logits_per_char": -0.7152078747749329, "num_chars": 2}, {"sum_logits": -1.392411231994629, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.392411231994629, "logits_per_char": -0.6962056159973145, "num_chars": 2}, {"sum_logits": -1.3803662061691284, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.3803662061691284, "logits_per_char": -0.6901831030845642, "num_chars": 2}, {"sum_logits": -1.3579916954040527, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": true, "logits_per_token": -1.3579916954040527, "logits_per_char": -0.6789958477020264, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 808, "native_id": "MCAS_2005_9_19-v1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2783254384994507, "incorrect_loss_raw": 1.4319998025894165, "correct_loss_per_char": 0.6391627192497253, "incorrect_loss_per_char": 0.7159999012947083, "correct_loss_per_token": 1.2783254384994507, "incorrect_loss_per_token": 1.4319998025894165, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5081398487091064, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.5081398487091064, "logits_per_char": -0.7540699243545532, "num_chars": 2}, {"sum_logits": -1.415907621383667, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.415907621383667, "logits_per_char": -0.7079538106918335, "num_chars": 2}, {"sum_logits": -1.371951937675476, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.371951937675476, "logits_per_char": -0.685975968837738, "num_chars": 2}, {"sum_logits": -1.2783254384994507, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": true, "logits_per_token": -1.2783254384994507, "logits_per_char": -0.6391627192497253, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 809, "native_id": "Mercury_7217718", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4420629739761353, "incorrect_loss_raw": 1.3747161229451497, "correct_loss_per_char": 0.7210314869880676, "incorrect_loss_per_char": 0.6873580614725748, "correct_loss_per_token": 1.4420629739761353, "incorrect_loss_per_token": 1.3747161229451497, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3946176767349243, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.3946176767349243, "logits_per_char": -0.6973088383674622, "num_chars": 2}, {"sum_logits": -1.3050317764282227, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.3050317764282227, "logits_per_char": -0.6525158882141113, "num_chars": 2}, {"sum_logits": -1.4420629739761353, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4420629739761353, "logits_per_char": -0.7210314869880676, "num_chars": 2}, {"sum_logits": -1.4244989156723022, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4244989156723022, "logits_per_char": -0.7122494578361511, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 810, "native_id": "Mercury_7188370", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4373793601989746, "incorrect_loss_raw": 1.3754393657048543, "correct_loss_per_char": 0.7186896800994873, "incorrect_loss_per_char": 0.6877196828524271, "correct_loss_per_token": 1.4373793601989746, "incorrect_loss_per_token": 1.3754393657048543, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3688108921051025, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.3688108921051025, "logits_per_char": -0.6844054460525513, "num_chars": 2}, {"sum_logits": -1.4262568950653076, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.4262568950653076, "logits_per_char": -0.7131284475326538, "num_chars": 2}, {"sum_logits": -1.3312503099441528, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": true, "logits_per_token": -1.3312503099441528, "logits_per_char": -0.6656251549720764, "num_chars": 2}, {"sum_logits": -1.4373793601989746, "num_tokens": 1, "num_tokens_all": 322, "is_greedy": false, "logits_per_token": -1.4373793601989746, "logits_per_char": -0.7186896800994873, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 811, "native_id": "CSZ_2008_8_29", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3545469045639038, "incorrect_loss_raw": 1.4039839108784993, "correct_loss_per_char": 0.6772734522819519, "incorrect_loss_per_char": 0.7019919554392496, "correct_loss_per_token": 1.3545469045639038, "incorrect_loss_per_token": 1.4039839108784993, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.338375210762024, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": true, "logits_per_token": -1.338375210762024, "logits_per_char": -0.669187605381012, "num_chars": 2}, {"sum_logits": -1.4798128604888916, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.4798128604888916, "logits_per_char": -0.7399064302444458, "num_chars": 2}, {"sum_logits": -1.3545469045639038, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.3545469045639038, "logits_per_char": -0.6772734522819519, "num_chars": 2}, {"sum_logits": -1.3937636613845825, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.3937636613845825, "logits_per_char": -0.6968818306922913, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 812, "native_id": "MCAS_2006_9_38", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4329066276550293, "incorrect_loss_raw": 1.3852782646814983, "correct_loss_per_char": 0.7164533138275146, "incorrect_loss_per_char": 0.6926391323407491, "correct_loss_per_token": 1.4329066276550293, "incorrect_loss_per_token": 1.3852782646814983, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2796225547790527, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": true, "logits_per_token": -1.2796225547790527, "logits_per_char": -0.6398112773895264, "num_chars": 2}, {"sum_logits": -1.4329066276550293, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.4329066276550293, "logits_per_char": -0.7164533138275146, "num_chars": 2}, {"sum_logits": -1.3526664972305298, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.3526664972305298, "logits_per_char": -0.6763332486152649, "num_chars": 2}, {"sum_logits": -1.523545742034912, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.523545742034912, "logits_per_char": -0.761772871017456, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 813, "native_id": "NYSEDREGENTS_2013_4_22", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4093353748321533, "incorrect_loss_raw": 1.9754590392112732, "correct_loss_per_char": 0.7046676874160767, "incorrect_loss_per_char": 0.9877295196056366, "correct_loss_per_token": 1.4093353748321533, "incorrect_loss_per_token": 1.9754590392112732, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8648079037666321, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": true, "logits_per_token": -0.8648079037666321, "logits_per_char": -0.43240395188331604, "num_chars": 2}, {"sum_logits": -1.4093353748321533, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.4093353748321533, "logits_per_char": -0.7046676874160767, "num_chars": 2}, {"sum_logits": -2.245532274246216, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -2.245532274246216, "logits_per_char": -1.122766137123108, "num_chars": 2}, {"sum_logits": -2.8160369396209717, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -2.8160369396209717, "logits_per_char": -1.4080184698104858, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 814, "native_id": "Mercury_402091", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4097164869308472, "incorrect_loss_raw": 1.3873043457667034, "correct_loss_per_char": 0.7048582434654236, "incorrect_loss_per_char": 0.6936521728833517, "correct_loss_per_token": 1.4097164869308472, "incorrect_loss_per_token": 1.3873043457667034, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3126109838485718, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": true, "logits_per_token": -1.3126109838485718, "logits_per_char": -0.6563054919242859, "num_chars": 2}, {"sum_logits": -1.4560834169387817, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.4560834169387817, "logits_per_char": -0.7280417084693909, "num_chars": 2}, {"sum_logits": -1.4097164869308472, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.4097164869308472, "logits_per_char": -0.7048582434654236, "num_chars": 2}, {"sum_logits": -1.3932186365127563, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.3932186365127563, "logits_per_char": -0.6966093182563782, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 815, "native_id": "NCEOGA_2013_8_36", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4089218378067017, "incorrect_loss_raw": 1.3861562808354695, "correct_loss_per_char": 0.7044609189033508, "incorrect_loss_per_char": 0.6930781404177347, "correct_loss_per_token": 1.4089218378067017, "incorrect_loss_per_token": 1.3861562808354695, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3035491704940796, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": true, "logits_per_token": -1.3035491704940796, "logits_per_char": -0.6517745852470398, "num_chars": 2}, {"sum_logits": -1.4054564237594604, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.4054564237594604, "logits_per_char": -0.7027282118797302, "num_chars": 2}, {"sum_logits": -1.4494632482528687, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.4494632482528687, "logits_per_char": -0.7247316241264343, "num_chars": 2}, {"sum_logits": -1.4089218378067017, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.4089218378067017, "logits_per_char": -0.7044609189033508, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 816, "native_id": "Mercury_7016240", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4791821241378784, "incorrect_loss_raw": 1.3654745022455852, "correct_loss_per_char": 0.7395910620689392, "incorrect_loss_per_char": 0.6827372511227926, "correct_loss_per_token": 1.4791821241378784, "incorrect_loss_per_token": 1.3654745022455852, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4791821241378784, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.4791821241378784, "logits_per_char": -0.7395910620689392, "num_chars": 2}, {"sum_logits": -1.3889278173446655, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.3889278173446655, "logits_per_char": -0.6944639086723328, "num_chars": 2}, {"sum_logits": -1.4361344575881958, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.4361344575881958, "logits_per_char": -0.7180672287940979, "num_chars": 2}, {"sum_logits": -1.271361231803894, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": true, "logits_per_token": -1.271361231803894, "logits_per_char": -0.635680615901947, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 817, "native_id": "Mercury_7207148", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3538081645965576, "incorrect_loss_raw": 1.4037424325942993, "correct_loss_per_char": 0.6769040822982788, "incorrect_loss_per_char": 0.7018712162971497, "correct_loss_per_token": 1.3538081645965576, "incorrect_loss_per_token": 1.4037424325942993, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4429423809051514, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.4429423809051514, "logits_per_char": -0.7214711904525757, "num_chars": 2}, {"sum_logits": -1.3752192258834839, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.3752192258834839, "logits_per_char": -0.6876096129417419, "num_chars": 2}, {"sum_logits": -1.3930656909942627, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.3930656909942627, "logits_per_char": -0.6965328454971313, "num_chars": 2}, {"sum_logits": -1.3538081645965576, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": true, "logits_per_token": -1.3538081645965576, "logits_per_char": -0.6769040822982788, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 818, "native_id": "MDSA_2011_8_39", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2789582014083862, "incorrect_loss_raw": 1.436272382736206, "correct_loss_per_char": 0.6394791007041931, "incorrect_loss_per_char": 0.718136191368103, "correct_loss_per_token": 1.2789582014083862, "incorrect_loss_per_token": 1.436272382736206, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2789582014083862, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": true, "logits_per_token": -1.2789582014083862, "logits_per_char": -0.6394791007041931, "num_chars": 2}, {"sum_logits": -1.2902805805206299, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.2902805805206299, "logits_per_char": -0.6451402902603149, "num_chars": 2}, {"sum_logits": -1.4617213010787964, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.4617213010787964, "logits_per_char": -0.7308606505393982, "num_chars": 2}, {"sum_logits": -1.556815266609192, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.556815266609192, "logits_per_char": -0.778407633304596, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 819, "native_id": "Mercury_SC_401815", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3555608987808228, "incorrect_loss_raw": 1.4086345831553142, "correct_loss_per_char": 0.6777804493904114, "incorrect_loss_per_char": 0.7043172915776571, "correct_loss_per_token": 1.3555608987808228, "incorrect_loss_per_token": 1.4086345831553142, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5621119737625122, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.5621119737625122, "logits_per_char": -0.7810559868812561, "num_chars": 2}, {"sum_logits": -1.3555608987808228, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.3555608987808228, "logits_per_char": -0.6777804493904114, "num_chars": 2}, {"sum_logits": -1.3727562427520752, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.3727562427520752, "logits_per_char": -0.6863781213760376, "num_chars": 2}, {"sum_logits": -1.291035532951355, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": true, "logits_per_token": -1.291035532951355, "logits_per_char": -0.6455177664756775, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 820, "native_id": "Mercury_7230423", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3862146139144897, "incorrect_loss_raw": 1.3951096137364705, "correct_loss_per_char": 0.6931073069572449, "incorrect_loss_per_char": 0.6975548068682352, "correct_loss_per_token": 1.3862146139144897, "incorrect_loss_per_token": 1.3951096137364705, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3862146139144897, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.3862146139144897, "logits_per_char": -0.6931073069572449, "num_chars": 2}, {"sum_logits": -1.3753644227981567, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.3753644227981567, "logits_per_char": -0.6876822113990784, "num_chars": 2}, {"sum_logits": -1.4871011972427368, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.4871011972427368, "logits_per_char": -0.7435505986213684, "num_chars": 2}, {"sum_logits": -1.322863221168518, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": true, "logits_per_token": -1.322863221168518, "logits_per_char": -0.661431610584259, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 821, "native_id": "Mercury_7006108", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4517390727996826, "incorrect_loss_raw": 1.3707666794459026, "correct_loss_per_char": 0.7258695363998413, "incorrect_loss_per_char": 0.6853833397229513, "correct_loss_per_token": 1.4517390727996826, "incorrect_loss_per_token": 1.3707666794459026, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.342553973197937, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.342553973197937, "logits_per_char": -0.6712769865989685, "num_chars": 2}, {"sum_logits": -1.3307102918624878, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": true, "logits_per_token": -1.3307102918624878, "logits_per_char": -0.6653551459312439, "num_chars": 2}, {"sum_logits": -1.4390357732772827, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.4390357732772827, "logits_per_char": -0.7195178866386414, "num_chars": 2}, {"sum_logits": -1.4517390727996826, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.4517390727996826, "logits_per_char": -0.7258695363998413, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 822, "native_id": "Mercury_7004585", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.4017534255981445, "incorrect_loss_raw": 1.3481825788815816, "correct_loss_per_char": 1.2008767127990723, "incorrect_loss_per_char": 0.6740912894407908, "correct_loss_per_token": 2.4017534255981445, "incorrect_loss_per_token": 1.3481825788815816, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7950436472892761, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": true, "logits_per_token": -0.7950436472892761, "logits_per_char": -0.39752182364463806, "num_chars": 2}, {"sum_logits": -1.3889892101287842, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.3889892101287842, "logits_per_char": -0.6944946050643921, "num_chars": 2}, {"sum_logits": -1.8605148792266846, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.8605148792266846, "logits_per_char": -0.9302574396133423, "num_chars": 2}, {"sum_logits": -2.4017534255981445, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -2.4017534255981445, "logits_per_char": -1.2008767127990723, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 823, "native_id": "Mercury_412777", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3496408462524414, "incorrect_loss_raw": 1.4077381292978923, "correct_loss_per_char": 0.6748204231262207, "incorrect_loss_per_char": 0.7038690646489462, "correct_loss_per_token": 1.3496408462524414, "incorrect_loss_per_token": 1.4077381292978923, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.504393219947815, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.504393219947815, "logits_per_char": -0.7521966099739075, "num_chars": 2}, {"sum_logits": -1.337809681892395, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": true, "logits_per_token": -1.337809681892395, "logits_per_char": -0.6689048409461975, "num_chars": 2}, {"sum_logits": -1.3810114860534668, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.3810114860534668, "logits_per_char": -0.6905057430267334, "num_chars": 2}, {"sum_logits": -1.3496408462524414, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.3496408462524414, "logits_per_char": -0.6748204231262207, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 824, "native_id": "Mercury_7172813", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4669458866119385, "incorrect_loss_raw": 1.389496127764384, "correct_loss_per_char": 0.7334729433059692, "incorrect_loss_per_char": 0.694748063882192, "correct_loss_per_token": 1.4669458866119385, "incorrect_loss_per_token": 1.389496127764384, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6848610639572144, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.6848610639572144, "logits_per_char": -0.8424305319786072, "num_chars": 2}, {"sum_logits": -1.335871934890747, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.335871934890747, "logits_per_char": -0.6679359674453735, "num_chars": 2}, {"sum_logits": -1.4669458866119385, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4669458866119385, "logits_per_char": -0.7334729433059692, "num_chars": 2}, {"sum_logits": -1.1477553844451904, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.1477553844451904, "logits_per_char": -0.5738776922225952, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 825, "native_id": "VASoL_2009_3_28", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.281578540802002, "incorrect_loss_raw": 1.4394888480504353, "correct_loss_per_char": 0.640789270401001, "incorrect_loss_per_char": 0.7197444240252177, "correct_loss_per_token": 1.281578540802002, "incorrect_loss_per_token": 1.4394888480504353, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2696858644485474, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": true, "logits_per_token": -1.2696858644485474, "logits_per_char": -0.6348429322242737, "num_chars": 2}, {"sum_logits": -1.281578540802002, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.281578540802002, "logits_per_char": -0.640789270401001, "num_chars": 2}, {"sum_logits": -1.5609546899795532, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.5609546899795532, "logits_per_char": -0.7804773449897766, "num_chars": 2}, {"sum_logits": -1.4878259897232056, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.4878259897232056, "logits_per_char": -0.7439129948616028, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 826, "native_id": "TIMSS_2007_8_pg34", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3846378326416016, "incorrect_loss_raw": 1.393438418706258, "correct_loss_per_char": 0.6923189163208008, "incorrect_loss_per_char": 0.696719209353129, "correct_loss_per_token": 1.3846378326416016, "incorrect_loss_per_token": 1.393438418706258, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3795615434646606, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.3795615434646606, "logits_per_char": -0.6897807717323303, "num_chars": 2}, {"sum_logits": -1.318424940109253, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": true, "logits_per_token": -1.318424940109253, "logits_per_char": -0.6592124700546265, "num_chars": 2}, {"sum_logits": -1.3846378326416016, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.3846378326416016, "logits_per_char": -0.6923189163208008, "num_chars": 2}, {"sum_logits": -1.4823287725448608, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.4823287725448608, "logits_per_char": -0.7411643862724304, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 827, "native_id": "Mercury_7215548", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.351211667060852, "incorrect_loss_raw": 1.4043794870376587, "correct_loss_per_char": 0.675605833530426, "incorrect_loss_per_char": 0.7021897435188293, "correct_loss_per_token": 1.351211667060852, "incorrect_loss_per_token": 1.4043794870376587, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.354949712753296, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.354949712753296, "logits_per_char": -0.677474856376648, "num_chars": 2}, {"sum_logits": -1.351211667060852, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": true, "logits_per_token": -1.351211667060852, "logits_per_char": -0.675605833530426, "num_chars": 2}, {"sum_logits": -1.4266542196273804, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.4266542196273804, "logits_per_char": -0.7133271098136902, "num_chars": 2}, {"sum_logits": -1.4315345287322998, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.4315345287322998, "logits_per_char": -0.7157672643661499, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 828, "native_id": "Mercury_7068425", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3897370100021362, "incorrect_loss_raw": 1.390009919802348, "correct_loss_per_char": 0.6948685050010681, "incorrect_loss_per_char": 0.695004959901174, "correct_loss_per_token": 1.3897370100021362, "incorrect_loss_per_token": 1.390009919802348, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4164167642593384, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.4164167642593384, "logits_per_char": -0.7082083821296692, "num_chars": 2}, {"sum_logits": -1.3897370100021362, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.3897370100021362, "logits_per_char": -0.6948685050010681, "num_chars": 2}, {"sum_logits": -1.400445580482483, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.400445580482483, "logits_per_char": -0.7002227902412415, "num_chars": 2}, {"sum_logits": -1.3531674146652222, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": true, "logits_per_token": -1.3531674146652222, "logits_per_char": -0.6765837073326111, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 829, "native_id": "Mercury_SC_401123", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4973690509796143, "incorrect_loss_raw": 1.359988808631897, "correct_loss_per_char": 0.7486845254898071, "incorrect_loss_per_char": 0.6799944043159485, "correct_loss_per_token": 1.4973690509796143, "incorrect_loss_per_token": 1.359988808631897, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2424805164337158, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": true, "logits_per_token": -1.2424805164337158, "logits_per_char": -0.6212402582168579, "num_chars": 2}, {"sum_logits": -1.4973690509796143, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.4973690509796143, "logits_per_char": -0.7486845254898071, "num_chars": 2}, {"sum_logits": -1.4455431699752808, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.4455431699752808, "logits_per_char": -0.7227715849876404, "num_chars": 2}, {"sum_logits": -1.3919427394866943, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.3919427394866943, "logits_per_char": -0.6959713697433472, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 830, "native_id": "TAKS_2009_5_21", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3748828172683716, "incorrect_loss_raw": 1.3975474834442139, "correct_loss_per_char": 0.6874414086341858, "incorrect_loss_per_char": 0.6987737417221069, "correct_loss_per_token": 1.3748828172683716, "incorrect_loss_per_token": 1.3975474834442139, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.444043517112732, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.444043517112732, "logits_per_char": -0.722021758556366, "num_chars": 2}, {"sum_logits": -1.3748828172683716, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.3748828172683716, "logits_per_char": -0.6874414086341858, "num_chars": 2}, {"sum_logits": -1.3518856763839722, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": true, "logits_per_token": -1.3518856763839722, "logits_per_char": -0.6759428381919861, "num_chars": 2}, {"sum_logits": -1.3967132568359375, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.3967132568359375, "logits_per_char": -0.6983566284179688, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 831, "native_id": "Mercury_7005075", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3923730850219727, "incorrect_loss_raw": 1.3911106983820598, "correct_loss_per_char": 0.6961865425109863, "incorrect_loss_per_char": 0.6955553491910299, "correct_loss_per_token": 1.3923730850219727, "incorrect_loss_per_token": 1.3911106983820598, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.386247992515564, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.386247992515564, "logits_per_char": -0.693123996257782, "num_chars": 2}, {"sum_logits": -1.4257982969284058, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4257982969284058, "logits_per_char": -0.7128991484642029, "num_chars": 2}, {"sum_logits": -1.3923730850219727, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3923730850219727, "logits_per_char": -0.6961865425109863, "num_chars": 2}, {"sum_logits": -1.3612858057022095, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.3612858057022095, "logits_per_char": -0.6806429028511047, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 832, "native_id": "MDSA_2012_8_35", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2445828914642334, "incorrect_loss_raw": 1.453531265258789, "correct_loss_per_char": 0.6222914457321167, "incorrect_loss_per_char": 0.7267656326293945, "correct_loss_per_token": 1.2445828914642334, "incorrect_loss_per_token": 1.453531265258789, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5966131687164307, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.5966131687164307, "logits_per_char": -0.7983065843582153, "num_chars": 2}, {"sum_logits": -1.3438830375671387, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.3438830375671387, "logits_per_char": -0.6719415187835693, "num_chars": 2}, {"sum_logits": -1.4200975894927979, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.4200975894927979, "logits_per_char": -0.7100487947463989, "num_chars": 2}, {"sum_logits": -1.2445828914642334, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": true, "logits_per_token": -1.2445828914642334, "logits_per_char": -0.6222914457321167, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 833, "native_id": "Mercury_7041545", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3507903814315796, "incorrect_loss_raw": 1.406755765279134, "correct_loss_per_char": 0.6753951907157898, "incorrect_loss_per_char": 0.703377882639567, "correct_loss_per_token": 1.3507903814315796, "incorrect_loss_per_token": 1.406755765279134, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.316746473312378, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": true, "logits_per_token": -1.316746473312378, "logits_per_char": -0.658373236656189, "num_chars": 2}, {"sum_logits": -1.3507903814315796, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.3507903814315796, "logits_per_char": -0.6753951907157898, "num_chars": 2}, {"sum_logits": -1.3828035593032837, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.3828035593032837, "logits_per_char": -0.6914017796516418, "num_chars": 2}, {"sum_logits": -1.5207172632217407, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.5207172632217407, "logits_per_char": -0.7603586316108704, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 834, "native_id": "NYSEDREGENTS_2010_4_10", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.457970380783081, "incorrect_loss_raw": 1.3742826382319133, "correct_loss_per_char": 0.7289851903915405, "incorrect_loss_per_char": 0.6871413191159567, "correct_loss_per_token": 1.457970380783081, "incorrect_loss_per_token": 1.3742826382319133, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.298767328262329, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.298767328262329, "logits_per_char": -0.6493836641311646, "num_chars": 2}, {"sum_logits": -1.2899168729782104, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": true, "logits_per_token": -1.2899168729782104, "logits_per_char": -0.6449584364891052, "num_chars": 2}, {"sum_logits": -1.457970380783081, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.457970380783081, "logits_per_char": -0.7289851903915405, "num_chars": 2}, {"sum_logits": -1.5341637134552002, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.5341637134552002, "logits_per_char": -0.7670818567276001, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 835, "native_id": "CSZ20334", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4719873666763306, "incorrect_loss_raw": 1.3664262294769287, "correct_loss_per_char": 0.7359936833381653, "incorrect_loss_per_char": 0.6832131147384644, "correct_loss_per_token": 1.4719873666763306, "incorrect_loss_per_token": 1.3664262294769287, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4719873666763306, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.4719873666763306, "logits_per_char": -0.7359936833381653, "num_chars": 2}, {"sum_logits": -1.3196351528167725, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": true, "logits_per_token": -1.3196351528167725, "logits_per_char": -0.6598175764083862, "num_chars": 2}, {"sum_logits": -1.4194072484970093, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.4194072484970093, "logits_per_char": -0.7097036242485046, "num_chars": 2}, {"sum_logits": -1.3602362871170044, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.3602362871170044, "logits_per_char": -0.6801181435585022, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 836, "native_id": "Mercury_SC_402031", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3900437355041504, "incorrect_loss_raw": 1.3914263645807903, "correct_loss_per_char": 0.6950218677520752, "incorrect_loss_per_char": 0.6957131822903951, "correct_loss_per_token": 1.3900437355041504, "incorrect_loss_per_token": 1.3914263645807903, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.352510690689087, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": true, "logits_per_token": -1.352510690689087, "logits_per_char": -0.6762553453445435, "num_chars": 2}, {"sum_logits": -1.4069337844848633, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.4069337844848633, "logits_per_char": -0.7034668922424316, "num_chars": 2}, {"sum_logits": -1.3900437355041504, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.3900437355041504, "logits_per_char": -0.6950218677520752, "num_chars": 2}, {"sum_logits": -1.4148346185684204, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.4148346185684204, "logits_per_char": -0.7074173092842102, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 837, "native_id": "NYSEDREGENTS_2012_8_40", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2872331142425537, "incorrect_loss_raw": 1.4286341269810994, "correct_loss_per_char": 0.6436165571212769, "incorrect_loss_per_char": 0.7143170634905497, "correct_loss_per_token": 1.2872331142425537, "incorrect_loss_per_token": 1.4286341269810994, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.441240668296814, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.441240668296814, "logits_per_char": -0.720620334148407, "num_chars": 2}, {"sum_logits": -1.477824330329895, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.477824330329895, "logits_per_char": -0.7389121651649475, "num_chars": 2}, {"sum_logits": -1.3668373823165894, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.3668373823165894, "logits_per_char": -0.6834186911582947, "num_chars": 2}, {"sum_logits": -1.2872331142425537, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": true, "logits_per_token": -1.2872331142425537, "logits_per_char": -0.6436165571212769, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 838, "native_id": "Mercury_7220955", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4102259874343872, "incorrect_loss_raw": 1.3841166496276855, "correct_loss_per_char": 0.7051129937171936, "incorrect_loss_per_char": 0.6920583248138428, "correct_loss_per_token": 1.4102259874343872, "incorrect_loss_per_token": 1.3841166496276855, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.401161551475525, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": false, "logits_per_token": -1.401161551475525, "logits_per_char": -0.7005807757377625, "num_chars": 2}, {"sum_logits": -1.3679429292678833, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": true, "logits_per_token": -1.3679429292678833, "logits_per_char": -0.6839714646339417, "num_chars": 2}, {"sum_logits": -1.3832454681396484, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": false, "logits_per_token": -1.3832454681396484, "logits_per_char": -0.6916227340698242, "num_chars": 2}, {"sum_logits": -1.4102259874343872, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": false, "logits_per_token": -1.4102259874343872, "logits_per_char": -0.7051129937171936, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 839, "native_id": "VASoL_2011_5_25", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5764065980911255, "incorrect_loss_raw": 1.336552381515503, "correct_loss_per_char": 0.7882032990455627, "incorrect_loss_per_char": 0.6682761907577515, "correct_loss_per_token": 1.5764065980911255, "incorrect_loss_per_token": 1.336552381515503, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2543656826019287, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": true, "logits_per_token": -1.2543656826019287, "logits_per_char": -0.6271828413009644, "num_chars": 2}, {"sum_logits": -1.363682508468628, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.363682508468628, "logits_per_char": -0.681841254234314, "num_chars": 2}, {"sum_logits": -1.3916089534759521, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.3916089534759521, "logits_per_char": -0.6958044767379761, "num_chars": 2}, {"sum_logits": -1.5764065980911255, "num_tokens": 1, "num_tokens_all": 306, "is_greedy": false, "logits_per_token": -1.5764065980911255, "logits_per_char": -0.7882032990455627, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 840, "native_id": "NYSEDREGENTS_2008_4_8", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.311347484588623, "incorrect_loss_raw": 1.4241581360499065, "correct_loss_per_char": 0.6556737422943115, "incorrect_loss_per_char": 0.7120790680249532, "correct_loss_per_token": 1.311347484588623, "incorrect_loss_per_token": 1.4241581360499065, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.311347484588623, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": true, "logits_per_token": -1.311347484588623, "logits_per_char": -0.6556737422943115, "num_chars": 2}, {"sum_logits": -1.3588945865631104, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.3588945865631104, "logits_per_char": -0.6794472932815552, "num_chars": 2}, {"sum_logits": -1.323917031288147, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.323917031288147, "logits_per_char": -0.6619585156440735, "num_chars": 2}, {"sum_logits": -1.589662790298462, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.589662790298462, "logits_per_char": -0.794831395149231, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 841, "native_id": "Mercury_LBS10795", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4043346643447876, "incorrect_loss_raw": 1.3895906607309978, "correct_loss_per_char": 0.7021673321723938, "incorrect_loss_per_char": 0.6947953303654989, "correct_loss_per_token": 1.4043346643447876, "incorrect_loss_per_token": 1.3895906607309978, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5105894804000854, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.5105894804000854, "logits_per_char": -0.7552947402000427, "num_chars": 2}, {"sum_logits": -1.3652263879776, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.3652263879776, "logits_per_char": -0.6826131939888, "num_chars": 2}, {"sum_logits": -1.2929561138153076, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": true, "logits_per_token": -1.2929561138153076, "logits_per_char": -0.6464780569076538, "num_chars": 2}, {"sum_logits": -1.4043346643447876, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.4043346643447876, "logits_per_char": -0.7021673321723938, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 842, "native_id": "NYSEDREGENTS_2015_4_15", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2959620952606201, "incorrect_loss_raw": 1.4258917570114136, "correct_loss_per_char": 0.6479810476303101, "incorrect_loss_per_char": 0.7129458785057068, "correct_loss_per_token": 1.2959620952606201, "incorrect_loss_per_token": 1.4258917570114136, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4760539531707764, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": false, "logits_per_token": -1.4760539531707764, "logits_per_char": -0.7380269765853882, "num_chars": 2}, {"sum_logits": -1.444949984550476, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": false, "logits_per_token": -1.444949984550476, "logits_per_char": -0.722474992275238, "num_chars": 2}, {"sum_logits": -1.2959620952606201, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": true, "logits_per_token": -1.2959620952606201, "logits_per_char": -0.6479810476303101, "num_chars": 2}, {"sum_logits": -1.3566713333129883, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": false, "logits_per_token": -1.3566713333129883, "logits_per_char": -0.6783356666564941, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 843, "native_id": "Mercury_SC_405197", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4338659048080444, "incorrect_loss_raw": 1.3772140343983967, "correct_loss_per_char": 0.7169329524040222, "incorrect_loss_per_char": 0.6886070171991984, "correct_loss_per_token": 1.4338659048080444, "incorrect_loss_per_token": 1.3772140343983967, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4338659048080444, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.4338659048080444, "logits_per_char": -0.7169329524040222, "num_chars": 2}, {"sum_logits": -1.4172414541244507, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.4172414541244507, "logits_per_char": -0.7086207270622253, "num_chars": 2}, {"sum_logits": -1.392797827720642, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.392797827720642, "logits_per_char": -0.696398913860321, "num_chars": 2}, {"sum_logits": -1.3216028213500977, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": true, "logits_per_token": -1.3216028213500977, "logits_per_char": -0.6608014106750488, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 844, "native_id": "Mercury_7013825", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5189481973648071, "incorrect_loss_raw": 1.3732505639394124, "correct_loss_per_char": 0.7594740986824036, "incorrect_loss_per_char": 0.6866252819697062, "correct_loss_per_token": 1.5189481973648071, "incorrect_loss_per_token": 1.3732505639394124, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2018413543701172, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": true, "logits_per_token": -1.2018413543701172, "logits_per_char": -0.6009206771850586, "num_chars": 2}, {"sum_logits": -1.3076739311218262, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.3076739311218262, "logits_per_char": -0.6538369655609131, "num_chars": 2}, {"sum_logits": -1.5189481973648071, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.5189481973648071, "logits_per_char": -0.7594740986824036, "num_chars": 2}, {"sum_logits": -1.610236406326294, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.610236406326294, "logits_per_char": -0.805118203163147, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 845, "native_id": "MCAS_2010_5_11981", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.41569983959198, "incorrect_loss_raw": 1.393865982691447, "correct_loss_per_char": 0.70784991979599, "incorrect_loss_per_char": 0.6969329913457235, "correct_loss_per_token": 1.41569983959198, "incorrect_loss_per_token": 1.393865982691447, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5628739595413208, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.5628739595413208, "logits_per_char": -0.7814369797706604, "num_chars": 2}, {"sum_logits": -1.41569983959198, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.41569983959198, "logits_per_char": -0.70784991979599, "num_chars": 2}, {"sum_logits": -1.4368315935134888, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.4368315935134888, "logits_per_char": -0.7184157967567444, "num_chars": 2}, {"sum_logits": -1.1818923950195312, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.1818923950195312, "logits_per_char": -0.5909461975097656, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 846, "native_id": "MDSA_2008_8_24", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3931094408035278, "incorrect_loss_raw": 1.4081785281499226, "correct_loss_per_char": 0.6965547204017639, "incorrect_loss_per_char": 0.7040892640749613, "correct_loss_per_token": 1.3931094408035278, "incorrect_loss_per_token": 1.4081785281499226, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6519566774368286, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.6519566774368286, "logits_per_char": -0.8259783387184143, "num_chars": 2}, {"sum_logits": -1.3775267601013184, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.3775267601013184, "logits_per_char": -0.6887633800506592, "num_chars": 2}, {"sum_logits": -1.3931094408035278, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.3931094408035278, "logits_per_char": -0.6965547204017639, "num_chars": 2}, {"sum_logits": -1.195052146911621, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": true, "logits_per_token": -1.195052146911621, "logits_per_char": -0.5975260734558105, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 847, "native_id": "NCEOGA_2013_8_39", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4546090364456177, "incorrect_loss_raw": 1.3730597098668416, "correct_loss_per_char": 0.7273045182228088, "incorrect_loss_per_char": 0.6865298549334208, "correct_loss_per_token": 1.4546090364456177, "incorrect_loss_per_token": 1.3730597098668416, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4160481691360474, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.4160481691360474, "logits_per_char": -0.7080240845680237, "num_chars": 2}, {"sum_logits": -1.2695363759994507, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": true, "logits_per_token": -1.2695363759994507, "logits_per_char": -0.6347681879997253, "num_chars": 2}, {"sum_logits": -1.4335945844650269, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.4335945844650269, "logits_per_char": -0.7167972922325134, "num_chars": 2}, {"sum_logits": -1.4546090364456177, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.4546090364456177, "logits_per_char": -0.7273045182228088, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 848, "native_id": "MCAS_2003_8_26", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2819935083389282, "incorrect_loss_raw": 1.4319978555043538, "correct_loss_per_char": 0.6409967541694641, "incorrect_loss_per_char": 0.7159989277521769, "correct_loss_per_token": 1.2819935083389282, "incorrect_loss_per_token": 1.4319978555043538, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4518686532974243, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.4518686532974243, "logits_per_char": -0.7259343266487122, "num_chars": 2}, {"sum_logits": -1.5121009349822998, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.5121009349822998, "logits_per_char": -0.7560504674911499, "num_chars": 2}, {"sum_logits": -1.2819935083389282, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -1.2819935083389282, "logits_per_char": -0.6409967541694641, "num_chars": 2}, {"sum_logits": -1.3320239782333374, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.3320239782333374, "logits_per_char": -0.6660119891166687, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 849, "native_id": "LEAP__4_10226", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3941112756729126, "incorrect_loss_raw": 1.389782706896464, "correct_loss_per_char": 0.6970556378364563, "incorrect_loss_per_char": 0.694891353448232, "correct_loss_per_token": 1.3941112756729126, "incorrect_loss_per_token": 1.389782706896464, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.436488389968872, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.436488389968872, "logits_per_char": -0.718244194984436, "num_chars": 2}, {"sum_logits": -1.3477306365966797, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.3477306365966797, "logits_per_char": -0.6738653182983398, "num_chars": 2}, {"sum_logits": -1.3941112756729126, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.3941112756729126, "logits_per_char": -0.6970556378364563, "num_chars": 2}, {"sum_logits": -1.3851290941238403, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.3851290941238403, "logits_per_char": -0.6925645470619202, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 850, "native_id": "Mercury_SC_416527", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2981120347976685, "incorrect_loss_raw": 1.4333182175954182, "correct_loss_per_char": 0.6490560173988342, "incorrect_loss_per_char": 0.7166591087977091, "correct_loss_per_token": 1.2981120347976685, "incorrect_loss_per_token": 1.4333182175954182, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3236788511276245, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.3236788511276245, "logits_per_char": -0.6618394255638123, "num_chars": 2}, {"sum_logits": -1.2981120347976685, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": true, "logits_per_token": -1.2981120347976685, "logits_per_char": -0.6490560173988342, "num_chars": 2}, {"sum_logits": -1.339613914489746, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.339613914489746, "logits_per_char": -0.669806957244873, "num_chars": 2}, {"sum_logits": -1.6366618871688843, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.6366618871688843, "logits_per_char": -0.8183309435844421, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 851, "native_id": "Mercury_LBS10778", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3993299007415771, "incorrect_loss_raw": 1.3894989887873332, "correct_loss_per_char": 0.6996649503707886, "incorrect_loss_per_char": 0.6947494943936666, "correct_loss_per_token": 1.3993299007415771, "incorrect_loss_per_token": 1.3894989887873332, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3993299007415771, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.3993299007415771, "logits_per_char": -0.6996649503707886, "num_chars": 2}, {"sum_logits": -1.4066269397735596, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.4066269397735596, "logits_per_char": -0.7033134698867798, "num_chars": 2}, {"sum_logits": -1.3928396701812744, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.3928396701812744, "logits_per_char": -0.6964198350906372, "num_chars": 2}, {"sum_logits": -1.3690303564071655, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": true, "logits_per_token": -1.3690303564071655, "logits_per_char": -0.6845151782035828, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 852, "native_id": "Mercury_178710", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4500285387039185, "incorrect_loss_raw": 1.3723111152648926, "correct_loss_per_char": 0.7250142693519592, "incorrect_loss_per_char": 0.6861555576324463, "correct_loss_per_token": 1.4500285387039185, "incorrect_loss_per_token": 1.3723111152648926, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3291857242584229, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.3291857242584229, "logits_per_char": -0.6645928621292114, "num_chars": 2}, {"sum_logits": -1.3266116380691528, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": true, "logits_per_token": -1.3266116380691528, "logits_per_char": -0.6633058190345764, "num_chars": 2}, {"sum_logits": -1.4500285387039185, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.4500285387039185, "logits_per_char": -0.7250142693519592, "num_chars": 2}, {"sum_logits": -1.461135983467102, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.461135983467102, "logits_per_char": -0.730567991733551, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 853, "native_id": "Mercury_178605", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4696266651153564, "incorrect_loss_raw": 1.3670763572057087, "correct_loss_per_char": 0.7348133325576782, "incorrect_loss_per_char": 0.6835381786028544, "correct_loss_per_token": 1.4696266651153564, "incorrect_loss_per_token": 1.3670763572057087, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4188246726989746, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.4188246726989746, "logits_per_char": -0.7094123363494873, "num_chars": 2}, {"sum_logits": -1.279589295387268, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": true, "logits_per_token": -1.279589295387268, "logits_per_char": -0.639794647693634, "num_chars": 2}, {"sum_logits": -1.4028151035308838, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.4028151035308838, "logits_per_char": -0.7014075517654419, "num_chars": 2}, {"sum_logits": -1.4696266651153564, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.4696266651153564, "logits_per_char": -0.7348133325576782, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 854, "native_id": "Mercury_7241063", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4047212600708008, "incorrect_loss_raw": 1.3909759124120076, "correct_loss_per_char": 0.7023606300354004, "incorrect_loss_per_char": 0.6954879562060038, "correct_loss_per_token": 1.4047212600708008, "incorrect_loss_per_token": 1.3909759124120076, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.428941249847412, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.428941249847412, "logits_per_char": -0.714470624923706, "num_chars": 2}, {"sum_logits": -1.4906522035598755, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.4906522035598755, "logits_per_char": -0.7453261017799377, "num_chars": 2}, {"sum_logits": -1.4047212600708008, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.4047212600708008, "logits_per_char": -0.7023606300354004, "num_chars": 2}, {"sum_logits": -1.2533342838287354, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": true, "logits_per_token": -1.2533342838287354, "logits_per_char": -0.6266671419143677, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 855, "native_id": "Mercury_SC_402079", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3945341110229492, "incorrect_loss_raw": 1.397383729616801, "correct_loss_per_char": 0.6972670555114746, "incorrect_loss_per_char": 0.6986918648084005, "correct_loss_per_token": 1.3945341110229492, "incorrect_loss_per_token": 1.397383729616801, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5812143087387085, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": false, "logits_per_token": -1.5812143087387085, "logits_per_char": -0.7906071543693542, "num_chars": 2}, {"sum_logits": -1.3262470960617065, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": false, "logits_per_token": -1.3262470960617065, "logits_per_char": -0.6631235480308533, "num_chars": 2}, {"sum_logits": -1.2846897840499878, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": true, "logits_per_token": -1.2846897840499878, "logits_per_char": -0.6423448920249939, "num_chars": 2}, {"sum_logits": -1.3945341110229492, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": false, "logits_per_token": -1.3945341110229492, "logits_per_char": -0.6972670555114746, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 856, "native_id": "Mercury_SC_415454", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4467071294784546, "incorrect_loss_raw": 1.377349615097046, "correct_loss_per_char": 0.7233535647392273, "incorrect_loss_per_char": 0.688674807548523, "correct_loss_per_token": 1.4467071294784546, "incorrect_loss_per_token": 1.377349615097046, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5011992454528809, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.5011992454528809, "logits_per_char": -0.7505996227264404, "num_chars": 2}, {"sum_logits": -1.4467071294784546, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.4467071294784546, "logits_per_char": -0.7233535647392273, "num_chars": 2}, {"sum_logits": -1.2717742919921875, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": true, "logits_per_token": -1.2717742919921875, "logits_per_char": -0.6358871459960938, "num_chars": 2}, {"sum_logits": -1.3590753078460693, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.3590753078460693, "logits_per_char": -0.6795376539230347, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 857, "native_id": "Mercury_7236058", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3997297286987305, "incorrect_loss_raw": 1.38829239209493, "correct_loss_per_char": 0.6998648643493652, "incorrect_loss_per_char": 0.694146196047465, "correct_loss_per_token": 1.3997297286987305, "incorrect_loss_per_token": 1.38829239209493, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4077450037002563, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.4077450037002563, "logits_per_char": -0.7038725018501282, "num_chars": 2}, {"sum_logits": -1.3997297286987305, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.3997297286987305, "logits_per_char": -0.6998648643493652, "num_chars": 2}, {"sum_logits": -1.416517972946167, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.416517972946167, "logits_per_char": -0.7082589864730835, "num_chars": 2}, {"sum_logits": -1.3406141996383667, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": true, "logits_per_token": -1.3406141996383667, "logits_per_char": -0.6703070998191833, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 858, "native_id": "NYSEDREGENTS_2015_8_19", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.503235936164856, "incorrect_loss_raw": 1.3568918307622273, "correct_loss_per_char": 0.751617968082428, "incorrect_loss_per_char": 0.6784459153811137, "correct_loss_per_token": 1.503235936164856, "incorrect_loss_per_token": 1.3568918307622273, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.428452730178833, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.428452730178833, "logits_per_char": -0.7142263650894165, "num_chars": 2}, {"sum_logits": -1.2684656381607056, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": true, "logits_per_token": -1.2684656381607056, "logits_per_char": -0.6342328190803528, "num_chars": 2}, {"sum_logits": -1.503235936164856, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.503235936164856, "logits_per_char": -0.751617968082428, "num_chars": 2}, {"sum_logits": -1.3737571239471436, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.3737571239471436, "logits_per_char": -0.6868785619735718, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 859, "native_id": "Mercury_SC_400193", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.467780590057373, "incorrect_loss_raw": 1.366467555363973, "correct_loss_per_char": 0.7338902950286865, "incorrect_loss_per_char": 0.6832337776819865, "correct_loss_per_token": 1.467780590057373, "incorrect_loss_per_token": 1.366467555363973, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3379850387573242, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": true, "logits_per_token": -1.3379850387573242, "logits_per_char": -0.6689925193786621, "num_chars": 2}, {"sum_logits": -1.467780590057373, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.467780590057373, "logits_per_char": -0.7338902950286865, "num_chars": 2}, {"sum_logits": -1.4186546802520752, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.4186546802520752, "logits_per_char": -0.7093273401260376, "num_chars": 2}, {"sum_logits": -1.3427629470825195, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.3427629470825195, "logits_per_char": -0.6713814735412598, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 860, "native_id": "Mercury_SC_416134", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4550271034240723, "incorrect_loss_raw": 1.3764853874842327, "correct_loss_per_char": 0.7275135517120361, "incorrect_loss_per_char": 0.6882426937421163, "correct_loss_per_token": 1.4550271034240723, "incorrect_loss_per_token": 1.3764853874842327, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.229828953742981, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": true, "logits_per_token": -1.229828953742981, "logits_per_char": -0.6149144768714905, "num_chars": 2}, {"sum_logits": -1.4316695928573608, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.4316695928573608, "logits_per_char": -0.7158347964286804, "num_chars": 2}, {"sum_logits": -1.467957615852356, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.467957615852356, "logits_per_char": -0.733978807926178, "num_chars": 2}, {"sum_logits": -1.4550271034240723, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.4550271034240723, "logits_per_char": -0.7275135517120361, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 861, "native_id": "Mercury_SC_LBS10388", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3687176704406738, "incorrect_loss_raw": 1.3973875045776367, "correct_loss_per_char": 0.6843588352203369, "incorrect_loss_per_char": 0.6986937522888184, "correct_loss_per_token": 1.3687176704406738, "incorrect_loss_per_token": 1.3973875045776367, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4020131826400757, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.4020131826400757, "logits_per_char": -0.7010065913200378, "num_chars": 2}, {"sum_logits": -1.3687176704406738, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.3687176704406738, "logits_per_char": -0.6843588352203369, "num_chars": 2}, {"sum_logits": -1.3617130517959595, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": true, "logits_per_token": -1.3617130517959595, "logits_per_char": -0.6808565258979797, "num_chars": 2}, {"sum_logits": -1.428436279296875, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.428436279296875, "logits_per_char": -0.7142181396484375, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 862, "native_id": "Mercury_416504", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.277251958847046, "incorrect_loss_raw": 1.4551026423772175, "correct_loss_per_char": 0.638625979423523, "incorrect_loss_per_char": 0.7275513211886088, "correct_loss_per_token": 1.277251958847046, "incorrect_loss_per_token": 1.4551026423772175, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6526942253112793, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.6526942253112793, "logits_per_char": -0.8263471126556396, "num_chars": 2}, {"sum_logits": -1.5454760789871216, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.5454760789871216, "logits_per_char": -0.7727380394935608, "num_chars": 2}, {"sum_logits": -1.277251958847046, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.277251958847046, "logits_per_char": -0.638625979423523, "num_chars": 2}, {"sum_logits": -1.167137622833252, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.167137622833252, "logits_per_char": -0.583568811416626, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 863, "native_id": "Mercury_7201320", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.468995213508606, "incorrect_loss_raw": 1.3675274848937988, "correct_loss_per_char": 0.734497606754303, "incorrect_loss_per_char": 0.6837637424468994, "correct_loss_per_token": 1.468995213508606, "incorrect_loss_per_token": 1.3675274848937988, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.468995213508606, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.468995213508606, "logits_per_char": -0.734497606754303, "num_chars": 2}, {"sum_logits": -1.3609275817871094, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.3609275817871094, "logits_per_char": -0.6804637908935547, "num_chars": 2}, {"sum_logits": -1.4436380863189697, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4436380863189697, "logits_per_char": -0.7218190431594849, "num_chars": 2}, {"sum_logits": -1.2980167865753174, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.2980167865753174, "logits_per_char": -0.6490083932876587, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 864, "native_id": "Mercury_7221218", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3306262493133545, "incorrect_loss_raw": 1.410593310991923, "correct_loss_per_char": 0.6653131246566772, "incorrect_loss_per_char": 0.7052966554959615, "correct_loss_per_token": 1.3306262493133545, "incorrect_loss_per_token": 1.410593310991923, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3756827116012573, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.3756827116012573, "logits_per_char": -0.6878413558006287, "num_chars": 2}, {"sum_logits": -1.3306262493133545, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": true, "logits_per_token": -1.3306262493133545, "logits_per_char": -0.6653131246566772, "num_chars": 2}, {"sum_logits": -1.4638323783874512, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.4638323783874512, "logits_per_char": -0.7319161891937256, "num_chars": 2}, {"sum_logits": -1.3922648429870605, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.3922648429870605, "logits_per_char": -0.6961324214935303, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 865, "native_id": "MCAS_2011_8_17683", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4122995138168335, "incorrect_loss_raw": 1.3888472318649292, "correct_loss_per_char": 0.7061497569084167, "incorrect_loss_per_char": 0.6944236159324646, "correct_loss_per_token": 1.4122995138168335, "incorrect_loss_per_token": 1.3888472318649292, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4963327646255493, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.4963327646255493, "logits_per_char": -0.7481663823127747, "num_chars": 2}, {"sum_logits": -1.3277530670166016, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": true, "logits_per_token": -1.3277530670166016, "logits_per_char": -0.6638765335083008, "num_chars": 2}, {"sum_logits": -1.4122995138168335, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.4122995138168335, "logits_per_char": -0.7061497569084167, "num_chars": 2}, {"sum_logits": -1.3424558639526367, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.3424558639526367, "logits_per_char": -0.6712279319763184, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 866, "native_id": "Mercury_7234220", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4159387350082397, "incorrect_loss_raw": 1.3842796087265015, "correct_loss_per_char": 0.7079693675041199, "incorrect_loss_per_char": 0.6921398043632507, "correct_loss_per_token": 1.4159387350082397, "incorrect_loss_per_token": 1.3842796087265015, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3831957578659058, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.3831957578659058, "logits_per_char": -0.6915978789329529, "num_chars": 2}, {"sum_logits": -1.3329076766967773, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": true, "logits_per_token": -1.3329076766967773, "logits_per_char": -0.6664538383483887, "num_chars": 2}, {"sum_logits": -1.4159387350082397, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.4159387350082397, "logits_per_char": -0.7079693675041199, "num_chars": 2}, {"sum_logits": -1.4367353916168213, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.4367353916168213, "logits_per_char": -0.7183676958084106, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 867, "native_id": "Mercury_7116358", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4250441789627075, "incorrect_loss_raw": 1.3838798999786377, "correct_loss_per_char": 0.7125220894813538, "incorrect_loss_per_char": 0.6919399499893188, "correct_loss_per_token": 1.4250441789627075, "incorrect_loss_per_token": 1.3838798999786377, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4497454166412354, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4497454166412354, "logits_per_char": -0.7248727083206177, "num_chars": 2}, {"sum_logits": -1.4250441789627075, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4250441789627075, "logits_per_char": -0.7125220894813538, "num_chars": 2}, {"sum_logits": -1.467993974685669, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.467993974685669, "logits_per_char": -0.7339969873428345, "num_chars": 2}, {"sum_logits": -1.2339003086090088, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.2339003086090088, "logits_per_char": -0.6169501543045044, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 868, "native_id": "MCAS_2004_5_36", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.395546793937683, "incorrect_loss_raw": 1.3944406112035115, "correct_loss_per_char": 0.6977733969688416, "incorrect_loss_per_char": 0.6972203056017557, "correct_loss_per_token": 1.395546793937683, "incorrect_loss_per_token": 1.3944406112035115, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4532697200775146, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.4532697200775146, "logits_per_char": -0.7266348600387573, "num_chars": 2}, {"sum_logits": -1.5027450323104858, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.5027450323104858, "logits_per_char": -0.7513725161552429, "num_chars": 2}, {"sum_logits": -1.395546793937683, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.395546793937683, "logits_per_char": -0.6977733969688416, "num_chars": 2}, {"sum_logits": -1.2273070812225342, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.2273070812225342, "logits_per_char": -0.6136535406112671, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 869, "native_id": "Mercury_7056875", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3044108152389526, "incorrect_loss_raw": 1.4264938831329346, "correct_loss_per_char": 0.6522054076194763, "incorrect_loss_per_char": 0.7132469415664673, "correct_loss_per_token": 1.3044108152389526, "incorrect_loss_per_token": 1.4264938831329346, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4157472848892212, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.4157472848892212, "logits_per_char": -0.7078736424446106, "num_chars": 2}, {"sum_logits": -1.3044108152389526, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": true, "logits_per_token": -1.3044108152389526, "logits_per_char": -0.6522054076194763, "num_chars": 2}, {"sum_logits": -1.49977707862854, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.49977707862854, "logits_per_char": -0.74988853931427, "num_chars": 2}, {"sum_logits": -1.3639572858810425, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.3639572858810425, "logits_per_char": -0.6819786429405212, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 870, "native_id": "Mercury_SC_413002", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.343738317489624, "incorrect_loss_raw": 1.4095316727956135, "correct_loss_per_char": 0.671869158744812, "incorrect_loss_per_char": 0.7047658363978068, "correct_loss_per_token": 1.343738317489624, "incorrect_loss_per_token": 1.4095316727956135, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.343738317489624, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.343738317489624, "logits_per_char": -0.671869158744812, "num_chars": 2}, {"sum_logits": -1.339616298675537, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": true, "logits_per_token": -1.339616298675537, "logits_per_char": -0.6698081493377686, "num_chars": 2}, {"sum_logits": -1.3699029684066772, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.3699029684066772, "logits_per_char": -0.6849514842033386, "num_chars": 2}, {"sum_logits": -1.5190757513046265, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.5190757513046265, "logits_per_char": -0.7595378756523132, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 871, "native_id": "Mercury_7094938", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3597795963287354, "incorrect_loss_raw": 1.4014183680216472, "correct_loss_per_char": 0.6798897981643677, "incorrect_loss_per_char": 0.7007091840108236, "correct_loss_per_token": 1.3597795963287354, "incorrect_loss_per_token": 1.4014183680216472, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4571995735168457, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.4571995735168457, "logits_per_char": -0.7285997867584229, "num_chars": 2}, {"sum_logits": -1.3770415782928467, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.3770415782928467, "logits_per_char": -0.6885207891464233, "num_chars": 2}, {"sum_logits": -1.370013952255249, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.370013952255249, "logits_per_char": -0.6850069761276245, "num_chars": 2}, {"sum_logits": -1.3597795963287354, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": true, "logits_per_token": -1.3597795963287354, "logits_per_char": -0.6798897981643677, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 872, "native_id": "Mercury_175963", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2723667621612549, "incorrect_loss_raw": 1.4356230894724529, "correct_loss_per_char": 0.6361833810806274, "incorrect_loss_per_char": 0.7178115447362264, "correct_loss_per_token": 1.2723667621612549, "incorrect_loss_per_token": 1.4356230894724529, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2723667621612549, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": true, "logits_per_token": -1.2723667621612549, "logits_per_char": -0.6361833810806274, "num_chars": 2}, {"sum_logits": -1.3245607614517212, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.3245607614517212, "logits_per_char": -0.6622803807258606, "num_chars": 2}, {"sum_logits": -1.520139217376709, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.520139217376709, "logits_per_char": -0.7600696086883545, "num_chars": 2}, {"sum_logits": -1.4621692895889282, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.4621692895889282, "logits_per_char": -0.7310846447944641, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 873, "native_id": "CSZ_2004_5_CSZ10100", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3983443975448608, "incorrect_loss_raw": 1.390062967936198, "correct_loss_per_char": 0.6991721987724304, "incorrect_loss_per_char": 0.695031483968099, "correct_loss_per_token": 1.3983443975448608, "incorrect_loss_per_token": 1.390062967936198, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4571551084518433, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.4571551084518433, "logits_per_char": -0.7285775542259216, "num_chars": 2}, {"sum_logits": -1.3506555557250977, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": true, "logits_per_token": -1.3506555557250977, "logits_per_char": -0.6753277778625488, "num_chars": 2}, {"sum_logits": -1.3623782396316528, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.3623782396316528, "logits_per_char": -0.6811891198158264, "num_chars": 2}, {"sum_logits": -1.3983443975448608, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.3983443975448608, "logits_per_char": -0.6991721987724304, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 874, "native_id": "AKDE&ED_2012_8_7", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.424973487854004, "incorrect_loss_raw": 1.3839792410532634, "correct_loss_per_char": 0.712486743927002, "incorrect_loss_per_char": 0.6919896205266317, "correct_loss_per_token": 1.424973487854004, "incorrect_loss_per_token": 1.3839792410532634, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.370545744895935, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.370545744895935, "logits_per_char": -0.6852728724479675, "num_chars": 2}, {"sum_logits": -1.292048454284668, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": true, "logits_per_token": -1.292048454284668, "logits_per_char": -0.646024227142334, "num_chars": 2}, {"sum_logits": -1.489343523979187, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.489343523979187, "logits_per_char": -0.7446717619895935, "num_chars": 2}, {"sum_logits": -1.424973487854004, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.424973487854004, "logits_per_char": -0.712486743927002, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 875, "native_id": "Mercury_7009818", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.362235188484192, "incorrect_loss_raw": 1.4003367026646931, "correct_loss_per_char": 0.681117594242096, "incorrect_loss_per_char": 0.7001683513323466, "correct_loss_per_token": 1.362235188484192, "incorrect_loss_per_token": 1.4003367026646931, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.362235188484192, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.362235188484192, "logits_per_char": -0.681117594242096, "num_chars": 2}, {"sum_logits": -1.41663658618927, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.41663658618927, "logits_per_char": -0.708318293094635, "num_chars": 2}, {"sum_logits": -1.440195918083191, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.440195918083191, "logits_per_char": -0.7200979590415955, "num_chars": 2}, {"sum_logits": -1.3441776037216187, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": true, "logits_per_token": -1.3441776037216187, "logits_per_char": -0.6720888018608093, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 876, "native_id": "MCAS_1999_8_10", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.35502028465271, "incorrect_loss_raw": 1.4025144974390666, "correct_loss_per_char": 0.677510142326355, "incorrect_loss_per_char": 0.7012572487195333, "correct_loss_per_token": 1.35502028465271, "incorrect_loss_per_token": 1.4025144974390666, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3818001747131348, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.3818001747131348, "logits_per_char": -0.6909000873565674, "num_chars": 2}, {"sum_logits": -1.4086356163024902, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.4086356163024902, "logits_per_char": -0.7043178081512451, "num_chars": 2}, {"sum_logits": -1.35502028465271, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": true, "logits_per_token": -1.35502028465271, "logits_per_char": -0.677510142326355, "num_chars": 2}, {"sum_logits": -1.4171077013015747, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.4171077013015747, "logits_per_char": -0.7085538506507874, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 877, "native_id": "MDSA_2010_8_43", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3585426807403564, "incorrect_loss_raw": 1.4019126892089844, "correct_loss_per_char": 0.6792713403701782, "incorrect_loss_per_char": 0.7009563446044922, "correct_loss_per_token": 1.3585426807403564, "incorrect_loss_per_token": 1.4019126892089844, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3766510486602783, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.3766510486602783, "logits_per_char": -0.6883255243301392, "num_chars": 2}, {"sum_logits": -1.3585426807403564, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.3585426807403564, "logits_per_char": -0.6792713403701782, "num_chars": 2}, {"sum_logits": -1.432445764541626, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.432445764541626, "logits_per_char": -0.716222882270813, "num_chars": 2}, {"sum_logits": -1.3966412544250488, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.3966412544250488, "logits_per_char": -0.6983206272125244, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 878, "native_id": "NYSEDREGENTS_2010_8_13", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3115708827972412, "incorrect_loss_raw": 1.4271022081375122, "correct_loss_per_char": 0.6557854413986206, "incorrect_loss_per_char": 0.7135511040687561, "correct_loss_per_token": 1.3115708827972412, "incorrect_loss_per_token": 1.4271022081375122, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3115780353546143, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": false, "logits_per_token": -1.3115780353546143, "logits_per_char": -0.6557890176773071, "num_chars": 2}, {"sum_logits": -1.3115708827972412, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": true, "logits_per_token": -1.3115708827972412, "logits_per_char": -0.6557854413986206, "num_chars": 2}, {"sum_logits": -1.368906855583191, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": false, "logits_per_token": -1.368906855583191, "logits_per_char": -0.6844534277915955, "num_chars": 2}, {"sum_logits": -1.6008217334747314, "num_tokens": 1, "num_tokens_all": 300, "is_greedy": false, "logits_per_token": -1.6008217334747314, "logits_per_char": -0.8004108667373657, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 879, "native_id": "Mercury_416369", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2741169929504395, "incorrect_loss_raw": 1.4366111755371094, "correct_loss_per_char": 0.6370584964752197, "incorrect_loss_per_char": 0.7183055877685547, "correct_loss_per_token": 1.2741169929504395, "incorrect_loss_per_token": 1.4366111755371094, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3539488315582275, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.3539488315582275, "logits_per_char": -0.6769744157791138, "num_chars": 2}, {"sum_logits": -1.2741169929504395, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.2741169929504395, "logits_per_char": -0.6370584964752197, "num_chars": 2}, {"sum_logits": -1.4067527055740356, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4067527055740356, "logits_per_char": -0.7033763527870178, "num_chars": 2}, {"sum_logits": -1.549131989479065, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.549131989479065, "logits_per_char": -0.7745659947395325, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 880, "native_id": "MCAS_8_2015_13", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5450459718704224, "incorrect_loss_raw": 1.347888469696045, "correct_loss_per_char": 0.7725229859352112, "incorrect_loss_per_char": 0.6739442348480225, "correct_loss_per_token": 1.5450459718704224, "incorrect_loss_per_token": 1.347888469696045, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5450459718704224, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.5450459718704224, "logits_per_char": -0.7725229859352112, "num_chars": 2}, {"sum_logits": -1.3795115947723389, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.3795115947723389, "logits_per_char": -0.6897557973861694, "num_chars": 2}, {"sum_logits": -1.4376276731491089, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.4376276731491089, "logits_per_char": -0.7188138365745544, "num_chars": 2}, {"sum_logits": -1.226526141166687, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": true, "logits_per_token": -1.226526141166687, "logits_per_char": -0.6132630705833435, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 881, "native_id": "MEAP_2005_8_16", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3967642784118652, "incorrect_loss_raw": 1.3921810388565063, "correct_loss_per_char": 0.6983821392059326, "incorrect_loss_per_char": 0.6960905194282532, "correct_loss_per_token": 1.3967642784118652, "incorrect_loss_per_token": 1.3921810388565063, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4505054950714111, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.4505054950714111, "logits_per_char": -0.7252527475357056, "num_chars": 2}, {"sum_logits": -1.3967642784118652, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.3967642784118652, "logits_per_char": -0.6983821392059326, "num_chars": 2}, {"sum_logits": -1.3148167133331299, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": true, "logits_per_token": -1.3148167133331299, "logits_per_char": -0.6574083566665649, "num_chars": 2}, {"sum_logits": -1.411220908164978, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.411220908164978, "logits_per_char": -0.705610454082489, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 882, "native_id": "Mercury_411809", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2225804328918457, "incorrect_loss_raw": 1.4582677682240803, "correct_loss_per_char": 0.6112902164459229, "incorrect_loss_per_char": 0.7291338841120402, "correct_loss_per_token": 1.2225804328918457, "incorrect_loss_per_token": 1.4582677682240803, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5296196937561035, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.5296196937561035, "logits_per_char": -0.7648098468780518, "num_chars": 2}, {"sum_logits": -1.2225804328918457, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": true, "logits_per_token": -1.2225804328918457, "logits_per_char": -0.6112902164459229, "num_chars": 2}, {"sum_logits": -1.3487560749053955, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.3487560749053955, "logits_per_char": -0.6743780374526978, "num_chars": 2}, {"sum_logits": -1.4964275360107422, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.4964275360107422, "logits_per_char": -0.7482137680053711, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 883, "native_id": "Mercury_SC_400214", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4215177297592163, "incorrect_loss_raw": 1.3799651463826497, "correct_loss_per_char": 0.7107588648796082, "incorrect_loss_per_char": 0.6899825731913248, "correct_loss_per_token": 1.4215177297592163, "incorrect_loss_per_token": 1.3799651463826497, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4215177297592163, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.4215177297592163, "logits_per_char": -0.7107588648796082, "num_chars": 2}, {"sum_logits": -1.3497693538665771, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": true, "logits_per_token": -1.3497693538665771, "logits_per_char": -0.6748846769332886, "num_chars": 2}, {"sum_logits": -1.4102057218551636, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.4102057218551636, "logits_per_char": -0.7051028609275818, "num_chars": 2}, {"sum_logits": -1.3799203634262085, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.3799203634262085, "logits_per_char": -0.6899601817131042, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 884, "native_id": "Mercury_SC_401161", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4039331674575806, "incorrect_loss_raw": 1.387198011080424, "correct_loss_per_char": 0.7019665837287903, "incorrect_loss_per_char": 0.693599005540212, "correct_loss_per_token": 1.4039331674575806, "incorrect_loss_per_token": 1.387198011080424, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4743784666061401, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.4743784666061401, "logits_per_char": -0.7371892333030701, "num_chars": 2}, {"sum_logits": -1.3133329153060913, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": true, "logits_per_token": -1.3133329153060913, "logits_per_char": -0.6566664576530457, "num_chars": 2}, {"sum_logits": -1.3738826513290405, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.3738826513290405, "logits_per_char": -0.6869413256645203, "num_chars": 2}, {"sum_logits": -1.4039331674575806, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.4039331674575806, "logits_per_char": -0.7019665837287903, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 885, "native_id": "Mercury_7205573", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3312267065048218, "incorrect_loss_raw": 1.4123212893803914, "correct_loss_per_char": 0.6656133532524109, "incorrect_loss_per_char": 0.7061606446901957, "correct_loss_per_token": 1.3312267065048218, "incorrect_loss_per_token": 1.4123212893803914, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.402663230895996, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.402663230895996, "logits_per_char": -0.701331615447998, "num_chars": 2}, {"sum_logits": -1.4328079223632812, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4328079223632812, "logits_per_char": -0.7164039611816406, "num_chars": 2}, {"sum_logits": -1.3312267065048218, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.3312267065048218, "logits_per_char": -0.6656133532524109, "num_chars": 2}, {"sum_logits": -1.401492714881897, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.401492714881897, "logits_per_char": -0.7007463574409485, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 886, "native_id": "AKDE&ED_2012_4_29", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3033868074417114, "incorrect_loss_raw": 1.4224613110224407, "correct_loss_per_char": 0.6516934037208557, "incorrect_loss_per_char": 0.7112306555112203, "correct_loss_per_token": 1.3033868074417114, "incorrect_loss_per_token": 1.4224613110224407, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3033868074417114, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": true, "logits_per_token": -1.3033868074417114, "logits_per_char": -0.6516934037208557, "num_chars": 2}, {"sum_logits": -1.3823150396347046, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.3823150396347046, "logits_per_char": -0.6911575198173523, "num_chars": 2}, {"sum_logits": -1.415771245956421, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.415771245956421, "logits_per_char": -0.7078856229782104, "num_chars": 2}, {"sum_logits": -1.4692976474761963, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.4692976474761963, "logits_per_char": -0.7346488237380981, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 887, "native_id": "Mercury_7218663", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2697033882141113, "incorrect_loss_raw": 1.438644806543986, "correct_loss_per_char": 0.6348516941070557, "incorrect_loss_per_char": 0.719322403271993, "correct_loss_per_token": 1.2697033882141113, "incorrect_loss_per_token": 1.438644806543986, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.342850923538208, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.342850923538208, "logits_per_char": -0.671425461769104, "num_chars": 2}, {"sum_logits": -1.526064157485962, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.526064157485962, "logits_per_char": -0.763032078742981, "num_chars": 2}, {"sum_logits": -1.2697033882141113, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": true, "logits_per_token": -1.2697033882141113, "logits_per_char": -0.6348516941070557, "num_chars": 2}, {"sum_logits": -1.447019338607788, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.447019338607788, "logits_per_char": -0.723509669303894, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 888, "native_id": "Mercury_7220973", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.443770170211792, "incorrect_loss_raw": 1.38427734375, "correct_loss_per_char": 0.721885085105896, "incorrect_loss_per_char": 0.692138671875, "correct_loss_per_token": 1.443770170211792, "incorrect_loss_per_token": 1.38427734375, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.443770170211792, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.443770170211792, "logits_per_char": -0.721885085105896, "num_chars": 2}, {"sum_logits": -1.5355134010314941, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.5355134010314941, "logits_per_char": -0.7677567005157471, "num_chars": 2}, {"sum_logits": -1.4449095726013184, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": false, "logits_per_token": -1.4449095726013184, "logits_per_char": -0.7224547863006592, "num_chars": 2}, {"sum_logits": -1.1724090576171875, "num_tokens": 1, "num_tokens_all": 324, "is_greedy": true, "logits_per_token": -1.1724090576171875, "logits_per_char": -0.5862045288085938, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 889, "native_id": "Mercury_7082670", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4334062337875366, "incorrect_loss_raw": 1.3786117633183796, "correct_loss_per_char": 0.7167031168937683, "incorrect_loss_per_char": 0.6893058816591898, "correct_loss_per_token": 1.4334062337875366, "incorrect_loss_per_token": 1.3786117633183796, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.286858320236206, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": true, "logits_per_token": -1.286858320236206, "logits_per_char": -0.643429160118103, "num_chars": 2}, {"sum_logits": -1.3733662366867065, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.3733662366867065, "logits_per_char": -0.6866831183433533, "num_chars": 2}, {"sum_logits": -1.4334062337875366, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.4334062337875366, "logits_per_char": -0.7167031168937683, "num_chars": 2}, {"sum_logits": -1.4756107330322266, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.4756107330322266, "logits_per_char": -0.7378053665161133, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 890, "native_id": "Mercury_7248255", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.39814031124115, "incorrect_loss_raw": 1.3919103145599365, "correct_loss_per_char": 0.699070155620575, "incorrect_loss_per_char": 0.6959551572799683, "correct_loss_per_token": 1.39814031124115, "incorrect_loss_per_token": 1.3919103145599365, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.39814031124115, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.39814031124115, "logits_per_char": -0.699070155620575, "num_chars": 2}, {"sum_logits": -1.293328881263733, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": true, "logits_per_token": -1.293328881263733, "logits_per_char": -0.6466644406318665, "num_chars": 2}, {"sum_logits": -1.4935556650161743, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.4935556650161743, "logits_per_char": -0.7467778325080872, "num_chars": 2}, {"sum_logits": -1.3888463973999023, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.3888463973999023, "logits_per_char": -0.6944231986999512, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 891, "native_id": "Mercury_7041230", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4438987970352173, "incorrect_loss_raw": 1.3793567021687825, "correct_loss_per_char": 0.7219493985176086, "incorrect_loss_per_char": 0.6896783510843912, "correct_loss_per_token": 1.4438987970352173, "incorrect_loss_per_token": 1.3793567021687825, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4438987970352173, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.4438987970352173, "logits_per_char": -0.7219493985176086, "num_chars": 2}, {"sum_logits": -1.4842485189437866, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.4842485189437866, "logits_per_char": -0.7421242594718933, "num_chars": 2}, {"sum_logits": -1.3982042074203491, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.3982042074203491, "logits_per_char": -0.6991021037101746, "num_chars": 2}, {"sum_logits": -1.255617380142212, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": true, "logits_per_token": -1.255617380142212, "logits_per_char": -0.627808690071106, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 892, "native_id": "Mercury_400471", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.381168007850647, "incorrect_loss_raw": 1.395567536354065, "correct_loss_per_char": 0.6905840039253235, "incorrect_loss_per_char": 0.6977837681770325, "correct_loss_per_token": 1.381168007850647, "incorrect_loss_per_token": 1.395567536354065, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4052248001098633, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.4052248001098633, "logits_per_char": -0.7026124000549316, "num_chars": 2}, {"sum_logits": -1.3924567699432373, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.3924567699432373, "logits_per_char": -0.6962283849716187, "num_chars": 2}, {"sum_logits": -1.3890210390090942, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.3890210390090942, "logits_per_char": -0.6945105195045471, "num_chars": 2}, {"sum_logits": -1.381168007850647, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": true, "logits_per_token": -1.381168007850647, "logits_per_char": -0.6905840039253235, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 893, "native_id": "Mercury_416374", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.365014672279358, "incorrect_loss_raw": 1.4107211430867512, "correct_loss_per_char": 0.682507336139679, "incorrect_loss_per_char": 0.7053605715433756, "correct_loss_per_token": 1.365014672279358, "incorrect_loss_per_token": 1.4107211430867512, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6074069738388062, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.6074069738388062, "logits_per_char": -0.8037034869194031, "num_chars": 2}, {"sum_logits": -1.3894400596618652, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.3894400596618652, "logits_per_char": -0.6947200298309326, "num_chars": 2}, {"sum_logits": -1.365014672279358, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.365014672279358, "logits_per_char": -0.682507336139679, "num_chars": 2}, {"sum_logits": -1.2353163957595825, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": true, "logits_per_token": -1.2353163957595825, "logits_per_char": -0.6176581978797913, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 894, "native_id": "NYSEDREGENTS_2010_8_27", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3106046915054321, "incorrect_loss_raw": 1.4183675448099773, "correct_loss_per_char": 0.6553023457527161, "incorrect_loss_per_char": 0.7091837724049886, "correct_loss_per_token": 1.3106046915054321, "incorrect_loss_per_token": 1.4183675448099773, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3106046915054321, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": true, "logits_per_token": -1.3106046915054321, "logits_per_char": -0.6553023457527161, "num_chars": 2}, {"sum_logits": -1.356865644454956, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.356865644454956, "logits_per_char": -0.678432822227478, "num_chars": 2}, {"sum_logits": -1.4660577774047852, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.4660577774047852, "logits_per_char": -0.7330288887023926, "num_chars": 2}, {"sum_logits": -1.4321792125701904, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.4321792125701904, "logits_per_char": -0.7160896062850952, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 895, "native_id": "ACTAAP_2015_7_1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4226245880126953, "incorrect_loss_raw": 1.380242149035136, "correct_loss_per_char": 0.7113122940063477, "incorrect_loss_per_char": 0.690121074517568, "correct_loss_per_token": 1.4226245880126953, "incorrect_loss_per_token": 1.380242149035136, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4010258913040161, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.4010258913040161, "logits_per_char": -0.7005129456520081, "num_chars": 2}, {"sum_logits": -1.3475903272628784, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": true, "logits_per_token": -1.3475903272628784, "logits_per_char": -0.6737951636314392, "num_chars": 2}, {"sum_logits": -1.3921102285385132, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.3921102285385132, "logits_per_char": -0.6960551142692566, "num_chars": 2}, {"sum_logits": -1.4226245880126953, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.4226245880126953, "logits_per_char": -0.7113122940063477, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 896, "native_id": "Mercury_7041055", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3376189470291138, "incorrect_loss_raw": 1.4261668125788372, "correct_loss_per_char": 0.6688094735145569, "incorrect_loss_per_char": 0.7130834062894186, "correct_loss_per_token": 1.3376189470291138, "incorrect_loss_per_token": 1.4261668125788372, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2016897201538086, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": true, "logits_per_token": -1.2016897201538086, "logits_per_char": -0.6008448600769043, "num_chars": 2}, {"sum_logits": -1.3376189470291138, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.3376189470291138, "logits_per_char": -0.6688094735145569, "num_chars": 2}, {"sum_logits": -1.4943323135375977, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.4943323135375977, "logits_per_char": -0.7471661567687988, "num_chars": 2}, {"sum_logits": -1.582478404045105, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.582478404045105, "logits_per_char": -0.7912392020225525, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 897, "native_id": "Mercury_7214620", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4144737720489502, "incorrect_loss_raw": 1.3813565572102864, "correct_loss_per_char": 0.7072368860244751, "incorrect_loss_per_char": 0.6906782786051432, "correct_loss_per_token": 1.4144737720489502, "incorrect_loss_per_token": 1.3813565572102864, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.383972406387329, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.383972406387329, "logits_per_char": -0.6919862031936646, "num_chars": 2}, {"sum_logits": -1.3971881866455078, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.3971881866455078, "logits_per_char": -0.6985940933227539, "num_chars": 2}, {"sum_logits": -1.4144737720489502, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.4144737720489502, "logits_per_char": -0.7072368860244751, "num_chars": 2}, {"sum_logits": -1.3629090785980225, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": true, "logits_per_token": -1.3629090785980225, "logits_per_char": -0.6814545392990112, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 898, "native_id": "ACTAAP_2014_7_2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3890724182128906, "incorrect_loss_raw": 1.3940203587214153, "correct_loss_per_char": 0.6945362091064453, "incorrect_loss_per_char": 0.6970101793607076, "correct_loss_per_token": 1.3890724182128906, "incorrect_loss_per_token": 1.3940203587214153, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3358314037322998, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": true, "logits_per_token": -1.3358314037322998, "logits_per_char": -0.6679157018661499, "num_chars": 2}, {"sum_logits": -1.3683960437774658, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.3683960437774658, "logits_per_char": -0.6841980218887329, "num_chars": 2}, {"sum_logits": -1.47783362865448, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.47783362865448, "logits_per_char": -0.73891681432724, "num_chars": 2}, {"sum_logits": -1.3890724182128906, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.3890724182128906, "logits_per_char": -0.6945362091064453, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 899, "native_id": "TIMSS_2003_4_pg20", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3829182386398315, "incorrect_loss_raw": 1.3971914052963257, "correct_loss_per_char": 0.6914591193199158, "incorrect_loss_per_char": 0.6985957026481628, "correct_loss_per_token": 1.3829182386398315, "incorrect_loss_per_token": 1.3971914052963257, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3829182386398315, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.3829182386398315, "logits_per_char": -0.6914591193199158, "num_chars": 2}, {"sum_logits": -1.510419487953186, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.510419487953186, "logits_per_char": -0.755209743976593, "num_chars": 2}, {"sum_logits": -1.3281538486480713, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": true, "logits_per_token": -1.3281538486480713, "logits_per_char": -0.6640769243240356, "num_chars": 2}, {"sum_logits": -1.3530008792877197, "num_tokens": 1, "num_tokens_all": 316, "is_greedy": false, "logits_per_token": -1.3530008792877197, "logits_per_char": -0.6765004396438599, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 900, "native_id": "Mercury_189105", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4285005331039429, "incorrect_loss_raw": 1.3816753228505452, "correct_loss_per_char": 0.7142502665519714, "incorrect_loss_per_char": 0.6908376614252726, "correct_loss_per_token": 1.4285005331039429, "incorrect_loss_per_token": 1.3816753228505452, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2966092824935913, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": true, "logits_per_token": -1.2966092824935913, "logits_per_char": -0.6483046412467957, "num_chars": 2}, {"sum_logits": -1.3821930885314941, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.3821930885314941, "logits_per_char": -0.6910965442657471, "num_chars": 2}, {"sum_logits": -1.4662235975265503, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.4662235975265503, "logits_per_char": -0.7331117987632751, "num_chars": 2}, {"sum_logits": -1.4285005331039429, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.4285005331039429, "logits_per_char": -0.7142502665519714, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 901, "native_id": "NYSEDREGENTS_2010_8_7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.541820764541626, "incorrect_loss_raw": 1.347165862719218, "correct_loss_per_char": 0.770910382270813, "incorrect_loss_per_char": 0.673582931359609, "correct_loss_per_token": 1.541820764541626, "incorrect_loss_per_token": 1.347165862719218, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.322383165359497, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": true, "logits_per_token": -1.322383165359497, "logits_per_char": -0.6611915826797485, "num_chars": 2}, {"sum_logits": -1.3533238172531128, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.3533238172531128, "logits_per_char": -0.6766619086265564, "num_chars": 2}, {"sum_logits": -1.541820764541626, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.541820764541626, "logits_per_char": -0.770910382270813, "num_chars": 2}, {"sum_logits": -1.365790605545044, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.365790605545044, "logits_per_char": -0.682895302772522, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 902, "native_id": "Mercury_SC_400126", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3747923374176025, "incorrect_loss_raw": 1.3952943881352742, "correct_loss_per_char": 0.6873961687088013, "incorrect_loss_per_char": 0.6976471940676371, "correct_loss_per_token": 1.3747923374176025, "incorrect_loss_per_token": 1.3952943881352742, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3825784921646118, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.3825784921646118, "logits_per_char": -0.6912892460823059, "num_chars": 2}, {"sum_logits": -1.4028362035751343, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.4028362035751343, "logits_per_char": -0.7014181017875671, "num_chars": 2}, {"sum_logits": -1.4004684686660767, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.4004684686660767, "logits_per_char": -0.7002342343330383, "num_chars": 2}, {"sum_logits": -1.3747923374176025, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": true, "logits_per_token": -1.3747923374176025, "logits_per_char": -0.6873961687088013, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 903, "native_id": "MCAS_2011_8_17696", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3248292207717896, "incorrect_loss_raw": 1.4234800338745117, "correct_loss_per_char": 0.6624146103858948, "incorrect_loss_per_char": 0.7117400169372559, "correct_loss_per_token": 1.3248292207717896, "incorrect_loss_per_token": 1.4234800338745117, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5638784170150757, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.5638784170150757, "logits_per_char": -0.7819392085075378, "num_chars": 2}, {"sum_logits": -1.2493547201156616, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": true, "logits_per_token": -1.2493547201156616, "logits_per_char": -0.6246773600578308, "num_chars": 2}, {"sum_logits": -1.4572069644927979, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.4572069644927979, "logits_per_char": -0.7286034822463989, "num_chars": 2}, {"sum_logits": -1.3248292207717896, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.3248292207717896, "logits_per_char": -0.6624146103858948, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 904, "native_id": "NCEOGA_2013_5_27", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2715611457824707, "incorrect_loss_raw": 1.433102011680603, "correct_loss_per_char": 0.6357805728912354, "incorrect_loss_per_char": 0.7165510058403015, "correct_loss_per_token": 1.2715611457824707, "incorrect_loss_per_token": 1.433102011680603, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4623258113861084, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.4623258113861084, "logits_per_char": -0.7311629056930542, "num_chars": 2}, {"sum_logits": -1.4679523706436157, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.4679523706436157, "logits_per_char": -0.7339761853218079, "num_chars": 2}, {"sum_logits": -1.369027853012085, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.369027853012085, "logits_per_char": -0.6845139265060425, "num_chars": 2}, {"sum_logits": -1.2715611457824707, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": true, "logits_per_token": -1.2715611457824707, "logits_per_char": -0.6357805728912354, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 905, "native_id": "Mercury_7058503", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4762147665023804, "incorrect_loss_raw": 1.3661551872889202, "correct_loss_per_char": 0.7381073832511902, "incorrect_loss_per_char": 0.6830775936444601, "correct_loss_per_token": 1.4762147665023804, "incorrect_loss_per_token": 1.3661551872889202, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.260558843612671, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": true, "logits_per_token": -1.260558843612671, "logits_per_char": -0.6302794218063354, "num_chars": 2}, {"sum_logits": -1.3738377094268799, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.3738377094268799, "logits_per_char": -0.6869188547134399, "num_chars": 2}, {"sum_logits": -1.4640690088272095, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.4640690088272095, "logits_per_char": -0.7320345044136047, "num_chars": 2}, {"sum_logits": -1.4762147665023804, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.4762147665023804, "logits_per_char": -0.7381073832511902, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 906, "native_id": "MEA_2016_8_18", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.292390823364258, "incorrect_loss_raw": 1.3000966707865398, "correct_loss_per_char": 1.146195411682129, "incorrect_loss_per_char": 0.6500483353932699, "correct_loss_per_token": 2.292390823364258, "incorrect_loss_per_token": 1.3000966707865398, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8722186088562012, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": true, "logits_per_token": -0.8722186088562012, "logits_per_char": -0.4361093044281006, "num_chars": 2}, {"sum_logits": -1.2671284675598145, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.2671284675598145, "logits_per_char": -0.6335642337799072, "num_chars": 2}, {"sum_logits": -1.7609429359436035, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -1.7609429359436035, "logits_per_char": -0.8804714679718018, "num_chars": 2}, {"sum_logits": -2.292390823364258, "num_tokens": 1, "num_tokens_all": 317, "is_greedy": false, "logits_per_token": -2.292390823364258, "logits_per_char": -1.146195411682129, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 907, "native_id": "Mercury_7205328", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.374896764755249, "incorrect_loss_raw": 1.3991502126057942, "correct_loss_per_char": 0.6874483823776245, "incorrect_loss_per_char": 0.6995751063028971, "correct_loss_per_token": 1.374896764755249, "incorrect_loss_per_token": 1.3991502126057942, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3815754652023315, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.3815754652023315, "logits_per_char": -0.6907877326011658, "num_chars": 2}, {"sum_logits": -1.305832028388977, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": true, "logits_per_token": -1.305832028388977, "logits_per_char": -0.6529160141944885, "num_chars": 2}, {"sum_logits": -1.374896764755249, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.374896764755249, "logits_per_char": -0.6874483823776245, "num_chars": 2}, {"sum_logits": -1.5100431442260742, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.5100431442260742, "logits_per_char": -0.7550215721130371, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 908, "native_id": "Mercury_SC_408984", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5026332139968872, "incorrect_loss_raw": 1.3601744174957275, "correct_loss_per_char": 0.7513166069984436, "incorrect_loss_per_char": 0.6800872087478638, "correct_loss_per_token": 1.5026332139968872, "incorrect_loss_per_token": 1.3601744174957275, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4922876358032227, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4922876358032227, "logits_per_char": -0.7461438179016113, "num_chars": 2}, {"sum_logits": -1.3457059860229492, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.3457059860229492, "logits_per_char": -0.6728529930114746, "num_chars": 2}, {"sum_logits": -1.5026332139968872, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.5026332139968872, "logits_per_char": -0.7513166069984436, "num_chars": 2}, {"sum_logits": -1.2425296306610107, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.2425296306610107, "logits_per_char": -0.6212648153305054, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 909, "native_id": "Mercury_178535", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3923765420913696, "incorrect_loss_raw": 1.3935933510462444, "correct_loss_per_char": 0.6961882710456848, "incorrect_loss_per_char": 0.6967966755231222, "correct_loss_per_token": 1.3923765420913696, "incorrect_loss_per_token": 1.3935933510462444, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2906924486160278, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": true, "logits_per_token": -1.2906924486160278, "logits_per_char": -0.6453462243080139, "num_chars": 2}, {"sum_logits": -1.4073344469070435, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.4073344469070435, "logits_per_char": -0.7036672234535217, "num_chars": 2}, {"sum_logits": -1.3923765420913696, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.3923765420913696, "logits_per_char": -0.6961882710456848, "num_chars": 2}, {"sum_logits": -1.4827531576156616, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.4827531576156616, "logits_per_char": -0.7413765788078308, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 910, "native_id": "Mercury_7011760", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.453557014465332, "incorrect_loss_raw": 1.3717339436213176, "correct_loss_per_char": 0.726778507232666, "incorrect_loss_per_char": 0.6858669718106588, "correct_loss_per_token": 1.453557014465332, "incorrect_loss_per_token": 1.3717339436213176, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.453557014465332, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.453557014465332, "logits_per_char": -0.726778507232666, "num_chars": 2}, {"sum_logits": -1.4116556644439697, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.4116556644439697, "logits_per_char": -0.7058278322219849, "num_chars": 2}, {"sum_logits": -1.4191625118255615, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.4191625118255615, "logits_per_char": -0.7095812559127808, "num_chars": 2}, {"sum_logits": -1.2843836545944214, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": true, "logits_per_token": -1.2843836545944214, "logits_per_char": -0.6421918272972107, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 911, "native_id": "Mercury_SC_406663", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3548048734664917, "incorrect_loss_raw": 1.4045081933339436, "correct_loss_per_char": 0.6774024367332458, "incorrect_loss_per_char": 0.7022540966669718, "correct_loss_per_token": 1.3548048734664917, "incorrect_loss_per_token": 1.4045081933339436, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3933305740356445, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.3933305740356445, "logits_per_char": -0.6966652870178223, "num_chars": 2}, {"sum_logits": -1.3548048734664917, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.3548048734664917, "logits_per_char": -0.6774024367332458, "num_chars": 2}, {"sum_logits": -1.4249563217163086, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4249563217163086, "logits_per_char": -0.7124781608581543, "num_chars": 2}, {"sum_logits": -1.395237684249878, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.395237684249878, "logits_per_char": -0.697618842124939, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 912, "native_id": "LEAP__8_10366", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4774649143218994, "incorrect_loss_raw": 1.3732767502466838, "correct_loss_per_char": 0.7387324571609497, "incorrect_loss_per_char": 0.6866383751233419, "correct_loss_per_token": 1.4774649143218994, "incorrect_loss_per_token": 1.3732767502466838, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5711987018585205, "num_tokens": 1, "num_tokens_all": 400, "is_greedy": false, "logits_per_token": -1.5711987018585205, "logits_per_char": -0.7855993509292603, "num_chars": 2}, {"sum_logits": -1.3406261205673218, "num_tokens": 1, "num_tokens_all": 400, "is_greedy": false, "logits_per_token": -1.3406261205673218, "logits_per_char": -0.6703130602836609, "num_chars": 2}, {"sum_logits": -1.4774649143218994, "num_tokens": 1, "num_tokens_all": 400, "is_greedy": false, "logits_per_token": -1.4774649143218994, "logits_per_char": -0.7387324571609497, "num_chars": 2}, {"sum_logits": -1.208005428314209, "num_tokens": 1, "num_tokens_all": 400, "is_greedy": true, "logits_per_token": -1.208005428314209, "logits_per_char": -0.6040027141571045, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 913, "native_id": "Mercury_7085873", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3081086874008179, "incorrect_loss_raw": 1.4450023969014485, "correct_loss_per_char": 0.6540543437004089, "incorrect_loss_per_char": 0.7225011984507242, "correct_loss_per_token": 1.3081086874008179, "incorrect_loss_per_token": 1.4450023969014485, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3081086874008179, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.3081086874008179, "logits_per_char": -0.6540543437004089, "num_chars": 2}, {"sum_logits": -1.191920518875122, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": true, "logits_per_token": -1.191920518875122, "logits_per_char": -0.595960259437561, "num_chars": 2}, {"sum_logits": -1.401565670967102, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.401565670967102, "logits_per_char": -0.700782835483551, "num_chars": 2}, {"sum_logits": -1.7415210008621216, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.7415210008621216, "logits_per_char": -0.8707605004310608, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 914, "native_id": "Mercury_7201058", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2120347023010254, "incorrect_loss_raw": 1.4586368004480998, "correct_loss_per_char": 0.6060173511505127, "incorrect_loss_per_char": 0.7293184002240499, "correct_loss_per_token": 1.2120347023010254, "incorrect_loss_per_token": 1.4586368004480998, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5512722730636597, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.5512722730636597, "logits_per_char": -0.7756361365318298, "num_chars": 2}, {"sum_logits": -1.4115970134735107, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4115970134735107, "logits_per_char": -0.7057985067367554, "num_chars": 2}, {"sum_logits": -1.413041114807129, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.413041114807129, "logits_per_char": -0.7065205574035645, "num_chars": 2}, {"sum_logits": -1.2120347023010254, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.2120347023010254, "logits_per_char": -0.6060173511505127, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 915, "native_id": "ACTAAP_2009_7_9", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3003085851669312, "incorrect_loss_raw": 1.4218230247497559, "correct_loss_per_char": 0.6501542925834656, "incorrect_loss_per_char": 0.7109115123748779, "correct_loss_per_token": 1.3003085851669312, "incorrect_loss_per_token": 1.4218230247497559, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4315193891525269, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.4315193891525269, "logits_per_char": -0.7157596945762634, "num_chars": 2}, {"sum_logits": -1.4145439863204956, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.4145439863204956, "logits_per_char": -0.7072719931602478, "num_chars": 2}, {"sum_logits": -1.4194056987762451, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.4194056987762451, "logits_per_char": -0.7097028493881226, "num_chars": 2}, {"sum_logits": -1.3003085851669312, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": true, "logits_per_token": -1.3003085851669312, "logits_per_char": -0.6501542925834656, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 916, "native_id": "MCAS_2004_5_2", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4353324174880981, "incorrect_loss_raw": 1.3760692675908406, "correct_loss_per_char": 0.7176662087440491, "incorrect_loss_per_char": 0.6880346337954203, "correct_loss_per_token": 1.4353324174880981, "incorrect_loss_per_token": 1.3760692675908406, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4353324174880981, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.4353324174880981, "logits_per_char": -0.7176662087440491, "num_chars": 2}, {"sum_logits": -1.4017858505249023, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.4017858505249023, "logits_per_char": -0.7008929252624512, "num_chars": 2}, {"sum_logits": -1.376427173614502, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.376427173614502, "logits_per_char": -0.688213586807251, "num_chars": 2}, {"sum_logits": -1.3499947786331177, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": true, "logits_per_token": -1.3499947786331177, "logits_per_char": -0.6749973893165588, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 917, "native_id": "Mercury_7270130", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.500993251800537, "incorrect_loss_raw": 1.3584849834442139, "correct_loss_per_char": 0.7504966259002686, "incorrect_loss_per_char": 0.6792424917221069, "correct_loss_per_token": 1.500993251800537, "incorrect_loss_per_token": 1.3584849834442139, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.500993251800537, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.500993251800537, "logits_per_char": -0.7504966259002686, "num_chars": 2}, {"sum_logits": -1.4312785863876343, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4312785863876343, "logits_per_char": -0.7156392931938171, "num_chars": 2}, {"sum_logits": -1.380251407623291, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.380251407623291, "logits_per_char": -0.6901257038116455, "num_chars": 2}, {"sum_logits": -1.2639249563217163, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.2639249563217163, "logits_per_char": -0.6319624781608582, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 918, "native_id": "MCAS_2003_8_28", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.454331398010254, "incorrect_loss_raw": 1.3701382478078206, "correct_loss_per_char": 0.727165699005127, "incorrect_loss_per_char": 0.6850691239039103, "correct_loss_per_token": 1.454331398010254, "incorrect_loss_per_token": 1.3701382478078206, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4319980144500732, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.4319980144500732, "logits_per_char": -0.7159990072250366, "num_chars": 2}, {"sum_logits": -1.3288670778274536, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": true, "logits_per_token": -1.3288670778274536, "logits_per_char": -0.6644335389137268, "num_chars": 2}, {"sum_logits": -1.454331398010254, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.454331398010254, "logits_per_char": -0.727165699005127, "num_chars": 2}, {"sum_logits": -1.349549651145935, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.349549651145935, "logits_per_char": -0.6747748255729675, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 919, "native_id": "Mercury_SC_406684", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3380861282348633, "incorrect_loss_raw": 1.414976437886556, "correct_loss_per_char": 0.6690430641174316, "incorrect_loss_per_char": 0.707488218943278, "correct_loss_per_token": 1.3380861282348633, "incorrect_loss_per_token": 1.414976437886556, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5677509307861328, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.5677509307861328, "logits_per_char": -0.7838754653930664, "num_chars": 2}, {"sum_logits": -1.2941901683807373, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": true, "logits_per_token": -1.2941901683807373, "logits_per_char": -0.6470950841903687, "num_chars": 2}, {"sum_logits": -1.3829882144927979, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.3829882144927979, "logits_per_char": -0.6914941072463989, "num_chars": 2}, {"sum_logits": -1.3380861282348633, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.3380861282348633, "logits_per_char": -0.6690430641174316, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 920, "native_id": "NAEP_2009_4_S7+6", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1758501529693604, "incorrect_loss_raw": 1.475213925043742, "correct_loss_per_char": 0.5879250764846802, "incorrect_loss_per_char": 0.737606962521871, "correct_loss_per_token": 1.1758501529693604, "incorrect_loss_per_token": 1.475213925043742, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.522398591041565, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.522398591041565, "logits_per_char": -0.7611992955207825, "num_chars": 2}, {"sum_logits": -1.5220025777816772, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.5220025777816772, "logits_per_char": -0.7610012888908386, "num_chars": 2}, {"sum_logits": -1.3812406063079834, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.3812406063079834, "logits_per_char": -0.6906203031539917, "num_chars": 2}, {"sum_logits": -1.1758501529693604, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.1758501529693604, "logits_per_char": -0.5879250764846802, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 921, "native_id": "Mercury_SC_402053", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3660016059875488, "incorrect_loss_raw": 1.3993292252222698, "correct_loss_per_char": 0.6830008029937744, "incorrect_loss_per_char": 0.6996646126111349, "correct_loss_per_token": 1.3660016059875488, "incorrect_loss_per_token": 1.3993292252222698, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4147233963012695, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.4147233963012695, "logits_per_char": -0.7073616981506348, "num_chars": 2}, {"sum_logits": -1.4183567762374878, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.4183567762374878, "logits_per_char": -0.7091783881187439, "num_chars": 2}, {"sum_logits": -1.3649075031280518, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": true, "logits_per_token": -1.3649075031280518, "logits_per_char": -0.6824537515640259, "num_chars": 2}, {"sum_logits": -1.3660016059875488, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.3660016059875488, "logits_per_char": -0.6830008029937744, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 922, "native_id": "Mercury_7267838", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2452549934387207, "incorrect_loss_raw": 1.4468047618865967, "correct_loss_per_char": 0.6226274967193604, "incorrect_loss_per_char": 0.7234023809432983, "correct_loss_per_token": 1.2452549934387207, "incorrect_loss_per_token": 1.4468047618865967, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2452549934387207, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": true, "logits_per_token": -1.2452549934387207, "logits_per_char": -0.6226274967193604, "num_chars": 2}, {"sum_logits": -1.338976502418518, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.338976502418518, "logits_per_char": -0.669488251209259, "num_chars": 2}, {"sum_logits": -1.476296305656433, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.476296305656433, "logits_per_char": -0.7381481528282166, "num_chars": 2}, {"sum_logits": -1.5251414775848389, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.5251414775848389, "logits_per_char": -0.7625707387924194, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 923, "native_id": "MCAS_2003_8_10", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2807201147079468, "incorrect_loss_raw": 1.4327245950698853, "correct_loss_per_char": 0.6403600573539734, "incorrect_loss_per_char": 0.7163622975349426, "correct_loss_per_token": 1.2807201147079468, "incorrect_loss_per_token": 1.4327245950698853, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5078163146972656, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.5078163146972656, "logits_per_char": -0.7539081573486328, "num_chars": 2}, {"sum_logits": -1.2807201147079468, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": true, "logits_per_token": -1.2807201147079468, "logits_per_char": -0.6403600573539734, "num_chars": 2}, {"sum_logits": -1.3308651447296143, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.3308651447296143, "logits_per_char": -0.6654325723648071, "num_chars": 2}, {"sum_logits": -1.4594923257827759, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.4594923257827759, "logits_per_char": -0.7297461628913879, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 924, "native_id": "Mercury_7085470", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4449689388275146, "incorrect_loss_raw": 1.376232425371806, "correct_loss_per_char": 0.7224844694137573, "incorrect_loss_per_char": 0.688116212685903, "correct_loss_per_token": 1.4449689388275146, "incorrect_loss_per_token": 1.376232425371806, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4822438955307007, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.4822438955307007, "logits_per_char": -0.7411219477653503, "num_chars": 2}, {"sum_logits": -1.4449689388275146, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.4449689388275146, "logits_per_char": -0.7224844694137573, "num_chars": 2}, {"sum_logits": -1.3653366565704346, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.3653366565704346, "logits_per_char": -0.6826683282852173, "num_chars": 2}, {"sum_logits": -1.2811167240142822, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": true, "logits_per_token": -1.2811167240142822, "logits_per_char": -0.6405583620071411, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 925, "native_id": "Mercury_7141890", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3571516275405884, "incorrect_loss_raw": 1.403164029121399, "correct_loss_per_char": 0.6785758137702942, "incorrect_loss_per_char": 0.7015820145606995, "correct_loss_per_token": 1.3571516275405884, "incorrect_loss_per_token": 1.403164029121399, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3173131942749023, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": true, "logits_per_token": -1.3173131942749023, "logits_per_char": -0.6586565971374512, "num_chars": 2}, {"sum_logits": -1.3571516275405884, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.3571516275405884, "logits_per_char": -0.6785758137702942, "num_chars": 2}, {"sum_logits": -1.4411461353302002, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.4411461353302002, "logits_per_char": -0.7205730676651001, "num_chars": 2}, {"sum_logits": -1.4510327577590942, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.4510327577590942, "logits_per_char": -0.7255163788795471, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 926, "native_id": "Mercury_SC_415395", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3912029266357422, "incorrect_loss_raw": 1.395641565322876, "correct_loss_per_char": 0.6956014633178711, "incorrect_loss_per_char": 0.697820782661438, "correct_loss_per_token": 1.3912029266357422, "incorrect_loss_per_token": 1.395641565322876, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3912029266357422, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.3912029266357422, "logits_per_char": -0.6956014633178711, "num_chars": 2}, {"sum_logits": -1.4068779945373535, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.4068779945373535, "logits_per_char": -0.7034389972686768, "num_chars": 2}, {"sum_logits": -1.2832746505737305, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": true, "logits_per_token": -1.2832746505737305, "logits_per_char": -0.6416373252868652, "num_chars": 2}, {"sum_logits": -1.496772050857544, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.496772050857544, "logits_per_char": -0.748386025428772, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 927, "native_id": "Mercury_7171588", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5431634187698364, "incorrect_loss_raw": 1.3467048009236653, "correct_loss_per_char": 0.7715817093849182, "incorrect_loss_per_char": 0.6733524004618326, "correct_loss_per_token": 1.5431634187698364, "incorrect_loss_per_token": 1.3467048009236653, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5431634187698364, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.5431634187698364, "logits_per_char": -0.7715817093849182, "num_chars": 2}, {"sum_logits": -1.3158718347549438, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.3158718347549438, "logits_per_char": -0.6579359173774719, "num_chars": 2}, {"sum_logits": -1.3051742315292358, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": true, "logits_per_token": -1.3051742315292358, "logits_per_char": -0.6525871157646179, "num_chars": 2}, {"sum_logits": -1.4190683364868164, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.4190683364868164, "logits_per_char": -0.7095341682434082, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 928, "native_id": "Mercury_7220028", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.43617844581604, "incorrect_loss_raw": 1.3774439096450806, "correct_loss_per_char": 0.71808922290802, "incorrect_loss_per_char": 0.6887219548225403, "correct_loss_per_token": 1.43617844581604, "incorrect_loss_per_token": 1.3774439096450806, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.43617844581604, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.43617844581604, "logits_per_char": -0.71808922290802, "num_chars": 2}, {"sum_logits": -1.376848578453064, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.376848578453064, "logits_per_char": -0.688424289226532, "num_chars": 2}, {"sum_logits": -1.4602030515670776, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.4602030515670776, "logits_per_char": -0.7301015257835388, "num_chars": 2}, {"sum_logits": -1.2952800989151, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": true, "logits_per_token": -1.2952800989151, "logits_per_char": -0.64764004945755, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 929, "native_id": "Mercury_7212153", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5924885272979736, "incorrect_loss_raw": 1.3349456389745076, "correct_loss_per_char": 0.7962442636489868, "incorrect_loss_per_char": 0.6674728194872538, "correct_loss_per_token": 1.5924885272979736, "incorrect_loss_per_token": 1.3349456389745076, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2063627243041992, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": true, "logits_per_token": -1.2063627243041992, "logits_per_char": -0.6031813621520996, "num_chars": 2}, {"sum_logits": -1.367159366607666, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.367159366607666, "logits_per_char": -0.683579683303833, "num_chars": 2}, {"sum_logits": -1.4313148260116577, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.4313148260116577, "logits_per_char": -0.7156574130058289, "num_chars": 2}, {"sum_logits": -1.5924885272979736, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.5924885272979736, "logits_per_char": -0.7962442636489868, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 930, "native_id": "Mercury_7124355", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3660732507705688, "incorrect_loss_raw": 1.4003760814666748, "correct_loss_per_char": 0.6830366253852844, "incorrect_loss_per_char": 0.7001880407333374, "correct_loss_per_token": 1.3660732507705688, "incorrect_loss_per_token": 1.4003760814666748, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3660732507705688, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": true, "logits_per_token": -1.3660732507705688, "logits_per_char": -0.6830366253852844, "num_chars": 2}, {"sum_logits": -1.394677996635437, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.394677996635437, "logits_per_char": -0.6973389983177185, "num_chars": 2}, {"sum_logits": -1.4041472673416138, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.4041472673416138, "logits_per_char": -0.7020736336708069, "num_chars": 2}, {"sum_logits": -1.4023029804229736, "num_tokens": 1, "num_tokens_all": 320, "is_greedy": false, "logits_per_token": -1.4023029804229736, "logits_per_char": -0.7011514902114868, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 931, "native_id": "Mercury_7217438", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2636841535568237, "incorrect_loss_raw": 1.4382234811782837, "correct_loss_per_char": 0.6318420767784119, "incorrect_loss_per_char": 0.7191117405891418, "correct_loss_per_token": 1.2636841535568237, "incorrect_loss_per_token": 1.4382234811782837, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5161603689193726, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.5161603689193726, "logits_per_char": -0.7580801844596863, "num_chars": 2}, {"sum_logits": -1.4185940027236938, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.4185940027236938, "logits_per_char": -0.7092970013618469, "num_chars": 2}, {"sum_logits": -1.3799160718917847, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.3799160718917847, "logits_per_char": -0.6899580359458923, "num_chars": 2}, {"sum_logits": -1.2636841535568237, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": true, "logits_per_token": -1.2636841535568237, "logits_per_char": -0.6318420767784119, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 932, "native_id": "Mercury_7083598", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4326356649398804, "incorrect_loss_raw": 1.3790569305419922, "correct_loss_per_char": 0.7163178324699402, "incorrect_loss_per_char": 0.6895284652709961, "correct_loss_per_token": 1.4326356649398804, "incorrect_loss_per_token": 1.3790569305419922, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4393106698989868, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.4393106698989868, "logits_per_char": -0.7196553349494934, "num_chars": 2}, {"sum_logits": -1.4326356649398804, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.4326356649398804, "logits_per_char": -0.7163178324699402, "num_chars": 2}, {"sum_logits": -1.411664366722107, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.411664366722107, "logits_per_char": -0.7058321833610535, "num_chars": 2}, {"sum_logits": -1.2861957550048828, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": true, "logits_per_token": -1.2861957550048828, "logits_per_char": -0.6430978775024414, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 933, "native_id": "Mercury_7071610", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2455633878707886, "incorrect_loss_raw": 1.4443401495615642, "correct_loss_per_char": 0.6227816939353943, "incorrect_loss_per_char": 0.7221700747807821, "correct_loss_per_token": 1.2455633878707886, "incorrect_loss_per_token": 1.4443401495615642, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5255154371261597, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.5255154371261597, "logits_per_char": -0.7627577185630798, "num_chars": 2}, {"sum_logits": -1.383803129196167, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.383803129196167, "logits_per_char": -0.6919015645980835, "num_chars": 2}, {"sum_logits": -1.4237018823623657, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.4237018823623657, "logits_per_char": -0.7118509411811829, "num_chars": 2}, {"sum_logits": -1.2455633878707886, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": true, "logits_per_token": -1.2455633878707886, "logits_per_char": -0.6227816939353943, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 934, "native_id": "Mercury_407767", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3628486394882202, "incorrect_loss_raw": 1.3996288379033406, "correct_loss_per_char": 0.6814243197441101, "incorrect_loss_per_char": 0.6998144189516703, "correct_loss_per_token": 1.3628486394882202, "incorrect_loss_per_token": 1.3996288379033406, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.441554307937622, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.441554307937622, "logits_per_char": -0.720777153968811, "num_chars": 2}, {"sum_logits": -1.3992657661437988, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.3992657661437988, "logits_per_char": -0.6996328830718994, "num_chars": 2}, {"sum_logits": -1.358066439628601, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.358066439628601, "logits_per_char": -0.6790332198143005, "num_chars": 2}, {"sum_logits": -1.3628486394882202, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.3628486394882202, "logits_per_char": -0.6814243197441101, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 935, "native_id": "Mercury_SC_402124", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3167799711227417, "incorrect_loss_raw": 1.4215614795684814, "correct_loss_per_char": 0.6583899855613708, "incorrect_loss_per_char": 0.7107807397842407, "correct_loss_per_token": 1.3167799711227417, "incorrect_loss_per_token": 1.4215614795684814, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3167799711227417, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": true, "logits_per_token": -1.3167799711227417, "logits_per_char": -0.6583899855613708, "num_chars": 2}, {"sum_logits": -1.414076805114746, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.414076805114746, "logits_per_char": -0.707038402557373, "num_chars": 2}, {"sum_logits": -1.3511847257614136, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.3511847257614136, "logits_per_char": -0.6755923628807068, "num_chars": 2}, {"sum_logits": -1.4994229078292847, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.4994229078292847, "logits_per_char": -0.7497114539146423, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 936, "native_id": "Mercury_LBS10976", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3491508960723877, "incorrect_loss_raw": 1.405660351117452, "correct_loss_per_char": 0.6745754480361938, "incorrect_loss_per_char": 0.702830175558726, "correct_loss_per_token": 1.3491508960723877, "incorrect_loss_per_token": 1.405660351117452, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3491508960723877, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.3491508960723877, "logits_per_char": -0.6745754480361938, "num_chars": 2}, {"sum_logits": -1.438232183456421, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.438232183456421, "logits_per_char": -0.7191160917282104, "num_chars": 2}, {"sum_logits": -1.4503103494644165, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.4503103494644165, "logits_per_char": -0.7251551747322083, "num_chars": 2}, {"sum_logits": -1.3284385204315186, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": true, "logits_per_token": -1.3284385204315186, "logits_per_char": -0.6642192602157593, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 937, "native_id": "Mercury_178308", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4453805685043335, "incorrect_loss_raw": 1.3799935579299927, "correct_loss_per_char": 0.7226902842521667, "incorrect_loss_per_char": 0.6899967789649963, "correct_loss_per_token": 1.4453805685043335, "incorrect_loss_per_token": 1.3799935579299927, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.508980631828308, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.508980631828308, "logits_per_char": -0.754490315914154, "num_chars": 2}, {"sum_logits": -1.4209192991256714, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.4209192991256714, "logits_per_char": -0.7104596495628357, "num_chars": 2}, {"sum_logits": -1.4453805685043335, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.4453805685043335, "logits_per_char": -0.7226902842521667, "num_chars": 2}, {"sum_logits": -1.2100807428359985, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": true, "logits_per_token": -1.2100807428359985, "logits_per_char": -0.6050403714179993, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 938, "native_id": "LEAP__7_10349", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.358209252357483, "incorrect_loss_raw": 1.4106911023457844, "correct_loss_per_char": 0.6791046261787415, "incorrect_loss_per_char": 0.7053455511728922, "correct_loss_per_token": 1.358209252357483, "incorrect_loss_per_token": 1.4106911023457844, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.457112431526184, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.457112431526184, "logits_per_char": -0.728556215763092, "num_chars": 2}, {"sum_logits": -1.5366458892822266, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.5366458892822266, "logits_per_char": -0.7683229446411133, "num_chars": 2}, {"sum_logits": -1.358209252357483, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.358209252357483, "logits_per_char": -0.6791046261787415, "num_chars": 2}, {"sum_logits": -1.2383149862289429, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.2383149862289429, "logits_per_char": -0.6191574931144714, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 939, "native_id": "Mercury_SC_400857", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3863205909729004, "incorrect_loss_raw": 1.3903337319691975, "correct_loss_per_char": 0.6931602954864502, "incorrect_loss_per_char": 0.6951668659845988, "correct_loss_per_token": 1.3863205909729004, "incorrect_loss_per_token": 1.3903337319691975, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3931727409362793, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.3931727409362793, "logits_per_char": -0.6965863704681396, "num_chars": 2}, {"sum_logits": -1.3863205909729004, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.3863205909729004, "logits_per_char": -0.6931602954864502, "num_chars": 2}, {"sum_logits": -1.3984246253967285, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": false, "logits_per_token": -1.3984246253967285, "logits_per_char": -0.6992123126983643, "num_chars": 2}, {"sum_logits": -1.379403829574585, "num_tokens": 1, "num_tokens_all": 325, "is_greedy": true, "logits_per_token": -1.379403829574585, "logits_per_char": -0.6897019147872925, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 940, "native_id": "NCEOGA_2013_5_39", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.349698543548584, "incorrect_loss_raw": 1.406943957010905, "correct_loss_per_char": 0.674849271774292, "incorrect_loss_per_char": 0.7034719785054525, "correct_loss_per_token": 1.349698543548584, "incorrect_loss_per_token": 1.406943957010905, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4924507141113281, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.4924507141113281, "logits_per_char": -0.7462253570556641, "num_chars": 2}, {"sum_logits": -1.349698543548584, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.349698543548584, "logits_per_char": -0.674849271774292, "num_chars": 2}, {"sum_logits": -1.4375755786895752, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.4375755786895752, "logits_per_char": -0.7187877893447876, "num_chars": 2}, {"sum_logits": -1.2908055782318115, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": true, "logits_per_token": -1.2908055782318115, "logits_per_char": -0.6454027891159058, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 941, "native_id": "NAEP_2009_4_S11+1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3087010383605957, "incorrect_loss_raw": 1.423753261566162, "correct_loss_per_char": 0.6543505191802979, "incorrect_loss_per_char": 0.711876630783081, "correct_loss_per_token": 1.3087010383605957, "incorrect_loss_per_token": 1.423753261566162, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.510800838470459, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -1.510800838470459, "logits_per_char": -0.7554004192352295, "num_chars": 2}, {"sum_logits": -1.3087010383605957, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": true, "logits_per_token": -1.3087010383605957, "logits_per_char": -0.6543505191802979, "num_chars": 2}, {"sum_logits": -1.3535538911819458, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -1.3535538911819458, "logits_per_char": -0.6767769455909729, "num_chars": 2}, {"sum_logits": -1.4069050550460815, "num_tokens": 1, "num_tokens_all": 304, "is_greedy": false, "logits_per_token": -1.4069050550460815, "logits_per_char": -0.7034525275230408, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 942, "native_id": "Mercury_SC_415469", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.418039321899414, "incorrect_loss_raw": 1.3809826771418254, "correct_loss_per_char": 0.709019660949707, "incorrect_loss_per_char": 0.6904913385709127, "correct_loss_per_token": 1.418039321899414, "incorrect_loss_per_token": 1.3809826771418254, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3959579467773438, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.3959579467773438, "logits_per_char": -0.6979789733886719, "num_chars": 2}, {"sum_logits": -1.418039321899414, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.418039321899414, "logits_per_char": -0.709019660949707, "num_chars": 2}, {"sum_logits": -1.326312780380249, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": true, "logits_per_token": -1.326312780380249, "logits_per_char": -0.6631563901901245, "num_chars": 2}, {"sum_logits": -1.4206773042678833, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.4206773042678833, "logits_per_char": -0.7103386521339417, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 943, "native_id": "Mercury_7110968", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4454796314239502, "incorrect_loss_raw": 1.374204158782959, "correct_loss_per_char": 0.7227398157119751, "incorrect_loss_per_char": 0.6871020793914795, "correct_loss_per_token": 1.4454796314239502, "incorrect_loss_per_token": 1.374204158782959, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4454796314239502, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.4454796314239502, "logits_per_char": -0.7227398157119751, "num_chars": 2}, {"sum_logits": -1.3833204507827759, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.3833204507827759, "logits_per_char": -0.6916602253913879, "num_chars": 2}, {"sum_logits": -1.4303120374679565, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.4303120374679565, "logits_per_char": -0.7151560187339783, "num_chars": 2}, {"sum_logits": -1.3089799880981445, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": true, "logits_per_token": -1.3089799880981445, "logits_per_char": -0.6544899940490723, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 944, "native_id": "Mercury_7097440", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3938777446746826, "incorrect_loss_raw": 1.3916976849238079, "correct_loss_per_char": 0.6969388723373413, "incorrect_loss_per_char": 0.6958488424619039, "correct_loss_per_token": 1.3938777446746826, "incorrect_loss_per_token": 1.3916976849238079, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4903942346572876, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4903942346572876, "logits_per_char": -0.7451971173286438, "num_chars": 2}, {"sum_logits": -1.3867309093475342, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.3867309093475342, "logits_per_char": -0.6933654546737671, "num_chars": 2}, {"sum_logits": -1.3938777446746826, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.3938777446746826, "logits_per_char": -0.6969388723373413, "num_chars": 2}, {"sum_logits": -1.2979679107666016, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.2979679107666016, "logits_per_char": -0.6489839553833008, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 945, "native_id": "Mercury_SC_416138", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3426493406295776, "incorrect_loss_raw": 1.4099044799804688, "correct_loss_per_char": 0.6713246703147888, "incorrect_loss_per_char": 0.7049522399902344, "correct_loss_per_token": 1.3426493406295776, "incorrect_loss_per_token": 1.4099044799804688, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4915467500686646, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.4915467500686646, "logits_per_char": -0.7457733750343323, "num_chars": 2}, {"sum_logits": -1.46110999584198, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.46110999584198, "logits_per_char": -0.73055499792099, "num_chars": 2}, {"sum_logits": -1.3426493406295776, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.3426493406295776, "logits_per_char": -0.6713246703147888, "num_chars": 2}, {"sum_logits": -1.2770566940307617, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": true, "logits_per_token": -1.2770566940307617, "logits_per_char": -0.6385283470153809, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 946, "native_id": "Mercury_403912", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4115082025527954, "incorrect_loss_raw": 1.3879512151082356, "correct_loss_per_char": 0.7057541012763977, "incorrect_loss_per_char": 0.6939756075541178, "correct_loss_per_token": 1.4115082025527954, "incorrect_loss_per_token": 1.3879512151082356, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4115082025527954, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.4115082025527954, "logits_per_char": -0.7057541012763977, "num_chars": 2}, {"sum_logits": -1.5234533548355103, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.5234533548355103, "logits_per_char": -0.7617266774177551, "num_chars": 2}, {"sum_logits": -1.2995456457138062, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": true, "logits_per_token": -1.2995456457138062, "logits_per_char": -0.6497728228569031, "num_chars": 2}, {"sum_logits": -1.3408546447753906, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.3408546447753906, "logits_per_char": -0.6704273223876953, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 947, "native_id": "Mercury_7219695", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4303547143936157, "incorrect_loss_raw": 1.379315932591756, "correct_loss_per_char": 0.7151773571968079, "incorrect_loss_per_char": 0.689657966295878, "correct_loss_per_token": 1.4303547143936157, "incorrect_loss_per_token": 1.379315932591756, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4303547143936157, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.4303547143936157, "logits_per_char": -0.7151773571968079, "num_chars": 2}, {"sum_logits": -1.4394675493240356, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.4394675493240356, "logits_per_char": -0.7197337746620178, "num_chars": 2}, {"sum_logits": -1.355333924293518, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.355333924293518, "logits_per_char": -0.677666962146759, "num_chars": 2}, {"sum_logits": -1.3431463241577148, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.3431463241577148, "logits_per_char": -0.6715731620788574, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 948, "native_id": "Mercury_SC_LBS10272", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3545218706130981, "incorrect_loss_raw": 1.4096972544987996, "correct_loss_per_char": 0.6772609353065491, "incorrect_loss_per_char": 0.7048486272493998, "correct_loss_per_token": 1.3545218706130981, "incorrect_loss_per_token": 1.4096972544987996, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2986054420471191, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": true, "logits_per_token": -1.2986054420471191, "logits_per_char": -0.6493027210235596, "num_chars": 2}, {"sum_logits": -1.522265911102295, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.522265911102295, "logits_per_char": -0.7611329555511475, "num_chars": 2}, {"sum_logits": -1.3545218706130981, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.3545218706130981, "logits_per_char": -0.6772609353065491, "num_chars": 2}, {"sum_logits": -1.4082204103469849, "num_tokens": 1, "num_tokens_all": 312, "is_greedy": false, "logits_per_token": -1.4082204103469849, "logits_per_char": -0.7041102051734924, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 949, "native_id": "NYSEDREGENTS_2015_8_25", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3618725538253784, "incorrect_loss_raw": 1.4041428168614705, "correct_loss_per_char": 0.6809362769126892, "incorrect_loss_per_char": 0.7020714084307352, "correct_loss_per_token": 1.3618725538253784, "incorrect_loss_per_token": 1.4041428168614705, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3505377769470215, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.3505377769470215, "logits_per_char": -0.6752688884735107, "num_chars": 2}, {"sum_logits": -1.3234796524047852, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": true, "logits_per_token": -1.3234796524047852, "logits_per_char": -0.6617398262023926, "num_chars": 2}, {"sum_logits": -1.3618725538253784, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.3618725538253784, "logits_per_char": -0.6809362769126892, "num_chars": 2}, {"sum_logits": -1.538411021232605, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.538411021232605, "logits_per_char": -0.7692055106163025, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 950, "native_id": "MSA_2015_5_37", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4126678705215454, "incorrect_loss_raw": 1.3843307892481487, "correct_loss_per_char": 0.7063339352607727, "incorrect_loss_per_char": 0.6921653946240743, "correct_loss_per_token": 1.4126678705215454, "incorrect_loss_per_token": 1.3843307892481487, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.307572364807129, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": true, "logits_per_token": -1.307572364807129, "logits_per_char": -0.6537861824035645, "num_chars": 2}, {"sum_logits": -1.4390949010849, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.4390949010849, "logits_per_char": -0.71954745054245, "num_chars": 2}, {"sum_logits": -1.406325101852417, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.406325101852417, "logits_per_char": -0.7031625509262085, "num_chars": 2}, {"sum_logits": -1.4126678705215454, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.4126678705215454, "logits_per_char": -0.7063339352607727, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 951, "native_id": "Mercury_SC_LBS10620", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4044190645217896, "incorrect_loss_raw": 1.386720895767212, "correct_loss_per_char": 0.7022095322608948, "incorrect_loss_per_char": 0.693360447883606, "correct_loss_per_token": 1.4044190645217896, "incorrect_loss_per_token": 1.386720895767212, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4044190645217896, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.4044190645217896, "logits_per_char": -0.7022095322608948, "num_chars": 2}, {"sum_logits": -1.3699123859405518, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": true, "logits_per_token": -1.3699123859405518, "logits_per_char": -0.6849561929702759, "num_chars": 2}, {"sum_logits": -1.3705390691757202, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.3705390691757202, "logits_per_char": -0.6852695345878601, "num_chars": 2}, {"sum_logits": -1.4197112321853638, "num_tokens": 1, "num_tokens_all": 308, "is_greedy": false, "logits_per_token": -1.4197112321853638, "logits_per_char": -0.7098556160926819, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 952, "native_id": "MCAS_2002_8_14", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4055016040802002, "incorrect_loss_raw": 1.384473443031311, "correct_loss_per_char": 0.7027508020401001, "incorrect_loss_per_char": 0.6922367215156555, "correct_loss_per_token": 1.4055016040802002, "incorrect_loss_per_token": 1.384473443031311, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4055016040802002, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.4055016040802002, "logits_per_char": -0.7027508020401001, "num_chars": 2}, {"sum_logits": -1.3982621431350708, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.3982621431350708, "logits_per_char": -0.6991310715675354, "num_chars": 2}, {"sum_logits": -1.380588412284851, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.380588412284851, "logits_per_char": -0.6902942061424255, "num_chars": 2}, {"sum_logits": -1.3745697736740112, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": true, "logits_per_token": -1.3745697736740112, "logits_per_char": -0.6872848868370056, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 953, "native_id": "MCAS_1998_4_11", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1758002042770386, "incorrect_loss_raw": 1.472096602121989, "correct_loss_per_char": 0.5879001021385193, "incorrect_loss_per_char": 0.7360483010609945, "correct_loss_per_token": 1.1758002042770386, "incorrect_loss_per_token": 1.472096602121989, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4754877090454102, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4754877090454102, "logits_per_char": -0.7377438545227051, "num_chars": 2}, {"sum_logits": -1.4553968906402588, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4553968906402588, "logits_per_char": -0.7276984453201294, "num_chars": 2}, {"sum_logits": -1.4854052066802979, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4854052066802979, "logits_per_char": -0.7427026033401489, "num_chars": 2}, {"sum_logits": -1.1758002042770386, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.1758002042770386, "logits_per_char": -0.5879001021385193, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 954, "native_id": "MCAS_2000_8_17", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6183937788009644, "incorrect_loss_raw": 1.330459475517273, "correct_loss_per_char": 0.8091968894004822, "incorrect_loss_per_char": 0.6652297377586365, "correct_loss_per_token": 1.6183937788009644, "incorrect_loss_per_token": 1.330459475517273, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6183937788009644, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.6183937788009644, "logits_per_char": -0.8091968894004822, "num_chars": 2}, {"sum_logits": -1.432157278060913, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.432157278060913, "logits_per_char": -0.7160786390304565, "num_chars": 2}, {"sum_logits": -1.3566159009933472, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.3566159009933472, "logits_per_char": -0.6783079504966736, "num_chars": 2}, {"sum_logits": -1.2026052474975586, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": true, "logits_per_token": -1.2026052474975586, "logits_per_char": -0.6013026237487793, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 955, "native_id": "Mercury_7108045", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3631558418273926, "incorrect_loss_raw": 1.401597221692403, "correct_loss_per_char": 0.6815779209136963, "incorrect_loss_per_char": 0.7007986108462015, "correct_loss_per_token": 1.3631558418273926, "incorrect_loss_per_token": 1.401597221692403, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4661273956298828, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.4661273956298828, "logits_per_char": -0.7330636978149414, "num_chars": 2}, {"sum_logits": -1.3631558418273926, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.3631558418273926, "logits_per_char": -0.6815779209136963, "num_chars": 2}, {"sum_logits": -1.446671962738037, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.446671962738037, "logits_per_char": -0.7233359813690186, "num_chars": 2}, {"sum_logits": -1.2919923067092896, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.2919923067092896, "logits_per_char": -0.6459961533546448, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 956, "native_id": "Mercury_7154228", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3341275453567505, "incorrect_loss_raw": 1.4129104614257812, "correct_loss_per_char": 0.6670637726783752, "incorrect_loss_per_char": 0.7064552307128906, "correct_loss_per_token": 1.3341275453567505, "incorrect_loss_per_token": 1.4129104614257812, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4595004320144653, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.4595004320144653, "logits_per_char": -0.7297502160072327, "num_chars": 2}, {"sum_logits": -1.3341275453567505, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.3341275453567505, "logits_per_char": -0.6670637726783752, "num_chars": 2}, {"sum_logits": -1.4912561178207397, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.4912561178207397, "logits_per_char": -0.7456280589103699, "num_chars": 2}, {"sum_logits": -1.2879748344421387, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": true, "logits_per_token": -1.2879748344421387, "logits_per_char": -0.6439874172210693, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 957, "native_id": "TIMSS_2011_8_pg101", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2826186418533325, "incorrect_loss_raw": 1.4345502456029255, "correct_loss_per_char": 0.6413093209266663, "incorrect_loss_per_char": 0.7172751228014628, "correct_loss_per_token": 1.2826186418533325, "incorrect_loss_per_token": 1.4345502456029255, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.600232481956482, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.600232481956482, "logits_per_char": -0.800116240978241, "num_chars": 2}, {"sum_logits": -1.2826186418533325, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": true, "logits_per_token": -1.2826186418533325, "logits_per_char": -0.6413093209266663, "num_chars": 2}, {"sum_logits": -1.3434443473815918, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.3434443473815918, "logits_per_char": -0.6717221736907959, "num_chars": 2}, {"sum_logits": -1.3599739074707031, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.3599739074707031, "logits_per_char": -0.6799869537353516, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 958, "native_id": "Mercury_405951", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4108362197875977, "incorrect_loss_raw": 1.3872480789820354, "correct_loss_per_char": 0.7054181098937988, "incorrect_loss_per_char": 0.6936240394910177, "correct_loss_per_token": 1.4108362197875977, "incorrect_loss_per_token": 1.3872480789820354, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4108362197875977, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.4108362197875977, "logits_per_char": -0.7054181098937988, "num_chars": 2}, {"sum_logits": -1.4790762662887573, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.4790762662887573, "logits_per_char": -0.7395381331443787, "num_chars": 2}, {"sum_logits": -1.306139349937439, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": true, "logits_per_token": -1.306139349937439, "logits_per_char": -0.6530696749687195, "num_chars": 2}, {"sum_logits": -1.3765286207199097, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.3765286207199097, "logits_per_char": -0.6882643103599548, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 959, "native_id": "Mercury_7214428", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4052424430847168, "incorrect_loss_raw": 1.3887399037679036, "correct_loss_per_char": 0.7026212215423584, "incorrect_loss_per_char": 0.6943699518839518, "correct_loss_per_token": 1.4052424430847168, "incorrect_loss_per_token": 1.3887399037679036, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3451051712036133, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.3451051712036133, "logits_per_char": -0.6725525856018066, "num_chars": 2}, {"sum_logits": -1.4899705648422241, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.4899705648422241, "logits_per_char": -0.7449852824211121, "num_chars": 2}, {"sum_logits": -1.3311439752578735, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": true, "logits_per_token": -1.3311439752578735, "logits_per_char": -0.6655719876289368, "num_chars": 2}, {"sum_logits": -1.4052424430847168, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.4052424430847168, "logits_per_char": -0.7026212215423584, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 960, "native_id": "Mercury_SC_405495", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.408372163772583, "incorrect_loss_raw": 1.3862309853235881, "correct_loss_per_char": 0.7041860818862915, "incorrect_loss_per_char": 0.6931154926617941, "correct_loss_per_token": 1.408372163772583, "incorrect_loss_per_token": 1.3862309853235881, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.408372163772583, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.408372163772583, "logits_per_char": -0.7041860818862915, "num_chars": 2}, {"sum_logits": -1.4014136791229248, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.4014136791229248, "logits_per_char": -0.7007068395614624, "num_chars": 2}, {"sum_logits": -1.2934541702270508, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": true, "logits_per_token": -1.2934541702270508, "logits_per_char": -0.6467270851135254, "num_chars": 2}, {"sum_logits": -1.4638251066207886, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.4638251066207886, "logits_per_char": -0.7319125533103943, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 961, "native_id": "Mercury_7216773", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4468241930007935, "incorrect_loss_raw": 1.3755736748377483, "correct_loss_per_char": 0.7234120965003967, "incorrect_loss_per_char": 0.6877868374188741, "correct_loss_per_token": 1.4468241930007935, "incorrect_loss_per_token": 1.3755736748377483, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4819763898849487, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.4819763898849487, "logits_per_char": -0.7409881949424744, "num_chars": 2}, {"sum_logits": -1.3843269348144531, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.3843269348144531, "logits_per_char": -0.6921634674072266, "num_chars": 2}, {"sum_logits": -1.2604176998138428, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": true, "logits_per_token": -1.2604176998138428, "logits_per_char": -0.6302088499069214, "num_chars": 2}, {"sum_logits": -1.4468241930007935, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.4468241930007935, "logits_per_char": -0.7234120965003967, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 962, "native_id": "OHAT_2007_8_42", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3632479906082153, "incorrect_loss_raw": 1.4009430805842082, "correct_loss_per_char": 0.6816239953041077, "incorrect_loss_per_char": 0.7004715402921041, "correct_loss_per_token": 1.3632479906082153, "incorrect_loss_per_token": 1.4009430805842082, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4383368492126465, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.4383368492126465, "logits_per_char": -0.7191684246063232, "num_chars": 2}, {"sum_logits": -1.3632479906082153, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": true, "logits_per_token": -1.3632479906082153, "logits_per_char": -0.6816239953041077, "num_chars": 2}, {"sum_logits": -1.3983123302459717, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.3983123302459717, "logits_per_char": -0.6991561651229858, "num_chars": 2}, {"sum_logits": -1.3661800622940063, "num_tokens": 1, "num_tokens_all": 318, "is_greedy": false, "logits_per_token": -1.3661800622940063, "logits_per_char": -0.6830900311470032, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 963, "native_id": "ACTAAP_2008_5_15", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.440300464630127, "incorrect_loss_raw": 1.374930500984192, "correct_loss_per_char": 0.7201502323150635, "incorrect_loss_per_char": 0.687465250492096, "correct_loss_per_token": 1.440300464630127, "incorrect_loss_per_token": 1.374930500984192, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.440300464630127, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.440300464630127, "logits_per_char": -0.7201502323150635, "num_chars": 2}, {"sum_logits": -1.3737072944641113, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.3737072944641113, "logits_per_char": -0.6868536472320557, "num_chars": 2}, {"sum_logits": -1.3352055549621582, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": true, "logits_per_token": -1.3352055549621582, "logits_per_char": -0.6676027774810791, "num_chars": 2}, {"sum_logits": -1.4158786535263062, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.4158786535263062, "logits_per_char": -0.7079393267631531, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 964, "native_id": "MCAS_2003_5_20", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.352172613143921, "incorrect_loss_raw": 1.405682921409607, "correct_loss_per_char": 0.6760863065719604, "incorrect_loss_per_char": 0.7028414607048035, "correct_loss_per_token": 1.352172613143921, "incorrect_loss_per_token": 1.405682921409607, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.352172613143921, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": true, "logits_per_token": -1.352172613143921, "logits_per_char": -0.6760863065719604, "num_chars": 2}, {"sum_logits": -1.4846673011779785, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.4846673011779785, "logits_per_char": -0.7423336505889893, "num_chars": 2}, {"sum_logits": -1.360906720161438, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.360906720161438, "logits_per_char": -0.680453360080719, "num_chars": 2}, {"sum_logits": -1.3714747428894043, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.3714747428894043, "logits_per_char": -0.6857373714447021, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 965, "native_id": "Mercury_7007770", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.2157058715820312, "incorrect_loss_raw": 1.449944297472636, "correct_loss_per_char": 1.1078529357910156, "incorrect_loss_per_char": 0.724972148736318, "correct_loss_per_token": 2.2157058715820312, "incorrect_loss_per_token": 1.449944297472636, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9437235593795776, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": true, "logits_per_token": -0.9437235593795776, "logits_per_char": -0.4718617796897888, "num_chars": 2}, {"sum_logits": -1.4953129291534424, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.4953129291534424, "logits_per_char": -0.7476564645767212, "num_chars": 2}, {"sum_logits": -1.9107964038848877, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.9107964038848877, "logits_per_char": -0.9553982019424438, "num_chars": 2}, {"sum_logits": -2.2157058715820312, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -2.2157058715820312, "logits_per_char": -1.1078529357910156, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 966, "native_id": "Mercury_400608", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3980751037597656, "incorrect_loss_raw": 1.3885554869969685, "correct_loss_per_char": 0.6990375518798828, "incorrect_loss_per_char": 0.6942777434984843, "correct_loss_per_token": 1.3980751037597656, "incorrect_loss_per_token": 1.3885554869969685, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.391367793083191, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.391367793083191, "logits_per_char": -0.6956838965415955, "num_chars": 2}, {"sum_logits": -1.3980751037597656, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.3980751037597656, "logits_per_char": -0.6990375518798828, "num_chars": 2}, {"sum_logits": -1.47672700881958, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.47672700881958, "logits_per_char": -0.73836350440979, "num_chars": 2}, {"sum_logits": -1.2975716590881348, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": true, "logits_per_token": -1.2975716590881348, "logits_per_char": -0.6487858295440674, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 967, "native_id": "Mercury_7217683", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.46728515625, "incorrect_loss_raw": 1.367327372233073, "correct_loss_per_char": 0.733642578125, "incorrect_loss_per_char": 0.6836636861165365, "correct_loss_per_token": 1.46728515625, "incorrect_loss_per_token": 1.367327372233073, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2994458675384521, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": true, "logits_per_token": -1.2994458675384521, "logits_per_char": -0.6497229337692261, "num_chars": 2}, {"sum_logits": -1.3816584348678589, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.3816584348678589, "logits_per_char": -0.6908292174339294, "num_chars": 2}, {"sum_logits": -1.4208778142929077, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.4208778142929077, "logits_per_char": -0.7104389071464539, "num_chars": 2}, {"sum_logits": -1.46728515625, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.46728515625, "logits_per_char": -0.733642578125, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 968, "native_id": "Mercury_7245123", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3853192329406738, "incorrect_loss_raw": 1.3947385549545288, "correct_loss_per_char": 0.6926596164703369, "incorrect_loss_per_char": 0.6973692774772644, "correct_loss_per_token": 1.3853192329406738, "incorrect_loss_per_token": 1.3947385549545288, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.459751009941101, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": false, "logits_per_token": -1.459751009941101, "logits_per_char": -0.7298755049705505, "num_chars": 2}, {"sum_logits": -1.3853192329406738, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": false, "logits_per_token": -1.3853192329406738, "logits_per_char": -0.6926596164703369, "num_chars": 2}, {"sum_logits": -1.439895749092102, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": false, "logits_per_token": -1.439895749092102, "logits_per_char": -0.719947874546051, "num_chars": 2}, {"sum_logits": -1.2845689058303833, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": true, "logits_per_token": -1.2845689058303833, "logits_per_char": -0.6422844529151917, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 969, "native_id": "NYSEDREGENTS_2010_8_8", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.283328890800476, "incorrect_loss_raw": 1.4387449423472087, "correct_loss_per_char": 0.641664445400238, "incorrect_loss_per_char": 0.7193724711736044, "correct_loss_per_token": 1.283328890800476, "incorrect_loss_per_token": 1.4387449423472087, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.424364686012268, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.424364686012268, "logits_per_char": -0.712182343006134, "num_chars": 2}, {"sum_logits": -1.3515141010284424, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.3515141010284424, "logits_per_char": -0.6757570505142212, "num_chars": 2}, {"sum_logits": -1.5403560400009155, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.5403560400009155, "logits_per_char": -0.7701780200004578, "num_chars": 2}, {"sum_logits": -1.283328890800476, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": true, "logits_per_token": -1.283328890800476, "logits_per_char": -0.641664445400238, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 970, "native_id": "Mercury_SC_406543", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4589020013809204, "incorrect_loss_raw": 1.372329552968343, "correct_loss_per_char": 0.7294510006904602, "incorrect_loss_per_char": 0.6861647764841715, "correct_loss_per_token": 1.4589020013809204, "incorrect_loss_per_token": 1.372329552968343, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4354954957962036, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.4354954957962036, "logits_per_char": -0.7177477478981018, "num_chars": 2}, {"sum_logits": -1.4589020013809204, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.4589020013809204, "logits_per_char": -0.7294510006904602, "num_chars": 2}, {"sum_logits": -1.3954076766967773, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.3954076766967773, "logits_per_char": -0.6977038383483887, "num_chars": 2}, {"sum_logits": -1.2860854864120483, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": true, "logits_per_token": -1.2860854864120483, "logits_per_char": -0.6430427432060242, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 971, "native_id": "Mercury_7214585", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3688029050827026, "incorrect_loss_raw": 1.4125198523203533, "correct_loss_per_char": 0.6844014525413513, "incorrect_loss_per_char": 0.7062599261601766, "correct_loss_per_token": 1.3688029050827026, "incorrect_loss_per_token": 1.4125198523203533, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6364630460739136, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.6364630460739136, "logits_per_char": -0.8182315230369568, "num_chars": 2}, {"sum_logits": -1.2065293788909912, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": true, "logits_per_token": -1.2065293788909912, "logits_per_char": -0.6032646894454956, "num_chars": 2}, {"sum_logits": -1.3945671319961548, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.3945671319961548, "logits_per_char": -0.6972835659980774, "num_chars": 2}, {"sum_logits": -1.3688029050827026, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.3688029050827026, "logits_per_char": -0.6844014525413513, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 972, "native_id": "MCAS_2011_8_17692", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4480524063110352, "incorrect_loss_raw": 1.3729718923568726, "correct_loss_per_char": 0.7240262031555176, "incorrect_loss_per_char": 0.6864859461784363, "correct_loss_per_token": 1.4480524063110352, "incorrect_loss_per_token": 1.3729718923568726, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4233125448226929, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.4233125448226929, "logits_per_char": -0.7116562724113464, "num_chars": 2}, {"sum_logits": -1.4480524063110352, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.4480524063110352, "logits_per_char": -0.7240262031555176, "num_chars": 2}, {"sum_logits": -1.2841954231262207, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": true, "logits_per_token": -1.2841954231262207, "logits_per_char": -0.6420977115631104, "num_chars": 2}, {"sum_logits": -1.411407709121704, "num_tokens": 1, "num_tokens_all": 311, "is_greedy": false, "logits_per_token": -1.411407709121704, "logits_per_char": -0.705703854560852, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 973, "native_id": "Mercury_7222758", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3513613939285278, "incorrect_loss_raw": 1.4036683241526287, "correct_loss_per_char": 0.6756806969642639, "incorrect_loss_per_char": 0.7018341620763143, "correct_loss_per_token": 1.3513613939285278, "incorrect_loss_per_token": 1.4036683241526287, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4215388298034668, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.4215388298034668, "logits_per_char": -0.7107694149017334, "num_chars": 2}, {"sum_logits": -1.3513613939285278, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": true, "logits_per_token": -1.3513613939285278, "logits_per_char": -0.6756806969642639, "num_chars": 2}, {"sum_logits": -1.406234622001648, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.406234622001648, "logits_per_char": -0.703117311000824, "num_chars": 2}, {"sum_logits": -1.383231520652771, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.383231520652771, "logits_per_char": -0.6916157603263855, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 974, "native_id": "Mercury_400522", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4104816913604736, "incorrect_loss_raw": 1.384447971979777, "correct_loss_per_char": 0.7052408456802368, "incorrect_loss_per_char": 0.6922239859898885, "correct_loss_per_token": 1.4104816913604736, "incorrect_loss_per_token": 1.384447971979777, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.321457028388977, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": true, "logits_per_token": -1.321457028388977, "logits_per_char": -0.6607285141944885, "num_chars": 2}, {"sum_logits": -1.407212257385254, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.407212257385254, "logits_per_char": -0.703606128692627, "num_chars": 2}, {"sum_logits": -1.4246746301651, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.4246746301651, "logits_per_char": -0.71233731508255, "num_chars": 2}, {"sum_logits": -1.4104816913604736, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.4104816913604736, "logits_per_char": -0.7052408456802368, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 975, "native_id": "Mercury_SC_415699", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4814953804016113, "incorrect_loss_raw": 1.3687047958374023, "correct_loss_per_char": 0.7407476902008057, "incorrect_loss_per_char": 0.6843523979187012, "correct_loss_per_token": 1.4814953804016113, "incorrect_loss_per_token": 1.3687047958374023, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2529587745666504, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": true, "logits_per_token": -1.2529587745666504, "logits_per_char": -0.6264793872833252, "num_chars": 2}, {"sum_logits": -1.4814953804016113, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": false, "logits_per_token": -1.4814953804016113, "logits_per_char": -0.7407476902008057, "num_chars": 2}, {"sum_logits": -1.36836576461792, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": false, "logits_per_token": -1.36836576461792, "logits_per_char": -0.68418288230896, "num_chars": 2}, {"sum_logits": -1.4847898483276367, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": false, "logits_per_token": -1.4847898483276367, "logits_per_char": -0.7423949241638184, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 976, "native_id": "NCEOGA_2013_8_16", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5335692167282104, "incorrect_loss_raw": 1.3471263647079468, "correct_loss_per_char": 0.7667846083641052, "incorrect_loss_per_char": 0.6735631823539734, "correct_loss_per_token": 1.5335692167282104, "incorrect_loss_per_token": 1.3471263647079468, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5335692167282104, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.5335692167282104, "logits_per_char": -0.7667846083641052, "num_chars": 2}, {"sum_logits": -1.3847403526306152, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.3847403526306152, "logits_per_char": -0.6923701763153076, "num_chars": 2}, {"sum_logits": -1.3736152648925781, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.3736152648925781, "logits_per_char": -0.6868076324462891, "num_chars": 2}, {"sum_logits": -1.283023476600647, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.283023476600647, "logits_per_char": -0.6415117383003235, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 977, "native_id": "Mercury_7212940", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3200868368148804, "incorrect_loss_raw": 1.4155257145563762, "correct_loss_per_char": 0.6600434184074402, "incorrect_loss_per_char": 0.7077628572781881, "correct_loss_per_token": 1.3200868368148804, "incorrect_loss_per_token": 1.4155257145563762, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3631526231765747, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.3631526231765747, "logits_per_char": -0.6815763115882874, "num_chars": 2}, {"sum_logits": -1.3200868368148804, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": true, "logits_per_token": -1.3200868368148804, "logits_per_char": -0.6600434184074402, "num_chars": 2}, {"sum_logits": -1.473410964012146, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.473410964012146, "logits_per_char": -0.736705482006073, "num_chars": 2}, {"sum_logits": -1.4100135564804077, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.4100135564804077, "logits_per_char": -0.7050067782402039, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 978, "native_id": "Mercury_7200568", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.413785696029663, "incorrect_loss_raw": 1.3860741058985393, "correct_loss_per_char": 0.7068928480148315, "incorrect_loss_per_char": 0.6930370529492696, "correct_loss_per_token": 1.413785696029663, "incorrect_loss_per_token": 1.3860741058985393, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3832670450210571, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.3832670450210571, "logits_per_char": -0.6916335225105286, "num_chars": 2}, {"sum_logits": -1.3285800218582153, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": true, "logits_per_token": -1.3285800218582153, "logits_per_char": -0.6642900109291077, "num_chars": 2}, {"sum_logits": -1.413785696029663, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.413785696029663, "logits_per_char": -0.7068928480148315, "num_chars": 2}, {"sum_logits": -1.4463752508163452, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.4463752508163452, "logits_per_char": -0.7231876254081726, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 979, "native_id": "Mercury_SC_401001", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3468785285949707, "incorrect_loss_raw": 1.405578335126241, "correct_loss_per_char": 0.6734392642974854, "incorrect_loss_per_char": 0.7027891675631205, "correct_loss_per_token": 1.3468785285949707, "incorrect_loss_per_token": 1.405578335126241, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3468785285949707, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": true, "logits_per_token": -1.3468785285949707, "logits_per_char": -0.6734392642974854, "num_chars": 2}, {"sum_logits": -1.3655123710632324, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.3655123710632324, "logits_per_char": -0.6827561855316162, "num_chars": 2}, {"sum_logits": -1.3847570419311523, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.3847570419311523, "logits_per_char": -0.6923785209655762, "num_chars": 2}, {"sum_logits": -1.4664655923843384, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.4664655923843384, "logits_per_char": -0.7332327961921692, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 980, "native_id": "Mercury_SC_409153", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2976970672607422, "incorrect_loss_raw": 1.424350420633952, "correct_loss_per_char": 0.6488485336303711, "incorrect_loss_per_char": 0.712175210316976, "correct_loss_per_token": 1.2976970672607422, "incorrect_loss_per_token": 1.424350420633952, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.477145791053772, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.477145791053772, "logits_per_char": -0.738572895526886, "num_chars": 2}, {"sum_logits": -1.3755792379379272, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.3755792379379272, "logits_per_char": -0.6877896189689636, "num_chars": 2}, {"sum_logits": -1.4203262329101562, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.4203262329101562, "logits_per_char": -0.7101631164550781, "num_chars": 2}, {"sum_logits": -1.2976970672607422, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": true, "logits_per_token": -1.2976970672607422, "logits_per_char": -0.6488485336303711, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 981, "native_id": "TIMSS_2011_4_pg97", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4663527011871338, "incorrect_loss_raw": 1.370482047398885, "correct_loss_per_char": 0.7331763505935669, "incorrect_loss_per_char": 0.6852410236994425, "correct_loss_per_token": 1.4663527011871338, "incorrect_loss_per_token": 1.370482047398885, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5023210048675537, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.5023210048675537, "logits_per_char": -0.7511605024337769, "num_chars": 2}, {"sum_logits": -1.4663527011871338, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4663527011871338, "logits_per_char": -0.7331763505935669, "num_chars": 2}, {"sum_logits": -1.3439807891845703, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.3439807891845703, "logits_per_char": -0.6719903945922852, "num_chars": 2}, {"sum_logits": -1.2651443481445312, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.2651443481445312, "logits_per_char": -0.6325721740722656, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 982, "native_id": "VASoL_2007_3_18", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3865457773208618, "incorrect_loss_raw": 1.3973073561986287, "correct_loss_per_char": 0.6932728886604309, "incorrect_loss_per_char": 0.6986536780993143, "correct_loss_per_token": 1.3865457773208618, "incorrect_loss_per_token": 1.3973073561986287, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4907491207122803, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.4907491207122803, "logits_per_char": -0.7453745603561401, "num_chars": 2}, {"sum_logits": -1.426061987876892, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.426061987876892, "logits_per_char": -0.713030993938446, "num_chars": 2}, {"sum_logits": -1.3865457773208618, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.3865457773208618, "logits_per_char": -0.6932728886604309, "num_chars": 2}, {"sum_logits": -1.2751109600067139, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": true, "logits_per_token": -1.2751109600067139, "logits_per_char": -0.6375554800033569, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 983, "native_id": "Mercury_7221393", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.357153058052063, "incorrect_loss_raw": 1.4024436871210735, "correct_loss_per_char": 0.6785765290260315, "incorrect_loss_per_char": 0.7012218435605367, "correct_loss_per_token": 1.357153058052063, "incorrect_loss_per_token": 1.4024436871210735, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4627196788787842, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.4627196788787842, "logits_per_char": -0.7313598394393921, "num_chars": 2}, {"sum_logits": -1.357153058052063, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.357153058052063, "logits_per_char": -0.6785765290260315, "num_chars": 2}, {"sum_logits": -1.3215866088867188, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": true, "logits_per_token": -1.3215866088867188, "logits_per_char": -0.6607933044433594, "num_chars": 2}, {"sum_logits": -1.4230247735977173, "num_tokens": 1, "num_tokens_all": 305, "is_greedy": false, "logits_per_token": -1.4230247735977173, "logits_per_char": -0.7115123867988586, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 984, "native_id": "Mercury_7238893", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4684199094772339, "incorrect_loss_raw": 1.366977373758952, "correct_loss_per_char": 0.7342099547386169, "incorrect_loss_per_char": 0.683488686879476, "correct_loss_per_token": 1.4684199094772339, "incorrect_loss_per_token": 1.366977373758952, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3326998949050903, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": true, "logits_per_token": -1.3326998949050903, "logits_per_char": -0.6663499474525452, "num_chars": 2}, {"sum_logits": -1.404398798942566, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.404398798942566, "logits_per_char": -0.702199399471283, "num_chars": 2}, {"sum_logits": -1.3638334274291992, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.3638334274291992, "logits_per_char": -0.6819167137145996, "num_chars": 2}, {"sum_logits": -1.4684199094772339, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.4684199094772339, "logits_per_char": -0.7342099547386169, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 985, "native_id": "NCEOGA_2013_5_32", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3782199621200562, "incorrect_loss_raw": 1.3954943418502808, "correct_loss_per_char": 0.6891099810600281, "incorrect_loss_per_char": 0.6977471709251404, "correct_loss_per_token": 1.3782199621200562, "incorrect_loss_per_token": 1.3954943418502808, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4010616540908813, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.4010616540908813, "logits_per_char": -0.7005308270454407, "num_chars": 2}, {"sum_logits": -1.3782199621200562, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.3782199621200562, "logits_per_char": -0.6891099810600281, "num_chars": 2}, {"sum_logits": -1.4131401777267456, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.4131401777267456, "logits_per_char": -0.7065700888633728, "num_chars": 2}, {"sum_logits": -1.3722811937332153, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": true, "logits_per_token": -1.3722811937332153, "logits_per_char": -0.6861405968666077, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 986, "native_id": "TAKS_2009_5_20", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4232772588729858, "incorrect_loss_raw": 1.3816074927647908, "correct_loss_per_char": 0.7116386294364929, "incorrect_loss_per_char": 0.6908037463823954, "correct_loss_per_token": 1.4232772588729858, "incorrect_loss_per_token": 1.3816074927647908, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.460949420928955, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.460949420928955, "logits_per_char": -0.7304747104644775, "num_chars": 2}, {"sum_logits": -1.392445683479309, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.392445683479309, "logits_per_char": -0.6962228417396545, "num_chars": 2}, {"sum_logits": -1.4232772588729858, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.4232772588729858, "logits_per_char": -0.7116386294364929, "num_chars": 2}, {"sum_logits": -1.2914273738861084, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": true, "logits_per_token": -1.2914273738861084, "logits_per_char": -0.6457136869430542, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 987, "native_id": "NYSEDREGENTS_2013_8_23", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4362711906433105, "incorrect_loss_raw": 1.3748783270517986, "correct_loss_per_char": 0.7181355953216553, "incorrect_loss_per_char": 0.6874391635258993, "correct_loss_per_token": 1.4362711906433105, "incorrect_loss_per_token": 1.3748783270517986, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3934173583984375, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.3934173583984375, "logits_per_char": -0.6967086791992188, "num_chars": 2}, {"sum_logits": -1.4362711906433105, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.4362711906433105, "logits_per_char": -0.7181355953216553, "num_chars": 2}, {"sum_logits": -1.3588634729385376, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.3588634729385376, "logits_per_char": -0.6794317364692688, "num_chars": 2}, {"sum_logits": -1.3723541498184204, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.3723541498184204, "logits_per_char": -0.6861770749092102, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 988, "native_id": "Mercury_7220430", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3753770589828491, "incorrect_loss_raw": 1.405230720837911, "correct_loss_per_char": 0.6876885294914246, "incorrect_loss_per_char": 0.7026153604189554, "correct_loss_per_token": 1.3753770589828491, "incorrect_loss_per_token": 1.405230720837911, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5635815858840942, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.5635815858840942, "logits_per_char": -0.7817907929420471, "num_chars": 2}, {"sum_logits": -1.441793441772461, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.441793441772461, "logits_per_char": -0.7208967208862305, "num_chars": 2}, {"sum_logits": -1.3753770589828491, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": false, "logits_per_token": -1.3753770589828491, "logits_per_char": -0.6876885294914246, "num_chars": 2}, {"sum_logits": -1.2103171348571777, "num_tokens": 1, "num_tokens_all": 314, "is_greedy": true, "logits_per_token": -1.2103171348571777, "logits_per_char": -0.6051585674285889, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 989, "native_id": "Mercury_LBS10254", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3987765312194824, "incorrect_loss_raw": 1.391700307528178, "correct_loss_per_char": 0.6993882656097412, "incorrect_loss_per_char": 0.695850153764089, "correct_loss_per_token": 1.3987765312194824, "incorrect_loss_per_token": 1.391700307528178, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3987765312194824, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.3987765312194824, "logits_per_char": -0.6993882656097412, "num_chars": 2}, {"sum_logits": -1.3570106029510498, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.3570106029510498, "logits_per_char": -0.6785053014755249, "num_chars": 2}, {"sum_logits": -1.4986217021942139, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": false, "logits_per_token": -1.4986217021942139, "logits_per_char": -0.7493108510971069, "num_chars": 2}, {"sum_logits": -1.31946861743927, "num_tokens": 1, "num_tokens_all": 323, "is_greedy": true, "logits_per_token": -1.31946861743927, "logits_per_char": -0.659734308719635, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 990, "native_id": "Mercury_401215", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3739711046218872, "incorrect_loss_raw": 1.395693023999532, "correct_loss_per_char": 0.6869855523109436, "incorrect_loss_per_char": 0.697846511999766, "correct_loss_per_token": 1.3739711046218872, "incorrect_loss_per_token": 1.395693023999532, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4081544876098633, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.4081544876098633, "logits_per_char": -0.7040772438049316, "num_chars": 2}, {"sum_logits": -1.390760064125061, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.390760064125061, "logits_per_char": -0.6953800320625305, "num_chars": 2}, {"sum_logits": -1.3739711046218872, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": true, "logits_per_token": -1.3739711046218872, "logits_per_char": -0.6869855523109436, "num_chars": 2}, {"sum_logits": -1.3881645202636719, "num_tokens": 1, "num_tokens_all": 313, "is_greedy": false, "logits_per_token": -1.3881645202636719, "logits_per_char": -0.6940822601318359, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 991, "native_id": "Mercury_7172865", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4324523210525513, "incorrect_loss_raw": 1.3929170370101929, "correct_loss_per_char": 0.7162261605262756, "incorrect_loss_per_char": 0.6964585185050964, "correct_loss_per_token": 1.4324523210525513, "incorrect_loss_per_token": 1.3929170370101929, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6384649276733398, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.6384649276733398, "logits_per_char": -0.8192324638366699, "num_chars": 2}, {"sum_logits": -1.3516194820404053, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.3516194820404053, "logits_per_char": -0.6758097410202026, "num_chars": 2}, {"sum_logits": -1.4324523210525513, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.4324523210525513, "logits_per_char": -0.7162261605262756, "num_chars": 2}, {"sum_logits": -1.1886667013168335, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.1886667013168335, "logits_per_char": -0.5943333506584167, "num_chars": 2}], "label": 2, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 992, "native_id": "Mercury_SC_400031", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.362435221672058, "incorrect_loss_raw": 1.4010619322458904, "correct_loss_per_char": 0.681217610836029, "incorrect_loss_per_char": 0.7005309661229452, "correct_loss_per_token": 1.362435221672058, "incorrect_loss_per_token": 1.4010619322458904, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.362435221672058, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": true, "logits_per_token": -1.362435221672058, "logits_per_char": -0.681217610836029, "num_chars": 2}, {"sum_logits": -1.3807228803634644, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.3807228803634644, "logits_per_char": -0.6903614401817322, "num_chars": 2}, {"sum_logits": -1.4349822998046875, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.4349822998046875, "logits_per_char": -0.7174911499023438, "num_chars": 2}, {"sum_logits": -1.387480616569519, "num_tokens": 1, "num_tokens_all": 319, "is_greedy": false, "logits_per_token": -1.387480616569519, "logits_per_char": -0.6937403082847595, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 993, "native_id": "ACTAAP_2011_5_14", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4750282764434814, "incorrect_loss_raw": 1.3654784361521404, "correct_loss_per_char": 0.7375141382217407, "incorrect_loss_per_char": 0.6827392180760702, "correct_loss_per_token": 1.4750282764434814, "incorrect_loss_per_token": 1.3654784361521404, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.393730640411377, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.393730640411377, "logits_per_char": -0.6968653202056885, "num_chars": 2}, {"sum_logits": -1.3456634283065796, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": true, "logits_per_token": -1.3456634283065796, "logits_per_char": -0.6728317141532898, "num_chars": 2}, {"sum_logits": -1.3570412397384644, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.3570412397384644, "logits_per_char": -0.6785206198692322, "num_chars": 2}, {"sum_logits": -1.4750282764434814, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.4750282764434814, "logits_per_char": -0.7375141382217407, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 994, "native_id": "Mercury_SC_400529", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3880261182785034, "incorrect_loss_raw": 1.3927798668543498, "correct_loss_per_char": 0.6940130591392517, "incorrect_loss_per_char": 0.6963899334271749, "correct_loss_per_token": 1.3880261182785034, "incorrect_loss_per_token": 1.3927798668543498, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3880261182785034, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.3880261182785034, "logits_per_char": -0.6940130591392517, "num_chars": 2}, {"sum_logits": -1.4254423379898071, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.4254423379898071, "logits_per_char": -0.7127211689949036, "num_chars": 2}, {"sum_logits": -1.4648818969726562, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.4648818969726562, "logits_per_char": -0.7324409484863281, "num_chars": 2}, {"sum_logits": -1.288015365600586, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": true, "logits_per_token": -1.288015365600586, "logits_per_char": -0.644007682800293, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 995, "native_id": "Mercury_400752", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.396200180053711, "incorrect_loss_raw": 1.3897778590520222, "correct_loss_per_char": 0.6981000900268555, "incorrect_loss_per_char": 0.6948889295260111, "correct_loss_per_token": 1.396200180053711, "incorrect_loss_per_token": 1.3897778590520222, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4661673307418823, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.4661673307418823, "logits_per_char": -0.7330836653709412, "num_chars": 2}, {"sum_logits": -1.396200180053711, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.396200180053711, "logits_per_char": -0.6981000900268555, "num_chars": 2}, {"sum_logits": -1.3429449796676636, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": true, "logits_per_token": -1.3429449796676636, "logits_per_char": -0.6714724898338318, "num_chars": 2}, {"sum_logits": -1.360221266746521, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.360221266746521, "logits_per_char": -0.6801106333732605, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 996, "native_id": "Mercury_7267908", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4174703359603882, "incorrect_loss_raw": 1.3835689624150593, "correct_loss_per_char": 0.7087351679801941, "incorrect_loss_per_char": 0.6917844812075297, "correct_loss_per_token": 1.4174703359603882, "incorrect_loss_per_token": 1.3835689624150593, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4174703359603882, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.4174703359603882, "logits_per_char": -0.7087351679801941, "num_chars": 2}, {"sum_logits": -1.4472534656524658, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.4472534656524658, "logits_per_char": -0.7236267328262329, "num_chars": 2}, {"sum_logits": -1.3457932472229004, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": true, "logits_per_token": -1.3457932472229004, "logits_per_char": -0.6728966236114502, "num_chars": 2}, {"sum_logits": -1.357660174369812, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.357660174369812, "logits_per_char": -0.678830087184906, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 997, "native_id": "Mercury_7090563", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3671298027038574, "incorrect_loss_raw": 1.4011525710423787, "correct_loss_per_char": 0.6835649013519287, "incorrect_loss_per_char": 0.7005762855211893, "correct_loss_per_token": 1.3671298027038574, "incorrect_loss_per_token": 1.4011525710423787, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3671298027038574, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.3671298027038574, "logits_per_char": -0.6835649013519287, "num_chars": 2}, {"sum_logits": -1.383468508720398, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.383468508720398, "logits_per_char": -0.691734254360199, "num_chars": 2}, {"sum_logits": -1.3135212659835815, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": true, "logits_per_token": -1.3135212659835815, "logits_per_char": -0.6567606329917908, "num_chars": 2}, {"sum_logits": -1.5064679384231567, "num_tokens": 1, "num_tokens_all": 310, "is_greedy": false, "logits_per_token": -1.5064679384231567, "logits_per_char": -0.7532339692115784, "num_chars": 2}], "label": 0, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 998, "native_id": "MCAS_2016_5_2", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2881948947906494, "incorrect_loss_raw": 1.4327293634414673, "correct_loss_per_char": 0.6440974473953247, "incorrect_loss_per_char": 0.7163646817207336, "correct_loss_per_token": 1.2881948947906494, "incorrect_loss_per_token": 1.4327293634414673, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5662096738815308, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.5662096738815308, "logits_per_char": -0.7831048369407654, "num_chars": 2}, {"sum_logits": -1.4200880527496338, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.4200880527496338, "logits_per_char": -0.7100440263748169, "num_chars": 2}, {"sum_logits": -1.3118903636932373, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.3118903636932373, "logits_per_char": -0.6559451818466187, "num_chars": 2}, {"sum_logits": -1.2881948947906494, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": true, "logits_per_token": -1.2881948947906494, "logits_per_char": -0.6440974473953247, "num_chars": 2}], "label": 3, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 999, "native_id": "Mercury_SC_401800", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4107176065444946, "incorrect_loss_raw": 1.3851645390192668, "correct_loss_per_char": 0.7053588032722473, "incorrect_loss_per_char": 0.6925822695096334, "correct_loss_per_token": 1.4107176065444946, "incorrect_loss_per_token": 1.3851645390192668, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.403028964996338, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.403028964996338, "logits_per_char": -0.701514482498169, "num_chars": 2}, {"sum_logits": -1.4107176065444946, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.4107176065444946, "logits_per_char": -0.7053588032722473, "num_chars": 2}, {"sum_logits": -1.3517509698867798, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": true, "logits_per_token": -1.3517509698867798, "logits_per_char": -0.6758754849433899, "num_chars": 2}, {"sum_logits": -1.4007136821746826, "num_tokens": 1, "num_tokens_all": 309, "is_greedy": false, "logits_per_token": -1.4007136821746826, "logits_per_char": -0.7003568410873413, "num_chars": 2}], "label": 1, "task_hash": "ee0799a85be6dba03938d8980a14bc3a", "model_hash": "03418cf8091a9882619950ffb07429a5"}