|
{"doc_id": 0, "native_id": 0, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4921441078186035, "incorrect_loss_raw": 1.3673115173975627, "correct_loss_per_char": 0.7460720539093018, "incorrect_loss_per_char": 0.6836557586987814, "correct_loss_per_token": 1.4921441078186035, "incorrect_loss_per_token": 1.3673115173975627, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4921441078186035, "num_tokens": 1, "num_tokens_all": 814, "is_greedy": false, "logits_per_token": -1.4921441078186035, "logits_per_char": -0.7460720539093018, "num_chars": 2}, {"sum_logits": -1.4305858612060547, "num_tokens": 1, "num_tokens_all": 814, "is_greedy": false, "logits_per_token": -1.4305858612060547, "logits_per_char": -0.7152929306030273, "num_chars": 2}, {"sum_logits": -1.4676897525787354, "num_tokens": 1, "num_tokens_all": 814, "is_greedy": false, "logits_per_token": -1.4676897525787354, "logits_per_char": -0.7338448762893677, "num_chars": 2}, {"sum_logits": -1.203658938407898, "num_tokens": 1, "num_tokens_all": 814, "is_greedy": true, "logits_per_token": -1.203658938407898, "logits_per_char": -0.601829469203949, "num_chars": 2}], "label": 0, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 1, "native_id": 1, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.504995346069336, "incorrect_loss_raw": 1.386578917503357, "correct_loss_per_char": 0.752497673034668, "incorrect_loss_per_char": 0.6932894587516785, "correct_loss_per_token": 1.504995346069336, "incorrect_loss_per_token": 1.386578917503357, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6610088348388672, "num_tokens": 1, "num_tokens_all": 876, "is_greedy": false, "logits_per_token": -1.6610088348388672, "logits_per_char": -0.8305044174194336, "num_chars": 2}, {"sum_logits": -1.4338006973266602, "num_tokens": 1, "num_tokens_all": 876, "is_greedy": false, "logits_per_token": -1.4338006973266602, "logits_per_char": -0.7169003486633301, "num_chars": 2}, {"sum_logits": -1.504995346069336, "num_tokens": 1, "num_tokens_all": 876, "is_greedy": false, "logits_per_token": -1.504995346069336, "logits_per_char": -0.752497673034668, "num_chars": 2}, {"sum_logits": -1.0649272203445435, "num_tokens": 1, "num_tokens_all": 876, "is_greedy": true, "logits_per_token": -1.0649272203445435, "logits_per_char": -0.5324636101722717, "num_chars": 2}], "label": 2, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 2, "native_id": 2, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3744373321533203, "incorrect_loss_raw": 1.427574356396993, "correct_loss_per_char": 0.6872186660766602, "incorrect_loss_per_char": 0.7137871781984965, "correct_loss_per_token": 1.3744373321533203, "incorrect_loss_per_token": 1.427574356396993, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.645589828491211, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": false, "logits_per_token": -1.645589828491211, "logits_per_char": -0.8227949142456055, "num_chars": 2}, {"sum_logits": -1.3744373321533203, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": false, "logits_per_token": -1.3744373321533203, "logits_per_char": -0.6872186660766602, "num_chars": 2}, {"sum_logits": -1.5609550476074219, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": false, "logits_per_token": -1.5609550476074219, "logits_per_char": -0.7804775238037109, "num_chars": 2}, {"sum_logits": -1.0761781930923462, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": true, "logits_per_token": -1.0761781930923462, "logits_per_char": -0.5380890965461731, "num_chars": 2}], "label": 1, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 3, "native_id": 3, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4856743812561035, "incorrect_loss_raw": 1.3737146457036336, "correct_loss_per_char": 0.7428371906280518, "incorrect_loss_per_char": 0.6868573228518168, "correct_loss_per_token": 1.4856743812561035, "incorrect_loss_per_token": 1.3737146457036336, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4856743812561035, "num_tokens": 1, "num_tokens_all": 956, "is_greedy": false, "logits_per_token": -1.4856743812561035, "logits_per_char": -0.7428371906280518, "num_chars": 2}, {"sum_logits": -1.3104645013809204, "num_tokens": 1, "num_tokens_all": 956, "is_greedy": false, "logits_per_token": -1.3104645013809204, "logits_per_char": -0.6552322506904602, "num_chars": 2}, {"sum_logits": -1.5690436363220215, "num_tokens": 1, "num_tokens_all": 956, "is_greedy": false, "logits_per_token": -1.5690436363220215, "logits_per_char": -0.7845218181610107, "num_chars": 2}, {"sum_logits": -1.241635799407959, "num_tokens": 1, "num_tokens_all": 956, "is_greedy": true, "logits_per_token": -1.241635799407959, "logits_per_char": -0.6208178997039795, "num_chars": 2}], "label": 0, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 4, "native_id": 4, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3965494632720947, "incorrect_loss_raw": 1.4069182475407918, "correct_loss_per_char": 0.6982747316360474, "incorrect_loss_per_char": 0.7034591237703959, "correct_loss_per_token": 1.3965494632720947, "incorrect_loss_per_token": 1.4069182475407918, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.436079502105713, "num_tokens": 1, "num_tokens_all": 797, "is_greedy": false, "logits_per_token": -1.436079502105713, "logits_per_char": -0.7180397510528564, "num_chars": 2}, {"sum_logits": -1.5996688604354858, "num_tokens": 1, "num_tokens_all": 797, "is_greedy": false, "logits_per_token": -1.5996688604354858, "logits_per_char": -0.7998344302177429, "num_chars": 2}, {"sum_logits": -1.3965494632720947, "num_tokens": 1, "num_tokens_all": 797, "is_greedy": false, "logits_per_token": -1.3965494632720947, "logits_per_char": -0.6982747316360474, "num_chars": 2}, {"sum_logits": -1.1850063800811768, "num_tokens": 1, "num_tokens_all": 797, "is_greedy": true, "logits_per_token": -1.1850063800811768, "logits_per_char": -0.5925031900405884, "num_chars": 2}], "label": 2, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 5, "native_id": 5, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5601119995117188, "incorrect_loss_raw": 1.4185410936673482, "correct_loss_per_char": 0.7800559997558594, "incorrect_loss_per_char": 0.7092705468336741, "correct_loss_per_token": 1.5601119995117188, "incorrect_loss_per_token": 1.4185410936673482, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.7780330181121826, "num_tokens": 1, "num_tokens_all": 845, "is_greedy": false, "logits_per_token": -1.7780330181121826, "logits_per_char": -0.8890165090560913, "num_chars": 2}, {"sum_logits": -1.595489263534546, "num_tokens": 1, "num_tokens_all": 845, "is_greedy": false, "logits_per_token": -1.595489263534546, "logits_per_char": -0.797744631767273, "num_chars": 2}, {"sum_logits": -1.5601119995117188, "num_tokens": 1, "num_tokens_all": 845, "is_greedy": false, "logits_per_token": -1.5601119995117188, "logits_per_char": -0.7800559997558594, "num_chars": 2}, {"sum_logits": -0.8821009993553162, "num_tokens": 1, "num_tokens_all": 845, "is_greedy": true, "logits_per_token": -0.8821009993553162, "logits_per_char": -0.4410504996776581, "num_chars": 2}], "label": 2, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 6, "native_id": 6, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4548261165618896, "incorrect_loss_raw": 1.3886473576227825, "correct_loss_per_char": 0.7274130582809448, "incorrect_loss_per_char": 0.6943236788113912, "correct_loss_per_token": 1.4548261165618896, "incorrect_loss_per_token": 1.3886473576227825, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5018150806427002, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": false, "logits_per_token": -1.5018150806427002, "logits_per_char": -0.7509075403213501, "num_chars": 2}, {"sum_logits": -1.4268029928207397, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": false, "logits_per_token": -1.4268029928207397, "logits_per_char": -0.7134014964103699, "num_chars": 2}, {"sum_logits": -1.4548261165618896, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": false, "logits_per_token": -1.4548261165618896, "logits_per_char": -0.7274130582809448, "num_chars": 2}, {"sum_logits": -1.2373239994049072, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": true, "logits_per_token": -1.2373239994049072, "logits_per_char": -0.6186619997024536, "num_chars": 2}], "label": 2, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 7, "native_id": 7, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4872462749481201, "incorrect_loss_raw": 1.3843834400177002, "correct_loss_per_char": 0.7436231374740601, "incorrect_loss_per_char": 0.6921917200088501, "correct_loss_per_token": 1.4872462749481201, "incorrect_loss_per_token": 1.3843834400177002, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4156721830368042, "num_tokens": 1, "num_tokens_all": 808, "is_greedy": false, "logits_per_token": -1.4156721830368042, "logits_per_char": -0.7078360915184021, "num_chars": 2}, {"sum_logits": -1.4872462749481201, "num_tokens": 1, "num_tokens_all": 808, "is_greedy": false, "logits_per_token": -1.4872462749481201, "logits_per_char": -0.7436231374740601, "num_chars": 2}, {"sum_logits": -1.504973292350769, "num_tokens": 1, "num_tokens_all": 808, "is_greedy": false, "logits_per_token": -1.504973292350769, "logits_per_char": -0.7524866461753845, "num_chars": 2}, {"sum_logits": -1.2325048446655273, "num_tokens": 1, "num_tokens_all": 808, "is_greedy": true, "logits_per_token": -1.2325048446655273, "logits_per_char": -0.6162524223327637, "num_chars": 2}], "label": 1, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 8, "native_id": 8, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4371610879898071, "incorrect_loss_raw": 1.3839383522669475, "correct_loss_per_char": 0.7185805439949036, "incorrect_loss_per_char": 0.6919691761334738, "correct_loss_per_token": 1.4371610879898071, "incorrect_loss_per_token": 1.3839383522669475, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4206008911132812, "num_tokens": 1, "num_tokens_all": 819, "is_greedy": false, "logits_per_token": -1.4206008911132812, "logits_per_char": -0.7103004455566406, "num_chars": 2}, {"sum_logits": -1.5023216009140015, "num_tokens": 1, "num_tokens_all": 819, "is_greedy": false, "logits_per_token": -1.5023216009140015, "logits_per_char": -0.7511608004570007, "num_chars": 2}, {"sum_logits": -1.4371610879898071, "num_tokens": 1, "num_tokens_all": 819, "is_greedy": false, "logits_per_token": -1.4371610879898071, "logits_per_char": -0.7185805439949036, "num_chars": 2}, {"sum_logits": -1.2288925647735596, "num_tokens": 1, "num_tokens_all": 819, "is_greedy": true, "logits_per_token": -1.2288925647735596, "logits_per_char": -0.6144462823867798, "num_chars": 2}], "label": 2, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 9, "native_id": 9, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.485114574432373, "incorrect_loss_raw": 1.3651503324508667, "correct_loss_per_char": 0.7425572872161865, "incorrect_loss_per_char": 0.6825751662254333, "correct_loss_per_token": 1.485114574432373, "incorrect_loss_per_token": 1.3651503324508667, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.46739661693573, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": false, "logits_per_token": -1.46739661693573, "logits_per_char": -0.733698308467865, "num_chars": 2}, {"sum_logits": -1.2962162494659424, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": true, "logits_per_token": -1.2962162494659424, "logits_per_char": -0.6481081247329712, "num_chars": 2}, {"sum_logits": -1.485114574432373, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": false, "logits_per_token": -1.485114574432373, "logits_per_char": -0.7425572872161865, "num_chars": 2}, {"sum_logits": -1.3318381309509277, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": false, "logits_per_token": -1.3318381309509277, "logits_per_char": -0.6659190654754639, "num_chars": 2}], "label": 2, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 10, "native_id": 10, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1620047092437744, "incorrect_loss_raw": 1.513976812362671, "correct_loss_per_char": 0.5810023546218872, "incorrect_loss_per_char": 0.7569884061813354, "correct_loss_per_token": 1.1620047092437744, "incorrect_loss_per_token": 1.513976812362671, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5508098602294922, "num_tokens": 1, "num_tokens_all": 833, "is_greedy": false, "logits_per_token": -1.5508098602294922, "logits_per_char": -0.7754049301147461, "num_chars": 2}, {"sum_logits": -1.5527145862579346, "num_tokens": 1, "num_tokens_all": 833, "is_greedy": false, "logits_per_token": -1.5527145862579346, "logits_per_char": -0.7763572931289673, "num_chars": 2}, {"sum_logits": -1.438405990600586, "num_tokens": 1, "num_tokens_all": 833, "is_greedy": false, "logits_per_token": -1.438405990600586, "logits_per_char": -0.719202995300293, "num_chars": 2}, {"sum_logits": -1.1620047092437744, "num_tokens": 1, "num_tokens_all": 833, "is_greedy": true, "logits_per_token": -1.1620047092437744, "logits_per_char": -0.5810023546218872, "num_chars": 2}], "label": 3, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 11, "native_id": 11, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5856573581695557, "incorrect_loss_raw": 1.3688217004140217, "correct_loss_per_char": 0.7928286790847778, "incorrect_loss_per_char": 0.6844108502070109, "correct_loss_per_token": 1.5856573581695557, "incorrect_loss_per_token": 1.3688217004140217, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5856573581695557, "num_tokens": 1, "num_tokens_all": 808, "is_greedy": false, "logits_per_token": -1.5856573581695557, "logits_per_char": -0.7928286790847778, "num_chars": 2}, {"sum_logits": -1.6454832553863525, "num_tokens": 1, "num_tokens_all": 808, "is_greedy": false, "logits_per_token": -1.6454832553863525, "logits_per_char": -0.8227416276931763, "num_chars": 2}, {"sum_logits": -1.4398236274719238, "num_tokens": 1, "num_tokens_all": 808, "is_greedy": false, "logits_per_token": -1.4398236274719238, "logits_per_char": -0.7199118137359619, "num_chars": 2}, {"sum_logits": -1.021158218383789, "num_tokens": 1, "num_tokens_all": 808, "is_greedy": true, "logits_per_token": -1.021158218383789, "logits_per_char": -0.5105791091918945, "num_chars": 2}], "label": 0, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 12, "native_id": 12, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6719751358032227, "incorrect_loss_raw": 1.313446283340454, "correct_loss_per_char": 0.8359875679016113, "incorrect_loss_per_char": 0.656723141670227, "correct_loss_per_token": 1.6719751358032227, "incorrect_loss_per_token": 1.313446283340454, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6719751358032227, "num_tokens": 1, "num_tokens_all": 877, "is_greedy": false, "logits_per_token": -1.6719751358032227, "logits_per_char": -0.8359875679016113, "num_chars": 2}, {"sum_logits": -1.3576281070709229, "num_tokens": 1, "num_tokens_all": 877, "is_greedy": false, "logits_per_token": -1.3576281070709229, "logits_per_char": -0.6788140535354614, "num_chars": 2}, {"sum_logits": -1.3527510166168213, "num_tokens": 1, "num_tokens_all": 877, "is_greedy": false, "logits_per_token": -1.3527510166168213, "logits_per_char": -0.6763755083084106, "num_chars": 2}, {"sum_logits": -1.2299597263336182, "num_tokens": 1, "num_tokens_all": 877, "is_greedy": true, "logits_per_token": -1.2299597263336182, "logits_per_char": -0.6149798631668091, "num_chars": 2}], "label": 0, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 13, "native_id": 13, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6149802207946777, "incorrect_loss_raw": 1.3525444269180298, "correct_loss_per_char": 0.8074901103973389, "incorrect_loss_per_char": 0.6762722134590149, "correct_loss_per_token": 1.6149802207946777, "incorrect_loss_per_token": 1.3525444269180298, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.509857416152954, "num_tokens": 1, "num_tokens_all": 803, "is_greedy": false, "logits_per_token": -1.509857416152954, "logits_per_char": -0.754928708076477, "num_chars": 2}, {"sum_logits": -1.5094356536865234, "num_tokens": 1, "num_tokens_all": 803, "is_greedy": false, "logits_per_token": -1.5094356536865234, "logits_per_char": -0.7547178268432617, "num_chars": 2}, {"sum_logits": -1.6149802207946777, "num_tokens": 1, "num_tokens_all": 803, "is_greedy": false, "logits_per_token": -1.6149802207946777, "logits_per_char": -0.8074901103973389, "num_chars": 2}, {"sum_logits": -1.0383402109146118, "num_tokens": 1, "num_tokens_all": 803, "is_greedy": true, "logits_per_token": -1.0383402109146118, "logits_per_char": -0.5191701054573059, "num_chars": 2}], "label": 2, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 14, "native_id": 14, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5616607666015625, "incorrect_loss_raw": 1.3867748975753784, "correct_loss_per_char": 0.7808303833007812, "incorrect_loss_per_char": 0.6933874487876892, "correct_loss_per_token": 1.5616607666015625, "incorrect_loss_per_token": 1.3867748975753784, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5616607666015625, "num_tokens": 1, "num_tokens_all": 849, "is_greedy": false, "logits_per_token": -1.5616607666015625, "logits_per_char": -0.7808303833007812, "num_chars": 2}, {"sum_logits": -1.5059809684753418, "num_tokens": 1, "num_tokens_all": 849, "is_greedy": false, "logits_per_token": -1.5059809684753418, "logits_per_char": -0.7529904842376709, "num_chars": 2}, {"sum_logits": -1.4823384284973145, "num_tokens": 1, "num_tokens_all": 849, "is_greedy": false, "logits_per_token": -1.4823384284973145, "logits_per_char": -0.7411692142486572, "num_chars": 2}, {"sum_logits": -1.172005295753479, "num_tokens": 1, "num_tokens_all": 849, "is_greedy": true, "logits_per_token": -1.172005295753479, "logits_per_char": -0.5860026478767395, "num_chars": 2}], "label": 0, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 15, "native_id": 15, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9399055242538452, "incorrect_loss_raw": 1.6132486661275227, "correct_loss_per_char": 0.4699527621269226, "incorrect_loss_per_char": 0.8066243330637614, "correct_loss_per_token": 0.9399055242538452, "incorrect_loss_per_token": 1.6132486661275227, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5156999826431274, "num_tokens": 1, "num_tokens_all": 857, "is_greedy": false, "logits_per_token": -1.5156999826431274, "logits_per_char": -0.7578499913215637, "num_chars": 2}, {"sum_logits": -1.6876791715621948, "num_tokens": 1, "num_tokens_all": 857, "is_greedy": false, "logits_per_token": -1.6876791715621948, "logits_per_char": -0.8438395857810974, "num_chars": 2}, {"sum_logits": -1.636366844177246, "num_tokens": 1, "num_tokens_all": 857, "is_greedy": false, "logits_per_token": -1.636366844177246, "logits_per_char": -0.818183422088623, "num_chars": 2}, {"sum_logits": -0.9399055242538452, "num_tokens": 1, "num_tokens_all": 857, "is_greedy": true, "logits_per_token": -0.9399055242538452, "logits_per_char": -0.4699527621269226, "num_chars": 2}], "label": 3, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 16, "native_id": 16, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3385286331176758, "incorrect_loss_raw": 1.4123271703720093, "correct_loss_per_char": 0.6692643165588379, "incorrect_loss_per_char": 0.7061635851860046, "correct_loss_per_token": 1.3385286331176758, "incorrect_loss_per_token": 1.4123271703720093, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3618391752243042, "num_tokens": 1, "num_tokens_all": 814, "is_greedy": false, "logits_per_token": -1.3618391752243042, "logits_per_char": -0.6809195876121521, "num_chars": 2}, {"sum_logits": -1.4842150211334229, "num_tokens": 1, "num_tokens_all": 814, "is_greedy": false, "logits_per_token": -1.4842150211334229, "logits_per_char": -0.7421075105667114, "num_chars": 2}, {"sum_logits": -1.3909273147583008, "num_tokens": 1, "num_tokens_all": 814, "is_greedy": false, "logits_per_token": -1.3909273147583008, "logits_per_char": -0.6954636573791504, "num_chars": 2}, {"sum_logits": -1.3385286331176758, "num_tokens": 1, "num_tokens_all": 814, "is_greedy": true, "logits_per_token": -1.3385286331176758, "logits_per_char": -0.6692643165588379, "num_chars": 2}], "label": 3, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 17, "native_id": 17, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5305007696151733, "incorrect_loss_raw": 1.3860206206639607, "correct_loss_per_char": 0.7652503848075867, "incorrect_loss_per_char": 0.6930103103319804, "correct_loss_per_token": 1.5305007696151733, "incorrect_loss_per_token": 1.3860206206639607, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.63248872756958, "num_tokens": 1, "num_tokens_all": 806, "is_greedy": false, "logits_per_token": -1.63248872756958, "logits_per_char": -0.81624436378479, "num_chars": 2}, {"sum_logits": -1.5305007696151733, "num_tokens": 1, "num_tokens_all": 806, "is_greedy": false, "logits_per_token": -1.5305007696151733, "logits_per_char": -0.7652503848075867, "num_chars": 2}, {"sum_logits": -1.5072758197784424, "num_tokens": 1, "num_tokens_all": 806, "is_greedy": false, "logits_per_token": -1.5072758197784424, "logits_per_char": -0.7536379098892212, "num_chars": 2}, {"sum_logits": -1.0182973146438599, "num_tokens": 1, "num_tokens_all": 806, "is_greedy": true, "logits_per_token": -1.0182973146438599, "logits_per_char": -0.5091486573219299, "num_chars": 2}], "label": 1, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 18, "native_id": 18, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.239250898361206, "incorrect_loss_raw": 1.4547405640284221, "correct_loss_per_char": 0.619625449180603, "incorrect_loss_per_char": 0.7273702820142111, "correct_loss_per_token": 1.239250898361206, "incorrect_loss_per_token": 1.4547405640284221, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.239250898361206, "num_tokens": 1, "num_tokens_all": 811, "is_greedy": true, "logits_per_token": -1.239250898361206, "logits_per_char": -0.619625449180603, "num_chars": 2}, {"sum_logits": -1.4593861103057861, "num_tokens": 1, "num_tokens_all": 811, "is_greedy": false, "logits_per_token": -1.4593861103057861, "logits_per_char": -0.7296930551528931, "num_chars": 2}, {"sum_logits": -1.5609322786331177, "num_tokens": 1, "num_tokens_all": 811, "is_greedy": false, "logits_per_token": -1.5609322786331177, "logits_per_char": -0.7804661393165588, "num_chars": 2}, {"sum_logits": -1.3439033031463623, "num_tokens": 1, "num_tokens_all": 811, "is_greedy": false, "logits_per_token": -1.3439033031463623, "logits_per_char": -0.6719516515731812, "num_chars": 2}], "label": 0, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 19, "native_id": 19, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2229423522949219, "incorrect_loss_raw": 1.47224760055542, "correct_loss_per_char": 0.6114711761474609, "incorrect_loss_per_char": 0.73612380027771, "correct_loss_per_token": 1.2229423522949219, "incorrect_loss_per_token": 1.47224760055542, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4423491954803467, "num_tokens": 1, "num_tokens_all": 839, "is_greedy": false, "logits_per_token": -1.4423491954803467, "logits_per_char": -0.7211745977401733, "num_chars": 2}, {"sum_logits": -1.4175212383270264, "num_tokens": 1, "num_tokens_all": 839, "is_greedy": false, "logits_per_token": -1.4175212383270264, "logits_per_char": -0.7087606191635132, "num_chars": 2}, {"sum_logits": -1.5568723678588867, "num_tokens": 1, "num_tokens_all": 839, "is_greedy": false, "logits_per_token": -1.5568723678588867, "logits_per_char": -0.7784361839294434, "num_chars": 2}, {"sum_logits": -1.2229423522949219, "num_tokens": 1, "num_tokens_all": 839, "is_greedy": true, "logits_per_token": -1.2229423522949219, "logits_per_char": -0.6114711761474609, "num_chars": 2}], "label": 3, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 20, "native_id": 20, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1593751907348633, "incorrect_loss_raw": 1.488348404566447, "correct_loss_per_char": 0.5796875953674316, "incorrect_loss_per_char": 0.7441742022832235, "correct_loss_per_token": 1.1593751907348633, "incorrect_loss_per_token": 1.488348404566447, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5607903003692627, "num_tokens": 1, "num_tokens_all": 796, "is_greedy": false, "logits_per_token": -1.5607903003692627, "logits_per_char": -0.7803951501846313, "num_chars": 2}, {"sum_logits": -1.5213539600372314, "num_tokens": 1, "num_tokens_all": 796, "is_greedy": false, "logits_per_token": -1.5213539600372314, "logits_per_char": -0.7606769800186157, "num_chars": 2}, {"sum_logits": -1.3829009532928467, "num_tokens": 1, "num_tokens_all": 796, "is_greedy": false, "logits_per_token": -1.3829009532928467, "logits_per_char": -0.6914504766464233, "num_chars": 2}, {"sum_logits": -1.1593751907348633, "num_tokens": 1, "num_tokens_all": 796, "is_greedy": true, "logits_per_token": -1.1593751907348633, "logits_per_char": -0.5796875953674316, "num_chars": 2}], "label": 3, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 21, "native_id": 21, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4166505336761475, "incorrect_loss_raw": 1.4248218536376953, "correct_loss_per_char": 0.7083252668380737, "incorrect_loss_per_char": 0.7124109268188477, "correct_loss_per_token": 1.4166505336761475, "incorrect_loss_per_token": 1.4248218536376953, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0771687030792236, "num_tokens": 1, "num_tokens_all": 801, "is_greedy": true, "logits_per_token": -1.0771687030792236, "logits_per_char": -0.5385843515396118, "num_chars": 2}, {"sum_logits": -1.4166505336761475, "num_tokens": 1, "num_tokens_all": 801, "is_greedy": false, "logits_per_token": -1.4166505336761475, "logits_per_char": -0.7083252668380737, "num_chars": 2}, {"sum_logits": -1.693526029586792, "num_tokens": 1, "num_tokens_all": 801, "is_greedy": false, "logits_per_token": -1.693526029586792, "logits_per_char": -0.846763014793396, "num_chars": 2}, {"sum_logits": -1.5037708282470703, "num_tokens": 1, "num_tokens_all": 801, "is_greedy": false, "logits_per_token": -1.5037708282470703, "logits_per_char": -0.7518854141235352, "num_chars": 2}], "label": 1, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 22, "native_id": 22, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.488300085067749, "incorrect_loss_raw": 1.396000822385152, "correct_loss_per_char": 0.7441500425338745, "incorrect_loss_per_char": 0.698000411192576, "correct_loss_per_token": 1.488300085067749, "incorrect_loss_per_token": 1.396000822385152, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.488300085067749, "num_tokens": 1, "num_tokens_all": 800, "is_greedy": false, "logits_per_token": -1.488300085067749, "logits_per_char": -0.7441500425338745, "num_chars": 2}, {"sum_logits": -1.5588029623031616, "num_tokens": 1, "num_tokens_all": 800, "is_greedy": false, "logits_per_token": -1.5588029623031616, "logits_per_char": -0.7794014811515808, "num_chars": 2}, {"sum_logits": -1.6014575958251953, "num_tokens": 1, "num_tokens_all": 800, "is_greedy": false, "logits_per_token": -1.6014575958251953, "logits_per_char": -0.8007287979125977, "num_chars": 2}, {"sum_logits": -1.0277419090270996, "num_tokens": 1, "num_tokens_all": 800, "is_greedy": true, "logits_per_token": -1.0277419090270996, "logits_per_char": -0.5138709545135498, "num_chars": 2}], "label": 0, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 23, "native_id": 23, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3834748268127441, "incorrect_loss_raw": 1.4199082851409912, "correct_loss_per_char": 0.6917374134063721, "incorrect_loss_per_char": 0.7099541425704956, "correct_loss_per_token": 1.3834748268127441, "incorrect_loss_per_token": 1.4199082851409912, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3834748268127441, "num_tokens": 1, "num_tokens_all": 806, "is_greedy": false, "logits_per_token": -1.3834748268127441, "logits_per_char": -0.6917374134063721, "num_chars": 2}, {"sum_logits": -1.5873210430145264, "num_tokens": 1, "num_tokens_all": 806, "is_greedy": false, "logits_per_token": -1.5873210430145264, "logits_per_char": -0.7936605215072632, "num_chars": 2}, {"sum_logits": -1.553260326385498, "num_tokens": 1, "num_tokens_all": 806, "is_greedy": false, "logits_per_token": -1.553260326385498, "logits_per_char": -0.776630163192749, "num_chars": 2}, {"sum_logits": -1.1191434860229492, "num_tokens": 1, "num_tokens_all": 806, "is_greedy": true, "logits_per_token": -1.1191434860229492, "logits_per_char": -0.5595717430114746, "num_chars": 2}], "label": 0, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 24, "native_id": 24, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.569717526435852, "incorrect_loss_raw": 1.3409669796625774, "correct_loss_per_char": 0.784858763217926, "incorrect_loss_per_char": 0.6704834898312887, "correct_loss_per_token": 1.569717526435852, "incorrect_loss_per_token": 1.3409669796625774, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.569717526435852, "num_tokens": 1, "num_tokens_all": 894, "is_greedy": false, "logits_per_token": -1.569717526435852, "logits_per_char": -0.784858763217926, "num_chars": 2}, {"sum_logits": -1.3804686069488525, "num_tokens": 1, "num_tokens_all": 894, "is_greedy": false, "logits_per_token": -1.3804686069488525, "logits_per_char": -0.6902343034744263, "num_chars": 2}, {"sum_logits": -1.388581395149231, "num_tokens": 1, "num_tokens_all": 894, "is_greedy": false, "logits_per_token": -1.388581395149231, "logits_per_char": -0.6942906975746155, "num_chars": 2}, {"sum_logits": -1.2538509368896484, "num_tokens": 1, "num_tokens_all": 894, "is_greedy": true, "logits_per_token": -1.2538509368896484, "logits_per_char": -0.6269254684448242, "num_chars": 2}], "label": 0, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 25, "native_id": 25, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2031391859054565, "incorrect_loss_raw": 1.4977919260660808, "correct_loss_per_char": 0.6015695929527283, "incorrect_loss_per_char": 0.7488959630330404, "correct_loss_per_token": 1.2031391859054565, "incorrect_loss_per_token": 1.4977919260660808, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6026890277862549, "num_tokens": 1, "num_tokens_all": 872, "is_greedy": false, "logits_per_token": -1.6026890277862549, "logits_per_char": -0.8013445138931274, "num_chars": 2}, {"sum_logits": -1.4501636028289795, "num_tokens": 1, "num_tokens_all": 872, "is_greedy": false, "logits_per_token": -1.4501636028289795, "logits_per_char": -0.7250818014144897, "num_chars": 2}, {"sum_logits": -1.4405231475830078, "num_tokens": 1, "num_tokens_all": 872, "is_greedy": false, "logits_per_token": -1.4405231475830078, "logits_per_char": -0.7202615737915039, "num_chars": 2}, {"sum_logits": -1.2031391859054565, "num_tokens": 1, "num_tokens_all": 872, "is_greedy": true, "logits_per_token": -1.2031391859054565, "logits_per_char": -0.6015695929527283, "num_chars": 2}], "label": 3, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 26, "native_id": 26, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3652856349945068, "incorrect_loss_raw": 1.4054380257924397, "correct_loss_per_char": 0.6826428174972534, "incorrect_loss_per_char": 0.7027190128962199, "correct_loss_per_token": 1.3652856349945068, "incorrect_loss_per_token": 1.4054380257924397, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3584235906600952, "num_tokens": 1, "num_tokens_all": 792, "is_greedy": false, "logits_per_token": -1.3584235906600952, "logits_per_char": -0.6792117953300476, "num_chars": 2}, {"sum_logits": -1.5421053171157837, "num_tokens": 1, "num_tokens_all": 792, "is_greedy": false, "logits_per_token": -1.5421053171157837, "logits_per_char": -0.7710526585578918, "num_chars": 2}, {"sum_logits": -1.3652856349945068, "num_tokens": 1, "num_tokens_all": 792, "is_greedy": false, "logits_per_token": -1.3652856349945068, "logits_per_char": -0.6826428174972534, "num_chars": 2}, {"sum_logits": -1.3157851696014404, "num_tokens": 1, "num_tokens_all": 792, "is_greedy": true, "logits_per_token": -1.3157851696014404, "logits_per_char": -0.6578925848007202, "num_chars": 2}], "label": 2, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 27, "native_id": 27, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0981886386871338, "incorrect_loss_raw": 1.5143165588378906, "correct_loss_per_char": 0.5490943193435669, "incorrect_loss_per_char": 0.7571582794189453, "correct_loss_per_token": 1.0981886386871338, "incorrect_loss_per_token": 1.5143165588378906, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.497309923171997, "num_tokens": 1, "num_tokens_all": 795, "is_greedy": false, "logits_per_token": -1.497309923171997, "logits_per_char": -0.7486549615859985, "num_chars": 2}, {"sum_logits": -1.5809588432312012, "num_tokens": 1, "num_tokens_all": 795, "is_greedy": false, "logits_per_token": -1.5809588432312012, "logits_per_char": -0.7904794216156006, "num_chars": 2}, {"sum_logits": -1.4646809101104736, "num_tokens": 1, "num_tokens_all": 795, "is_greedy": false, "logits_per_token": -1.4646809101104736, "logits_per_char": -0.7323404550552368, "num_chars": 2}, {"sum_logits": -1.0981886386871338, "num_tokens": 1, "num_tokens_all": 795, "is_greedy": true, "logits_per_token": -1.0981886386871338, "logits_per_char": -0.5490943193435669, "num_chars": 2}], "label": 3, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 28, "native_id": 28, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3737820386886597, "incorrect_loss_raw": 1.4126639763514202, "correct_loss_per_char": 0.6868910193443298, "incorrect_loss_per_char": 0.7063319881757101, "correct_loss_per_token": 1.3737820386886597, "incorrect_loss_per_token": 1.4126639763514202, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.402832269668579, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": false, "logits_per_token": -1.402832269668579, "logits_per_char": -0.7014161348342896, "num_chars": 2}, {"sum_logits": -1.3742074966430664, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": false, "logits_per_token": -1.3742074966430664, "logits_per_char": -0.6871037483215332, "num_chars": 2}, {"sum_logits": -1.4609521627426147, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": false, "logits_per_token": -1.4609521627426147, "logits_per_char": -0.7304760813713074, "num_chars": 2}, {"sum_logits": -1.3737820386886597, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": true, "logits_per_token": -1.3737820386886597, "logits_per_char": -0.6868910193443298, "num_chars": 2}], "label": 3, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 29, "native_id": 29, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4878181219100952, "incorrect_loss_raw": 1.3675498565038045, "correct_loss_per_char": 0.7439090609550476, "incorrect_loss_per_char": 0.6837749282519022, "correct_loss_per_token": 1.4878181219100952, "incorrect_loss_per_token": 1.3675498565038045, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3128985166549683, "num_tokens": 1, "num_tokens_all": 804, "is_greedy": false, "logits_per_token": -1.3128985166549683, "logits_per_char": -0.6564492583274841, "num_chars": 2}, {"sum_logits": -1.493280053138733, "num_tokens": 1, "num_tokens_all": 804, "is_greedy": false, "logits_per_token": -1.493280053138733, "logits_per_char": -0.7466400265693665, "num_chars": 2}, {"sum_logits": -1.4878181219100952, "num_tokens": 1, "num_tokens_all": 804, "is_greedy": false, "logits_per_token": -1.4878181219100952, "logits_per_char": -0.7439090609550476, "num_chars": 2}, {"sum_logits": -1.2964709997177124, "num_tokens": 1, "num_tokens_all": 804, "is_greedy": true, "logits_per_token": -1.2964709997177124, "logits_per_char": -0.6482354998588562, "num_chars": 2}], "label": 2, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 30, "native_id": 30, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3914148807525635, "incorrect_loss_raw": 1.4014288584391277, "correct_loss_per_char": 0.6957074403762817, "incorrect_loss_per_char": 0.7007144292195638, "correct_loss_per_token": 1.3914148807525635, "incorrect_loss_per_token": 1.4014288584391277, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.464536428451538, "num_tokens": 1, "num_tokens_all": 800, "is_greedy": false, "logits_per_token": -1.464536428451538, "logits_per_char": -0.732268214225769, "num_chars": 2}, {"sum_logits": -1.5220696926116943, "num_tokens": 1, "num_tokens_all": 800, "is_greedy": false, "logits_per_token": -1.5220696926116943, "logits_per_char": -0.7610348463058472, "num_chars": 2}, {"sum_logits": -1.3914148807525635, "num_tokens": 1, "num_tokens_all": 800, "is_greedy": false, "logits_per_token": -1.3914148807525635, "logits_per_char": -0.6957074403762817, "num_chars": 2}, {"sum_logits": -1.2176804542541504, "num_tokens": 1, "num_tokens_all": 800, "is_greedy": true, "logits_per_token": -1.2176804542541504, "logits_per_char": -0.6088402271270752, "num_chars": 2}], "label": 2, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 31, "native_id": 31, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0789644718170166, "incorrect_loss_raw": 1.5348869959513347, "correct_loss_per_char": 0.5394822359085083, "incorrect_loss_per_char": 0.7674434979756674, "correct_loss_per_token": 1.0789644718170166, "incorrect_loss_per_token": 1.5348869959513347, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.598425030708313, "num_tokens": 1, "num_tokens_all": 873, "is_greedy": false, "logits_per_token": -1.598425030708313, "logits_per_char": -0.7992125153541565, "num_chars": 2}, {"sum_logits": -1.4772758483886719, "num_tokens": 1, "num_tokens_all": 873, "is_greedy": false, "logits_per_token": -1.4772758483886719, "logits_per_char": -0.7386379241943359, "num_chars": 2}, {"sum_logits": -1.528960108757019, "num_tokens": 1, "num_tokens_all": 873, "is_greedy": false, "logits_per_token": -1.528960108757019, "logits_per_char": -0.7644800543785095, "num_chars": 2}, {"sum_logits": -1.0789644718170166, "num_tokens": 1, "num_tokens_all": 873, "is_greedy": true, "logits_per_token": -1.0789644718170166, "logits_per_char": -0.5394822359085083, "num_chars": 2}], "label": 3, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 32, "native_id": 32, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2060874700546265, "incorrect_loss_raw": 1.4650142987569172, "correct_loss_per_char": 0.6030437350273132, "incorrect_loss_per_char": 0.7325071493784586, "correct_loss_per_token": 1.2060874700546265, "incorrect_loss_per_token": 1.4650142987569172, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4237781763076782, "num_tokens": 1, "num_tokens_all": 820, "is_greedy": false, "logits_per_token": -1.4237781763076782, "logits_per_char": -0.7118890881538391, "num_chars": 2}, {"sum_logits": -1.5952179431915283, "num_tokens": 1, "num_tokens_all": 820, "is_greedy": false, "logits_per_token": -1.5952179431915283, "logits_per_char": -0.7976089715957642, "num_chars": 2}, {"sum_logits": -1.3760467767715454, "num_tokens": 1, "num_tokens_all": 820, "is_greedy": false, "logits_per_token": -1.3760467767715454, "logits_per_char": -0.6880233883857727, "num_chars": 2}, {"sum_logits": -1.2060874700546265, "num_tokens": 1, "num_tokens_all": 820, "is_greedy": true, "logits_per_token": -1.2060874700546265, "logits_per_char": -0.6030437350273132, "num_chars": 2}], "label": 3, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 33, "native_id": 33, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3742064237594604, "incorrect_loss_raw": 1.4188178777694702, "correct_loss_per_char": 0.6871032118797302, "incorrect_loss_per_char": 0.7094089388847351, "correct_loss_per_token": 1.3742064237594604, "incorrect_loss_per_token": 1.4188178777694702, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3742064237594604, "num_tokens": 1, "num_tokens_all": 821, "is_greedy": false, "logits_per_token": -1.3742064237594604, "logits_per_char": -0.6871032118797302, "num_chars": 2}, {"sum_logits": -1.427352786064148, "num_tokens": 1, "num_tokens_all": 821, "is_greedy": false, "logits_per_token": -1.427352786064148, "logits_per_char": -0.713676393032074, "num_chars": 2}, {"sum_logits": -1.494112491607666, "num_tokens": 1, "num_tokens_all": 821, "is_greedy": false, "logits_per_token": -1.494112491607666, "logits_per_char": -0.747056245803833, "num_chars": 2}, {"sum_logits": -1.3349883556365967, "num_tokens": 1, "num_tokens_all": 821, "is_greedy": true, "logits_per_token": -1.3349883556365967, "logits_per_char": -0.6674941778182983, "num_chars": 2}], "label": 0, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 34, "native_id": 34, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.461963176727295, "incorrect_loss_raw": 1.3707365989685059, "correct_loss_per_char": 0.7309815883636475, "incorrect_loss_per_char": 0.6853682994842529, "correct_loss_per_token": 1.461963176727295, "incorrect_loss_per_token": 1.3707365989685059, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3340778350830078, "num_tokens": 1, "num_tokens_all": 793, "is_greedy": true, "logits_per_token": -1.3340778350830078, "logits_per_char": -0.6670389175415039, "num_chars": 2}, {"sum_logits": -1.461963176727295, "num_tokens": 1, "num_tokens_all": 793, "is_greedy": false, "logits_per_token": -1.461963176727295, "logits_per_char": -0.7309815883636475, "num_chars": 2}, {"sum_logits": -1.4129267930984497, "num_tokens": 1, "num_tokens_all": 793, "is_greedy": false, "logits_per_token": -1.4129267930984497, "logits_per_char": -0.7064633965492249, "num_chars": 2}, {"sum_logits": -1.36520516872406, "num_tokens": 1, "num_tokens_all": 793, "is_greedy": false, "logits_per_token": -1.36520516872406, "logits_per_char": -0.68260258436203, "num_chars": 2}], "label": 1, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 35, "native_id": 35, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.503011703491211, "incorrect_loss_raw": 1.3578650554021199, "correct_loss_per_char": 0.7515058517456055, "incorrect_loss_per_char": 0.6789325277010599, "correct_loss_per_token": 1.503011703491211, "incorrect_loss_per_token": 1.3578650554021199, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3654553890228271, "num_tokens": 1, "num_tokens_all": 805, "is_greedy": false, "logits_per_token": -1.3654553890228271, "logits_per_char": -0.6827276945114136, "num_chars": 2}, {"sum_logits": -1.3960294723510742, "num_tokens": 1, "num_tokens_all": 805, "is_greedy": false, "logits_per_token": -1.3960294723510742, "logits_per_char": -0.6980147361755371, "num_chars": 2}, {"sum_logits": -1.503011703491211, "num_tokens": 1, "num_tokens_all": 805, "is_greedy": false, "logits_per_token": -1.503011703491211, "logits_per_char": -0.7515058517456055, "num_chars": 2}, {"sum_logits": -1.3121103048324585, "num_tokens": 1, "num_tokens_all": 805, "is_greedy": true, "logits_per_token": -1.3121103048324585, "logits_per_char": -0.6560551524162292, "num_chars": 2}], "label": 2, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 36, "native_id": 36, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7405314445495605, "incorrect_loss_raw": 1.329370339711507, "correct_loss_per_char": 0.8702657222747803, "incorrect_loss_per_char": 0.6646851698557535, "correct_loss_per_token": 1.7405314445495605, "incorrect_loss_per_token": 1.329370339711507, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4758000373840332, "num_tokens": 1, "num_tokens_all": 786, "is_greedy": false, "logits_per_token": -1.4758000373840332, "logits_per_char": -0.7379000186920166, "num_chars": 2}, {"sum_logits": -1.510998010635376, "num_tokens": 1, "num_tokens_all": 786, "is_greedy": false, "logits_per_token": -1.510998010635376, "logits_per_char": -0.755499005317688, "num_chars": 2}, {"sum_logits": -1.7405314445495605, "num_tokens": 1, "num_tokens_all": 786, "is_greedy": false, "logits_per_token": -1.7405314445495605, "logits_per_char": -0.8702657222747803, "num_chars": 2}, {"sum_logits": -1.0013129711151123, "num_tokens": 1, "num_tokens_all": 786, "is_greedy": true, "logits_per_token": -1.0013129711151123, "logits_per_char": -0.5006564855575562, "num_chars": 2}], "label": 2, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 37, "native_id": 37, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2229924201965332, "incorrect_loss_raw": 1.4538158178329468, "correct_loss_per_char": 0.6114962100982666, "incorrect_loss_per_char": 0.7269079089164734, "correct_loss_per_token": 1.2229924201965332, "incorrect_loss_per_token": 1.4538158178329468, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4684866666793823, "num_tokens": 1, "num_tokens_all": 827, "is_greedy": false, "logits_per_token": -1.4684866666793823, "logits_per_char": -0.7342433333396912, "num_chars": 2}, {"sum_logits": -1.3771154880523682, "num_tokens": 1, "num_tokens_all": 827, "is_greedy": false, "logits_per_token": -1.3771154880523682, "logits_per_char": -0.6885577440261841, "num_chars": 2}, {"sum_logits": -1.5158452987670898, "num_tokens": 1, "num_tokens_all": 827, "is_greedy": false, "logits_per_token": -1.5158452987670898, "logits_per_char": -0.7579226493835449, "num_chars": 2}, {"sum_logits": -1.2229924201965332, "num_tokens": 1, "num_tokens_all": 827, "is_greedy": true, "logits_per_token": -1.2229924201965332, "logits_per_char": -0.6114962100982666, "num_chars": 2}], "label": 3, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 38, "native_id": 38, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.052984595298767, "incorrect_loss_raw": 1.5362304051717122, "correct_loss_per_char": 0.5264922976493835, "incorrect_loss_per_char": 0.7681152025858561, "correct_loss_per_token": 1.052984595298767, "incorrect_loss_per_token": 1.5362304051717122, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4735229015350342, "num_tokens": 1, "num_tokens_all": 817, "is_greedy": false, "logits_per_token": -1.4735229015350342, "logits_per_char": -0.7367614507675171, "num_chars": 2}, {"sum_logits": -1.6209027767181396, "num_tokens": 1, "num_tokens_all": 817, "is_greedy": false, "logits_per_token": -1.6209027767181396, "logits_per_char": -0.8104513883590698, "num_chars": 2}, {"sum_logits": -1.514265537261963, "num_tokens": 1, "num_tokens_all": 817, "is_greedy": false, "logits_per_token": -1.514265537261963, "logits_per_char": -0.7571327686309814, "num_chars": 2}, {"sum_logits": -1.052984595298767, "num_tokens": 1, "num_tokens_all": 817, "is_greedy": true, "logits_per_token": -1.052984595298767, "logits_per_char": -0.5264922976493835, "num_chars": 2}], "label": 3, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 39, "native_id": 39, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.333178997039795, "incorrect_loss_raw": 1.4147001107533772, "correct_loss_per_char": 0.6665894985198975, "incorrect_loss_per_char": 0.7073500553766886, "correct_loss_per_token": 1.333178997039795, "incorrect_loss_per_token": 1.4147001107533772, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5086270570755005, "num_tokens": 1, "num_tokens_all": 853, "is_greedy": false, "logits_per_token": -1.5086270570755005, "logits_per_char": -0.7543135285377502, "num_chars": 2}, {"sum_logits": -1.333178997039795, "num_tokens": 1, "num_tokens_all": 853, "is_greedy": false, "logits_per_token": -1.333178997039795, "logits_per_char": -0.6665894985198975, "num_chars": 2}, {"sum_logits": -1.426323413848877, "num_tokens": 1, "num_tokens_all": 853, "is_greedy": false, "logits_per_token": -1.426323413848877, "logits_per_char": -0.7131617069244385, "num_chars": 2}, {"sum_logits": -1.3091498613357544, "num_tokens": 1, "num_tokens_all": 853, "is_greedy": true, "logits_per_token": -1.3091498613357544, "logits_per_char": -0.6545749306678772, "num_chars": 2}], "label": 1, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 40, "native_id": 40, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4407294988632202, "incorrect_loss_raw": 1.3867477575937908, "correct_loss_per_char": 0.7203647494316101, "incorrect_loss_per_char": 0.6933738787968954, "correct_loss_per_token": 1.4407294988632202, "incorrect_loss_per_token": 1.3867477575937908, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4407294988632202, "num_tokens": 1, "num_tokens_all": 824, "is_greedy": false, "logits_per_token": -1.4407294988632202, "logits_per_char": -0.7203647494316101, "num_chars": 2}, {"sum_logits": -1.3938792943954468, "num_tokens": 1, "num_tokens_all": 824, "is_greedy": false, "logits_per_token": -1.3938792943954468, "logits_per_char": -0.6969396471977234, "num_chars": 2}, {"sum_logits": -1.5724910497665405, "num_tokens": 1, "num_tokens_all": 824, "is_greedy": false, "logits_per_token": -1.5724910497665405, "logits_per_char": -0.7862455248832703, "num_chars": 2}, {"sum_logits": -1.1938729286193848, "num_tokens": 1, "num_tokens_all": 824, "is_greedy": true, "logits_per_token": -1.1938729286193848, "logits_per_char": -0.5969364643096924, "num_chars": 2}], "label": 0, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 41, "native_id": 41, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.118024468421936, "incorrect_loss_raw": 1.5019547939300537, "correct_loss_per_char": 0.559012234210968, "incorrect_loss_per_char": 0.7509773969650269, "correct_loss_per_token": 1.118024468421936, "incorrect_loss_per_token": 1.5019547939300537, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5215486288070679, "num_tokens": 1, "num_tokens_all": 809, "is_greedy": false, "logits_per_token": -1.5215486288070679, "logits_per_char": -0.7607743144035339, "num_chars": 2}, {"sum_logits": -1.5304793119430542, "num_tokens": 1, "num_tokens_all": 809, "is_greedy": false, "logits_per_token": -1.5304793119430542, "logits_per_char": -0.7652396559715271, "num_chars": 2}, {"sum_logits": -1.453836441040039, "num_tokens": 1, "num_tokens_all": 809, "is_greedy": false, "logits_per_token": -1.453836441040039, "logits_per_char": -0.7269182205200195, "num_chars": 2}, {"sum_logits": -1.118024468421936, "num_tokens": 1, "num_tokens_all": 809, "is_greedy": true, "logits_per_token": -1.118024468421936, "logits_per_char": -0.559012234210968, "num_chars": 2}], "label": 3, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 42, "native_id": 42, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.515108585357666, "incorrect_loss_raw": 1.3796897729237874, "correct_loss_per_char": 0.757554292678833, "incorrect_loss_per_char": 0.6898448864618937, "correct_loss_per_token": 1.515108585357666, "incorrect_loss_per_token": 1.3796897729237874, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.515108585357666, "num_tokens": 1, "num_tokens_all": 880, "is_greedy": false, "logits_per_token": -1.515108585357666, "logits_per_char": -0.757554292678833, "num_chars": 2}, {"sum_logits": -1.4334896802902222, "num_tokens": 1, "num_tokens_all": 880, "is_greedy": false, "logits_per_token": -1.4334896802902222, "logits_per_char": -0.7167448401451111, "num_chars": 2}, {"sum_logits": -1.5250056982040405, "num_tokens": 1, "num_tokens_all": 880, "is_greedy": false, "logits_per_token": -1.5250056982040405, "logits_per_char": -0.7625028491020203, "num_chars": 2}, {"sum_logits": -1.1805739402770996, "num_tokens": 1, "num_tokens_all": 880, "is_greedy": true, "logits_per_token": -1.1805739402770996, "logits_per_char": -0.5902869701385498, "num_chars": 2}], "label": 0, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 43, "native_id": 43, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1475353240966797, "incorrect_loss_raw": 1.4928377469380696, "correct_loss_per_char": 0.5737676620483398, "incorrect_loss_per_char": 0.7464188734690348, "correct_loss_per_token": 1.1475353240966797, "incorrect_loss_per_token": 1.4928377469380696, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5086829662322998, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": false, "logits_per_token": -1.5086829662322998, "logits_per_char": -0.7543414831161499, "num_chars": 2}, {"sum_logits": -1.397999882698059, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": false, "logits_per_token": -1.397999882698059, "logits_per_char": -0.6989999413490295, "num_chars": 2}, {"sum_logits": -1.57183039188385, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": false, "logits_per_token": -1.57183039188385, "logits_per_char": -0.785915195941925, "num_chars": 2}, {"sum_logits": -1.1475353240966797, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": true, "logits_per_token": -1.1475353240966797, "logits_per_char": -0.5737676620483398, "num_chars": 2}], "label": 3, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 44, "native_id": 44, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.153324007987976, "incorrect_loss_raw": 1.514971176783244, "correct_loss_per_char": 0.576662003993988, "incorrect_loss_per_char": 0.757485588391622, "correct_loss_per_token": 1.153324007987976, "incorrect_loss_per_token": 1.514971176783244, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.630492925643921, "num_tokens": 1, "num_tokens_all": 824, "is_greedy": false, "logits_per_token": -1.630492925643921, "logits_per_char": -0.8152464628219604, "num_chars": 2}, {"sum_logits": -1.4130651950836182, "num_tokens": 1, "num_tokens_all": 824, "is_greedy": false, "logits_per_token": -1.4130651950836182, "logits_per_char": -0.7065325975418091, "num_chars": 2}, {"sum_logits": -1.5013554096221924, "num_tokens": 1, "num_tokens_all": 824, "is_greedy": false, "logits_per_token": -1.5013554096221924, "logits_per_char": -0.7506777048110962, "num_chars": 2}, {"sum_logits": -1.153324007987976, "num_tokens": 1, "num_tokens_all": 824, "is_greedy": true, "logits_per_token": -1.153324007987976, "logits_per_char": -0.576662003993988, "num_chars": 2}], "label": 3, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 45, "native_id": 45, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4308931827545166, "incorrect_loss_raw": 1.3913478056589763, "correct_loss_per_char": 0.7154465913772583, "incorrect_loss_per_char": 0.6956739028294882, "correct_loss_per_token": 1.4308931827545166, "incorrect_loss_per_token": 1.3913478056589763, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4806758165359497, "num_tokens": 1, "num_tokens_all": 842, "is_greedy": false, "logits_per_token": -1.4806758165359497, "logits_per_char": -0.7403379082679749, "num_chars": 2}, {"sum_logits": -1.415672779083252, "num_tokens": 1, "num_tokens_all": 842, "is_greedy": false, "logits_per_token": -1.415672779083252, "logits_per_char": -0.707836389541626, "num_chars": 2}, {"sum_logits": -1.4308931827545166, "num_tokens": 1, "num_tokens_all": 842, "is_greedy": false, "logits_per_token": -1.4308931827545166, "logits_per_char": -0.7154465913772583, "num_chars": 2}, {"sum_logits": -1.277694821357727, "num_tokens": 1, "num_tokens_all": 842, "is_greedy": true, "logits_per_token": -1.277694821357727, "logits_per_char": -0.6388474106788635, "num_chars": 2}], "label": 2, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 46, "native_id": 46, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1543350219726562, "incorrect_loss_raw": 1.489290674527486, "correct_loss_per_char": 0.5771675109863281, "incorrect_loss_per_char": 0.744645337263743, "correct_loss_per_token": 1.1543350219726562, "incorrect_loss_per_token": 1.489290674527486, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5573101043701172, "num_tokens": 1, "num_tokens_all": 777, "is_greedy": false, "logits_per_token": -1.5573101043701172, "logits_per_char": -0.7786550521850586, "num_chars": 2}, {"sum_logits": -1.5608124732971191, "num_tokens": 1, "num_tokens_all": 777, "is_greedy": false, "logits_per_token": -1.5608124732971191, "logits_per_char": -0.7804062366485596, "num_chars": 2}, {"sum_logits": -1.3497494459152222, "num_tokens": 1, "num_tokens_all": 777, "is_greedy": false, "logits_per_token": -1.3497494459152222, "logits_per_char": -0.6748747229576111, "num_chars": 2}, {"sum_logits": -1.1543350219726562, "num_tokens": 1, "num_tokens_all": 777, "is_greedy": true, "logits_per_token": -1.1543350219726562, "logits_per_char": -0.5771675109863281, "num_chars": 2}], "label": 3, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 47, "native_id": 47, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3718205690383911, "incorrect_loss_raw": 1.3989921808242798, "correct_loss_per_char": 0.6859102845191956, "incorrect_loss_per_char": 0.6994960904121399, "correct_loss_per_token": 1.3718205690383911, "incorrect_loss_per_token": 1.3989921808242798, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3718205690383911, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": true, "logits_per_token": -1.3718205690383911, "logits_per_char": -0.6859102845191956, "num_chars": 2}, {"sum_logits": -1.4153971672058105, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": false, "logits_per_token": -1.4153971672058105, "logits_per_char": -0.7076985836029053, "num_chars": 2}, {"sum_logits": -1.3761787414550781, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": false, "logits_per_token": -1.3761787414550781, "logits_per_char": -0.6880893707275391, "num_chars": 2}, {"sum_logits": -1.4054006338119507, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": false, "logits_per_token": -1.4054006338119507, "logits_per_char": -0.7027003169059753, "num_chars": 2}], "label": 0, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 48, "native_id": 48, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6818146705627441, "incorrect_loss_raw": 1.3138258854548137, "correct_loss_per_char": 0.8409073352813721, "incorrect_loss_per_char": 0.6569129427274069, "correct_loss_per_token": 1.6818146705627441, "incorrect_loss_per_token": 1.3138258854548137, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6818146705627441, "num_tokens": 1, "num_tokens_all": 904, "is_greedy": false, "logits_per_token": -1.6818146705627441, "logits_per_char": -0.8409073352813721, "num_chars": 2}, {"sum_logits": -1.417531967163086, "num_tokens": 1, "num_tokens_all": 904, "is_greedy": false, "logits_per_token": -1.417531967163086, "logits_per_char": -0.708765983581543, "num_chars": 2}, {"sum_logits": -1.3331856727600098, "num_tokens": 1, "num_tokens_all": 904, "is_greedy": false, "logits_per_token": -1.3331856727600098, "logits_per_char": -0.6665928363800049, "num_chars": 2}, {"sum_logits": -1.1907600164413452, "num_tokens": 1, "num_tokens_all": 904, "is_greedy": true, "logits_per_token": -1.1907600164413452, "logits_per_char": -0.5953800082206726, "num_chars": 2}], "label": 0, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 49, "native_id": 49, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.135303258895874, "incorrect_loss_raw": 1.5424792766571045, "correct_loss_per_char": 0.567651629447937, "incorrect_loss_per_char": 0.7712396383285522, "correct_loss_per_token": 1.135303258895874, "incorrect_loss_per_token": 1.5424792766571045, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6059048175811768, "num_tokens": 1, "num_tokens_all": 809, "is_greedy": false, "logits_per_token": -1.6059048175811768, "logits_per_char": -0.8029524087905884, "num_chars": 2}, {"sum_logits": -1.5583771467208862, "num_tokens": 1, "num_tokens_all": 809, "is_greedy": false, "logits_per_token": -1.5583771467208862, "logits_per_char": -0.7791885733604431, "num_chars": 2}, {"sum_logits": -1.4631558656692505, "num_tokens": 1, "num_tokens_all": 809, "is_greedy": false, "logits_per_token": -1.4631558656692505, "logits_per_char": -0.7315779328346252, "num_chars": 2}, {"sum_logits": -1.135303258895874, "num_tokens": 1, "num_tokens_all": 809, "is_greedy": true, "logits_per_token": -1.135303258895874, "logits_per_char": -0.567651629447937, "num_chars": 2}], "label": 3, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 50, "native_id": 50, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1309173107147217, "incorrect_loss_raw": 1.495942234992981, "correct_loss_per_char": 0.5654586553573608, "incorrect_loss_per_char": 0.7479711174964905, "correct_loss_per_token": 1.1309173107147217, "incorrect_loss_per_token": 1.495942234992981, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.576733946800232, "num_tokens": 1, "num_tokens_all": 816, "is_greedy": false, "logits_per_token": -1.576733946800232, "logits_per_char": -0.788366973400116, "num_chars": 2}, {"sum_logits": -1.4405617713928223, "num_tokens": 1, "num_tokens_all": 816, "is_greedy": false, "logits_per_token": -1.4405617713928223, "logits_per_char": -0.7202808856964111, "num_chars": 2}, {"sum_logits": -1.4705309867858887, "num_tokens": 1, "num_tokens_all": 816, "is_greedy": false, "logits_per_token": -1.4705309867858887, "logits_per_char": -0.7352654933929443, "num_chars": 2}, {"sum_logits": -1.1309173107147217, "num_tokens": 1, "num_tokens_all": 816, "is_greedy": true, "logits_per_token": -1.1309173107147217, "logits_per_char": -0.5654586553573608, "num_chars": 2}], "label": 3, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 51, "native_id": 51, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4589747190475464, "incorrect_loss_raw": 1.388464371363322, "correct_loss_per_char": 0.7294873595237732, "incorrect_loss_per_char": 0.694232185681661, "correct_loss_per_token": 1.4589747190475464, "incorrect_loss_per_token": 1.388464371363322, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5137345790863037, "num_tokens": 1, "num_tokens_all": 827, "is_greedy": false, "logits_per_token": -1.5137345790863037, "logits_per_char": -0.7568672895431519, "num_chars": 2}, {"sum_logits": -1.4589747190475464, "num_tokens": 1, "num_tokens_all": 827, "is_greedy": false, "logits_per_token": -1.4589747190475464, "logits_per_char": -0.7294873595237732, "num_chars": 2}, {"sum_logits": -1.4381005764007568, "num_tokens": 1, "num_tokens_all": 827, "is_greedy": false, "logits_per_token": -1.4381005764007568, "logits_per_char": -0.7190502882003784, "num_chars": 2}, {"sum_logits": -1.2135579586029053, "num_tokens": 1, "num_tokens_all": 827, "is_greedy": true, "logits_per_token": -1.2135579586029053, "logits_per_char": -0.6067789793014526, "num_chars": 2}], "label": 1, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 52, "native_id": 52, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.53923499584198, "incorrect_loss_raw": 1.3710025151570637, "correct_loss_per_char": 0.76961749792099, "incorrect_loss_per_char": 0.6855012575785319, "correct_loss_per_token": 1.53923499584198, "incorrect_loss_per_token": 1.3710025151570637, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5954476594924927, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": false, "logits_per_token": -1.5954476594924927, "logits_per_char": -0.7977238297462463, "num_chars": 2}, {"sum_logits": -1.4533586502075195, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": false, "logits_per_token": -1.4533586502075195, "logits_per_char": -0.7266793251037598, "num_chars": 2}, {"sum_logits": -1.53923499584198, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": false, "logits_per_token": -1.53923499584198, "logits_per_char": -0.76961749792099, "num_chars": 2}, {"sum_logits": -1.0642012357711792, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": true, "logits_per_token": -1.0642012357711792, "logits_per_char": -0.5321006178855896, "num_chars": 2}], "label": 2, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 53, "native_id": 53, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5763680934906006, "incorrect_loss_raw": 1.371099591255188, "correct_loss_per_char": 0.7881840467453003, "incorrect_loss_per_char": 0.685549795627594, "correct_loss_per_token": 1.5763680934906006, "incorrect_loss_per_token": 1.371099591255188, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5444917678833008, "num_tokens": 1, "num_tokens_all": 786, "is_greedy": false, "logits_per_token": -1.5444917678833008, "logits_per_char": -0.7722458839416504, "num_chars": 2}, {"sum_logits": -1.5523788928985596, "num_tokens": 1, "num_tokens_all": 786, "is_greedy": false, "logits_per_token": -1.5523788928985596, "logits_per_char": -0.7761894464492798, "num_chars": 2}, {"sum_logits": -1.5763680934906006, "num_tokens": 1, "num_tokens_all": 786, "is_greedy": false, "logits_per_token": -1.5763680934906006, "logits_per_char": -0.7881840467453003, "num_chars": 2}, {"sum_logits": -1.0164281129837036, "num_tokens": 1, "num_tokens_all": 786, "is_greedy": true, "logits_per_token": -1.0164281129837036, "logits_per_char": -0.5082140564918518, "num_chars": 2}], "label": 2, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 54, "native_id": 54, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3448612689971924, "incorrect_loss_raw": 1.4160287380218506, "correct_loss_per_char": 0.6724306344985962, "incorrect_loss_per_char": 0.7080143690109253, "correct_loss_per_token": 1.3448612689971924, "incorrect_loss_per_token": 1.4160287380218506, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3448612689971924, "num_tokens": 1, "num_tokens_all": 836, "is_greedy": true, "logits_per_token": -1.3448612689971924, "logits_per_char": -0.6724306344985962, "num_chars": 2}, {"sum_logits": -1.4224263429641724, "num_tokens": 1, "num_tokens_all": 836, "is_greedy": false, "logits_per_token": -1.4224263429641724, "logits_per_char": -0.7112131714820862, "num_chars": 2}, {"sum_logits": -1.4772670269012451, "num_tokens": 1, "num_tokens_all": 836, "is_greedy": false, "logits_per_token": -1.4772670269012451, "logits_per_char": -0.7386335134506226, "num_chars": 2}, {"sum_logits": -1.3483928442001343, "num_tokens": 1, "num_tokens_all": 836, "is_greedy": false, "logits_per_token": -1.3483928442001343, "logits_per_char": -0.6741964221000671, "num_chars": 2}], "label": 0, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 55, "native_id": 55, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4271554946899414, "incorrect_loss_raw": 1.4301058451334636, "correct_loss_per_char": 0.7135777473449707, "incorrect_loss_per_char": 0.7150529225667318, "correct_loss_per_token": 1.4271554946899414, "incorrect_loss_per_token": 1.4301058451334636, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.711273193359375, "num_tokens": 1, "num_tokens_all": 842, "is_greedy": false, "logits_per_token": -1.711273193359375, "logits_per_char": -0.8556365966796875, "num_chars": 2}, {"sum_logits": -1.5419187545776367, "num_tokens": 1, "num_tokens_all": 842, "is_greedy": false, "logits_per_token": -1.5419187545776367, "logits_per_char": -0.7709593772888184, "num_chars": 2}, {"sum_logits": -1.4271554946899414, "num_tokens": 1, "num_tokens_all": 842, "is_greedy": false, "logits_per_token": -1.4271554946899414, "logits_per_char": -0.7135777473449707, "num_chars": 2}, {"sum_logits": -1.037125587463379, "num_tokens": 1, "num_tokens_all": 842, "is_greedy": true, "logits_per_token": -1.037125587463379, "logits_per_char": -0.5185627937316895, "num_chars": 2}], "label": 2, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 56, "native_id": 56, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4517114162445068, "incorrect_loss_raw": 1.4225432872772217, "correct_loss_per_char": 0.7258557081222534, "incorrect_loss_per_char": 0.7112716436386108, "correct_loss_per_token": 1.4517114162445068, "incorrect_loss_per_token": 1.4225432872772217, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.210269808769226, "num_tokens": 1, "num_tokens_all": 874, "is_greedy": true, "logits_per_token": -1.210269808769226, "logits_per_char": -0.605134904384613, "num_chars": 2}, {"sum_logits": -1.4551382064819336, "num_tokens": 1, "num_tokens_all": 874, "is_greedy": false, "logits_per_token": -1.4551382064819336, "logits_per_char": -0.7275691032409668, "num_chars": 2}, {"sum_logits": -1.6022218465805054, "num_tokens": 1, "num_tokens_all": 874, "is_greedy": false, "logits_per_token": -1.6022218465805054, "logits_per_char": -0.8011109232902527, "num_chars": 2}, {"sum_logits": -1.4517114162445068, "num_tokens": 1, "num_tokens_all": 874, "is_greedy": false, "logits_per_token": -1.4517114162445068, "logits_per_char": -0.7258557081222534, "num_chars": 2}], "label": 3, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 57, "native_id": 57, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2223260402679443, "incorrect_loss_raw": 1.4585837125778198, "correct_loss_per_char": 0.6111630201339722, "incorrect_loss_per_char": 0.7292918562889099, "correct_loss_per_token": 1.2223260402679443, "incorrect_loss_per_token": 1.4585837125778198, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4235235452651978, "num_tokens": 1, "num_tokens_all": 805, "is_greedy": false, "logits_per_token": -1.4235235452651978, "logits_per_char": -0.7117617726325989, "num_chars": 2}, {"sum_logits": -1.5581949949264526, "num_tokens": 1, "num_tokens_all": 805, "is_greedy": false, "logits_per_token": -1.5581949949264526, "logits_per_char": -0.7790974974632263, "num_chars": 2}, {"sum_logits": -1.394032597541809, "num_tokens": 1, "num_tokens_all": 805, "is_greedy": false, "logits_per_token": -1.394032597541809, "logits_per_char": -0.6970162987709045, "num_chars": 2}, {"sum_logits": -1.2223260402679443, "num_tokens": 1, "num_tokens_all": 805, "is_greedy": true, "logits_per_token": -1.2223260402679443, "logits_per_char": -0.6111630201339722, "num_chars": 2}], "label": 3, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 58, "native_id": 58, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.422203540802002, "incorrect_loss_raw": 1.4012306133906047, "correct_loss_per_char": 0.711101770401001, "incorrect_loss_per_char": 0.7006153066953024, "correct_loss_per_token": 1.422203540802002, "incorrect_loss_per_token": 1.4012306133906047, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5489343404769897, "num_tokens": 1, "num_tokens_all": 832, "is_greedy": false, "logits_per_token": -1.5489343404769897, "logits_per_char": -0.7744671702384949, "num_chars": 2}, {"sum_logits": -1.422203540802002, "num_tokens": 1, "num_tokens_all": 832, "is_greedy": false, "logits_per_token": -1.422203540802002, "logits_per_char": -0.711101770401001, "num_chars": 2}, {"sum_logits": -1.5327775478363037, "num_tokens": 1, "num_tokens_all": 832, "is_greedy": false, "logits_per_token": -1.5327775478363037, "logits_per_char": -0.7663887739181519, "num_chars": 2}, {"sum_logits": -1.1219799518585205, "num_tokens": 1, "num_tokens_all": 832, "is_greedy": true, "logits_per_token": -1.1219799518585205, "logits_per_char": -0.5609899759292603, "num_chars": 2}], "label": 1, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 59, "native_id": 59, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2057687044143677, "incorrect_loss_raw": 1.4888558387756348, "correct_loss_per_char": 0.6028843522071838, "incorrect_loss_per_char": 0.7444279193878174, "correct_loss_per_token": 1.2057687044143677, "incorrect_loss_per_token": 1.4888558387756348, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5106886625289917, "num_tokens": 1, "num_tokens_all": 839, "is_greedy": false, "logits_per_token": -1.5106886625289917, "logits_per_char": -0.7553443312644958, "num_chars": 2}, {"sum_logits": -1.4565348625183105, "num_tokens": 1, "num_tokens_all": 839, "is_greedy": false, "logits_per_token": -1.4565348625183105, "logits_per_char": -0.7282674312591553, "num_chars": 2}, {"sum_logits": -1.499343991279602, "num_tokens": 1, "num_tokens_all": 839, "is_greedy": false, "logits_per_token": -1.499343991279602, "logits_per_char": -0.749671995639801, "num_chars": 2}, {"sum_logits": -1.2057687044143677, "num_tokens": 1, "num_tokens_all": 839, "is_greedy": true, "logits_per_token": -1.2057687044143677, "logits_per_char": -0.6028843522071838, "num_chars": 2}], "label": 3, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 60, "native_id": 60, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3036898374557495, "incorrect_loss_raw": 1.4441472291946411, "correct_loss_per_char": 0.6518449187278748, "incorrect_loss_per_char": 0.7220736145973206, "correct_loss_per_token": 1.3036898374557495, "incorrect_loss_per_token": 1.4441472291946411, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5573281049728394, "num_tokens": 1, "num_tokens_all": 964, "is_greedy": false, "logits_per_token": -1.5573281049728394, "logits_per_char": -0.7786640524864197, "num_chars": 2}, {"sum_logits": -1.3269639015197754, "num_tokens": 1, "num_tokens_all": 964, "is_greedy": false, "logits_per_token": -1.3269639015197754, "logits_per_char": -0.6634819507598877, "num_chars": 2}, {"sum_logits": -1.4481496810913086, "num_tokens": 1, "num_tokens_all": 964, "is_greedy": false, "logits_per_token": -1.4481496810913086, "logits_per_char": -0.7240748405456543, "num_chars": 2}, {"sum_logits": -1.3036898374557495, "num_tokens": 1, "num_tokens_all": 964, "is_greedy": true, "logits_per_token": -1.3036898374557495, "logits_per_char": -0.6518449187278748, "num_chars": 2}], "label": 3, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 61, "native_id": 61, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.512614369392395, "incorrect_loss_raw": 1.3581509192784627, "correct_loss_per_char": 0.7563071846961975, "incorrect_loss_per_char": 0.6790754596392313, "correct_loss_per_token": 1.512614369392395, "incorrect_loss_per_token": 1.3581509192784627, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4543017148971558, "num_tokens": 1, "num_tokens_all": 840, "is_greedy": false, "logits_per_token": -1.4543017148971558, "logits_per_char": -0.7271508574485779, "num_chars": 2}, {"sum_logits": -1.512614369392395, "num_tokens": 1, "num_tokens_all": 840, "is_greedy": false, "logits_per_token": -1.512614369392395, "logits_per_char": -0.7563071846961975, "num_chars": 2}, {"sum_logits": -1.3853105306625366, "num_tokens": 1, "num_tokens_all": 840, "is_greedy": false, "logits_per_token": -1.3853105306625366, "logits_per_char": -0.6926552653312683, "num_chars": 2}, {"sum_logits": -1.2348405122756958, "num_tokens": 1, "num_tokens_all": 840, "is_greedy": true, "logits_per_token": -1.2348405122756958, "logits_per_char": -0.6174202561378479, "num_chars": 2}], "label": 1, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 62, "native_id": 62, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1386258602142334, "incorrect_loss_raw": 1.492449680964152, "correct_loss_per_char": 0.5693129301071167, "incorrect_loss_per_char": 0.746224840482076, "correct_loss_per_token": 1.1386258602142334, "incorrect_loss_per_token": 1.492449680964152, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5685930252075195, "num_tokens": 1, "num_tokens_all": 813, "is_greedy": false, "logits_per_token": -1.5685930252075195, "logits_per_char": -0.7842965126037598, "num_chars": 2}, {"sum_logits": -1.4446685314178467, "num_tokens": 1, "num_tokens_all": 813, "is_greedy": false, "logits_per_token": -1.4446685314178467, "logits_per_char": -0.7223342657089233, "num_chars": 2}, {"sum_logits": -1.4640874862670898, "num_tokens": 1, "num_tokens_all": 813, "is_greedy": false, "logits_per_token": -1.4640874862670898, "logits_per_char": -0.7320437431335449, "num_chars": 2}, {"sum_logits": -1.1386258602142334, "num_tokens": 1, "num_tokens_all": 813, "is_greedy": true, "logits_per_token": -1.1386258602142334, "logits_per_char": -0.5693129301071167, "num_chars": 2}], "label": 3, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 63, "native_id": 63, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2952173948287964, "incorrect_loss_raw": 1.433815638224284, "correct_loss_per_char": 0.6476086974143982, "incorrect_loss_per_char": 0.716907819112142, "correct_loss_per_token": 1.2952173948287964, "incorrect_loss_per_token": 1.433815638224284, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2952173948287964, "num_tokens": 1, "num_tokens_all": 787, "is_greedy": false, "logits_per_token": -1.2952173948287964, "logits_per_char": -0.6476086974143982, "num_chars": 2}, {"sum_logits": -1.5119563341140747, "num_tokens": 1, "num_tokens_all": 787, "is_greedy": false, "logits_per_token": -1.5119563341140747, "logits_per_char": -0.7559781670570374, "num_chars": 2}, {"sum_logits": -1.5193915367126465, "num_tokens": 1, "num_tokens_all": 787, "is_greedy": false, "logits_per_token": -1.5193915367126465, "logits_per_char": -0.7596957683563232, "num_chars": 2}, {"sum_logits": -1.2700990438461304, "num_tokens": 1, "num_tokens_all": 787, "is_greedy": true, "logits_per_token": -1.2700990438461304, "logits_per_char": -0.6350495219230652, "num_chars": 2}], "label": 0, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 64, "native_id": 64, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6976981163024902, "incorrect_loss_raw": 1.3496475617090862, "correct_loss_per_char": 0.8488490581512451, "incorrect_loss_per_char": 0.6748237808545431, "correct_loss_per_token": 1.6976981163024902, "incorrect_loss_per_token": 1.3496475617090862, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6976981163024902, "num_tokens": 1, "num_tokens_all": 801, "is_greedy": false, "logits_per_token": -1.6976981163024902, "logits_per_char": -0.8488490581512451, "num_chars": 2}, {"sum_logits": -1.6268011331558228, "num_tokens": 1, "num_tokens_all": 801, "is_greedy": false, "logits_per_token": -1.6268011331558228, "logits_per_char": -0.8134005665779114, "num_chars": 2}, {"sum_logits": -1.4304792881011963, "num_tokens": 1, "num_tokens_all": 801, "is_greedy": false, "logits_per_token": -1.4304792881011963, "logits_per_char": -0.7152396440505981, "num_chars": 2}, {"sum_logits": -0.9916622638702393, "num_tokens": 1, "num_tokens_all": 801, "is_greedy": true, "logits_per_token": -0.9916622638702393, "logits_per_char": -0.49583113193511963, "num_chars": 2}], "label": 0, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 65, "native_id": 65, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6386563777923584, "incorrect_loss_raw": 1.3559431234995525, "correct_loss_per_char": 0.8193281888961792, "incorrect_loss_per_char": 0.6779715617497762, "correct_loss_per_token": 1.6386563777923584, "incorrect_loss_per_token": 1.3559431234995525, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5002789497375488, "num_tokens": 1, "num_tokens_all": 835, "is_greedy": false, "logits_per_token": -1.5002789497375488, "logits_per_char": -0.7501394748687744, "num_chars": 2}, {"sum_logits": -1.5817756652832031, "num_tokens": 1, "num_tokens_all": 835, "is_greedy": false, "logits_per_token": -1.5817756652832031, "logits_per_char": -0.7908878326416016, "num_chars": 2}, {"sum_logits": -1.6386563777923584, "num_tokens": 1, "num_tokens_all": 835, "is_greedy": false, "logits_per_token": -1.6386563777923584, "logits_per_char": -0.8193281888961792, "num_chars": 2}, {"sum_logits": -0.9857747554779053, "num_tokens": 1, "num_tokens_all": 835, "is_greedy": true, "logits_per_token": -0.9857747554779053, "logits_per_char": -0.49288737773895264, "num_chars": 2}], "label": 2, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 66, "native_id": 66, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3268215656280518, "incorrect_loss_raw": 1.446496566136678, "correct_loss_per_char": 0.6634107828140259, "incorrect_loss_per_char": 0.723248283068339, "correct_loss_per_token": 1.3268215656280518, "incorrect_loss_per_token": 1.446496566136678, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3268215656280518, "num_tokens": 1, "num_tokens_all": 849, "is_greedy": false, "logits_per_token": -1.3268215656280518, "logits_per_char": -0.6634107828140259, "num_chars": 2}, {"sum_logits": -1.4464099407196045, "num_tokens": 1, "num_tokens_all": 849, "is_greedy": false, "logits_per_token": -1.4464099407196045, "logits_per_char": -0.7232049703598022, "num_chars": 2}, {"sum_logits": -1.6293021440505981, "num_tokens": 1, "num_tokens_all": 849, "is_greedy": false, "logits_per_token": -1.6293021440505981, "logits_per_char": -0.8146510720252991, "num_chars": 2}, {"sum_logits": -1.2637776136398315, "num_tokens": 1, "num_tokens_all": 849, "is_greedy": true, "logits_per_token": -1.2637776136398315, "logits_per_char": -0.6318888068199158, "num_chars": 2}], "label": 0, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 67, "native_id": 67, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4242362976074219, "incorrect_loss_raw": 1.4054079453150432, "correct_loss_per_char": 0.7121181488037109, "incorrect_loss_per_char": 0.7027039726575216, "correct_loss_per_token": 1.4242362976074219, "incorrect_loss_per_token": 1.4054079453150432, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.54135000705719, "num_tokens": 1, "num_tokens_all": 830, "is_greedy": false, "logits_per_token": -1.54135000705719, "logits_per_char": -0.770675003528595, "num_chars": 2}, {"sum_logits": -1.4242362976074219, "num_tokens": 1, "num_tokens_all": 830, "is_greedy": false, "logits_per_token": -1.4242362976074219, "logits_per_char": -0.7121181488037109, "num_chars": 2}, {"sum_logits": -1.4833128452301025, "num_tokens": 1, "num_tokens_all": 830, "is_greedy": false, "logits_per_token": -1.4833128452301025, "logits_per_char": -0.7416564226150513, "num_chars": 2}, {"sum_logits": -1.191560983657837, "num_tokens": 1, "num_tokens_all": 830, "is_greedy": true, "logits_per_token": -1.191560983657837, "logits_per_char": -0.5957804918289185, "num_chars": 2}], "label": 1, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 68, "native_id": 68, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.585404396057129, "incorrect_loss_raw": 1.376249631245931, "correct_loss_per_char": 0.7927021980285645, "incorrect_loss_per_char": 0.6881248156229655, "correct_loss_per_token": 1.585404396057129, "incorrect_loss_per_token": 1.376249631245931, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.675856113433838, "num_tokens": 1, "num_tokens_all": 909, "is_greedy": false, "logits_per_token": -1.675856113433838, "logits_per_char": -0.837928056716919, "num_chars": 2}, {"sum_logits": -1.585404396057129, "num_tokens": 1, "num_tokens_all": 909, "is_greedy": false, "logits_per_token": -1.585404396057129, "logits_per_char": -0.7927021980285645, "num_chars": 2}, {"sum_logits": -1.4648983478546143, "num_tokens": 1, "num_tokens_all": 909, "is_greedy": false, "logits_per_token": -1.4648983478546143, "logits_per_char": -0.7324491739273071, "num_chars": 2}, {"sum_logits": -0.9879944324493408, "num_tokens": 1, "num_tokens_all": 909, "is_greedy": true, "logits_per_token": -0.9879944324493408, "logits_per_char": -0.4939972162246704, "num_chars": 2}], "label": 1, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 69, "native_id": 69, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.643787145614624, "incorrect_loss_raw": 1.323006272315979, "correct_loss_per_char": 0.821893572807312, "incorrect_loss_per_char": 0.6615031361579895, "correct_loss_per_token": 1.643787145614624, "incorrect_loss_per_token": 1.323006272315979, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.643787145614624, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": false, "logits_per_token": -1.643787145614624, "logits_per_char": -0.821893572807312, "num_chars": 2}, {"sum_logits": -1.4058516025543213, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": false, "logits_per_token": -1.4058516025543213, "logits_per_char": -0.7029258012771606, "num_chars": 2}, {"sum_logits": -1.3680593967437744, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": false, "logits_per_token": -1.3680593967437744, "logits_per_char": -0.6840296983718872, "num_chars": 2}, {"sum_logits": -1.1951078176498413, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": true, "logits_per_token": -1.1951078176498413, "logits_per_char": -0.5975539088249207, "num_chars": 2}], "label": 0, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 70, "native_id": 70, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.180134892463684, "incorrect_loss_raw": 1.4834574460983276, "correct_loss_per_char": 0.590067446231842, "incorrect_loss_per_char": 0.7417287230491638, "correct_loss_per_token": 1.180134892463684, "incorrect_loss_per_token": 1.4834574460983276, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5017766952514648, "num_tokens": 1, "num_tokens_all": 850, "is_greedy": false, "logits_per_token": -1.5017766952514648, "logits_per_char": -0.7508883476257324, "num_chars": 2}, {"sum_logits": -1.414771318435669, "num_tokens": 1, "num_tokens_all": 850, "is_greedy": false, "logits_per_token": -1.414771318435669, "logits_per_char": -0.7073856592178345, "num_chars": 2}, {"sum_logits": -1.5338243246078491, "num_tokens": 1, "num_tokens_all": 850, "is_greedy": false, "logits_per_token": -1.5338243246078491, "logits_per_char": -0.7669121623039246, "num_chars": 2}, {"sum_logits": -1.180134892463684, "num_tokens": 1, "num_tokens_all": 850, "is_greedy": true, "logits_per_token": -1.180134892463684, "logits_per_char": -0.590067446231842, "num_chars": 2}], "label": 3, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 71, "native_id": 71, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5421690940856934, "incorrect_loss_raw": 1.359784444173177, "correct_loss_per_char": 0.7710845470428467, "incorrect_loss_per_char": 0.6798922220865885, "correct_loss_per_token": 1.5421690940856934, "incorrect_loss_per_token": 1.359784444173177, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5421690940856934, "num_tokens": 1, "num_tokens_all": 901, "is_greedy": false, "logits_per_token": -1.5421690940856934, "logits_per_char": -0.7710845470428467, "num_chars": 2}, {"sum_logits": -1.316366195678711, "num_tokens": 1, "num_tokens_all": 901, "is_greedy": true, "logits_per_token": -1.316366195678711, "logits_per_char": -0.6581830978393555, "num_chars": 2}, {"sum_logits": -1.4381918907165527, "num_tokens": 1, "num_tokens_all": 901, "is_greedy": false, "logits_per_token": -1.4381918907165527, "logits_per_char": -0.7190959453582764, "num_chars": 2}, {"sum_logits": -1.3247952461242676, "num_tokens": 1, "num_tokens_all": 901, "is_greedy": false, "logits_per_token": -1.3247952461242676, "logits_per_char": -0.6623976230621338, "num_chars": 2}], "label": 0, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 72, "native_id": 72, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3218976259231567, "incorrect_loss_raw": 1.4198810656865437, "correct_loss_per_char": 0.6609488129615784, "incorrect_loss_per_char": 0.7099405328432719, "correct_loss_per_token": 1.3218976259231567, "incorrect_loss_per_token": 1.4198810656865437, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2973779439926147, "num_tokens": 1, "num_tokens_all": 803, "is_greedy": true, "logits_per_token": -1.2973779439926147, "logits_per_char": -0.6486889719963074, "num_chars": 2}, {"sum_logits": -1.4712103605270386, "num_tokens": 1, "num_tokens_all": 803, "is_greedy": false, "logits_per_token": -1.4712103605270386, "logits_per_char": -0.7356051802635193, "num_chars": 2}, {"sum_logits": -1.491054892539978, "num_tokens": 1, "num_tokens_all": 803, "is_greedy": false, "logits_per_token": -1.491054892539978, "logits_per_char": -0.745527446269989, "num_chars": 2}, {"sum_logits": -1.3218976259231567, "num_tokens": 1, "num_tokens_all": 803, "is_greedy": false, "logits_per_token": -1.3218976259231567, "logits_per_char": -0.6609488129615784, "num_chars": 2}], "label": 3, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 73, "native_id": 73, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5844889879226685, "incorrect_loss_raw": 1.3388923406600952, "correct_loss_per_char": 0.7922444939613342, "incorrect_loss_per_char": 0.6694461703300476, "correct_loss_per_token": 1.5844889879226685, "incorrect_loss_per_token": 1.3388923406600952, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4600214958190918, "num_tokens": 1, "num_tokens_all": 847, "is_greedy": false, "logits_per_token": -1.4600214958190918, "logits_per_char": -0.7300107479095459, "num_chars": 2}, {"sum_logits": -1.3695640563964844, "num_tokens": 1, "num_tokens_all": 847, "is_greedy": false, "logits_per_token": -1.3695640563964844, "logits_per_char": -0.6847820281982422, "num_chars": 2}, {"sum_logits": -1.5844889879226685, "num_tokens": 1, "num_tokens_all": 847, "is_greedy": false, "logits_per_token": -1.5844889879226685, "logits_per_char": -0.7922444939613342, "num_chars": 2}, {"sum_logits": -1.1870914697647095, "num_tokens": 1, "num_tokens_all": 847, "is_greedy": true, "logits_per_token": -1.1870914697647095, "logits_per_char": -0.5935457348823547, "num_chars": 2}], "label": 2, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 74, "native_id": 74, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2029609680175781, "incorrect_loss_raw": 1.4838759899139404, "correct_loss_per_char": 0.6014804840087891, "incorrect_loss_per_char": 0.7419379949569702, "correct_loss_per_token": 1.2029609680175781, "incorrect_loss_per_token": 1.4838759899139404, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5346695184707642, "num_tokens": 1, "num_tokens_all": 822, "is_greedy": false, "logits_per_token": -1.5346695184707642, "logits_per_char": -0.7673347592353821, "num_chars": 2}, {"sum_logits": -1.489293098449707, "num_tokens": 1, "num_tokens_all": 822, "is_greedy": false, "logits_per_token": -1.489293098449707, "logits_per_char": -0.7446465492248535, "num_chars": 2}, {"sum_logits": -1.42766535282135, "num_tokens": 1, "num_tokens_all": 822, "is_greedy": false, "logits_per_token": -1.42766535282135, "logits_per_char": -0.713832676410675, "num_chars": 2}, {"sum_logits": -1.2029609680175781, "num_tokens": 1, "num_tokens_all": 822, "is_greedy": true, "logits_per_token": -1.2029609680175781, "logits_per_char": -0.6014804840087891, "num_chars": 2}], "label": 3, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 75, "native_id": 75, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5354843139648438, "incorrect_loss_raw": 1.3567068974177043, "correct_loss_per_char": 0.7677421569824219, "incorrect_loss_per_char": 0.6783534487088522, "correct_loss_per_token": 1.5354843139648438, "incorrect_loss_per_token": 1.3567068974177043, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2595572471618652, "num_tokens": 1, "num_tokens_all": 848, "is_greedy": true, "logits_per_token": -1.2595572471618652, "logits_per_char": -0.6297786235809326, "num_chars": 2}, {"sum_logits": -1.439346432685852, "num_tokens": 1, "num_tokens_all": 848, "is_greedy": false, "logits_per_token": -1.439346432685852, "logits_per_char": -0.719673216342926, "num_chars": 2}, {"sum_logits": -1.5354843139648438, "num_tokens": 1, "num_tokens_all": 848, "is_greedy": false, "logits_per_token": -1.5354843139648438, "logits_per_char": -0.7677421569824219, "num_chars": 2}, {"sum_logits": -1.3712170124053955, "num_tokens": 1, "num_tokens_all": 848, "is_greedy": false, "logits_per_token": -1.3712170124053955, "logits_per_char": -0.6856085062026978, "num_chars": 2}], "label": 2, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 76, "native_id": 76, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5734330415725708, "incorrect_loss_raw": 1.368969241778056, "correct_loss_per_char": 0.7867165207862854, "incorrect_loss_per_char": 0.684484620889028, "correct_loss_per_token": 1.5734330415725708, "incorrect_loss_per_token": 1.368969241778056, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5734330415725708, "num_tokens": 1, "num_tokens_all": 804, "is_greedy": false, "logits_per_token": -1.5734330415725708, "logits_per_char": -0.7867165207862854, "num_chars": 2}, {"sum_logits": -1.6805202960968018, "num_tokens": 1, "num_tokens_all": 804, "is_greedy": false, "logits_per_token": -1.6805202960968018, "logits_per_char": -0.8402601480484009, "num_chars": 2}, {"sum_logits": -1.3680695295333862, "num_tokens": 1, "num_tokens_all": 804, "is_greedy": false, "logits_per_token": -1.3680695295333862, "logits_per_char": -0.6840347647666931, "num_chars": 2}, {"sum_logits": -1.0583178997039795, "num_tokens": 1, "num_tokens_all": 804, "is_greedy": true, "logits_per_token": -1.0583178997039795, "logits_per_char": -0.5291589498519897, "num_chars": 2}], "label": 0, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 77, "native_id": 77, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5141057968139648, "incorrect_loss_raw": 1.3688228130340576, "correct_loss_per_char": 0.7570528984069824, "incorrect_loss_per_char": 0.6844114065170288, "correct_loss_per_token": 1.5141057968139648, "incorrect_loss_per_token": 1.3688228130340576, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5344953536987305, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": false, "logits_per_token": -1.5344953536987305, "logits_per_char": -0.7672476768493652, "num_chars": 2}, {"sum_logits": -1.5141057968139648, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": false, "logits_per_token": -1.5141057968139648, "logits_per_char": -0.7570528984069824, "num_chars": 2}, {"sum_logits": -1.4511675834655762, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": false, "logits_per_token": -1.4511675834655762, "logits_per_char": -0.7255837917327881, "num_chars": 2}, {"sum_logits": -1.1208055019378662, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": true, "logits_per_token": -1.1208055019378662, "logits_per_char": -0.5604027509689331, "num_chars": 2}], "label": 1, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 78, "native_id": 78, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4371631145477295, "incorrect_loss_raw": 1.3847684860229492, "correct_loss_per_char": 0.7185815572738647, "incorrect_loss_per_char": 0.6923842430114746, "correct_loss_per_token": 1.4371631145477295, "incorrect_loss_per_token": 1.3847684860229492, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5652377605438232, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": false, "logits_per_token": -1.5652377605438232, "logits_per_char": -0.7826188802719116, "num_chars": 2}, {"sum_logits": -1.2658289670944214, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": true, "logits_per_token": -1.2658289670944214, "logits_per_char": -0.6329144835472107, "num_chars": 2}, {"sum_logits": -1.4371631145477295, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": false, "logits_per_token": -1.4371631145477295, "logits_per_char": -0.7185815572738647, "num_chars": 2}, {"sum_logits": -1.323238730430603, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": false, "logits_per_token": -1.323238730430603, "logits_per_char": -0.6616193652153015, "num_chars": 2}], "label": 2, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 79, "native_id": 79, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2606768608093262, "incorrect_loss_raw": 1.4380739132563274, "correct_loss_per_char": 0.6303384304046631, "incorrect_loss_per_char": 0.7190369566281637, "correct_loss_per_token": 1.2606768608093262, "incorrect_loss_per_token": 1.4380739132563274, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4265130758285522, "num_tokens": 1, "num_tokens_all": 827, "is_greedy": false, "logits_per_token": -1.4265130758285522, "logits_per_char": -0.7132565379142761, "num_chars": 2}, {"sum_logits": -1.3787206411361694, "num_tokens": 1, "num_tokens_all": 827, "is_greedy": false, "logits_per_token": -1.3787206411361694, "logits_per_char": -0.6893603205680847, "num_chars": 2}, {"sum_logits": -1.5089880228042603, "num_tokens": 1, "num_tokens_all": 827, "is_greedy": false, "logits_per_token": -1.5089880228042603, "logits_per_char": -0.7544940114021301, "num_chars": 2}, {"sum_logits": -1.2606768608093262, "num_tokens": 1, "num_tokens_all": 827, "is_greedy": true, "logits_per_token": -1.2606768608093262, "logits_per_char": -0.6303384304046631, "num_chars": 2}], "label": 3, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 80, "native_id": 80, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5289331674575806, "incorrect_loss_raw": 1.3813124895095825, "correct_loss_per_char": 0.7644665837287903, "incorrect_loss_per_char": 0.6906562447547913, "correct_loss_per_token": 1.5289331674575806, "incorrect_loss_per_token": 1.3813124895095825, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.561622142791748, "num_tokens": 1, "num_tokens_all": 821, "is_greedy": false, "logits_per_token": -1.561622142791748, "logits_per_char": -0.780811071395874, "num_chars": 2}, {"sum_logits": -1.5289331674575806, "num_tokens": 1, "num_tokens_all": 821, "is_greedy": false, "logits_per_token": -1.5289331674575806, "logits_per_char": -0.7644665837287903, "num_chars": 2}, {"sum_logits": -1.4382452964782715, "num_tokens": 1, "num_tokens_all": 821, "is_greedy": false, "logits_per_token": -1.4382452964782715, "logits_per_char": -0.7191226482391357, "num_chars": 2}, {"sum_logits": -1.144070029258728, "num_tokens": 1, "num_tokens_all": 821, "is_greedy": true, "logits_per_token": -1.144070029258728, "logits_per_char": -0.572035014629364, "num_chars": 2}], "label": 1, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 81, "native_id": 81, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.478810429573059, "incorrect_loss_raw": 1.37456218401591, "correct_loss_per_char": 0.7394052147865295, "incorrect_loss_per_char": 0.687281092007955, "correct_loss_per_token": 1.478810429573059, "incorrect_loss_per_token": 1.37456218401591, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1949151754379272, "num_tokens": 1, "num_tokens_all": 781, "is_greedy": true, "logits_per_token": -1.1949151754379272, "logits_per_char": -0.5974575877189636, "num_chars": 2}, {"sum_logits": -1.4670342206954956, "num_tokens": 1, "num_tokens_all": 781, "is_greedy": false, "logits_per_token": -1.4670342206954956, "logits_per_char": -0.7335171103477478, "num_chars": 2}, {"sum_logits": -1.478810429573059, "num_tokens": 1, "num_tokens_all": 781, "is_greedy": false, "logits_per_token": -1.478810429573059, "logits_per_char": -0.7394052147865295, "num_chars": 2}, {"sum_logits": -1.4617371559143066, "num_tokens": 1, "num_tokens_all": 781, "is_greedy": false, "logits_per_token": -1.4617371559143066, "logits_per_char": -0.7308685779571533, "num_chars": 2}], "label": 2, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 82, "native_id": 82, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1568976640701294, "incorrect_loss_raw": 1.5024501880009968, "correct_loss_per_char": 0.5784488320350647, "incorrect_loss_per_char": 0.7512250940004984, "correct_loss_per_token": 1.1568976640701294, "incorrect_loss_per_token": 1.5024501880009968, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.7110556364059448, "num_tokens": 1, "num_tokens_all": 846, "is_greedy": false, "logits_per_token": -1.7110556364059448, "logits_per_char": -0.8555278182029724, "num_chars": 2}, {"sum_logits": -1.3688924312591553, "num_tokens": 1, "num_tokens_all": 846, "is_greedy": false, "logits_per_token": -1.3688924312591553, "logits_per_char": -0.6844462156295776, "num_chars": 2}, {"sum_logits": -1.4274024963378906, "num_tokens": 1, "num_tokens_all": 846, "is_greedy": false, "logits_per_token": -1.4274024963378906, "logits_per_char": -0.7137012481689453, "num_chars": 2}, {"sum_logits": -1.1568976640701294, "num_tokens": 1, "num_tokens_all": 846, "is_greedy": true, "logits_per_token": -1.1568976640701294, "logits_per_char": -0.5784488320350647, "num_chars": 2}], "label": 3, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 83, "native_id": 83, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4889638423919678, "incorrect_loss_raw": 1.387243350346883, "correct_loss_per_char": 0.7444819211959839, "incorrect_loss_per_char": 0.6936216751734415, "correct_loss_per_token": 1.4889638423919678, "incorrect_loss_per_token": 1.387243350346883, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4889638423919678, "num_tokens": 1, "num_tokens_all": 799, "is_greedy": false, "logits_per_token": -1.4889638423919678, "logits_per_char": -0.7444819211959839, "num_chars": 2}, {"sum_logits": -1.5925886631011963, "num_tokens": 1, "num_tokens_all": 799, "is_greedy": false, "logits_per_token": -1.5925886631011963, "logits_per_char": -0.7962943315505981, "num_chars": 2}, {"sum_logits": -1.487170934677124, "num_tokens": 1, "num_tokens_all": 799, "is_greedy": false, "logits_per_token": -1.487170934677124, "logits_per_char": -0.743585467338562, "num_chars": 2}, {"sum_logits": -1.081970453262329, "num_tokens": 1, "num_tokens_all": 799, "is_greedy": true, "logits_per_token": -1.081970453262329, "logits_per_char": -0.5409852266311646, "num_chars": 2}], "label": 0, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 84, "native_id": 84, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9800381660461426, "incorrect_loss_raw": 1.5772896607716878, "correct_loss_per_char": 0.4900190830230713, "incorrect_loss_per_char": 0.7886448303858439, "correct_loss_per_token": 0.9800381660461426, "incorrect_loss_per_token": 1.5772896607716878, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5699386596679688, "num_tokens": 1, "num_tokens_all": 839, "is_greedy": false, "logits_per_token": -1.5699386596679688, "logits_per_char": -0.7849693298339844, "num_chars": 2}, {"sum_logits": -1.5824589729309082, "num_tokens": 1, "num_tokens_all": 839, "is_greedy": false, "logits_per_token": -1.5824589729309082, "logits_per_char": -0.7912294864654541, "num_chars": 2}, {"sum_logits": -1.5794713497161865, "num_tokens": 1, "num_tokens_all": 839, "is_greedy": false, "logits_per_token": -1.5794713497161865, "logits_per_char": -0.7897356748580933, "num_chars": 2}, {"sum_logits": -0.9800381660461426, "num_tokens": 1, "num_tokens_all": 839, "is_greedy": true, "logits_per_token": -0.9800381660461426, "logits_per_char": -0.4900190830230713, "num_chars": 2}], "label": 3, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 85, "native_id": 85, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2055540084838867, "incorrect_loss_raw": 1.4646455844243367, "correct_loss_per_char": 0.6027770042419434, "incorrect_loss_per_char": 0.7323227922121683, "correct_loss_per_token": 1.2055540084838867, "incorrect_loss_per_token": 1.4646455844243367, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5740272998809814, "num_tokens": 1, "num_tokens_all": 831, "is_greedy": false, "logits_per_token": -1.5740272998809814, "logits_per_char": -0.7870136499404907, "num_chars": 2}, {"sum_logits": -1.3741992712020874, "num_tokens": 1, "num_tokens_all": 831, "is_greedy": false, "logits_per_token": -1.3741992712020874, "logits_per_char": -0.6870996356010437, "num_chars": 2}, {"sum_logits": -1.4457101821899414, "num_tokens": 1, "num_tokens_all": 831, "is_greedy": false, "logits_per_token": -1.4457101821899414, "logits_per_char": -0.7228550910949707, "num_chars": 2}, {"sum_logits": -1.2055540084838867, "num_tokens": 1, "num_tokens_all": 831, "is_greedy": true, "logits_per_token": -1.2055540084838867, "logits_per_char": -0.6027770042419434, "num_chars": 2}], "label": 3, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 86, "native_id": 86, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3733854293823242, "incorrect_loss_raw": 1.4237927595774333, "correct_loss_per_char": 0.6866927146911621, "incorrect_loss_per_char": 0.7118963797887167, "correct_loss_per_token": 1.3733854293823242, "incorrect_loss_per_token": 1.4237927595774333, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3733854293823242, "num_tokens": 1, "num_tokens_all": 832, "is_greedy": false, "logits_per_token": -1.3733854293823242, "logits_per_char": -0.6866927146911621, "num_chars": 2}, {"sum_logits": -1.4685767889022827, "num_tokens": 1, "num_tokens_all": 832, "is_greedy": false, "logits_per_token": -1.4685767889022827, "logits_per_char": -0.7342883944511414, "num_chars": 2}, {"sum_logits": -1.5666009187698364, "num_tokens": 1, "num_tokens_all": 832, "is_greedy": false, "logits_per_token": -1.5666009187698364, "logits_per_char": -0.7833004593849182, "num_chars": 2}, {"sum_logits": -1.2362005710601807, "num_tokens": 1, "num_tokens_all": 832, "is_greedy": true, "logits_per_token": -1.2362005710601807, "logits_per_char": -0.6181002855300903, "num_chars": 2}], "label": 0, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 87, "native_id": 87, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3870961666107178, "incorrect_loss_raw": 1.408495545387268, "correct_loss_per_char": 0.6935480833053589, "incorrect_loss_per_char": 0.704247772693634, "correct_loss_per_token": 1.3870961666107178, "incorrect_loss_per_token": 1.408495545387268, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3730732202529907, "num_tokens": 1, "num_tokens_all": 897, "is_greedy": false, "logits_per_token": -1.3730732202529907, "logits_per_char": -0.6865366101264954, "num_chars": 2}, {"sum_logits": -1.3870961666107178, "num_tokens": 1, "num_tokens_all": 897, "is_greedy": false, "logits_per_token": -1.3870961666107178, "logits_per_char": -0.6935480833053589, "num_chars": 2}, {"sum_logits": -1.5452758073806763, "num_tokens": 1, "num_tokens_all": 897, "is_greedy": false, "logits_per_token": -1.5452758073806763, "logits_per_char": -0.7726379036903381, "num_chars": 2}, {"sum_logits": -1.3071376085281372, "num_tokens": 1, "num_tokens_all": 897, "is_greedy": true, "logits_per_token": -1.3071376085281372, "logits_per_char": -0.6535688042640686, "num_chars": 2}], "label": 1, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 88, "native_id": 88, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3050365447998047, "incorrect_loss_raw": 1.4558753569920857, "correct_loss_per_char": 0.6525182723999023, "incorrect_loss_per_char": 0.7279376784960429, "correct_loss_per_token": 1.3050365447998047, "incorrect_loss_per_token": 1.4558753569920857, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.7235276699066162, "num_tokens": 1, "num_tokens_all": 820, "is_greedy": false, "logits_per_token": -1.7235276699066162, "logits_per_char": -0.8617638349533081, "num_chars": 2}, {"sum_logits": -1.5355474948883057, "num_tokens": 1, "num_tokens_all": 820, "is_greedy": false, "logits_per_token": -1.5355474948883057, "logits_per_char": -0.7677737474441528, "num_chars": 2}, {"sum_logits": -1.3050365447998047, "num_tokens": 1, "num_tokens_all": 820, "is_greedy": false, "logits_per_token": -1.3050365447998047, "logits_per_char": -0.6525182723999023, "num_chars": 2}, {"sum_logits": -1.1085509061813354, "num_tokens": 1, "num_tokens_all": 820, "is_greedy": true, "logits_per_token": -1.1085509061813354, "logits_per_char": -0.5542754530906677, "num_chars": 2}], "label": 2, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 89, "native_id": 89, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5360232591629028, "incorrect_loss_raw": 1.355705698331197, "correct_loss_per_char": 0.7680116295814514, "incorrect_loss_per_char": 0.6778528491655985, "correct_loss_per_token": 1.5360232591629028, "incorrect_loss_per_token": 1.355705698331197, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5360232591629028, "num_tokens": 1, "num_tokens_all": 807, "is_greedy": false, "logits_per_token": -1.5360232591629028, "logits_per_char": -0.7680116295814514, "num_chars": 2}, {"sum_logits": -1.4047611951828003, "num_tokens": 1, "num_tokens_all": 807, "is_greedy": false, "logits_per_token": -1.4047611951828003, "logits_per_char": -0.7023805975914001, "num_chars": 2}, {"sum_logits": -1.4944943189620972, "num_tokens": 1, "num_tokens_all": 807, "is_greedy": false, "logits_per_token": -1.4944943189620972, "logits_per_char": -0.7472471594810486, "num_chars": 2}, {"sum_logits": -1.1678615808486938, "num_tokens": 1, "num_tokens_all": 807, "is_greedy": true, "logits_per_token": -1.1678615808486938, "logits_per_char": -0.5839307904243469, "num_chars": 2}], "label": 0, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 90, "native_id": 90, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3238914012908936, "incorrect_loss_raw": 1.4333434502283733, "correct_loss_per_char": 0.6619457006454468, "incorrect_loss_per_char": 0.7166717251141866, "correct_loss_per_token": 1.3238914012908936, "incorrect_loss_per_token": 1.4333434502283733, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6528198719024658, "num_tokens": 1, "num_tokens_all": 905, "is_greedy": false, "logits_per_token": -1.6528198719024658, "logits_per_char": -0.8264099359512329, "num_chars": 2}, {"sum_logits": -1.4731483459472656, "num_tokens": 1, "num_tokens_all": 905, "is_greedy": false, "logits_per_token": -1.4731483459472656, "logits_per_char": -0.7365741729736328, "num_chars": 2}, {"sum_logits": -1.3238914012908936, "num_tokens": 1, "num_tokens_all": 905, "is_greedy": false, "logits_per_token": -1.3238914012908936, "logits_per_char": -0.6619457006454468, "num_chars": 2}, {"sum_logits": -1.1740621328353882, "num_tokens": 1, "num_tokens_all": 905, "is_greedy": true, "logits_per_token": -1.1740621328353882, "logits_per_char": -0.5870310664176941, "num_chars": 2}], "label": 2, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 91, "native_id": 91, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3703511953353882, "incorrect_loss_raw": 1.4087841510772705, "correct_loss_per_char": 0.6851755976676941, "incorrect_loss_per_char": 0.7043920755386353, "correct_loss_per_token": 1.3703511953353882, "incorrect_loss_per_token": 1.4087841510772705, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4796087741851807, "num_tokens": 1, "num_tokens_all": 808, "is_greedy": false, "logits_per_token": -1.4796087741851807, "logits_per_char": -0.7398043870925903, "num_chars": 2}, {"sum_logits": -1.5437126159667969, "num_tokens": 1, "num_tokens_all": 808, "is_greedy": false, "logits_per_token": -1.5437126159667969, "logits_per_char": -0.7718563079833984, "num_chars": 2}, {"sum_logits": -1.3703511953353882, "num_tokens": 1, "num_tokens_all": 808, "is_greedy": false, "logits_per_token": -1.3703511953353882, "logits_per_char": -0.6851755976676941, "num_chars": 2}, {"sum_logits": -1.203031063079834, "num_tokens": 1, "num_tokens_all": 808, "is_greedy": true, "logits_per_token": -1.203031063079834, "logits_per_char": -0.601515531539917, "num_chars": 2}], "label": 2, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 92, "native_id": 92, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.283405065536499, "incorrect_loss_raw": 1.4412036736806233, "correct_loss_per_char": 0.6417025327682495, "incorrect_loss_per_char": 0.7206018368403116, "correct_loss_per_token": 1.283405065536499, "incorrect_loss_per_token": 1.4412036736806233, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4454185962677002, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": false, "logits_per_token": -1.4454185962677002, "logits_per_char": -0.7227092981338501, "num_chars": 2}, {"sum_logits": -1.283405065536499, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": false, "logits_per_token": -1.283405065536499, "logits_per_char": -0.6417025327682495, "num_chars": 2}, {"sum_logits": -1.607741355895996, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": false, "logits_per_token": -1.607741355895996, "logits_per_char": -0.803870677947998, "num_chars": 2}, {"sum_logits": -1.2704510688781738, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": true, "logits_per_token": -1.2704510688781738, "logits_per_char": -0.6352255344390869, "num_chars": 2}], "label": 1, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 93, "native_id": 93, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5540034770965576, "incorrect_loss_raw": 1.3622373739878337, "correct_loss_per_char": 0.7770017385482788, "incorrect_loss_per_char": 0.6811186869939169, "correct_loss_per_token": 1.5540034770965576, "incorrect_loss_per_token": 1.3622373739878337, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5589951276779175, "num_tokens": 1, "num_tokens_all": 829, "is_greedy": false, "logits_per_token": -1.5589951276779175, "logits_per_char": -0.7794975638389587, "num_chars": 2}, {"sum_logits": -1.4453372955322266, "num_tokens": 1, "num_tokens_all": 829, "is_greedy": false, "logits_per_token": -1.4453372955322266, "logits_per_char": -0.7226686477661133, "num_chars": 2}, {"sum_logits": -1.5540034770965576, "num_tokens": 1, "num_tokens_all": 829, "is_greedy": false, "logits_per_token": -1.5540034770965576, "logits_per_char": -0.7770017385482788, "num_chars": 2}, {"sum_logits": -1.082379698753357, "num_tokens": 1, "num_tokens_all": 829, "is_greedy": true, "logits_per_token": -1.082379698753357, "logits_per_char": -0.5411898493766785, "num_chars": 2}], "label": 2, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 94, "native_id": 94, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2188091278076172, "incorrect_loss_raw": 1.4689091046651204, "correct_loss_per_char": 0.6094045639038086, "incorrect_loss_per_char": 0.7344545523325602, "correct_loss_per_token": 1.2188091278076172, "incorrect_loss_per_token": 1.4689091046651204, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3736752271652222, "num_tokens": 1, "num_tokens_all": 887, "is_greedy": false, "logits_per_token": -1.3736752271652222, "logits_per_char": -0.6868376135826111, "num_chars": 2}, {"sum_logits": -1.4677085876464844, "num_tokens": 1, "num_tokens_all": 887, "is_greedy": false, "logits_per_token": -1.4677085876464844, "logits_per_char": -0.7338542938232422, "num_chars": 2}, {"sum_logits": -1.5653434991836548, "num_tokens": 1, "num_tokens_all": 887, "is_greedy": false, "logits_per_token": -1.5653434991836548, "logits_per_char": -0.7826717495918274, "num_chars": 2}, {"sum_logits": -1.2188091278076172, "num_tokens": 1, "num_tokens_all": 887, "is_greedy": true, "logits_per_token": -1.2188091278076172, "logits_per_char": -0.6094045639038086, "num_chars": 2}], "label": 3, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 95, "native_id": 95, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5947998762130737, "incorrect_loss_raw": 1.3451499144236247, "correct_loss_per_char": 0.7973999381065369, "incorrect_loss_per_char": 0.6725749572118124, "correct_loss_per_token": 1.5947998762130737, "incorrect_loss_per_token": 1.3451499144236247, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5947998762130737, "num_tokens": 1, "num_tokens_all": 808, "is_greedy": false, "logits_per_token": -1.5947998762130737, "logits_per_char": -0.7973999381065369, "num_chars": 2}, {"sum_logits": -1.405689001083374, "num_tokens": 1, "num_tokens_all": 808, "is_greedy": false, "logits_per_token": -1.405689001083374, "logits_per_char": -0.702844500541687, "num_chars": 2}, {"sum_logits": -1.51186203956604, "num_tokens": 1, "num_tokens_all": 808, "is_greedy": false, "logits_per_token": -1.51186203956604, "logits_per_char": -0.75593101978302, "num_chars": 2}, {"sum_logits": -1.11789870262146, "num_tokens": 1, "num_tokens_all": 808, "is_greedy": true, "logits_per_token": -1.11789870262146, "logits_per_char": -0.55894935131073, "num_chars": 2}], "label": 0, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 96, "native_id": 96, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.54805588722229, "incorrect_loss_raw": 1.3435568412144978, "correct_loss_per_char": 0.774027943611145, "incorrect_loss_per_char": 0.6717784206072489, "correct_loss_per_token": 1.54805588722229, "incorrect_loss_per_token": 1.3435568412144978, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.54805588722229, "num_tokens": 1, "num_tokens_all": 880, "is_greedy": false, "logits_per_token": -1.54805588722229, "logits_per_char": -0.774027943611145, "num_chars": 2}, {"sum_logits": -1.3724919557571411, "num_tokens": 1, "num_tokens_all": 880, "is_greedy": false, "logits_per_token": -1.3724919557571411, "logits_per_char": -0.6862459778785706, "num_chars": 2}, {"sum_logits": -1.3882824182510376, "num_tokens": 1, "num_tokens_all": 880, "is_greedy": false, "logits_per_token": -1.3882824182510376, "logits_per_char": -0.6941412091255188, "num_chars": 2}, {"sum_logits": -1.269896149635315, "num_tokens": 1, "num_tokens_all": 880, "is_greedy": true, "logits_per_token": -1.269896149635315, "logits_per_char": -0.6349480748176575, "num_chars": 2}], "label": 0, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 97, "native_id": 97, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2188825607299805, "incorrect_loss_raw": 1.4771347045898438, "correct_loss_per_char": 0.6094412803649902, "incorrect_loss_per_char": 0.7385673522949219, "correct_loss_per_token": 1.2188825607299805, "incorrect_loss_per_token": 1.4771347045898438, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5048342943191528, "num_tokens": 1, "num_tokens_all": 837, "is_greedy": false, "logits_per_token": -1.5048342943191528, "logits_per_char": -0.7524171471595764, "num_chars": 2}, {"sum_logits": -1.4465515613555908, "num_tokens": 1, "num_tokens_all": 837, "is_greedy": false, "logits_per_token": -1.4465515613555908, "logits_per_char": -0.7232757806777954, "num_chars": 2}, {"sum_logits": -1.4800182580947876, "num_tokens": 1, "num_tokens_all": 837, "is_greedy": false, "logits_per_token": -1.4800182580947876, "logits_per_char": -0.7400091290473938, "num_chars": 2}, {"sum_logits": -1.2188825607299805, "num_tokens": 1, "num_tokens_all": 837, "is_greedy": true, "logits_per_token": -1.2188825607299805, "logits_per_char": -0.6094412803649902, "num_chars": 2}], "label": 3, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 98, "native_id": 98, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4602962732315063, "incorrect_loss_raw": 1.3864477078119914, "correct_loss_per_char": 0.7301481366157532, "incorrect_loss_per_char": 0.6932238539059957, "correct_loss_per_token": 1.4602962732315063, "incorrect_loss_per_token": 1.3864477078119914, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6676814556121826, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": false, "logits_per_token": -1.6676814556121826, "logits_per_char": -0.8338407278060913, "num_chars": 2}, {"sum_logits": -1.2196279764175415, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": true, "logits_per_token": -1.2196279764175415, "logits_per_char": -0.6098139882087708, "num_chars": 2}, {"sum_logits": -1.4602962732315063, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": false, "logits_per_token": -1.4602962732315063, "logits_per_char": -0.7301481366157532, "num_chars": 2}, {"sum_logits": -1.27203369140625, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": false, "logits_per_token": -1.27203369140625, "logits_per_char": -0.636016845703125, "num_chars": 2}], "label": 2, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 99, "native_id": 99, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1996573209762573, "incorrect_loss_raw": 1.466852068901062, "correct_loss_per_char": 0.5998286604881287, "incorrect_loss_per_char": 0.733426034450531, "correct_loss_per_token": 1.1996573209762573, "incorrect_loss_per_token": 1.466852068901062, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4941363334655762, "num_tokens": 1, "num_tokens_all": 803, "is_greedy": false, "logits_per_token": -1.4941363334655762, "logits_per_char": -0.7470681667327881, "num_chars": 2}, {"sum_logits": -1.5440876483917236, "num_tokens": 1, "num_tokens_all": 803, "is_greedy": false, "logits_per_token": -1.5440876483917236, "logits_per_char": -0.7720438241958618, "num_chars": 2}, {"sum_logits": -1.3623322248458862, "num_tokens": 1, "num_tokens_all": 803, "is_greedy": false, "logits_per_token": -1.3623322248458862, "logits_per_char": -0.6811661124229431, "num_chars": 2}, {"sum_logits": -1.1996573209762573, "num_tokens": 1, "num_tokens_all": 803, "is_greedy": true, "logits_per_token": -1.1996573209762573, "logits_per_char": -0.5998286604881287, "num_chars": 2}], "label": 3, "task_hash": "9d5570c603bbcb33a0727904a22ef997", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
|