|
{"doc_id": 0, "native_id": 0, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.624301791191101, "incorrect_loss_raw": 1.370020071665446, "correct_loss_per_char": 0.8121508955955505, "incorrect_loss_per_char": 0.685010035832723, "correct_loss_per_token": 1.624301791191101, "incorrect_loss_per_token": 1.370020071665446, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6919183731079102, "num_tokens": 1, "num_tokens_all": 609, "is_greedy": false, "logits_per_token": -1.6919183731079102, "logits_per_char": -0.8459591865539551, "num_chars": 2}, {"sum_logits": -1.624301791191101, "num_tokens": 1, "num_tokens_all": 609, "is_greedy": false, "logits_per_token": -1.624301791191101, "logits_per_char": -0.8121508955955505, "num_chars": 2}, {"sum_logits": -1.4454092979431152, "num_tokens": 1, "num_tokens_all": 609, "is_greedy": false, "logits_per_token": -1.4454092979431152, "logits_per_char": -0.7227046489715576, "num_chars": 2}, {"sum_logits": -0.9727325439453125, "num_tokens": 1, "num_tokens_all": 609, "is_greedy": true, "logits_per_token": -0.9727325439453125, "logits_per_char": -0.48636627197265625, "num_chars": 2}], "label": 1, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 1, "native_id": 1, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2437372207641602, "incorrect_loss_raw": 1.4559073050816853, "correct_loss_per_char": 0.6218686103820801, "incorrect_loss_per_char": 0.7279536525408427, "correct_loss_per_token": 1.2437372207641602, "incorrect_loss_per_token": 1.4559073050816853, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5731456279754639, "num_tokens": 1, "num_tokens_all": 570, "is_greedy": false, "logits_per_token": -1.5731456279754639, "logits_per_char": -0.7865728139877319, "num_chars": 2}, {"sum_logits": -1.469448208808899, "num_tokens": 1, "num_tokens_all": 570, "is_greedy": false, "logits_per_token": -1.469448208808899, "logits_per_char": -0.7347241044044495, "num_chars": 2}, {"sum_logits": -1.3251280784606934, "num_tokens": 1, "num_tokens_all": 570, "is_greedy": false, "logits_per_token": -1.3251280784606934, "logits_per_char": -0.6625640392303467, "num_chars": 2}, {"sum_logits": -1.2437372207641602, "num_tokens": 1, "num_tokens_all": 570, "is_greedy": true, "logits_per_token": -1.2437372207641602, "logits_per_char": -0.6218686103820801, "num_chars": 2}], "label": 3, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 2, "native_id": 2, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8926581740379333, "incorrect_loss_raw": 1.64110533396403, "correct_loss_per_char": 0.4463290870189667, "incorrect_loss_per_char": 0.820552666982015, "correct_loss_per_token": 0.8926581740379333, "incorrect_loss_per_token": 1.64110533396403, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.8046376705169678, "num_tokens": 1, "num_tokens_all": 618, "is_greedy": false, "logits_per_token": -1.8046376705169678, "logits_per_char": -0.9023188352584839, "num_chars": 2}, {"sum_logits": -1.5777955055236816, "num_tokens": 1, "num_tokens_all": 618, "is_greedy": false, "logits_per_token": -1.5777955055236816, "logits_per_char": -0.7888977527618408, "num_chars": 2}, {"sum_logits": -1.5408828258514404, "num_tokens": 1, "num_tokens_all": 618, "is_greedy": false, "logits_per_token": -1.5408828258514404, "logits_per_char": -0.7704414129257202, "num_chars": 2}, {"sum_logits": -0.8926581740379333, "num_tokens": 1, "num_tokens_all": 618, "is_greedy": true, "logits_per_token": -0.8926581740379333, "logits_per_char": -0.4463290870189667, "num_chars": 2}], "label": 3, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 3, "native_id": 3, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5324008464813232, "incorrect_loss_raw": 1.3647078673044841, "correct_loss_per_char": 0.7662004232406616, "incorrect_loss_per_char": 0.6823539336522421, "correct_loss_per_token": 1.5324008464813232, "incorrect_loss_per_token": 1.3647078673044841, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5324008464813232, "num_tokens": 1, "num_tokens_all": 556, "is_greedy": false, "logits_per_token": -1.5324008464813232, "logits_per_char": -0.7662004232406616, "num_chars": 2}, {"sum_logits": -1.459259033203125, "num_tokens": 1, "num_tokens_all": 556, "is_greedy": false, "logits_per_token": -1.459259033203125, "logits_per_char": -0.7296295166015625, "num_chars": 2}, {"sum_logits": -1.4939212799072266, "num_tokens": 1, "num_tokens_all": 556, "is_greedy": false, "logits_per_token": -1.4939212799072266, "logits_per_char": -0.7469606399536133, "num_chars": 2}, {"sum_logits": -1.1409432888031006, "num_tokens": 1, "num_tokens_all": 556, "is_greedy": true, "logits_per_token": -1.1409432888031006, "logits_per_char": -0.5704716444015503, "num_chars": 2}], "label": 0, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 4, "native_id": 4, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4739269018173218, "incorrect_loss_raw": 1.3958348830540974, "correct_loss_per_char": 0.7369634509086609, "incorrect_loss_per_char": 0.6979174415270487, "correct_loss_per_token": 1.4739269018173218, "incorrect_loss_per_token": 1.3958348830540974, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5326752662658691, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": false, "logits_per_token": -1.5326752662658691, "logits_per_char": -0.7663376331329346, "num_chars": 2}, {"sum_logits": -1.5355744361877441, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": false, "logits_per_token": -1.5355744361877441, "logits_per_char": -0.7677872180938721, "num_chars": 2}, {"sum_logits": -1.4739269018173218, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": false, "logits_per_token": -1.4739269018173218, "logits_per_char": -0.7369634509086609, "num_chars": 2}, {"sum_logits": -1.1192549467086792, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": true, "logits_per_token": -1.1192549467086792, "logits_per_char": -0.5596274733543396, "num_chars": 2}], "label": 2, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 5, "native_id": 5, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.331106185913086, "incorrect_loss_raw": 1.421077013015747, "correct_loss_per_char": 0.665553092956543, "incorrect_loss_per_char": 0.7105385065078735, "correct_loss_per_token": 1.331106185913086, "incorrect_loss_per_token": 1.421077013015747, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3275480270385742, "num_tokens": 1, "num_tokens_all": 643, "is_greedy": true, "logits_per_token": -1.3275480270385742, "logits_per_char": -0.6637740135192871, "num_chars": 2}, {"sum_logits": -1.421217679977417, "num_tokens": 1, "num_tokens_all": 643, "is_greedy": false, "logits_per_token": -1.421217679977417, "logits_per_char": -0.7106088399887085, "num_chars": 2}, {"sum_logits": -1.51446533203125, "num_tokens": 1, "num_tokens_all": 643, "is_greedy": false, "logits_per_token": -1.51446533203125, "logits_per_char": -0.757232666015625, "num_chars": 2}, {"sum_logits": -1.331106185913086, "num_tokens": 1, "num_tokens_all": 643, "is_greedy": false, "logits_per_token": -1.331106185913086, "logits_per_char": -0.665553092956543, "num_chars": 2}], "label": 3, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 6, "native_id": 6, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5203938484191895, "incorrect_loss_raw": 1.3790362278620403, "correct_loss_per_char": 0.7601969242095947, "incorrect_loss_per_char": 0.6895181139310201, "correct_loss_per_token": 1.5203938484191895, "incorrect_loss_per_token": 1.3790362278620403, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5770018100738525, "num_tokens": 1, "num_tokens_all": 639, "is_greedy": false, "logits_per_token": -1.5770018100738525, "logits_per_char": -0.7885009050369263, "num_chars": 2}, {"sum_logits": -1.4941197633743286, "num_tokens": 1, "num_tokens_all": 639, "is_greedy": false, "logits_per_token": -1.4941197633743286, "logits_per_char": -0.7470598816871643, "num_chars": 2}, {"sum_logits": -1.5203938484191895, "num_tokens": 1, "num_tokens_all": 639, "is_greedy": false, "logits_per_token": -1.5203938484191895, "logits_per_char": -0.7601969242095947, "num_chars": 2}, {"sum_logits": -1.0659871101379395, "num_tokens": 1, "num_tokens_all": 639, "is_greedy": true, "logits_per_token": -1.0659871101379395, "logits_per_char": -0.5329935550689697, "num_chars": 2}], "label": 2, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 7, "native_id": 7, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2736409902572632, "incorrect_loss_raw": 1.4843804836273193, "correct_loss_per_char": 0.6368204951286316, "incorrect_loss_per_char": 0.7421902418136597, "correct_loss_per_token": 1.2736409902572632, "incorrect_loss_per_token": 1.4843804836273193, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6185097694396973, "num_tokens": 1, "num_tokens_all": 585, "is_greedy": false, "logits_per_token": -1.6185097694396973, "logits_per_char": -0.8092548847198486, "num_chars": 2}, {"sum_logits": -1.7710518836975098, "num_tokens": 1, "num_tokens_all": 585, "is_greedy": false, "logits_per_token": -1.7710518836975098, "logits_per_char": -0.8855259418487549, "num_chars": 2}, {"sum_logits": -1.2736409902572632, "num_tokens": 1, "num_tokens_all": 585, "is_greedy": false, "logits_per_token": -1.2736409902572632, "logits_per_char": -0.6368204951286316, "num_chars": 2}, {"sum_logits": -1.063579797744751, "num_tokens": 1, "num_tokens_all": 585, "is_greedy": true, "logits_per_token": -1.063579797744751, "logits_per_char": -0.5317898988723755, "num_chars": 2}], "label": 2, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 8, "native_id": 8, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3534789085388184, "incorrect_loss_raw": 1.4488972425460815, "correct_loss_per_char": 0.6767394542694092, "incorrect_loss_per_char": 0.7244486212730408, "correct_loss_per_token": 1.3534789085388184, "incorrect_loss_per_token": 1.4488972425460815, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5963433980941772, "num_tokens": 1, "num_tokens_all": 605, "is_greedy": false, "logits_per_token": -1.5963433980941772, "logits_per_char": -0.7981716990470886, "num_chars": 2}, {"sum_logits": -1.430017113685608, "num_tokens": 1, "num_tokens_all": 605, "is_greedy": false, "logits_per_token": -1.430017113685608, "logits_per_char": -0.715008556842804, "num_chars": 2}, {"sum_logits": -1.3534789085388184, "num_tokens": 1, "num_tokens_all": 605, "is_greedy": false, "logits_per_token": -1.3534789085388184, "logits_per_char": -0.6767394542694092, "num_chars": 2}, {"sum_logits": -1.3203312158584595, "num_tokens": 1, "num_tokens_all": 605, "is_greedy": true, "logits_per_token": -1.3203312158584595, "logits_per_char": -0.6601656079292297, "num_chars": 2}], "label": 2, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 9, "native_id": 9, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0880969762802124, "incorrect_loss_raw": 1.57316788037618, "correct_loss_per_char": 0.5440484881401062, "incorrect_loss_per_char": 0.78658394018809, "correct_loss_per_token": 1.0880969762802124, "incorrect_loss_per_token": 1.57316788037618, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0880969762802124, "num_tokens": 1, "num_tokens_all": 568, "is_greedy": true, "logits_per_token": -1.0880969762802124, "logits_per_char": -0.5440484881401062, "num_chars": 2}, {"sum_logits": -1.5191669464111328, "num_tokens": 1, "num_tokens_all": 568, "is_greedy": false, "logits_per_token": -1.5191669464111328, "logits_per_char": -0.7595834732055664, "num_chars": 2}, {"sum_logits": -1.6540820598602295, "num_tokens": 1, "num_tokens_all": 568, "is_greedy": false, "logits_per_token": -1.6540820598602295, "logits_per_char": -0.8270410299301147, "num_chars": 2}, {"sum_logits": -1.5462546348571777, "num_tokens": 1, "num_tokens_all": 568, "is_greedy": false, "logits_per_token": -1.5462546348571777, "logits_per_char": -0.7731273174285889, "num_chars": 2}], "label": 0, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 10, "native_id": 10, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1128487586975098, "incorrect_loss_raw": 1.5105475187301636, "correct_loss_per_char": 0.5564243793487549, "incorrect_loss_per_char": 0.7552737593650818, "correct_loss_per_token": 1.1128487586975098, "incorrect_loss_per_token": 1.5105475187301636, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.569892406463623, "num_tokens": 1, "num_tokens_all": 566, "is_greedy": false, "logits_per_token": -1.569892406463623, "logits_per_char": -0.7849462032318115, "num_chars": 2}, {"sum_logits": -1.5705609321594238, "num_tokens": 1, "num_tokens_all": 566, "is_greedy": false, "logits_per_token": -1.5705609321594238, "logits_per_char": -0.7852804660797119, "num_chars": 2}, {"sum_logits": -1.3911892175674438, "num_tokens": 1, "num_tokens_all": 566, "is_greedy": false, "logits_per_token": -1.3911892175674438, "logits_per_char": -0.6955946087837219, "num_chars": 2}, {"sum_logits": -1.1128487586975098, "num_tokens": 1, "num_tokens_all": 566, "is_greedy": true, "logits_per_token": -1.1128487586975098, "logits_per_char": -0.5564243793487549, "num_chars": 2}], "label": 3, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 11, "native_id": 11, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5144808292388916, "incorrect_loss_raw": 1.3731839656829834, "correct_loss_per_char": 0.7572404146194458, "incorrect_loss_per_char": 0.6865919828414917, "correct_loss_per_token": 1.5144808292388916, "incorrect_loss_per_token": 1.3731839656829834, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.550523042678833, "num_tokens": 1, "num_tokens_all": 620, "is_greedy": false, "logits_per_token": -1.550523042678833, "logits_per_char": -0.7752615213394165, "num_chars": 2}, {"sum_logits": -1.5144808292388916, "num_tokens": 1, "num_tokens_all": 620, "is_greedy": false, "logits_per_token": -1.5144808292388916, "logits_per_char": -0.7572404146194458, "num_chars": 2}, {"sum_logits": -1.4538233280181885, "num_tokens": 1, "num_tokens_all": 620, "is_greedy": false, "logits_per_token": -1.4538233280181885, "logits_per_char": -0.7269116640090942, "num_chars": 2}, {"sum_logits": -1.1152055263519287, "num_tokens": 1, "num_tokens_all": 620, "is_greedy": true, "logits_per_token": -1.1152055263519287, "logits_per_char": -0.5576027631759644, "num_chars": 2}], "label": 1, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 12, "native_id": 12, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.460127830505371, "incorrect_loss_raw": 1.3782985607783, "correct_loss_per_char": 0.7300639152526855, "incorrect_loss_per_char": 0.68914928038915, "correct_loss_per_token": 1.460127830505371, "incorrect_loss_per_token": 1.3782985607783, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.460127830505371, "num_tokens": 1, "num_tokens_all": 573, "is_greedy": false, "logits_per_token": -1.460127830505371, "logits_per_char": -0.7300639152526855, "num_chars": 2}, {"sum_logits": -1.5307056903839111, "num_tokens": 1, "num_tokens_all": 573, "is_greedy": false, "logits_per_token": -1.5307056903839111, "logits_per_char": -0.7653528451919556, "num_chars": 2}, {"sum_logits": -1.2882620096206665, "num_tokens": 1, "num_tokens_all": 573, "is_greedy": true, "logits_per_token": -1.2882620096206665, "logits_per_char": -0.6441310048103333, "num_chars": 2}, {"sum_logits": -1.3159279823303223, "num_tokens": 1, "num_tokens_all": 573, "is_greedy": false, "logits_per_token": -1.3159279823303223, "logits_per_char": -0.6579639911651611, "num_chars": 2}], "label": 0, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 13, "native_id": 13, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4660282135009766, "incorrect_loss_raw": 1.3943302631378174, "correct_loss_per_char": 0.7330141067504883, "incorrect_loss_per_char": 0.6971651315689087, "correct_loss_per_token": 1.4660282135009766, "incorrect_loss_per_token": 1.3943302631378174, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6791956424713135, "num_tokens": 1, "num_tokens_all": 608, "is_greedy": false, "logits_per_token": -1.6791956424713135, "logits_per_char": -0.8395978212356567, "num_chars": 2}, {"sum_logits": -1.4660282135009766, "num_tokens": 1, "num_tokens_all": 608, "is_greedy": false, "logits_per_token": -1.4660282135009766, "logits_per_char": -0.7330141067504883, "num_chars": 2}, {"sum_logits": -1.3844572305679321, "num_tokens": 1, "num_tokens_all": 608, "is_greedy": false, "logits_per_token": -1.3844572305679321, "logits_per_char": -0.6922286152839661, "num_chars": 2}, {"sum_logits": -1.1193379163742065, "num_tokens": 1, "num_tokens_all": 608, "is_greedy": true, "logits_per_token": -1.1193379163742065, "logits_per_char": -0.5596689581871033, "num_chars": 2}], "label": 1, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 14, "native_id": 14, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0707602500915527, "incorrect_loss_raw": 1.5325297514597576, "correct_loss_per_char": 0.5353801250457764, "incorrect_loss_per_char": 0.7662648757298788, "correct_loss_per_token": 1.0707602500915527, "incorrect_loss_per_token": 1.5325297514597576, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5478709936141968, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": false, "logits_per_token": -1.5478709936141968, "logits_per_char": -0.7739354968070984, "num_chars": 2}, {"sum_logits": -1.6520849466323853, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": false, "logits_per_token": -1.6520849466323853, "logits_per_char": -0.8260424733161926, "num_chars": 2}, {"sum_logits": -1.3976333141326904, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": false, "logits_per_token": -1.3976333141326904, "logits_per_char": -0.6988166570663452, "num_chars": 2}, {"sum_logits": -1.0707602500915527, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": true, "logits_per_token": -1.0707602500915527, "logits_per_char": -0.5353801250457764, "num_chars": 2}], "label": 3, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 15, "native_id": 15, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.449288010597229, "incorrect_loss_raw": 1.4307501316070557, "correct_loss_per_char": 0.7246440052986145, "incorrect_loss_per_char": 0.7153750658035278, "correct_loss_per_token": 1.449288010597229, "incorrect_loss_per_token": 1.4307501316070557, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4417370557785034, "num_tokens": 1, "num_tokens_all": 625, "is_greedy": false, "logits_per_token": -1.4417370557785034, "logits_per_char": -0.7208685278892517, "num_chars": 2}, {"sum_logits": -1.2915161848068237, "num_tokens": 1, "num_tokens_all": 625, "is_greedy": true, "logits_per_token": -1.2915161848068237, "logits_per_char": -0.6457580924034119, "num_chars": 2}, {"sum_logits": -1.5589971542358398, "num_tokens": 1, "num_tokens_all": 625, "is_greedy": false, "logits_per_token": -1.5589971542358398, "logits_per_char": -0.7794985771179199, "num_chars": 2}, {"sum_logits": -1.449288010597229, "num_tokens": 1, "num_tokens_all": 625, "is_greedy": false, "logits_per_token": -1.449288010597229, "logits_per_char": -0.7246440052986145, "num_chars": 2}], "label": 3, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 16, "native_id": 16, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3632850646972656, "incorrect_loss_raw": 1.4810266892115276, "correct_loss_per_char": 0.6816425323486328, "incorrect_loss_per_char": 0.7405133446057638, "correct_loss_per_token": 1.3632850646972656, "incorrect_loss_per_token": 1.4810266892115276, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4938551187515259, "num_tokens": 1, "num_tokens_all": 668, "is_greedy": false, "logits_per_token": -1.4938551187515259, "logits_per_char": -0.7469275593757629, "num_chars": 2}, {"sum_logits": -1.3632850646972656, "num_tokens": 1, "num_tokens_all": 668, "is_greedy": true, "logits_per_token": -1.3632850646972656, "logits_per_char": -0.6816425323486328, "num_chars": 2}, {"sum_logits": -1.5590633153915405, "num_tokens": 1, "num_tokens_all": 668, "is_greedy": false, "logits_per_token": -1.5590633153915405, "logits_per_char": -0.7795316576957703, "num_chars": 2}, {"sum_logits": -1.3901616334915161, "num_tokens": 1, "num_tokens_all": 668, "is_greedy": false, "logits_per_token": -1.3901616334915161, "logits_per_char": -0.6950808167457581, "num_chars": 2}], "label": 1, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 17, "native_id": 17, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.462314486503601, "incorrect_loss_raw": 1.415928880373637, "correct_loss_per_char": 0.7311572432518005, "incorrect_loss_per_char": 0.7079644401868185, "correct_loss_per_token": 1.462314486503601, "incorrect_loss_per_token": 1.415928880373637, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6644082069396973, "num_tokens": 1, "num_tokens_all": 616, "is_greedy": false, "logits_per_token": -1.6644082069396973, "logits_per_char": -0.8322041034698486, "num_chars": 2}, {"sum_logits": -1.587465524673462, "num_tokens": 1, "num_tokens_all": 616, "is_greedy": false, "logits_per_token": -1.587465524673462, "logits_per_char": -0.793732762336731, "num_chars": 2}, {"sum_logits": -1.462314486503601, "num_tokens": 1, "num_tokens_all": 616, "is_greedy": false, "logits_per_token": -1.462314486503601, "logits_per_char": -0.7311572432518005, "num_chars": 2}, {"sum_logits": -0.9959129095077515, "num_tokens": 1, "num_tokens_all": 616, "is_greedy": true, "logits_per_token": -0.9959129095077515, "logits_per_char": -0.49795645475387573, "num_chars": 2}], "label": 2, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 18, "native_id": 18, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.084719181060791, "incorrect_loss_raw": 1.5981276035308838, "correct_loss_per_char": 0.5423595905303955, "incorrect_loss_per_char": 0.7990638017654419, "correct_loss_per_token": 1.084719181060791, "incorrect_loss_per_token": 1.5981276035308838, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.598576307296753, "num_tokens": 1, "num_tokens_all": 657, "is_greedy": false, "logits_per_token": -1.598576307296753, "logits_per_char": -0.7992881536483765, "num_chars": 2}, {"sum_logits": -1.6549718379974365, "num_tokens": 1, "num_tokens_all": 657, "is_greedy": false, "logits_per_token": -1.6549718379974365, "logits_per_char": -0.8274859189987183, "num_chars": 2}, {"sum_logits": -1.540834665298462, "num_tokens": 1, "num_tokens_all": 657, "is_greedy": false, "logits_per_token": -1.540834665298462, "logits_per_char": -0.770417332649231, "num_chars": 2}, {"sum_logits": -1.084719181060791, "num_tokens": 1, "num_tokens_all": 657, "is_greedy": true, "logits_per_token": -1.084719181060791, "logits_per_char": -0.5423595905303955, "num_chars": 2}], "label": 3, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 19, "native_id": 19, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4349744319915771, "incorrect_loss_raw": 1.4004321893056233, "correct_loss_per_char": 0.7174872159957886, "incorrect_loss_per_char": 0.7002160946528116, "correct_loss_per_token": 1.4349744319915771, "incorrect_loss_per_token": 1.4004321893056233, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4349744319915771, "num_tokens": 1, "num_tokens_all": 588, "is_greedy": false, "logits_per_token": -1.4349744319915771, "logits_per_char": -0.7174872159957886, "num_chars": 2}, {"sum_logits": -1.4403913021087646, "num_tokens": 1, "num_tokens_all": 588, "is_greedy": false, "logits_per_token": -1.4403913021087646, "logits_per_char": -0.7201956510543823, "num_chars": 2}, {"sum_logits": -1.6106836795806885, "num_tokens": 1, "num_tokens_all": 588, "is_greedy": false, "logits_per_token": -1.6106836795806885, "logits_per_char": -0.8053418397903442, "num_chars": 2}, {"sum_logits": -1.150221586227417, "num_tokens": 1, "num_tokens_all": 588, "is_greedy": true, "logits_per_token": -1.150221586227417, "logits_per_char": -0.5751107931137085, "num_chars": 2}], "label": 0, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 20, "native_id": 20, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.314552903175354, "incorrect_loss_raw": 1.4756632645924885, "correct_loss_per_char": 0.657276451587677, "incorrect_loss_per_char": 0.7378316322962443, "correct_loss_per_token": 1.314552903175354, "incorrect_loss_per_token": 1.4756632645924885, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4850599765777588, "num_tokens": 1, "num_tokens_all": 612, "is_greedy": false, "logits_per_token": -1.4850599765777588, "logits_per_char": -0.7425299882888794, "num_chars": 2}, {"sum_logits": -1.3787808418273926, "num_tokens": 1, "num_tokens_all": 612, "is_greedy": false, "logits_per_token": -1.3787808418273926, "logits_per_char": -0.6893904209136963, "num_chars": 2}, {"sum_logits": -1.5631489753723145, "num_tokens": 1, "num_tokens_all": 612, "is_greedy": false, "logits_per_token": -1.5631489753723145, "logits_per_char": -0.7815744876861572, "num_chars": 2}, {"sum_logits": -1.314552903175354, "num_tokens": 1, "num_tokens_all": 612, "is_greedy": true, "logits_per_token": -1.314552903175354, "logits_per_char": -0.657276451587677, "num_chars": 2}], "label": 3, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 21, "native_id": 21, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1586753129959106, "incorrect_loss_raw": 1.4975978136062622, "correct_loss_per_char": 0.5793376564979553, "incorrect_loss_per_char": 0.7487989068031311, "correct_loss_per_token": 1.1586753129959106, "incorrect_loss_per_token": 1.4975978136062622, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.478515863418579, "num_tokens": 1, "num_tokens_all": 560, "is_greedy": false, "logits_per_token": -1.478515863418579, "logits_per_char": -0.7392579317092896, "num_chars": 2}, {"sum_logits": -1.619194746017456, "num_tokens": 1, "num_tokens_all": 560, "is_greedy": false, "logits_per_token": -1.619194746017456, "logits_per_char": -0.809597373008728, "num_chars": 2}, {"sum_logits": -1.3950828313827515, "num_tokens": 1, "num_tokens_all": 560, "is_greedy": false, "logits_per_token": -1.3950828313827515, "logits_per_char": -0.6975414156913757, "num_chars": 2}, {"sum_logits": -1.1586753129959106, "num_tokens": 1, "num_tokens_all": 560, "is_greedy": true, "logits_per_token": -1.1586753129959106, "logits_per_char": -0.5793376564979553, "num_chars": 2}], "label": 3, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 22, "native_id": 22, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3811547756195068, "incorrect_loss_raw": 1.4531841675440471, "correct_loss_per_char": 0.6905773878097534, "incorrect_loss_per_char": 0.7265920837720236, "correct_loss_per_token": 1.3811547756195068, "incorrect_loss_per_token": 1.4531841675440471, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.527726411819458, "num_tokens": 1, "num_tokens_all": 645, "is_greedy": false, "logits_per_token": -1.527726411819458, "logits_per_char": -0.763863205909729, "num_chars": 2}, {"sum_logits": -1.3811547756195068, "num_tokens": 1, "num_tokens_all": 645, "is_greedy": false, "logits_per_token": -1.3811547756195068, "logits_per_char": -0.6905773878097534, "num_chars": 2}, {"sum_logits": -1.4575387239456177, "num_tokens": 1, "num_tokens_all": 645, "is_greedy": false, "logits_per_token": -1.4575387239456177, "logits_per_char": -0.7287693619728088, "num_chars": 2}, {"sum_logits": -1.3742873668670654, "num_tokens": 1, "num_tokens_all": 645, "is_greedy": true, "logits_per_token": -1.3742873668670654, "logits_per_char": -0.6871436834335327, "num_chars": 2}], "label": 1, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 23, "native_id": 23, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5496962070465088, "incorrect_loss_raw": 1.3627674579620361, "correct_loss_per_char": 0.7748481035232544, "incorrect_loss_per_char": 0.6813837289810181, "correct_loss_per_token": 1.5496962070465088, "incorrect_loss_per_token": 1.3627674579620361, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5496962070465088, "num_tokens": 1, "num_tokens_all": 615, "is_greedy": false, "logits_per_token": -1.5496962070465088, "logits_per_char": -0.7748481035232544, "num_chars": 2}, {"sum_logits": -1.4853593111038208, "num_tokens": 1, "num_tokens_all": 615, "is_greedy": false, "logits_per_token": -1.4853593111038208, "logits_per_char": -0.7426796555519104, "num_chars": 2}, {"sum_logits": -1.4971193075180054, "num_tokens": 1, "num_tokens_all": 615, "is_greedy": false, "logits_per_token": -1.4971193075180054, "logits_per_char": -0.7485596537590027, "num_chars": 2}, {"sum_logits": -1.1058237552642822, "num_tokens": 1, "num_tokens_all": 615, "is_greedy": true, "logits_per_token": -1.1058237552642822, "logits_per_char": -0.5529118776321411, "num_chars": 2}], "label": 0, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 24, "native_id": 24, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3254444599151611, "incorrect_loss_raw": 1.4763342539469402, "correct_loss_per_char": 0.6627222299575806, "incorrect_loss_per_char": 0.7381671269734701, "correct_loss_per_token": 1.3254444599151611, "incorrect_loss_per_token": 1.4763342539469402, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2773655652999878, "num_tokens": 1, "num_tokens_all": 622, "is_greedy": true, "logits_per_token": -1.2773655652999878, "logits_per_char": -0.6386827826499939, "num_chars": 2}, {"sum_logits": -1.3254444599151611, "num_tokens": 1, "num_tokens_all": 622, "is_greedy": false, "logits_per_token": -1.3254444599151611, "logits_per_char": -0.6627222299575806, "num_chars": 2}, {"sum_logits": -1.7295054197311401, "num_tokens": 1, "num_tokens_all": 622, "is_greedy": false, "logits_per_token": -1.7295054197311401, "logits_per_char": -0.8647527098655701, "num_chars": 2}, {"sum_logits": -1.4221317768096924, "num_tokens": 1, "num_tokens_all": 622, "is_greedy": false, "logits_per_token": -1.4221317768096924, "logits_per_char": -0.7110658884048462, "num_chars": 2}], "label": 1, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 25, "native_id": 25, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2125036716461182, "incorrect_loss_raw": 1.499484380086263, "correct_loss_per_char": 0.6062518358230591, "incorrect_loss_per_char": 0.7497421900431315, "correct_loss_per_token": 1.2125036716461182, "incorrect_loss_per_token": 1.499484380086263, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5736305713653564, "num_tokens": 1, "num_tokens_all": 653, "is_greedy": false, "logits_per_token": -1.5736305713653564, "logits_per_char": -0.7868152856826782, "num_chars": 2}, {"sum_logits": -1.4837048053741455, "num_tokens": 1, "num_tokens_all": 653, "is_greedy": false, "logits_per_token": -1.4837048053741455, "logits_per_char": -0.7418524026870728, "num_chars": 2}, {"sum_logits": -1.441117763519287, "num_tokens": 1, "num_tokens_all": 653, "is_greedy": false, "logits_per_token": -1.441117763519287, "logits_per_char": -0.7205588817596436, "num_chars": 2}, {"sum_logits": -1.2125036716461182, "num_tokens": 1, "num_tokens_all": 653, "is_greedy": true, "logits_per_token": -1.2125036716461182, "logits_per_char": -0.6062518358230591, "num_chars": 2}], "label": 3, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 26, "native_id": 26, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0564258098602295, "incorrect_loss_raw": 1.5367084344228108, "correct_loss_per_char": 0.5282129049301147, "incorrect_loss_per_char": 0.7683542172114054, "correct_loss_per_token": 1.0564258098602295, "incorrect_loss_per_token": 1.5367084344228108, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.491121530532837, "num_tokens": 1, "num_tokens_all": 626, "is_greedy": false, "logits_per_token": -1.491121530532837, "logits_per_char": -0.7455607652664185, "num_chars": 2}, {"sum_logits": -1.545720100402832, "num_tokens": 1, "num_tokens_all": 626, "is_greedy": false, "logits_per_token": -1.545720100402832, "logits_per_char": -0.772860050201416, "num_chars": 2}, {"sum_logits": -1.5732836723327637, "num_tokens": 1, "num_tokens_all": 626, "is_greedy": false, "logits_per_token": -1.5732836723327637, "logits_per_char": -0.7866418361663818, "num_chars": 2}, {"sum_logits": -1.0564258098602295, "num_tokens": 1, "num_tokens_all": 626, "is_greedy": true, "logits_per_token": -1.0564258098602295, "logits_per_char": -0.5282129049301147, "num_chars": 2}], "label": 3, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 27, "native_id": 27, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4919530153274536, "incorrect_loss_raw": 1.3937248786290486, "correct_loss_per_char": 0.7459765076637268, "incorrect_loss_per_char": 0.6968624393145243, "correct_loss_per_token": 1.4919530153274536, "incorrect_loss_per_token": 1.3937248786290486, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4919530153274536, "num_tokens": 1, "num_tokens_all": 574, "is_greedy": false, "logits_per_token": -1.4919530153274536, "logits_per_char": -0.7459765076637268, "num_chars": 2}, {"sum_logits": -1.5726683139801025, "num_tokens": 1, "num_tokens_all": 574, "is_greedy": false, "logits_per_token": -1.5726683139801025, "logits_per_char": -0.7863341569900513, "num_chars": 2}, {"sum_logits": -1.5649590492248535, "num_tokens": 1, "num_tokens_all": 574, "is_greedy": false, "logits_per_token": -1.5649590492248535, "logits_per_char": -0.7824795246124268, "num_chars": 2}, {"sum_logits": -1.04354727268219, "num_tokens": 1, "num_tokens_all": 574, "is_greedy": true, "logits_per_token": -1.04354727268219, "logits_per_char": -0.521773636341095, "num_chars": 2}], "label": 0, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 28, "native_id": 28, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4055027961730957, "incorrect_loss_raw": 1.3963087797164917, "correct_loss_per_char": 0.7027513980865479, "incorrect_loss_per_char": 0.6981543898582458, "correct_loss_per_token": 1.4055027961730957, "incorrect_loss_per_token": 1.3963087797164917, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4055027961730957, "num_tokens": 1, "num_tokens_all": 628, "is_greedy": false, "logits_per_token": -1.4055027961730957, "logits_per_char": -0.7027513980865479, "num_chars": 2}, {"sum_logits": -1.472132921218872, "num_tokens": 1, "num_tokens_all": 628, "is_greedy": false, "logits_per_token": -1.472132921218872, "logits_per_char": -0.736066460609436, "num_chars": 2}, {"sum_logits": -1.4889521598815918, "num_tokens": 1, "num_tokens_all": 628, "is_greedy": false, "logits_per_token": -1.4889521598815918, "logits_per_char": -0.7444760799407959, "num_chars": 2}, {"sum_logits": -1.2278412580490112, "num_tokens": 1, "num_tokens_all": 628, "is_greedy": true, "logits_per_token": -1.2278412580490112, "logits_per_char": -0.6139206290245056, "num_chars": 2}], "label": 0, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 29, "native_id": 29, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4770699739456177, "incorrect_loss_raw": 1.3699057499567668, "correct_loss_per_char": 0.7385349869728088, "incorrect_loss_per_char": 0.6849528749783834, "correct_loss_per_token": 1.4770699739456177, "incorrect_loss_per_token": 1.3699057499567668, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.424542784690857, "num_tokens": 1, "num_tokens_all": 555, "is_greedy": false, "logits_per_token": -1.424542784690857, "logits_per_char": -0.7122713923454285, "num_chars": 2}, {"sum_logits": -1.4770699739456177, "num_tokens": 1, "num_tokens_all": 555, "is_greedy": false, "logits_per_token": -1.4770699739456177, "logits_per_char": -0.7385349869728088, "num_chars": 2}, {"sum_logits": -1.3885266780853271, "num_tokens": 1, "num_tokens_all": 555, "is_greedy": false, "logits_per_token": -1.3885266780853271, "logits_per_char": -0.6942633390426636, "num_chars": 2}, {"sum_logits": -1.2966477870941162, "num_tokens": 1, "num_tokens_all": 555, "is_greedy": true, "logits_per_token": -1.2966477870941162, "logits_per_char": -0.6483238935470581, "num_chars": 2}], "label": 1, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 30, "native_id": 30, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.497766375541687, "incorrect_loss_raw": 1.409801443417867, "correct_loss_per_char": 0.7488831877708435, "incorrect_loss_per_char": 0.7049007217089335, "correct_loss_per_token": 1.497766375541687, "incorrect_loss_per_token": 1.409801443417867, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.649495005607605, "num_tokens": 1, "num_tokens_all": 648, "is_greedy": false, "logits_per_token": -1.649495005607605, "logits_per_char": -0.8247475028038025, "num_chars": 2}, {"sum_logits": -1.595635175704956, "num_tokens": 1, "num_tokens_all": 648, "is_greedy": false, "logits_per_token": -1.595635175704956, "logits_per_char": -0.797817587852478, "num_chars": 2}, {"sum_logits": -1.497766375541687, "num_tokens": 1, "num_tokens_all": 648, "is_greedy": false, "logits_per_token": -1.497766375541687, "logits_per_char": -0.7488831877708435, "num_chars": 2}, {"sum_logits": -0.98427414894104, "num_tokens": 1, "num_tokens_all": 648, "is_greedy": true, "logits_per_token": -0.98427414894104, "logits_per_char": -0.49213707447052, "num_chars": 2}], "label": 2, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 31, "native_id": 31, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5779914855957031, "incorrect_loss_raw": 1.3567177454630535, "correct_loss_per_char": 0.7889957427978516, "incorrect_loss_per_char": 0.6783588727315267, "correct_loss_per_token": 1.5779914855957031, "incorrect_loss_per_token": 1.3567177454630535, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.537717342376709, "num_tokens": 1, "num_tokens_all": 612, "is_greedy": false, "logits_per_token": -1.537717342376709, "logits_per_char": -0.7688586711883545, "num_chars": 2}, {"sum_logits": -1.5779914855957031, "num_tokens": 1, "num_tokens_all": 612, "is_greedy": false, "logits_per_token": -1.5779914855957031, "logits_per_char": -0.7889957427978516, "num_chars": 2}, {"sum_logits": -1.4214646816253662, "num_tokens": 1, "num_tokens_all": 612, "is_greedy": false, "logits_per_token": -1.4214646816253662, "logits_per_char": -0.7107323408126831, "num_chars": 2}, {"sum_logits": -1.110971212387085, "num_tokens": 1, "num_tokens_all": 612, "is_greedy": true, "logits_per_token": -1.110971212387085, "logits_per_char": -0.5554856061935425, "num_chars": 2}], "label": 1, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 32, "native_id": 32, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7147741317749023, "incorrect_loss_raw": 1.338906705379486, "correct_loss_per_char": 0.8573870658874512, "incorrect_loss_per_char": 0.669453352689743, "correct_loss_per_token": 1.7147741317749023, "incorrect_loss_per_token": 1.338906705379486, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5513947010040283, "num_tokens": 1, "num_tokens_all": 595, "is_greedy": false, "logits_per_token": -1.5513947010040283, "logits_per_char": -0.7756973505020142, "num_chars": 2}, {"sum_logits": -1.7147741317749023, "num_tokens": 1, "num_tokens_all": 595, "is_greedy": false, "logits_per_token": -1.7147741317749023, "logits_per_char": -0.8573870658874512, "num_chars": 2}, {"sum_logits": -1.4798252582550049, "num_tokens": 1, "num_tokens_all": 595, "is_greedy": false, "logits_per_token": -1.4798252582550049, "logits_per_char": -0.7399126291275024, "num_chars": 2}, {"sum_logits": -0.985500156879425, "num_tokens": 1, "num_tokens_all": 595, "is_greedy": true, "logits_per_token": -0.985500156879425, "logits_per_char": -0.4927500784397125, "num_chars": 2}], "label": 1, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 33, "native_id": 33, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2321701049804688, "incorrect_loss_raw": 1.4683727820714314, "correct_loss_per_char": 0.6160850524902344, "incorrect_loss_per_char": 0.7341863910357157, "correct_loss_per_token": 1.2321701049804688, "incorrect_loss_per_token": 1.4683727820714314, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.592191457748413, "num_tokens": 1, "num_tokens_all": 559, "is_greedy": false, "logits_per_token": -1.592191457748413, "logits_per_char": -0.7960957288742065, "num_chars": 2}, {"sum_logits": -1.591843843460083, "num_tokens": 1, "num_tokens_all": 559, "is_greedy": false, "logits_per_token": -1.591843843460083, "logits_per_char": -0.7959219217300415, "num_chars": 2}, {"sum_logits": -1.2321701049804688, "num_tokens": 1, "num_tokens_all": 559, "is_greedy": false, "logits_per_token": -1.2321701049804688, "logits_per_char": -0.6160850524902344, "num_chars": 2}, {"sum_logits": -1.2210830450057983, "num_tokens": 1, "num_tokens_all": 559, "is_greedy": true, "logits_per_token": -1.2210830450057983, "logits_per_char": -0.6105415225028992, "num_chars": 2}], "label": 2, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 34, "native_id": 34, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2619553804397583, "incorrect_loss_raw": 1.5248535871505737, "correct_loss_per_char": 0.6309776902198792, "incorrect_loss_per_char": 0.7624267935752869, "correct_loss_per_token": 1.2619553804397583, "incorrect_loss_per_token": 1.5248535871505737, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4696892499923706, "num_tokens": 1, "num_tokens_all": 629, "is_greedy": false, "logits_per_token": -1.4696892499923706, "logits_per_char": -0.7348446249961853, "num_chars": 2}, {"sum_logits": -1.510127305984497, "num_tokens": 1, "num_tokens_all": 629, "is_greedy": false, "logits_per_token": -1.510127305984497, "logits_per_char": -0.7550636529922485, "num_chars": 2}, {"sum_logits": -1.5947442054748535, "num_tokens": 1, "num_tokens_all": 629, "is_greedy": false, "logits_per_token": -1.5947442054748535, "logits_per_char": -0.7973721027374268, "num_chars": 2}, {"sum_logits": -1.2619553804397583, "num_tokens": 1, "num_tokens_all": 629, "is_greedy": true, "logits_per_token": -1.2619553804397583, "logits_per_char": -0.6309776902198792, "num_chars": 2}], "label": 3, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 35, "native_id": 35, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9886537790298462, "incorrect_loss_raw": 1.5758370955785115, "correct_loss_per_char": 0.4943268895149231, "incorrect_loss_per_char": 0.7879185477892557, "correct_loss_per_token": 0.9886537790298462, "incorrect_loss_per_token": 1.5758370955785115, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6702021360397339, "num_tokens": 1, "num_tokens_all": 684, "is_greedy": false, "logits_per_token": -1.6702021360397339, "logits_per_char": -0.8351010680198669, "num_chars": 2}, {"sum_logits": -1.5360572338104248, "num_tokens": 1, "num_tokens_all": 684, "is_greedy": false, "logits_per_token": -1.5360572338104248, "logits_per_char": -0.7680286169052124, "num_chars": 2}, {"sum_logits": -1.521251916885376, "num_tokens": 1, "num_tokens_all": 684, "is_greedy": false, "logits_per_token": -1.521251916885376, "logits_per_char": -0.760625958442688, "num_chars": 2}, {"sum_logits": -0.9886537790298462, "num_tokens": 1, "num_tokens_all": 684, "is_greedy": true, "logits_per_token": -0.9886537790298462, "logits_per_char": -0.4943268895149231, "num_chars": 2}], "label": 3, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 36, "native_id": 36, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4958820343017578, "incorrect_loss_raw": 1.3801262378692627, "correct_loss_per_char": 0.7479410171508789, "incorrect_loss_per_char": 0.6900631189346313, "correct_loss_per_token": 1.4958820343017578, "incorrect_loss_per_token": 1.3801262378692627, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4958820343017578, "num_tokens": 1, "num_tokens_all": 581, "is_greedy": false, "logits_per_token": -1.4958820343017578, "logits_per_char": -0.7479410171508789, "num_chars": 2}, {"sum_logits": -1.5114490985870361, "num_tokens": 1, "num_tokens_all": 581, "is_greedy": false, "logits_per_token": -1.5114490985870361, "logits_per_char": -0.7557245492935181, "num_chars": 2}, {"sum_logits": -1.5243017673492432, "num_tokens": 1, "num_tokens_all": 581, "is_greedy": false, "logits_per_token": -1.5243017673492432, "logits_per_char": -0.7621508836746216, "num_chars": 2}, {"sum_logits": -1.1046278476715088, "num_tokens": 1, "num_tokens_all": 581, "is_greedy": true, "logits_per_token": -1.1046278476715088, "logits_per_char": -0.5523139238357544, "num_chars": 2}], "label": 0, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 37, "native_id": 37, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8355741500854492, "incorrect_loss_raw": 1.3007491032282512, "correct_loss_per_char": 0.9177870750427246, "incorrect_loss_per_char": 0.6503745516141256, "correct_loss_per_token": 1.8355741500854492, "incorrect_loss_per_token": 1.3007491032282512, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.8355741500854492, "num_tokens": 1, "num_tokens_all": 592, "is_greedy": false, "logits_per_token": -1.8355741500854492, "logits_per_char": -0.9177870750427246, "num_chars": 2}, {"sum_logits": -1.5175566673278809, "num_tokens": 1, "num_tokens_all": 592, "is_greedy": false, "logits_per_token": -1.5175566673278809, "logits_per_char": -0.7587783336639404, "num_chars": 2}, {"sum_logits": -1.3692104816436768, "num_tokens": 1, "num_tokens_all": 592, "is_greedy": false, "logits_per_token": -1.3692104816436768, "logits_per_char": -0.6846052408218384, "num_chars": 2}, {"sum_logits": -1.0154801607131958, "num_tokens": 1, "num_tokens_all": 592, "is_greedy": true, "logits_per_token": -1.0154801607131958, "logits_per_char": -0.5077400803565979, "num_chars": 2}], "label": 0, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 38, "native_id": 38, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3122040033340454, "incorrect_loss_raw": 1.4837214946746826, "correct_loss_per_char": 0.6561020016670227, "incorrect_loss_per_char": 0.7418607473373413, "correct_loss_per_token": 1.3122040033340454, "incorrect_loss_per_token": 1.4837214946746826, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3122040033340454, "num_tokens": 1, "num_tokens_all": 630, "is_greedy": true, "logits_per_token": -1.3122040033340454, "logits_per_char": -0.6561020016670227, "num_chars": 2}, {"sum_logits": -1.3400113582611084, "num_tokens": 1, "num_tokens_all": 630, "is_greedy": false, "logits_per_token": -1.3400113582611084, "logits_per_char": -0.6700056791305542, "num_chars": 2}, {"sum_logits": -1.6722033023834229, "num_tokens": 1, "num_tokens_all": 630, "is_greedy": false, "logits_per_token": -1.6722033023834229, "logits_per_char": -0.8361016511917114, "num_chars": 2}, {"sum_logits": -1.4389498233795166, "num_tokens": 1, "num_tokens_all": 630, "is_greedy": false, "logits_per_token": -1.4389498233795166, "logits_per_char": -0.7194749116897583, "num_chars": 2}], "label": 0, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 39, "native_id": 39, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3543314933776855, "incorrect_loss_raw": 1.426483114560445, "correct_loss_per_char": 0.6771657466888428, "incorrect_loss_per_char": 0.7132415572802225, "correct_loss_per_token": 1.3543314933776855, "incorrect_loss_per_token": 1.426483114560445, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6057687997817993, "num_tokens": 1, "num_tokens_all": 555, "is_greedy": false, "logits_per_token": -1.6057687997817993, "logits_per_char": -0.8028843998908997, "num_chars": 2}, {"sum_logits": -1.523123025894165, "num_tokens": 1, "num_tokens_all": 555, "is_greedy": false, "logits_per_token": -1.523123025894165, "logits_per_char": -0.7615615129470825, "num_chars": 2}, {"sum_logits": -1.3543314933776855, "num_tokens": 1, "num_tokens_all": 555, "is_greedy": false, "logits_per_token": -1.3543314933776855, "logits_per_char": -0.6771657466888428, "num_chars": 2}, {"sum_logits": -1.150557518005371, "num_tokens": 1, "num_tokens_all": 555, "is_greedy": true, "logits_per_token": -1.150557518005371, "logits_per_char": -0.5752787590026855, "num_chars": 2}], "label": 2, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 40, "native_id": 40, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.438638687133789, "incorrect_loss_raw": 1.3793847560882568, "correct_loss_per_char": 0.7193193435668945, "incorrect_loss_per_char": 0.6896923780441284, "correct_loss_per_token": 1.438638687133789, "incorrect_loss_per_token": 1.3793847560882568, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3788952827453613, "num_tokens": 1, "num_tokens_all": 575, "is_greedy": false, "logits_per_token": -1.3788952827453613, "logits_per_char": -0.6894476413726807, "num_chars": 2}, {"sum_logits": -1.438638687133789, "num_tokens": 1, "num_tokens_all": 575, "is_greedy": false, "logits_per_token": -1.438638687133789, "logits_per_char": -0.7193193435668945, "num_chars": 2}, {"sum_logits": -1.4258453845977783, "num_tokens": 1, "num_tokens_all": 575, "is_greedy": false, "logits_per_token": -1.4258453845977783, "logits_per_char": -0.7129226922988892, "num_chars": 2}, {"sum_logits": -1.3334136009216309, "num_tokens": 1, "num_tokens_all": 575, "is_greedy": true, "logits_per_token": -1.3334136009216309, "logits_per_char": -0.6667068004608154, "num_chars": 2}], "label": 1, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 41, "native_id": 41, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0795871019363403, "incorrect_loss_raw": 1.5258604685465496, "correct_loss_per_char": 0.5397935509681702, "incorrect_loss_per_char": 0.7629302342732748, "correct_loss_per_token": 1.0795871019363403, "incorrect_loss_per_token": 1.5258604685465496, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5428946018218994, "num_tokens": 1, "num_tokens_all": 565, "is_greedy": false, "logits_per_token": -1.5428946018218994, "logits_per_char": -0.7714473009109497, "num_chars": 2}, {"sum_logits": -1.4371141195297241, "num_tokens": 1, "num_tokens_all": 565, "is_greedy": false, "logits_per_token": -1.4371141195297241, "logits_per_char": -0.7185570597648621, "num_chars": 2}, {"sum_logits": -1.597572684288025, "num_tokens": 1, "num_tokens_all": 565, "is_greedy": false, "logits_per_token": -1.597572684288025, "logits_per_char": -0.7987863421440125, "num_chars": 2}, {"sum_logits": -1.0795871019363403, "num_tokens": 1, "num_tokens_all": 565, "is_greedy": true, "logits_per_token": -1.0795871019363403, "logits_per_char": -0.5397935509681702, "num_chars": 2}], "label": 3, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 42, "native_id": 42, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0924381017684937, "incorrect_loss_raw": 1.5227056741714478, "correct_loss_per_char": 0.5462190508842468, "incorrect_loss_per_char": 0.7613528370857239, "correct_loss_per_token": 1.0924381017684937, "incorrect_loss_per_token": 1.5227056741714478, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6206964254379272, "num_tokens": 1, "num_tokens_all": 589, "is_greedy": false, "logits_per_token": -1.6206964254379272, "logits_per_char": -0.8103482127189636, "num_chars": 2}, {"sum_logits": -1.5783069133758545, "num_tokens": 1, "num_tokens_all": 589, "is_greedy": false, "logits_per_token": -1.5783069133758545, "logits_per_char": -0.7891534566879272, "num_chars": 2}, {"sum_logits": -1.3691136837005615, "num_tokens": 1, "num_tokens_all": 589, "is_greedy": false, "logits_per_token": -1.3691136837005615, "logits_per_char": -0.6845568418502808, "num_chars": 2}, {"sum_logits": -1.0924381017684937, "num_tokens": 1, "num_tokens_all": 589, "is_greedy": true, "logits_per_token": -1.0924381017684937, "logits_per_char": -0.5462190508842468, "num_chars": 2}], "label": 3, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 43, "native_id": 43, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3529540300369263, "incorrect_loss_raw": 1.4855645497639973, "correct_loss_per_char": 0.6764770150184631, "incorrect_loss_per_char": 0.7427822748819987, "correct_loss_per_token": 1.3529540300369263, "incorrect_loss_per_token": 1.4855645497639973, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3529540300369263, "num_tokens": 1, "num_tokens_all": 614, "is_greedy": true, "logits_per_token": -1.3529540300369263, "logits_per_char": -0.6764770150184631, "num_chars": 2}, {"sum_logits": -1.3912534713745117, "num_tokens": 1, "num_tokens_all": 614, "is_greedy": false, "logits_per_token": -1.3912534713745117, "logits_per_char": -0.6956267356872559, "num_chars": 2}, {"sum_logits": -1.5446741580963135, "num_tokens": 1, "num_tokens_all": 614, "is_greedy": false, "logits_per_token": -1.5446741580963135, "logits_per_char": -0.7723370790481567, "num_chars": 2}, {"sum_logits": -1.520766019821167, "num_tokens": 1, "num_tokens_all": 614, "is_greedy": false, "logits_per_token": -1.520766019821167, "logits_per_char": -0.7603830099105835, "num_chars": 2}], "label": 0, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 44, "native_id": 44, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2010583877563477, "incorrect_loss_raw": 1.4845712979634602, "correct_loss_per_char": 0.6005291938781738, "incorrect_loss_per_char": 0.7422856489817301, "correct_loss_per_token": 1.2010583877563477, "incorrect_loss_per_token": 1.4845712979634602, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6057237386703491, "num_tokens": 1, "num_tokens_all": 555, "is_greedy": false, "logits_per_token": -1.6057237386703491, "logits_per_char": -0.8028618693351746, "num_chars": 2}, {"sum_logits": -1.6174103021621704, "num_tokens": 1, "num_tokens_all": 555, "is_greedy": false, "logits_per_token": -1.6174103021621704, "logits_per_char": -0.8087051510810852, "num_chars": 2}, {"sum_logits": -1.2305798530578613, "num_tokens": 1, "num_tokens_all": 555, "is_greedy": false, "logits_per_token": -1.2305798530578613, "logits_per_char": -0.6152899265289307, "num_chars": 2}, {"sum_logits": -1.2010583877563477, "num_tokens": 1, "num_tokens_all": 555, "is_greedy": true, "logits_per_token": -1.2010583877563477, "logits_per_char": -0.6005291938781738, "num_chars": 2}], "label": 3, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 45, "native_id": 45, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3540796041488647, "incorrect_loss_raw": 1.4665989478429158, "correct_loss_per_char": 0.6770398020744324, "incorrect_loss_per_char": 0.7332994739214579, "correct_loss_per_token": 1.3540796041488647, "incorrect_loss_per_token": 1.4665989478429158, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3540796041488647, "num_tokens": 1, "num_tokens_all": 597, "is_greedy": false, "logits_per_token": -1.3540796041488647, "logits_per_char": -0.6770398020744324, "num_chars": 2}, {"sum_logits": -1.3232020139694214, "num_tokens": 1, "num_tokens_all": 597, "is_greedy": true, "logits_per_token": -1.3232020139694214, "logits_per_char": -0.6616010069847107, "num_chars": 2}, {"sum_logits": -1.649431824684143, "num_tokens": 1, "num_tokens_all": 597, "is_greedy": false, "logits_per_token": -1.649431824684143, "logits_per_char": -0.8247159123420715, "num_chars": 2}, {"sum_logits": -1.427163004875183, "num_tokens": 1, "num_tokens_all": 597, "is_greedy": false, "logits_per_token": -1.427163004875183, "logits_per_char": -0.7135815024375916, "num_chars": 2}], "label": 0, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 46, "native_id": 46, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4278696775436401, "incorrect_loss_raw": 1.4006623427073162, "correct_loss_per_char": 0.7139348387718201, "incorrect_loss_per_char": 0.7003311713536581, "correct_loss_per_token": 1.4278696775436401, "incorrect_loss_per_token": 1.4006623427073162, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5402768850326538, "num_tokens": 1, "num_tokens_all": 589, "is_greedy": false, "logits_per_token": -1.5402768850326538, "logits_per_char": -0.7701384425163269, "num_chars": 2}, {"sum_logits": -1.4278696775436401, "num_tokens": 1, "num_tokens_all": 589, "is_greedy": false, "logits_per_token": -1.4278696775436401, "logits_per_char": -0.7139348387718201, "num_chars": 2}, {"sum_logits": -1.5414670705795288, "num_tokens": 1, "num_tokens_all": 589, "is_greedy": false, "logits_per_token": -1.5414670705795288, "logits_per_char": -0.7707335352897644, "num_chars": 2}, {"sum_logits": -1.1202430725097656, "num_tokens": 1, "num_tokens_all": 589, "is_greedy": true, "logits_per_token": -1.1202430725097656, "logits_per_char": -0.5601215362548828, "num_chars": 2}], "label": 1, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 47, "native_id": 47, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.421897053718567, "incorrect_loss_raw": 1.4171850283940632, "correct_loss_per_char": 0.7109485268592834, "incorrect_loss_per_char": 0.7085925141970316, "correct_loss_per_token": 1.421897053718567, "incorrect_loss_per_token": 1.4171850283940632, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.7453784942626953, "num_tokens": 1, "num_tokens_all": 571, "is_greedy": false, "logits_per_token": -1.7453784942626953, "logits_per_char": -0.8726892471313477, "num_chars": 2}, {"sum_logits": -1.421897053718567, "num_tokens": 1, "num_tokens_all": 571, "is_greedy": false, "logits_per_token": -1.421897053718567, "logits_per_char": -0.7109485268592834, "num_chars": 2}, {"sum_logits": -1.4189621210098267, "num_tokens": 1, "num_tokens_all": 571, "is_greedy": false, "logits_per_token": -1.4189621210098267, "logits_per_char": -0.7094810605049133, "num_chars": 2}, {"sum_logits": -1.087214469909668, "num_tokens": 1, "num_tokens_all": 571, "is_greedy": true, "logits_per_token": -1.087214469909668, "logits_per_char": -0.543607234954834, "num_chars": 2}], "label": 1, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 48, "native_id": 48, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.689730167388916, "incorrect_loss_raw": 1.4087353150049846, "correct_loss_per_char": 0.844865083694458, "incorrect_loss_per_char": 0.7043676575024923, "correct_loss_per_token": 1.689730167388916, "incorrect_loss_per_token": 1.4087353150049846, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3635143041610718, "num_tokens": 1, "num_tokens_all": 617, "is_greedy": false, "logits_per_token": -1.3635143041610718, "logits_per_char": -0.6817571520805359, "num_chars": 2}, {"sum_logits": -1.6694116592407227, "num_tokens": 1, "num_tokens_all": 617, "is_greedy": false, "logits_per_token": -1.6694116592407227, "logits_per_char": -0.8347058296203613, "num_chars": 2}, {"sum_logits": -1.689730167388916, "num_tokens": 1, "num_tokens_all": 617, "is_greedy": false, "logits_per_token": -1.689730167388916, "logits_per_char": -0.844865083694458, "num_chars": 2}, {"sum_logits": -1.1932799816131592, "num_tokens": 1, "num_tokens_all": 617, "is_greedy": true, "logits_per_token": -1.1932799816131592, "logits_per_char": -0.5966399908065796, "num_chars": 2}], "label": 2, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 49, "native_id": 49, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.19557523727417, "incorrect_loss_raw": 1.4934984842936199, "correct_loss_per_char": 0.597787618637085, "incorrect_loss_per_char": 0.7467492421468099, "correct_loss_per_token": 1.19557523727417, "incorrect_loss_per_token": 1.4934984842936199, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2248889207839966, "num_tokens": 1, "num_tokens_all": 621, "is_greedy": false, "logits_per_token": -1.2248889207839966, "logits_per_char": -0.6124444603919983, "num_chars": 2}, {"sum_logits": -1.6070624589920044, "num_tokens": 1, "num_tokens_all": 621, "is_greedy": false, "logits_per_token": -1.6070624589920044, "logits_per_char": -0.8035312294960022, "num_chars": 2}, {"sum_logits": -1.6485440731048584, "num_tokens": 1, "num_tokens_all": 621, "is_greedy": false, "logits_per_token": -1.6485440731048584, "logits_per_char": -0.8242720365524292, "num_chars": 2}, {"sum_logits": -1.19557523727417, "num_tokens": 1, "num_tokens_all": 621, "is_greedy": true, "logits_per_token": -1.19557523727417, "logits_per_char": -0.597787618637085, "num_chars": 2}], "label": 3, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 50, "native_id": 50, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3603589534759521, "incorrect_loss_raw": 1.413988431294759, "correct_loss_per_char": 0.6801794767379761, "incorrect_loss_per_char": 0.7069942156473795, "correct_loss_per_token": 1.3603589534759521, "incorrect_loss_per_token": 1.413988431294759, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3603589534759521, "num_tokens": 1, "num_tokens_all": 572, "is_greedy": false, "logits_per_token": -1.3603589534759521, "logits_per_char": -0.6801794767379761, "num_chars": 2}, {"sum_logits": -1.5239958763122559, "num_tokens": 1, "num_tokens_all": 572, "is_greedy": false, "logits_per_token": -1.5239958763122559, "logits_per_char": -0.7619979381561279, "num_chars": 2}, {"sum_logits": -1.498534917831421, "num_tokens": 1, "num_tokens_all": 572, "is_greedy": false, "logits_per_token": -1.498534917831421, "logits_per_char": -0.7492674589157104, "num_chars": 2}, {"sum_logits": -1.2194344997406006, "num_tokens": 1, "num_tokens_all": 572, "is_greedy": true, "logits_per_token": -1.2194344997406006, "logits_per_char": -0.6097172498703003, "num_chars": 2}], "label": 0, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 51, "native_id": 51, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5515644550323486, "incorrect_loss_raw": 1.367691993713379, "correct_loss_per_char": 0.7757822275161743, "incorrect_loss_per_char": 0.6838459968566895, "correct_loss_per_token": 1.5515644550323486, "incorrect_loss_per_token": 1.367691993713379, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3485461473464966, "num_tokens": 1, "num_tokens_all": 558, "is_greedy": false, "logits_per_token": -1.3485461473464966, "logits_per_char": -0.6742730736732483, "num_chars": 2}, {"sum_logits": -1.5515644550323486, "num_tokens": 1, "num_tokens_all": 558, "is_greedy": false, "logits_per_token": -1.5515644550323486, "logits_per_char": -0.7757822275161743, "num_chars": 2}, {"sum_logits": -1.5770201683044434, "num_tokens": 1, "num_tokens_all": 558, "is_greedy": false, "logits_per_token": -1.5770201683044434, "logits_per_char": -0.7885100841522217, "num_chars": 2}, {"sum_logits": -1.1775096654891968, "num_tokens": 1, "num_tokens_all": 558, "is_greedy": true, "logits_per_token": -1.1775096654891968, "logits_per_char": -0.5887548327445984, "num_chars": 2}], "label": 1, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 52, "native_id": 52, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.49067223072052, "incorrect_loss_raw": 1.375020980834961, "correct_loss_per_char": 0.74533611536026, "incorrect_loss_per_char": 0.6875104904174805, "correct_loss_per_token": 1.49067223072052, "incorrect_loss_per_token": 1.375020980834961, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4825429916381836, "num_tokens": 1, "num_tokens_all": 608, "is_greedy": false, "logits_per_token": -1.4825429916381836, "logits_per_char": -0.7412714958190918, "num_chars": 2}, {"sum_logits": -1.4888871908187866, "num_tokens": 1, "num_tokens_all": 608, "is_greedy": false, "logits_per_token": -1.4888871908187866, "logits_per_char": -0.7444435954093933, "num_chars": 2}, {"sum_logits": -1.49067223072052, "num_tokens": 1, "num_tokens_all": 608, "is_greedy": false, "logits_per_token": -1.49067223072052, "logits_per_char": -0.74533611536026, "num_chars": 2}, {"sum_logits": -1.1536327600479126, "num_tokens": 1, "num_tokens_all": 608, "is_greedy": true, "logits_per_token": -1.1536327600479126, "logits_per_char": -0.5768163800239563, "num_chars": 2}], "label": 2, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 53, "native_id": 53, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5313986539840698, "incorrect_loss_raw": 1.360748012860616, "correct_loss_per_char": 0.7656993269920349, "incorrect_loss_per_char": 0.680374006430308, "correct_loss_per_token": 1.5313986539840698, "incorrect_loss_per_token": 1.360748012860616, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3837671279907227, "num_tokens": 1, "num_tokens_all": 565, "is_greedy": false, "logits_per_token": -1.3837671279907227, "logits_per_char": -0.6918835639953613, "num_chars": 2}, {"sum_logits": -1.524585485458374, "num_tokens": 1, "num_tokens_all": 565, "is_greedy": false, "logits_per_token": -1.524585485458374, "logits_per_char": -0.762292742729187, "num_chars": 2}, {"sum_logits": -1.5313986539840698, "num_tokens": 1, "num_tokens_all": 565, "is_greedy": false, "logits_per_token": -1.5313986539840698, "logits_per_char": -0.7656993269920349, "num_chars": 2}, {"sum_logits": -1.1738914251327515, "num_tokens": 1, "num_tokens_all": 565, "is_greedy": true, "logits_per_token": -1.1738914251327515, "logits_per_char": -0.5869457125663757, "num_chars": 2}], "label": 2, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 54, "native_id": 54, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.454233169555664, "incorrect_loss_raw": 1.3957148392995198, "correct_loss_per_char": 0.727116584777832, "incorrect_loss_per_char": 0.6978574196497599, "correct_loss_per_token": 1.454233169555664, "incorrect_loss_per_token": 1.3957148392995198, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.454233169555664, "num_tokens": 1, "num_tokens_all": 556, "is_greedy": false, "logits_per_token": -1.454233169555664, "logits_per_char": -0.727116584777832, "num_chars": 2}, {"sum_logits": -1.5924906730651855, "num_tokens": 1, "num_tokens_all": 556, "is_greedy": false, "logits_per_token": -1.5924906730651855, "logits_per_char": -0.7962453365325928, "num_chars": 2}, {"sum_logits": -1.4349820613861084, "num_tokens": 1, "num_tokens_all": 556, "is_greedy": false, "logits_per_token": -1.4349820613861084, "logits_per_char": -0.7174910306930542, "num_chars": 2}, {"sum_logits": -1.1596717834472656, "num_tokens": 1, "num_tokens_all": 556, "is_greedy": true, "logits_per_token": -1.1596717834472656, "logits_per_char": -0.5798358917236328, "num_chars": 2}], "label": 0, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 55, "native_id": 55, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.319035291671753, "incorrect_loss_raw": 1.4247984886169434, "correct_loss_per_char": 0.6595176458358765, "incorrect_loss_per_char": 0.7123992443084717, "correct_loss_per_token": 1.319035291671753, "incorrect_loss_per_token": 1.4247984886169434, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3837659358978271, "num_tokens": 1, "num_tokens_all": 595, "is_greedy": false, "logits_per_token": -1.3837659358978271, "logits_per_char": -0.6918829679489136, "num_chars": 2}, {"sum_logits": -1.434859037399292, "num_tokens": 1, "num_tokens_all": 595, "is_greedy": false, "logits_per_token": -1.434859037399292, "logits_per_char": -0.717429518699646, "num_chars": 2}, {"sum_logits": -1.455770492553711, "num_tokens": 1, "num_tokens_all": 595, "is_greedy": false, "logits_per_token": -1.455770492553711, "logits_per_char": -0.7278852462768555, "num_chars": 2}, {"sum_logits": -1.319035291671753, "num_tokens": 1, "num_tokens_all": 595, "is_greedy": true, "logits_per_token": -1.319035291671753, "logits_per_char": -0.6595176458358765, "num_chars": 2}], "label": 3, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 56, "native_id": 56, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4578857421875, "incorrect_loss_raw": 1.4047738711039226, "correct_loss_per_char": 0.72894287109375, "incorrect_loss_per_char": 0.7023869355519613, "correct_loss_per_token": 1.4578857421875, "incorrect_loss_per_token": 1.4047738711039226, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.7220611572265625, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.7220611572265625, "logits_per_char": -0.8610305786132812, "num_chars": 2}, {"sum_logits": -1.4578857421875, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.4578857421875, "logits_per_char": -0.72894287109375, "num_chars": 2}, {"sum_logits": -1.4146451950073242, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.4146451950073242, "logits_per_char": -0.7073225975036621, "num_chars": 2}, {"sum_logits": -1.0776152610778809, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": true, "logits_per_token": -1.0776152610778809, "logits_per_char": -0.5388076305389404, "num_chars": 2}], "label": 1, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 57, "native_id": 57, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0292022228240967, "incorrect_loss_raw": 1.5517688592274983, "correct_loss_per_char": 0.5146011114120483, "incorrect_loss_per_char": 0.7758844296137491, "correct_loss_per_token": 1.0292022228240967, "incorrect_loss_per_token": 1.5517688592274983, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.49296236038208, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": false, "logits_per_token": -1.49296236038208, "logits_per_char": -0.74648118019104, "num_chars": 2}, {"sum_logits": -1.6152474880218506, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": false, "logits_per_token": -1.6152474880218506, "logits_per_char": -0.8076237440109253, "num_chars": 2}, {"sum_logits": -1.5470967292785645, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": false, "logits_per_token": -1.5470967292785645, "logits_per_char": -0.7735483646392822, "num_chars": 2}, {"sum_logits": -1.0292022228240967, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": true, "logits_per_token": -1.0292022228240967, "logits_per_char": -0.5146011114120483, "num_chars": 2}], "label": 3, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 58, "native_id": 58, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5350538492202759, "incorrect_loss_raw": 1.369343678156535, "correct_loss_per_char": 0.7675269246101379, "incorrect_loss_per_char": 0.6846718390782675, "correct_loss_per_token": 1.5350538492202759, "incorrect_loss_per_token": 1.369343678156535, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5220774412155151, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.5220774412155151, "logits_per_char": -0.7610387206077576, "num_chars": 2}, {"sum_logits": -1.4860056638717651, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.4860056638717651, "logits_per_char": -0.7430028319358826, "num_chars": 2}, {"sum_logits": -1.5350538492202759, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.5350538492202759, "logits_per_char": -0.7675269246101379, "num_chars": 2}, {"sum_logits": -1.0999479293823242, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": true, "logits_per_token": -1.0999479293823242, "logits_per_char": -0.5499739646911621, "num_chars": 2}], "label": 2, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 59, "native_id": 59, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5121643543243408, "incorrect_loss_raw": 1.3661192655563354, "correct_loss_per_char": 0.7560821771621704, "incorrect_loss_per_char": 0.6830596327781677, "correct_loss_per_token": 1.5121643543243408, "incorrect_loss_per_token": 1.3661192655563354, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5121643543243408, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": false, "logits_per_token": -1.5121643543243408, "logits_per_char": -0.7560821771621704, "num_chars": 2}, {"sum_logits": -1.5615023374557495, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": false, "logits_per_token": -1.5615023374557495, "logits_per_char": -0.7807511687278748, "num_chars": 2}, {"sum_logits": -1.29710054397583, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": false, "logits_per_token": -1.29710054397583, "logits_per_char": -0.648550271987915, "num_chars": 2}, {"sum_logits": -1.2397549152374268, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": true, "logits_per_token": -1.2397549152374268, "logits_per_char": -0.6198774576187134, "num_chars": 2}], "label": 0, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 60, "native_id": 60, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5650696754455566, "incorrect_loss_raw": 1.3484326998392742, "correct_loss_per_char": 0.7825348377227783, "incorrect_loss_per_char": 0.6742163499196371, "correct_loss_per_token": 1.5650696754455566, "incorrect_loss_per_token": 1.3484326998392742, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.424521565437317, "num_tokens": 1, "num_tokens_all": 556, "is_greedy": false, "logits_per_token": -1.424521565437317, "logits_per_char": -0.7122607827186584, "num_chars": 2}, {"sum_logits": -1.5650696754455566, "num_tokens": 1, "num_tokens_all": 556, "is_greedy": false, "logits_per_token": -1.5650696754455566, "logits_per_char": -0.7825348377227783, "num_chars": 2}, {"sum_logits": -1.4182735681533813, "num_tokens": 1, "num_tokens_all": 556, "is_greedy": false, "logits_per_token": -1.4182735681533813, "logits_per_char": -0.7091367840766907, "num_chars": 2}, {"sum_logits": -1.202502965927124, "num_tokens": 1, "num_tokens_all": 556, "is_greedy": true, "logits_per_token": -1.202502965927124, "logits_per_char": -0.601251482963562, "num_chars": 2}], "label": 1, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 61, "native_id": 61, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6124062538146973, "incorrect_loss_raw": 1.3704328139623005, "correct_loss_per_char": 0.8062031269073486, "incorrect_loss_per_char": 0.6852164069811503, "correct_loss_per_token": 1.6124062538146973, "incorrect_loss_per_token": 1.3704328139623005, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6124062538146973, "num_tokens": 1, "num_tokens_all": 631, "is_greedy": false, "logits_per_token": -1.6124062538146973, "logits_per_char": -0.8062031269073486, "num_chars": 2}, {"sum_logits": -1.6188424825668335, "num_tokens": 1, "num_tokens_all": 631, "is_greedy": false, "logits_per_token": -1.6188424825668335, "logits_per_char": -0.8094212412834167, "num_chars": 2}, {"sum_logits": -1.5118991136550903, "num_tokens": 1, "num_tokens_all": 631, "is_greedy": false, "logits_per_token": -1.5118991136550903, "logits_per_char": -0.7559495568275452, "num_chars": 2}, {"sum_logits": -0.980556845664978, "num_tokens": 1, "num_tokens_all": 631, "is_greedy": true, "logits_per_token": -0.980556845664978, "logits_per_char": -0.490278422832489, "num_chars": 2}], "label": 0, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 62, "native_id": 62, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2577028274536133, "incorrect_loss_raw": 1.4813332955042522, "correct_loss_per_char": 0.6288514137268066, "incorrect_loss_per_char": 0.7406666477521261, "correct_loss_per_token": 1.2577028274536133, "incorrect_loss_per_token": 1.4813332955042522, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2577028274536133, "num_tokens": 1, "num_tokens_all": 618, "is_greedy": true, "logits_per_token": -1.2577028274536133, "logits_per_char": -0.6288514137268066, "num_chars": 2}, {"sum_logits": -1.4963043928146362, "num_tokens": 1, "num_tokens_all": 618, "is_greedy": false, "logits_per_token": -1.4963043928146362, "logits_per_char": -0.7481521964073181, "num_chars": 2}, {"sum_logits": -1.688697338104248, "num_tokens": 1, "num_tokens_all": 618, "is_greedy": false, "logits_per_token": -1.688697338104248, "logits_per_char": -0.844348669052124, "num_chars": 2}, {"sum_logits": -1.258998155593872, "num_tokens": 1, "num_tokens_all": 618, "is_greedy": false, "logits_per_token": -1.258998155593872, "logits_per_char": -0.629499077796936, "num_chars": 2}], "label": 0, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 63, "native_id": 63, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.185119867324829, "incorrect_loss_raw": 1.4755691289901733, "correct_loss_per_char": 0.5925599336624146, "incorrect_loss_per_char": 0.7377845644950867, "correct_loss_per_token": 1.185119867324829, "incorrect_loss_per_token": 1.4755691289901733, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5750176906585693, "num_tokens": 1, "num_tokens_all": 583, "is_greedy": false, "logits_per_token": -1.5750176906585693, "logits_per_char": -0.7875088453292847, "num_chars": 2}, {"sum_logits": -1.4186915159225464, "num_tokens": 1, "num_tokens_all": 583, "is_greedy": false, "logits_per_token": -1.4186915159225464, "logits_per_char": -0.7093457579612732, "num_chars": 2}, {"sum_logits": -1.4329981803894043, "num_tokens": 1, "num_tokens_all": 583, "is_greedy": false, "logits_per_token": -1.4329981803894043, "logits_per_char": -0.7164990901947021, "num_chars": 2}, {"sum_logits": -1.185119867324829, "num_tokens": 1, "num_tokens_all": 583, "is_greedy": true, "logits_per_token": -1.185119867324829, "logits_per_char": -0.5925599336624146, "num_chars": 2}], "label": 3, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 64, "native_id": 64, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4268944263458252, "incorrect_loss_raw": 1.388120134671529, "correct_loss_per_char": 0.7134472131729126, "incorrect_loss_per_char": 0.6940600673357645, "correct_loss_per_token": 1.4268944263458252, "incorrect_loss_per_token": 1.388120134671529, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3914153575897217, "num_tokens": 1, "num_tokens_all": 563, "is_greedy": false, "logits_per_token": -1.3914153575897217, "logits_per_char": -0.6957076787948608, "num_chars": 2}, {"sum_logits": -1.4268944263458252, "num_tokens": 1, "num_tokens_all": 563, "is_greedy": false, "logits_per_token": -1.4268944263458252, "logits_per_char": -0.7134472131729126, "num_chars": 2}, {"sum_logits": -1.4877722263336182, "num_tokens": 1, "num_tokens_all": 563, "is_greedy": false, "logits_per_token": -1.4877722263336182, "logits_per_char": -0.7438861131668091, "num_chars": 2}, {"sum_logits": -1.2851728200912476, "num_tokens": 1, "num_tokens_all": 563, "is_greedy": true, "logits_per_token": -1.2851728200912476, "logits_per_char": -0.6425864100456238, "num_chars": 2}], "label": 1, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 65, "native_id": 65, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.067579984664917, "incorrect_loss_raw": 1.5281474590301514, "correct_loss_per_char": 0.5337899923324585, "incorrect_loss_per_char": 0.7640737295150757, "correct_loss_per_token": 1.067579984664917, "incorrect_loss_per_token": 1.5281474590301514, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5160518884658813, "num_tokens": 1, "num_tokens_all": 586, "is_greedy": false, "logits_per_token": -1.5160518884658813, "logits_per_char": -0.7580259442329407, "num_chars": 2}, {"sum_logits": -1.547403335571289, "num_tokens": 1, "num_tokens_all": 586, "is_greedy": false, "logits_per_token": -1.547403335571289, "logits_per_char": -0.7737016677856445, "num_chars": 2}, {"sum_logits": -1.5209871530532837, "num_tokens": 1, "num_tokens_all": 586, "is_greedy": false, "logits_per_token": -1.5209871530532837, "logits_per_char": -0.7604935765266418, "num_chars": 2}, {"sum_logits": -1.067579984664917, "num_tokens": 1, "num_tokens_all": 586, "is_greedy": true, "logits_per_token": -1.067579984664917, "logits_per_char": -0.5337899923324585, "num_chars": 2}], "label": 3, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 66, "native_id": 66, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.550965428352356, "incorrect_loss_raw": 1.3680099248886108, "correct_loss_per_char": 0.775482714176178, "incorrect_loss_per_char": 0.6840049624443054, "correct_loss_per_token": 1.550965428352356, "incorrect_loss_per_token": 1.3680099248886108, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4295482635498047, "num_tokens": 1, "num_tokens_all": 586, "is_greedy": false, "logits_per_token": -1.4295482635498047, "logits_per_char": -0.7147741317749023, "num_chars": 2}, {"sum_logits": -1.5920264720916748, "num_tokens": 1, "num_tokens_all": 586, "is_greedy": false, "logits_per_token": -1.5920264720916748, "logits_per_char": -0.7960132360458374, "num_chars": 2}, {"sum_logits": -1.550965428352356, "num_tokens": 1, "num_tokens_all": 586, "is_greedy": false, "logits_per_token": -1.550965428352356, "logits_per_char": -0.775482714176178, "num_chars": 2}, {"sum_logits": -1.082455039024353, "num_tokens": 1, "num_tokens_all": 586, "is_greedy": true, "logits_per_token": -1.082455039024353, "logits_per_char": -0.5412275195121765, "num_chars": 2}], "label": 2, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 67, "native_id": 67, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3254528045654297, "incorrect_loss_raw": 1.5052764415740967, "correct_loss_per_char": 0.6627264022827148, "incorrect_loss_per_char": 0.7526382207870483, "correct_loss_per_token": 1.3254528045654297, "incorrect_loss_per_token": 1.5052764415740967, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.423733115196228, "num_tokens": 1, "num_tokens_all": 626, "is_greedy": false, "logits_per_token": -1.423733115196228, "logits_per_char": -0.711866557598114, "num_chars": 2}, {"sum_logits": -1.5532029867172241, "num_tokens": 1, "num_tokens_all": 626, "is_greedy": false, "logits_per_token": -1.5532029867172241, "logits_per_char": -0.7766014933586121, "num_chars": 2}, {"sum_logits": -1.538893222808838, "num_tokens": 1, "num_tokens_all": 626, "is_greedy": false, "logits_per_token": -1.538893222808838, "logits_per_char": -0.769446611404419, "num_chars": 2}, {"sum_logits": -1.3254528045654297, "num_tokens": 1, "num_tokens_all": 626, "is_greedy": true, "logits_per_token": -1.3254528045654297, "logits_per_char": -0.6627264022827148, "num_chars": 2}], "label": 3, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 68, "native_id": 68, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6646168231964111, "incorrect_loss_raw": 1.3347963094711304, "correct_loss_per_char": 0.8323084115982056, "incorrect_loss_per_char": 0.6673981547355652, "correct_loss_per_token": 1.6646168231964111, "incorrect_loss_per_token": 1.3347963094711304, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6646168231964111, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": false, "logits_per_token": -1.6646168231964111, "logits_per_char": -0.8323084115982056, "num_chars": 2}, {"sum_logits": -1.508988857269287, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": false, "logits_per_token": -1.508988857269287, "logits_per_char": -0.7544944286346436, "num_chars": 2}, {"sum_logits": -1.4257818460464478, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": false, "logits_per_token": -1.4257818460464478, "logits_per_char": -0.7128909230232239, "num_chars": 2}, {"sum_logits": -1.0696182250976562, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": true, "logits_per_token": -1.0696182250976562, "logits_per_char": -0.5348091125488281, "num_chars": 2}], "label": 0, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 69, "native_id": 69, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4877266883850098, "incorrect_loss_raw": 1.38109290599823, "correct_loss_per_char": 0.7438633441925049, "incorrect_loss_per_char": 0.690546452999115, "correct_loss_per_token": 1.4877266883850098, "incorrect_loss_per_token": 1.38109290599823, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4877266883850098, "num_tokens": 1, "num_tokens_all": 561, "is_greedy": false, "logits_per_token": -1.4877266883850098, "logits_per_char": -0.7438633441925049, "num_chars": 2}, {"sum_logits": -1.4539384841918945, "num_tokens": 1, "num_tokens_all": 561, "is_greedy": false, "logits_per_token": -1.4539384841918945, "logits_per_char": -0.7269692420959473, "num_chars": 2}, {"sum_logits": -1.491166591644287, "num_tokens": 1, "num_tokens_all": 561, "is_greedy": false, "logits_per_token": -1.491166591644287, "logits_per_char": -0.7455832958221436, "num_chars": 2}, {"sum_logits": -1.1981736421585083, "num_tokens": 1, "num_tokens_all": 561, "is_greedy": true, "logits_per_token": -1.1981736421585083, "logits_per_char": -0.5990868210792542, "num_chars": 2}], "label": 0, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 70, "native_id": 70, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0060228109359741, "incorrect_loss_raw": 1.5692507425944011, "correct_loss_per_char": 0.5030114054679871, "incorrect_loss_per_char": 0.7846253712972006, "correct_loss_per_token": 1.0060228109359741, "incorrect_loss_per_token": 1.5692507425944011, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.728401780128479, "num_tokens": 1, "num_tokens_all": 672, "is_greedy": false, "logits_per_token": -1.728401780128479, "logits_per_char": -0.8642008900642395, "num_chars": 2}, {"sum_logits": -1.4412472248077393, "num_tokens": 1, "num_tokens_all": 672, "is_greedy": false, "logits_per_token": -1.4412472248077393, "logits_per_char": -0.7206236124038696, "num_chars": 2}, {"sum_logits": -1.5381032228469849, "num_tokens": 1, "num_tokens_all": 672, "is_greedy": false, "logits_per_token": -1.5381032228469849, "logits_per_char": -0.7690516114234924, "num_chars": 2}, {"sum_logits": -1.0060228109359741, "num_tokens": 1, "num_tokens_all": 672, "is_greedy": true, "logits_per_token": -1.0060228109359741, "logits_per_char": -0.5030114054679871, "num_chars": 2}], "label": 3, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 71, "native_id": 71, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.280587077140808, "incorrect_loss_raw": 1.4429818789164226, "correct_loss_per_char": 0.640293538570404, "incorrect_loss_per_char": 0.7214909394582113, "correct_loss_per_token": 1.280587077140808, "incorrect_loss_per_token": 1.4429818789164226, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3517557382583618, "num_tokens": 1, "num_tokens_all": 662, "is_greedy": false, "logits_per_token": -1.3517557382583618, "logits_per_char": -0.6758778691291809, "num_chars": 2}, {"sum_logits": -1.407200574874878, "num_tokens": 1, "num_tokens_all": 662, "is_greedy": false, "logits_per_token": -1.407200574874878, "logits_per_char": -0.703600287437439, "num_chars": 2}, {"sum_logits": -1.5699893236160278, "num_tokens": 1, "num_tokens_all": 662, "is_greedy": false, "logits_per_token": -1.5699893236160278, "logits_per_char": -0.7849946618080139, "num_chars": 2}, {"sum_logits": -1.280587077140808, "num_tokens": 1, "num_tokens_all": 662, "is_greedy": true, "logits_per_token": -1.280587077140808, "logits_per_char": -0.640293538570404, "num_chars": 2}], "label": 3, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 72, "native_id": 72, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1448376178741455, "incorrect_loss_raw": 1.4985803763071697, "correct_loss_per_char": 0.5724188089370728, "incorrect_loss_per_char": 0.7492901881535848, "correct_loss_per_token": 1.1448376178741455, "incorrect_loss_per_token": 1.4985803763071697, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3509974479675293, "num_tokens": 1, "num_tokens_all": 583, "is_greedy": false, "logits_per_token": -1.3509974479675293, "logits_per_char": -0.6754987239837646, "num_chars": 2}, {"sum_logits": -1.4841179847717285, "num_tokens": 1, "num_tokens_all": 583, "is_greedy": false, "logits_per_token": -1.4841179847717285, "logits_per_char": -0.7420589923858643, "num_chars": 2}, {"sum_logits": -1.660625696182251, "num_tokens": 1, "num_tokens_all": 583, "is_greedy": false, "logits_per_token": -1.660625696182251, "logits_per_char": -0.8303128480911255, "num_chars": 2}, {"sum_logits": -1.1448376178741455, "num_tokens": 1, "num_tokens_all": 583, "is_greedy": true, "logits_per_token": -1.1448376178741455, "logits_per_char": -0.5724188089370728, "num_chars": 2}], "label": 3, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 73, "native_id": 73, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4713082313537598, "incorrect_loss_raw": 1.4213557243347168, "correct_loss_per_char": 0.7356541156768799, "incorrect_loss_per_char": 0.7106778621673584, "correct_loss_per_token": 1.4713082313537598, "incorrect_loss_per_token": 1.4213557243347168, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4646556377410889, "num_tokens": 1, "num_tokens_all": 630, "is_greedy": false, "logits_per_token": -1.4646556377410889, "logits_per_char": -0.7323278188705444, "num_chars": 2}, {"sum_logits": -1.2940874099731445, "num_tokens": 1, "num_tokens_all": 630, "is_greedy": true, "logits_per_token": -1.2940874099731445, "logits_per_char": -0.6470437049865723, "num_chars": 2}, {"sum_logits": -1.505324125289917, "num_tokens": 1, "num_tokens_all": 630, "is_greedy": false, "logits_per_token": -1.505324125289917, "logits_per_char": -0.7526620626449585, "num_chars": 2}, {"sum_logits": -1.4713082313537598, "num_tokens": 1, "num_tokens_all": 630, "is_greedy": false, "logits_per_token": -1.4713082313537598, "logits_per_char": -0.7356541156768799, "num_chars": 2}], "label": 3, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 74, "native_id": 74, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.53496515750885, "incorrect_loss_raw": 1.4142486651738484, "correct_loss_per_char": 0.767482578754425, "incorrect_loss_per_char": 0.7071243325869242, "correct_loss_per_token": 1.53496515750885, "incorrect_loss_per_token": 1.4142486651738484, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6926274299621582, "num_tokens": 1, "num_tokens_all": 595, "is_greedy": false, "logits_per_token": -1.6926274299621582, "logits_per_char": -0.8463137149810791, "num_chars": 2}, {"sum_logits": -1.62904953956604, "num_tokens": 1, "num_tokens_all": 595, "is_greedy": false, "logits_per_token": -1.62904953956604, "logits_per_char": -0.81452476978302, "num_chars": 2}, {"sum_logits": -1.53496515750885, "num_tokens": 1, "num_tokens_all": 595, "is_greedy": false, "logits_per_token": -1.53496515750885, "logits_per_char": -0.767482578754425, "num_chars": 2}, {"sum_logits": -0.9210690259933472, "num_tokens": 1, "num_tokens_all": 595, "is_greedy": true, "logits_per_token": -0.9210690259933472, "logits_per_char": -0.4605345129966736, "num_chars": 2}], "label": 2, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 75, "native_id": 75, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9993283748626709, "incorrect_loss_raw": 1.5719621181488037, "correct_loss_per_char": 0.49966418743133545, "incorrect_loss_per_char": 0.7859810590744019, "correct_loss_per_token": 0.9993283748626709, "incorrect_loss_per_token": 1.5719621181488037, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6451750993728638, "num_tokens": 1, "num_tokens_all": 600, "is_greedy": false, "logits_per_token": -1.6451750993728638, "logits_per_char": -0.8225875496864319, "num_chars": 2}, {"sum_logits": -1.5495893955230713, "num_tokens": 1, "num_tokens_all": 600, "is_greedy": false, "logits_per_token": -1.5495893955230713, "logits_per_char": -0.7747946977615356, "num_chars": 2}, {"sum_logits": -1.521121859550476, "num_tokens": 1, "num_tokens_all": 600, "is_greedy": false, "logits_per_token": -1.521121859550476, "logits_per_char": -0.760560929775238, "num_chars": 2}, {"sum_logits": -0.9993283748626709, "num_tokens": 1, "num_tokens_all": 600, "is_greedy": true, "logits_per_token": -0.9993283748626709, "logits_per_char": -0.49966418743133545, "num_chars": 2}], "label": 3, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 76, "native_id": 76, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.590400218963623, "incorrect_loss_raw": 1.3937475283940632, "correct_loss_per_char": 0.7952001094818115, "incorrect_loss_per_char": 0.6968737641970316, "correct_loss_per_token": 1.590400218963623, "incorrect_loss_per_token": 1.3937475283940632, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1711970567703247, "num_tokens": 1, "num_tokens_all": 593, "is_greedy": true, "logits_per_token": -1.1711970567703247, "logits_per_char": -0.5855985283851624, "num_chars": 2}, {"sum_logits": -1.3698704242706299, "num_tokens": 1, "num_tokens_all": 593, "is_greedy": false, "logits_per_token": -1.3698704242706299, "logits_per_char": -0.6849352121353149, "num_chars": 2}, {"sum_logits": -1.6401751041412354, "num_tokens": 1, "num_tokens_all": 593, "is_greedy": false, "logits_per_token": -1.6401751041412354, "logits_per_char": -0.8200875520706177, "num_chars": 2}, {"sum_logits": -1.590400218963623, "num_tokens": 1, "num_tokens_all": 593, "is_greedy": false, "logits_per_token": -1.590400218963623, "logits_per_char": -0.7952001094818115, "num_chars": 2}], "label": 3, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 77, "native_id": 77, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.459788203239441, "incorrect_loss_raw": 1.3832767407099407, "correct_loss_per_char": 0.7298941016197205, "incorrect_loss_per_char": 0.6916383703549703, "correct_loss_per_token": 1.459788203239441, "incorrect_loss_per_token": 1.3832767407099407, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6001167297363281, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": false, "logits_per_token": -1.6001167297363281, "logits_per_char": -0.8000583648681641, "num_chars": 2}, {"sum_logits": -1.2213703393936157, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": true, "logits_per_token": -1.2213703393936157, "logits_per_char": -0.6106851696968079, "num_chars": 2}, {"sum_logits": -1.459788203239441, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": false, "logits_per_token": -1.459788203239441, "logits_per_char": -0.7298941016197205, "num_chars": 2}, {"sum_logits": -1.328343152999878, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": false, "logits_per_token": -1.328343152999878, "logits_per_char": -0.664171576499939, "num_chars": 2}], "label": 2, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 78, "native_id": 78, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4900130033493042, "incorrect_loss_raw": 1.38792089621226, "correct_loss_per_char": 0.7450065016746521, "incorrect_loss_per_char": 0.69396044810613, "correct_loss_per_token": 1.4900130033493042, "incorrect_loss_per_token": 1.38792089621226, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6574945449829102, "num_tokens": 1, "num_tokens_all": 586, "is_greedy": false, "logits_per_token": -1.6574945449829102, "logits_per_char": -0.8287472724914551, "num_chars": 2}, {"sum_logits": -1.4900130033493042, "num_tokens": 1, "num_tokens_all": 586, "is_greedy": false, "logits_per_token": -1.4900130033493042, "logits_per_char": -0.7450065016746521, "num_chars": 2}, {"sum_logits": -1.4197008609771729, "num_tokens": 1, "num_tokens_all": 586, "is_greedy": false, "logits_per_token": -1.4197008609771729, "logits_per_char": -0.7098504304885864, "num_chars": 2}, {"sum_logits": -1.0865672826766968, "num_tokens": 1, "num_tokens_all": 586, "is_greedy": true, "logits_per_token": -1.0865672826766968, "logits_per_char": -0.5432836413383484, "num_chars": 2}], "label": 1, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 79, "native_id": 79, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3012245893478394, "incorrect_loss_raw": 1.4700160026550293, "correct_loss_per_char": 0.6506122946739197, "incorrect_loss_per_char": 0.7350080013275146, "correct_loss_per_token": 1.3012245893478394, "incorrect_loss_per_token": 1.4700160026550293, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4108946323394775, "num_tokens": 1, "num_tokens_all": 586, "is_greedy": false, "logits_per_token": -1.4108946323394775, "logits_per_char": -0.7054473161697388, "num_chars": 2}, {"sum_logits": -1.3012245893478394, "num_tokens": 1, "num_tokens_all": 586, "is_greedy": true, "logits_per_token": -1.3012245893478394, "logits_per_char": -0.6506122946739197, "num_chars": 2}, {"sum_logits": -1.5596182346343994, "num_tokens": 1, "num_tokens_all": 586, "is_greedy": false, "logits_per_token": -1.5596182346343994, "logits_per_char": -0.7798091173171997, "num_chars": 2}, {"sum_logits": -1.439535140991211, "num_tokens": 1, "num_tokens_all": 586, "is_greedy": false, "logits_per_token": -1.439535140991211, "logits_per_char": -0.7197675704956055, "num_chars": 2}], "label": 1, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 80, "native_id": 80, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.364126205444336, "incorrect_loss_raw": 1.4271435737609863, "correct_loss_per_char": 0.682063102722168, "incorrect_loss_per_char": 0.7135717868804932, "correct_loss_per_token": 1.364126205444336, "incorrect_loss_per_token": 1.4271435737609863, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5123696327209473, "num_tokens": 1, "num_tokens_all": 575, "is_greedy": false, "logits_per_token": -1.5123696327209473, "logits_per_char": -0.7561848163604736, "num_chars": 2}, {"sum_logits": -1.623898983001709, "num_tokens": 1, "num_tokens_all": 575, "is_greedy": false, "logits_per_token": -1.623898983001709, "logits_per_char": -0.8119494915008545, "num_chars": 2}, {"sum_logits": -1.364126205444336, "num_tokens": 1, "num_tokens_all": 575, "is_greedy": false, "logits_per_token": -1.364126205444336, "logits_per_char": -0.682063102722168, "num_chars": 2}, {"sum_logits": -1.1451621055603027, "num_tokens": 1, "num_tokens_all": 575, "is_greedy": true, "logits_per_token": -1.1451621055603027, "logits_per_char": -0.5725810527801514, "num_chars": 2}], "label": 2, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 81, "native_id": 81, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.46134352684021, "incorrect_loss_raw": 1.3963718016942341, "correct_loss_per_char": 0.730671763420105, "incorrect_loss_per_char": 0.6981859008471171, "correct_loss_per_token": 1.46134352684021, "incorrect_loss_per_token": 1.3963718016942341, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5475749969482422, "num_tokens": 1, "num_tokens_all": 586, "is_greedy": false, "logits_per_token": -1.5475749969482422, "logits_per_char": -0.7737874984741211, "num_chars": 2}, {"sum_logits": -1.46134352684021, "num_tokens": 1, "num_tokens_all": 586, "is_greedy": false, "logits_per_token": -1.46134352684021, "logits_per_char": -0.730671763420105, "num_chars": 2}, {"sum_logits": -1.5407949686050415, "num_tokens": 1, "num_tokens_all": 586, "is_greedy": false, "logits_per_token": -1.5407949686050415, "logits_per_char": -0.7703974843025208, "num_chars": 2}, {"sum_logits": -1.100745439529419, "num_tokens": 1, "num_tokens_all": 586, "is_greedy": true, "logits_per_token": -1.100745439529419, "logits_per_char": -0.5503727197647095, "num_chars": 2}], "label": 1, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 82, "native_id": 82, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5682497024536133, "incorrect_loss_raw": 1.3669018745422363, "correct_loss_per_char": 0.7841248512268066, "incorrect_loss_per_char": 0.6834509372711182, "correct_loss_per_token": 1.5682497024536133, "incorrect_loss_per_token": 1.3669018745422363, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.657970905303955, "num_tokens": 1, "num_tokens_all": 549, "is_greedy": false, "logits_per_token": -1.657970905303955, "logits_per_char": -0.8289854526519775, "num_chars": 2}, {"sum_logits": -1.5682497024536133, "num_tokens": 1, "num_tokens_all": 549, "is_greedy": false, "logits_per_token": -1.5682497024536133, "logits_per_char": -0.7841248512268066, "num_chars": 2}, {"sum_logits": -1.2713687419891357, "num_tokens": 1, "num_tokens_all": 549, "is_greedy": false, "logits_per_token": -1.2713687419891357, "logits_per_char": -0.6356843709945679, "num_chars": 2}, {"sum_logits": -1.1713659763336182, "num_tokens": 1, "num_tokens_all": 549, "is_greedy": true, "logits_per_token": -1.1713659763336182, "logits_per_char": -0.5856829881668091, "num_chars": 2}], "label": 1, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 83, "native_id": 83, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4775726795196533, "incorrect_loss_raw": 1.3782779773076375, "correct_loss_per_char": 0.7387863397598267, "incorrect_loss_per_char": 0.6891389886538187, "correct_loss_per_token": 1.4775726795196533, "incorrect_loss_per_token": 1.3782779773076375, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4386162757873535, "num_tokens": 1, "num_tokens_all": 570, "is_greedy": false, "logits_per_token": -1.4386162757873535, "logits_per_char": -0.7193081378936768, "num_chars": 2}, {"sum_logits": -1.5037143230438232, "num_tokens": 1, "num_tokens_all": 570, "is_greedy": false, "logits_per_token": -1.5037143230438232, "logits_per_char": -0.7518571615219116, "num_chars": 2}, {"sum_logits": -1.4775726795196533, "num_tokens": 1, "num_tokens_all": 570, "is_greedy": false, "logits_per_token": -1.4775726795196533, "logits_per_char": -0.7387863397598267, "num_chars": 2}, {"sum_logits": -1.1925033330917358, "num_tokens": 1, "num_tokens_all": 570, "is_greedy": true, "logits_per_token": -1.1925033330917358, "logits_per_char": -0.5962516665458679, "num_chars": 2}], "label": 2, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 84, "native_id": 84, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3869562149047852, "incorrect_loss_raw": 1.404180367787679, "correct_loss_per_char": 0.6934781074523926, "incorrect_loss_per_char": 0.7020901838938395, "correct_loss_per_token": 1.3869562149047852, "incorrect_loss_per_token": 1.404180367787679, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4746434688568115, "num_tokens": 1, "num_tokens_all": 606, "is_greedy": false, "logits_per_token": -1.4746434688568115, "logits_per_char": -0.7373217344284058, "num_chars": 2}, {"sum_logits": -1.5370144844055176, "num_tokens": 1, "num_tokens_all": 606, "is_greedy": false, "logits_per_token": -1.5370144844055176, "logits_per_char": -0.7685072422027588, "num_chars": 2}, {"sum_logits": -1.3869562149047852, "num_tokens": 1, "num_tokens_all": 606, "is_greedy": false, "logits_per_token": -1.3869562149047852, "logits_per_char": -0.6934781074523926, "num_chars": 2}, {"sum_logits": -1.200883150100708, "num_tokens": 1, "num_tokens_all": 606, "is_greedy": true, "logits_per_token": -1.200883150100708, "logits_per_char": -0.600441575050354, "num_chars": 2}], "label": 2, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 85, "native_id": 85, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4887721538543701, "incorrect_loss_raw": 1.369397481282552, "correct_loss_per_char": 0.7443860769271851, "incorrect_loss_per_char": 0.684698740641276, "correct_loss_per_token": 1.4887721538543701, "incorrect_loss_per_token": 1.369397481282552, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.476232647895813, "num_tokens": 1, "num_tokens_all": 603, "is_greedy": false, "logits_per_token": -1.476232647895813, "logits_per_char": -0.7381163239479065, "num_chars": 2}, {"sum_logits": -1.4887721538543701, "num_tokens": 1, "num_tokens_all": 603, "is_greedy": false, "logits_per_token": -1.4887721538543701, "logits_per_char": -0.7443860769271851, "num_chars": 2}, {"sum_logits": -1.4253367185592651, "num_tokens": 1, "num_tokens_all": 603, "is_greedy": false, "logits_per_token": -1.4253367185592651, "logits_per_char": -0.7126683592796326, "num_chars": 2}, {"sum_logits": -1.2066230773925781, "num_tokens": 1, "num_tokens_all": 603, "is_greedy": true, "logits_per_token": -1.2066230773925781, "logits_per_char": -0.6033115386962891, "num_chars": 2}], "label": 1, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 86, "native_id": 86, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1984262466430664, "incorrect_loss_raw": 1.4847374359766643, "correct_loss_per_char": 0.5992131233215332, "incorrect_loss_per_char": 0.7423687179883321, "correct_loss_per_token": 1.1984262466430664, "incorrect_loss_per_token": 1.4847374359766643, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1984262466430664, "num_tokens": 1, "num_tokens_all": 640, "is_greedy": true, "logits_per_token": -1.1984262466430664, "logits_per_char": -0.5992131233215332, "num_chars": 2}, {"sum_logits": -1.5704214572906494, "num_tokens": 1, "num_tokens_all": 640, "is_greedy": false, "logits_per_token": -1.5704214572906494, "logits_per_char": -0.7852107286453247, "num_chars": 2}, {"sum_logits": -1.613593339920044, "num_tokens": 1, "num_tokens_all": 640, "is_greedy": false, "logits_per_token": -1.613593339920044, "logits_per_char": -0.806796669960022, "num_chars": 2}, {"sum_logits": -1.2701975107192993, "num_tokens": 1, "num_tokens_all": 640, "is_greedy": false, "logits_per_token": -1.2701975107192993, "logits_per_char": -0.6350987553596497, "num_chars": 2}], "label": 0, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 87, "native_id": 87, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4612687826156616, "incorrect_loss_raw": 1.391373634338379, "correct_loss_per_char": 0.7306343913078308, "incorrect_loss_per_char": 0.6956868171691895, "correct_loss_per_token": 1.4612687826156616, "incorrect_loss_per_token": 1.391373634338379, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.445948839187622, "num_tokens": 1, "num_tokens_all": 554, "is_greedy": false, "logits_per_token": -1.445948839187622, "logits_per_char": -0.722974419593811, "num_chars": 2}, {"sum_logits": -1.5801808834075928, "num_tokens": 1, "num_tokens_all": 554, "is_greedy": false, "logits_per_token": -1.5801808834075928, "logits_per_char": -0.7900904417037964, "num_chars": 2}, {"sum_logits": -1.4612687826156616, "num_tokens": 1, "num_tokens_all": 554, "is_greedy": false, "logits_per_token": -1.4612687826156616, "logits_per_char": -0.7306343913078308, "num_chars": 2}, {"sum_logits": -1.1479911804199219, "num_tokens": 1, "num_tokens_all": 554, "is_greedy": true, "logits_per_token": -1.1479911804199219, "logits_per_char": -0.5739955902099609, "num_chars": 2}], "label": 2, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 88, "native_id": 88, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5267378091812134, "incorrect_loss_raw": 1.3827938636144002, "correct_loss_per_char": 0.7633689045906067, "incorrect_loss_per_char": 0.6913969318072001, "correct_loss_per_token": 1.5267378091812134, "incorrect_loss_per_token": 1.3827938636144002, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5399878025054932, "num_tokens": 1, "num_tokens_all": 598, "is_greedy": false, "logits_per_token": -1.5399878025054932, "logits_per_char": -0.7699939012527466, "num_chars": 2}, {"sum_logits": -1.5616666078567505, "num_tokens": 1, "num_tokens_all": 598, "is_greedy": false, "logits_per_token": -1.5616666078567505, "logits_per_char": -0.7808333039283752, "num_chars": 2}, {"sum_logits": -1.5267378091812134, "num_tokens": 1, "num_tokens_all": 598, "is_greedy": false, "logits_per_token": -1.5267378091812134, "logits_per_char": -0.7633689045906067, "num_chars": 2}, {"sum_logits": -1.046727180480957, "num_tokens": 1, "num_tokens_all": 598, "is_greedy": true, "logits_per_token": -1.046727180480957, "logits_per_char": -0.5233635902404785, "num_chars": 2}], "label": 2, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 89, "native_id": 89, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7188162803649902, "incorrect_loss_raw": 1.3046249151229858, "correct_loss_per_char": 0.8594081401824951, "incorrect_loss_per_char": 0.6523124575614929, "correct_loss_per_token": 1.7188162803649902, "incorrect_loss_per_token": 1.3046249151229858, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3143815994262695, "num_tokens": 1, "num_tokens_all": 573, "is_greedy": false, "logits_per_token": -1.3143815994262695, "logits_per_char": -0.6571907997131348, "num_chars": 2}, {"sum_logits": -1.3096837997436523, "num_tokens": 1, "num_tokens_all": 573, "is_greedy": false, "logits_per_token": -1.3096837997436523, "logits_per_char": -0.6548418998718262, "num_chars": 2}, {"sum_logits": -1.7188162803649902, "num_tokens": 1, "num_tokens_all": 573, "is_greedy": false, "logits_per_token": -1.7188162803649902, "logits_per_char": -0.8594081401824951, "num_chars": 2}, {"sum_logits": -1.2898093461990356, "num_tokens": 1, "num_tokens_all": 573, "is_greedy": true, "logits_per_token": -1.2898093461990356, "logits_per_char": -0.6449046730995178, "num_chars": 2}], "label": 2, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 90, "native_id": 90, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5708088874816895, "incorrect_loss_raw": 1.361920714378357, "correct_loss_per_char": 0.7854044437408447, "incorrect_loss_per_char": 0.6809603571891785, "correct_loss_per_token": 1.5708088874816895, "incorrect_loss_per_token": 1.361920714378357, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6658121347427368, "num_tokens": 1, "num_tokens_all": 596, "is_greedy": false, "logits_per_token": -1.6658121347427368, "logits_per_char": -0.8329060673713684, "num_chars": 2}, {"sum_logits": -1.5708088874816895, "num_tokens": 1, "num_tokens_all": 596, "is_greedy": false, "logits_per_token": -1.5708088874816895, "logits_per_char": -0.7854044437408447, "num_chars": 2}, {"sum_logits": -1.2760694026947021, "num_tokens": 1, "num_tokens_all": 596, "is_greedy": false, "logits_per_token": -1.2760694026947021, "logits_per_char": -0.6380347013473511, "num_chars": 2}, {"sum_logits": -1.1438806056976318, "num_tokens": 1, "num_tokens_all": 596, "is_greedy": true, "logits_per_token": -1.1438806056976318, "logits_per_char": -0.5719403028488159, "num_chars": 2}], "label": 1, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 91, "native_id": 91, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4627349376678467, "incorrect_loss_raw": 1.4108856916427612, "correct_loss_per_char": 0.7313674688339233, "incorrect_loss_per_char": 0.7054428458213806, "correct_loss_per_token": 1.4627349376678467, "incorrect_loss_per_token": 1.4108856916427612, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4135825634002686, "num_tokens": 1, "num_tokens_all": 608, "is_greedy": false, "logits_per_token": -1.4135825634002686, "logits_per_char": -0.7067912817001343, "num_chars": 2}, {"sum_logits": -1.3967981338500977, "num_tokens": 1, "num_tokens_all": 608, "is_greedy": true, "logits_per_token": -1.3967981338500977, "logits_per_char": -0.6983990669250488, "num_chars": 2}, {"sum_logits": -1.4627349376678467, "num_tokens": 1, "num_tokens_all": 608, "is_greedy": false, "logits_per_token": -1.4627349376678467, "logits_per_char": -0.7313674688339233, "num_chars": 2}, {"sum_logits": -1.4222763776779175, "num_tokens": 1, "num_tokens_all": 608, "is_greedy": false, "logits_per_token": -1.4222763776779175, "logits_per_char": -0.7111381888389587, "num_chars": 2}], "label": 2, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 92, "native_id": 92, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4396347999572754, "incorrect_loss_raw": 1.4292124509811401, "correct_loss_per_char": 0.7198173999786377, "incorrect_loss_per_char": 0.7146062254905701, "correct_loss_per_token": 1.4396347999572754, "incorrect_loss_per_token": 1.4292124509811401, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.7899354696273804, "num_tokens": 1, "num_tokens_all": 589, "is_greedy": false, "logits_per_token": -1.7899354696273804, "logits_per_char": -0.8949677348136902, "num_chars": 2}, {"sum_logits": -1.4827319383621216, "num_tokens": 1, "num_tokens_all": 589, "is_greedy": false, "logits_per_token": -1.4827319383621216, "logits_per_char": -0.7413659691810608, "num_chars": 2}, {"sum_logits": -1.4396347999572754, "num_tokens": 1, "num_tokens_all": 589, "is_greedy": false, "logits_per_token": -1.4396347999572754, "logits_per_char": -0.7198173999786377, "num_chars": 2}, {"sum_logits": -1.0149699449539185, "num_tokens": 1, "num_tokens_all": 589, "is_greedy": true, "logits_per_token": -1.0149699449539185, "logits_per_char": -0.5074849724769592, "num_chars": 2}], "label": 2, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 93, "native_id": 93, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4849529266357422, "incorrect_loss_raw": 1.398340900739034, "correct_loss_per_char": 0.7424764633178711, "incorrect_loss_per_char": 0.699170450369517, "correct_loss_per_token": 1.4849529266357422, "incorrect_loss_per_token": 1.398340900739034, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5774612426757812, "num_tokens": 1, "num_tokens_all": 603, "is_greedy": false, "logits_per_token": -1.5774612426757812, "logits_per_char": -0.7887306213378906, "num_chars": 2}, {"sum_logits": -1.4849529266357422, "num_tokens": 1, "num_tokens_all": 603, "is_greedy": false, "logits_per_token": -1.4849529266357422, "logits_per_char": -0.7424764633178711, "num_chars": 2}, {"sum_logits": -1.5883840322494507, "num_tokens": 1, "num_tokens_all": 603, "is_greedy": false, "logits_per_token": -1.5883840322494507, "logits_per_char": -0.7941920161247253, "num_chars": 2}, {"sum_logits": -1.0291774272918701, "num_tokens": 1, "num_tokens_all": 603, "is_greedy": true, "logits_per_token": -1.0291774272918701, "logits_per_char": -0.5145887136459351, "num_chars": 2}], "label": 1, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 94, "native_id": 94, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.436528205871582, "incorrect_loss_raw": 1.4145605166753132, "correct_loss_per_char": 0.718264102935791, "incorrect_loss_per_char": 0.7072802583376566, "correct_loss_per_token": 1.436528205871582, "incorrect_loss_per_token": 1.4145605166753132, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2782996892929077, "num_tokens": 1, "num_tokens_all": 546, "is_greedy": true, "logits_per_token": -1.2782996892929077, "logits_per_char": -0.6391498446464539, "num_chars": 2}, {"sum_logits": -1.5323596000671387, "num_tokens": 1, "num_tokens_all": 546, "is_greedy": false, "logits_per_token": -1.5323596000671387, "logits_per_char": -0.7661798000335693, "num_chars": 2}, {"sum_logits": -1.4330222606658936, "num_tokens": 1, "num_tokens_all": 546, "is_greedy": false, "logits_per_token": -1.4330222606658936, "logits_per_char": -0.7165111303329468, "num_chars": 2}, {"sum_logits": -1.436528205871582, "num_tokens": 1, "num_tokens_all": 546, "is_greedy": false, "logits_per_token": -1.436528205871582, "logits_per_char": -0.718264102935791, "num_chars": 2}], "label": 3, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 95, "native_id": 95, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0240424871444702, "incorrect_loss_raw": 1.5596694548924763, "correct_loss_per_char": 0.5120212435722351, "incorrect_loss_per_char": 0.7798347274462382, "correct_loss_per_token": 1.0240424871444702, "incorrect_loss_per_token": 1.5596694548924763, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.48166823387146, "num_tokens": 1, "num_tokens_all": 571, "is_greedy": false, "logits_per_token": -1.48166823387146, "logits_per_char": -0.74083411693573, "num_chars": 2}, {"sum_logits": -1.7097042798995972, "num_tokens": 1, "num_tokens_all": 571, "is_greedy": false, "logits_per_token": -1.7097042798995972, "logits_per_char": -0.8548521399497986, "num_chars": 2}, {"sum_logits": -1.487635850906372, "num_tokens": 1, "num_tokens_all": 571, "is_greedy": false, "logits_per_token": -1.487635850906372, "logits_per_char": -0.743817925453186, "num_chars": 2}, {"sum_logits": -1.0240424871444702, "num_tokens": 1, "num_tokens_all": 571, "is_greedy": true, "logits_per_token": -1.0240424871444702, "logits_per_char": -0.5120212435722351, "num_chars": 2}], "label": 3, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 96, "native_id": 96, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5142946243286133, "incorrect_loss_raw": 1.3584380149841309, "correct_loss_per_char": 0.7571473121643066, "incorrect_loss_per_char": 0.6792190074920654, "correct_loss_per_token": 1.5142946243286133, "incorrect_loss_per_token": 1.3584380149841309, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3544631004333496, "num_tokens": 1, "num_tokens_all": 597, "is_greedy": false, "logits_per_token": -1.3544631004333496, "logits_per_char": -0.6772315502166748, "num_chars": 2}, {"sum_logits": -1.470578908920288, "num_tokens": 1, "num_tokens_all": 597, "is_greedy": false, "logits_per_token": -1.470578908920288, "logits_per_char": -0.735289454460144, "num_chars": 2}, {"sum_logits": -1.5142946243286133, "num_tokens": 1, "num_tokens_all": 597, "is_greedy": false, "logits_per_token": -1.5142946243286133, "logits_per_char": -0.7571473121643066, "num_chars": 2}, {"sum_logits": -1.2502720355987549, "num_tokens": 1, "num_tokens_all": 597, "is_greedy": true, "logits_per_token": -1.2502720355987549, "logits_per_char": -0.6251360177993774, "num_chars": 2}], "label": 2, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 97, "native_id": 97, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4588491916656494, "incorrect_loss_raw": 1.3875008424123128, "correct_loss_per_char": 0.7294245958328247, "incorrect_loss_per_char": 0.6937504212061564, "correct_loss_per_token": 1.4588491916656494, "incorrect_loss_per_token": 1.3875008424123128, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4556483030319214, "num_tokens": 1, "num_tokens_all": 596, "is_greedy": false, "logits_per_token": -1.4556483030319214, "logits_per_char": -0.7278241515159607, "num_chars": 2}, {"sum_logits": -1.5533453226089478, "num_tokens": 1, "num_tokens_all": 596, "is_greedy": false, "logits_per_token": -1.5533453226089478, "logits_per_char": -0.7766726613044739, "num_chars": 2}, {"sum_logits": -1.4588491916656494, "num_tokens": 1, "num_tokens_all": 596, "is_greedy": false, "logits_per_token": -1.4588491916656494, "logits_per_char": -0.7294245958328247, "num_chars": 2}, {"sum_logits": -1.1535089015960693, "num_tokens": 1, "num_tokens_all": 596, "is_greedy": true, "logits_per_token": -1.1535089015960693, "logits_per_char": -0.5767544507980347, "num_chars": 2}], "label": 2, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 98, "native_id": 98, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.081336259841919, "incorrect_loss_raw": 1.525070031483968, "correct_loss_per_char": 0.5406681299209595, "incorrect_loss_per_char": 0.762535015741984, "correct_loss_per_token": 1.081336259841919, "incorrect_loss_per_token": 1.525070031483968, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5369210243225098, "num_tokens": 1, "num_tokens_all": 585, "is_greedy": false, "logits_per_token": -1.5369210243225098, "logits_per_char": -0.7684605121612549, "num_chars": 2}, {"sum_logits": -1.569908857345581, "num_tokens": 1, "num_tokens_all": 585, "is_greedy": false, "logits_per_token": -1.569908857345581, "logits_per_char": -0.7849544286727905, "num_chars": 2}, {"sum_logits": -1.4683802127838135, "num_tokens": 1, "num_tokens_all": 585, "is_greedy": false, "logits_per_token": -1.4683802127838135, "logits_per_char": -0.7341901063919067, "num_chars": 2}, {"sum_logits": -1.081336259841919, "num_tokens": 1, "num_tokens_all": 585, "is_greedy": true, "logits_per_token": -1.081336259841919, "logits_per_char": -0.5406681299209595, "num_chars": 2}], "label": 3, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 99, "native_id": 99, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5185778141021729, "incorrect_loss_raw": 1.3658406734466553, "correct_loss_per_char": 0.7592889070510864, "incorrect_loss_per_char": 0.6829203367233276, "correct_loss_per_token": 1.5185778141021729, "incorrect_loss_per_token": 1.3658406734466553, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5185778141021729, "num_tokens": 1, "num_tokens_all": 609, "is_greedy": false, "logits_per_token": -1.5185778141021729, "logits_per_char": -0.7592889070510864, "num_chars": 2}, {"sum_logits": -1.4491069316864014, "num_tokens": 1, "num_tokens_all": 609, "is_greedy": false, "logits_per_token": -1.4491069316864014, "logits_per_char": -0.7245534658432007, "num_chars": 2}, {"sum_logits": -1.4797756671905518, "num_tokens": 1, "num_tokens_all": 609, "is_greedy": false, "logits_per_token": -1.4797756671905518, "logits_per_char": -0.7398878335952759, "num_chars": 2}, {"sum_logits": -1.1686394214630127, "num_tokens": 1, "num_tokens_all": 609, "is_greedy": true, "logits_per_token": -1.1686394214630127, "logits_per_char": -0.5843197107315063, "num_chars": 2}], "label": 0, "task_hash": "264fbafdeceacfd7588ca20ca3546113", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
|