|
{"doc_id": 0, "native_id": 0, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0704638957977295, "incorrect_loss_raw": 1.6039934555689495, "correct_loss_per_char": 0.5352319478988647, "incorrect_loss_per_char": 0.8019967277844747, "correct_loss_per_token": 1.0704638957977295, "incorrect_loss_per_token": 1.6039934555689495, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1079685688018799, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.1079685688018799, "logits_per_char": -0.5539842844009399, "num_chars": 2}, {"sum_logits": -1.0704638957977295, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": true, "logits_per_token": -1.0704638957977295, "logits_per_char": -0.5352319478988647, "num_chars": 2}, {"sum_logits": -1.6843065023422241, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.6843065023422241, "logits_per_char": -0.8421532511711121, "num_chars": 2}, {"sum_logits": -2.019705295562744, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -2.019705295562744, "logits_per_char": -1.009852647781372, "num_chars": 2}], "label": 1, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 1, "native_id": 1, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5916364192962646, "incorrect_loss_raw": 1.3928387959798176, "correct_loss_per_char": 0.7958182096481323, "incorrect_loss_per_char": 0.6964193979899088, "correct_loss_per_token": 1.5916364192962646, "incorrect_loss_per_token": 1.3928387959798176, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1747397184371948, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.1747397184371948, "logits_per_char": -0.5873698592185974, "num_chars": 2}, {"sum_logits": -1.1181073188781738, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": true, "logits_per_token": -1.1181073188781738, "logits_per_char": -0.5590536594390869, "num_chars": 2}, {"sum_logits": -1.5916364192962646, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.5916364192962646, "logits_per_char": -0.7958182096481323, "num_chars": 2}, {"sum_logits": -1.8856693506240845, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.8856693506240845, "logits_per_char": -0.9428346753120422, "num_chars": 2}], "label": 2, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 2, "native_id": 2, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8330495357513428, "incorrect_loss_raw": 1.3096723159154255, "correct_loss_per_char": 0.9165247678756714, "incorrect_loss_per_char": 0.6548361579577128, "correct_loss_per_token": 1.8330495357513428, "incorrect_loss_per_token": 1.3096723159154255, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1896802186965942, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.1896802186965942, "logits_per_char": -0.5948401093482971, "num_chars": 2}, {"sum_logits": -1.1117968559265137, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": true, "logits_per_token": -1.1117968559265137, "logits_per_char": -0.5558984279632568, "num_chars": 2}, {"sum_logits": -1.627539873123169, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.627539873123169, "logits_per_char": -0.8137699365615845, "num_chars": 2}, {"sum_logits": -1.8330495357513428, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.8330495357513428, "logits_per_char": -0.9165247678756714, "num_chars": 2}], "label": 3, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 3, "native_id": 3, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9271265268325806, "incorrect_loss_raw": 1.7720727920532227, "correct_loss_per_char": 0.4635632634162903, "incorrect_loss_per_char": 0.8860363960266113, "correct_loss_per_token": 0.9271265268325806, "incorrect_loss_per_token": 1.7720727920532227, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0660165548324585, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.0660165548324585, "logits_per_char": -0.5330082774162292, "num_chars": 2}, {"sum_logits": -0.9271265268325806, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -0.9271265268325806, "logits_per_char": -0.4635632634162903, "num_chars": 2}, {"sum_logits": -1.77761709690094, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.77761709690094, "logits_per_char": -0.88880854845047, "num_chars": 2}, {"sum_logits": -2.4725847244262695, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -2.4725847244262695, "logits_per_char": -1.2362923622131348, "num_chars": 2}], "label": 1, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 4, "native_id": 4, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2145400047302246, "incorrect_loss_raw": 1.4926794370015461, "correct_loss_per_char": 0.6072700023651123, "incorrect_loss_per_char": 0.7463397185007731, "correct_loss_per_token": 1.2145400047302246, "incorrect_loss_per_token": 1.4926794370015461, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1567180156707764, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.1567180156707764, "logits_per_char": -0.5783590078353882, "num_chars": 2}, {"sum_logits": -1.2145400047302246, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.2145400047302246, "logits_per_char": -0.6072700023651123, "num_chars": 2}, {"sum_logits": -1.6150498390197754, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.6150498390197754, "logits_per_char": -0.8075249195098877, "num_chars": 2}, {"sum_logits": -1.706270456314087, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.706270456314087, "logits_per_char": -0.8531352281570435, "num_chars": 2}], "label": 1, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 5, "native_id": 5, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.986041247844696, "incorrect_loss_raw": 1.7420563697814941, "correct_loss_per_char": 0.493020623922348, "incorrect_loss_per_char": 0.8710281848907471, "correct_loss_per_token": 0.986041247844696, "incorrect_loss_per_token": 1.7420563697814941, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.986041247844696, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": true, "logits_per_token": -0.986041247844696, "logits_per_char": -0.493020623922348, "num_chars": 2}, {"sum_logits": -1.0006520748138428, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.0006520748138428, "logits_per_char": -0.5003260374069214, "num_chars": 2}, {"sum_logits": -1.8226509094238281, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.8226509094238281, "logits_per_char": -0.9113254547119141, "num_chars": 2}, {"sum_logits": -2.4028661251068115, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -2.4028661251068115, "logits_per_char": -1.2014330625534058, "num_chars": 2}], "label": 0, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 6, "native_id": 6, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9664701223373413, "incorrect_loss_raw": 1.8138081232706706, "correct_loss_per_char": 0.48323506116867065, "incorrect_loss_per_char": 0.9069040616353353, "correct_loss_per_token": 0.9664701223373413, "incorrect_loss_per_token": 1.8138081232706706, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9664701223373413, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -0.9664701223373413, "logits_per_char": -0.48323506116867065, "num_chars": 2}, {"sum_logits": -0.9655255079269409, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -0.9655255079269409, "logits_per_char": -0.48276275396347046, "num_chars": 2}, {"sum_logits": -1.824102520942688, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.824102520942688, "logits_per_char": -0.912051260471344, "num_chars": 2}, {"sum_logits": -2.651796340942383, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -2.651796340942383, "logits_per_char": -1.3258981704711914, "num_chars": 2}], "label": 0, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 7, "native_id": 7, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.450774908065796, "incorrect_loss_raw": 1.2540128827095032, "correct_loss_per_char": 1.225387454032898, "incorrect_loss_per_char": 0.6270064413547516, "correct_loss_per_token": 2.450774908065796, "incorrect_loss_per_token": 1.2540128827095032, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0176279544830322, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.0176279544830322, "logits_per_char": -0.5088139772415161, "num_chars": 2}, {"sum_logits": -0.9856166243553162, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -0.9856166243553162, "logits_per_char": -0.4928083121776581, "num_chars": 2}, {"sum_logits": -1.7587940692901611, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.7587940692901611, "logits_per_char": -0.8793970346450806, "num_chars": 2}, {"sum_logits": -2.450774908065796, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -2.450774908065796, "logits_per_char": -1.225387454032898, "num_chars": 2}], "label": 3, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 8, "native_id": 8, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0739060640335083, "incorrect_loss_raw": 1.6141666968663533, "correct_loss_per_char": 0.5369530320167542, "incorrect_loss_per_char": 0.8070833484331766, "correct_loss_per_token": 1.0739060640335083, "incorrect_loss_per_token": 1.6141666968663533, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.080434799194336, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.080434799194336, "logits_per_char": -0.540217399597168, "num_chars": 2}, {"sum_logits": -1.0739060640335083, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": true, "logits_per_token": -1.0739060640335083, "logits_per_char": -0.5369530320167542, "num_chars": 2}, {"sum_logits": -1.6899603605270386, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.6899603605270386, "logits_per_char": -0.8449801802635193, "num_chars": 2}, {"sum_logits": -2.0721049308776855, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -2.0721049308776855, "logits_per_char": -1.0360524654388428, "num_chars": 2}], "label": 1, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 9, "native_id": 9, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6475601196289062, "incorrect_loss_raw": 1.3739102681477864, "correct_loss_per_char": 0.8237800598144531, "incorrect_loss_per_char": 0.6869551340738932, "correct_loss_per_token": 1.6475601196289062, "incorrect_loss_per_token": 1.3739102681477864, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.208313226699829, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.208313226699829, "logits_per_char": -0.6041566133499146, "num_chars": 2}, {"sum_logits": -1.0937995910644531, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.0937995910644531, "logits_per_char": -0.5468997955322266, "num_chars": 2}, {"sum_logits": -1.6475601196289062, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.6475601196289062, "logits_per_char": -0.8237800598144531, "num_chars": 2}, {"sum_logits": -1.8196179866790771, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.8196179866790771, "logits_per_char": -0.9098089933395386, "num_chars": 2}], "label": 2, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 10, "native_id": 10, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8380343914031982, "incorrect_loss_raw": 1.4982322851816814, "correct_loss_per_char": 0.9190171957015991, "incorrect_loss_per_char": 0.7491161425908407, "correct_loss_per_token": 1.8380343914031982, "incorrect_loss_per_token": 1.4982322851816814, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9818638563156128, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -0.9818638563156128, "logits_per_char": -0.4909319281578064, "num_chars": 2}, {"sum_logits": -0.9661353826522827, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -0.9661353826522827, "logits_per_char": -0.48306769132614136, "num_chars": 2}, {"sum_logits": -1.8380343914031982, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.8380343914031982, "logits_per_char": -0.9190171957015991, "num_chars": 2}, {"sum_logits": -2.5466976165771484, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -2.5466976165771484, "logits_per_char": -1.2733488082885742, "num_chars": 2}], "label": 2, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 11, "native_id": 11, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5268349647521973, "incorrect_loss_raw": 1.359757900238037, "correct_loss_per_char": 0.7634174823760986, "incorrect_loss_per_char": 0.6798789501190186, "correct_loss_per_token": 1.5268349647521973, "incorrect_loss_per_token": 1.359757900238037, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2913157939910889, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.2913157939910889, "logits_per_char": -0.6456578969955444, "num_chars": 2}, {"sum_logits": -1.3035130500793457, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.3035130500793457, "logits_per_char": -0.6517565250396729, "num_chars": 2}, {"sum_logits": -1.5268349647521973, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.5268349647521973, "logits_per_char": -0.7634174823760986, "num_chars": 2}, {"sum_logits": -1.4844448566436768, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.4844448566436768, "logits_per_char": -0.7422224283218384, "num_chars": 2}], "label": 2, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 12, "native_id": 12, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1716554164886475, "incorrect_loss_raw": 1.5504411856333415, "correct_loss_per_char": 0.5858277082443237, "incorrect_loss_per_char": 0.7752205928166708, "correct_loss_per_token": 1.1716554164886475, "incorrect_loss_per_token": 1.5504411856333415, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1716554164886475, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.1716554164886475, "logits_per_char": -0.5858277082443237, "num_chars": 2}, {"sum_logits": -1.0655295848846436, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": true, "logits_per_token": -1.0655295848846436, "logits_per_char": -0.5327647924423218, "num_chars": 2}, {"sum_logits": -1.6159887313842773, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.6159887313842773, "logits_per_char": -0.8079943656921387, "num_chars": 2}, {"sum_logits": -1.9698052406311035, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.9698052406311035, "logits_per_char": -0.9849026203155518, "num_chars": 2}], "label": 0, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 13, "native_id": 13, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6058878898620605, "incorrect_loss_raw": 1.3371635278065999, "correct_loss_per_char": 0.8029439449310303, "incorrect_loss_per_char": 0.6685817639032999, "correct_loss_per_token": 1.6058878898620605, "incorrect_loss_per_token": 1.3371635278065999, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2673333883285522, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": true, "logits_per_token": -1.2673333883285522, "logits_per_char": -0.6336666941642761, "num_chars": 2}, {"sum_logits": -1.287016749382019, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.287016749382019, "logits_per_char": -0.6435083746910095, "num_chars": 2}, {"sum_logits": -1.6058878898620605, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.6058878898620605, "logits_per_char": -0.8029439449310303, "num_chars": 2}, {"sum_logits": -1.4571404457092285, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.4571404457092285, "logits_per_char": -0.7285702228546143, "num_chars": 2}], "label": 2, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 14, "native_id": 14, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.652392029762268, "incorrect_loss_raw": 1.3851861159006755, "correct_loss_per_char": 0.826196014881134, "incorrect_loss_per_char": 0.6925930579503378, "correct_loss_per_token": 1.652392029762268, "incorrect_loss_per_token": 1.3851861159006755, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0648117065429688, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": true, "logits_per_token": -1.0648117065429688, "logits_per_char": -0.5324058532714844, "num_chars": 2}, {"sum_logits": -1.1804707050323486, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.1804707050323486, "logits_per_char": -0.5902353525161743, "num_chars": 2}, {"sum_logits": -1.652392029762268, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.652392029762268, "logits_per_char": -0.826196014881134, "num_chars": 2}, {"sum_logits": -1.910275936126709, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.910275936126709, "logits_per_char": -0.9551379680633545, "num_chars": 2}], "label": 2, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 15, "native_id": 15, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9988807439804077, "incorrect_loss_raw": 1.7281997203826904, "correct_loss_per_char": 0.49944037199020386, "incorrect_loss_per_char": 0.8640998601913452, "correct_loss_per_token": 0.9988807439804077, "incorrect_loss_per_token": 1.7281997203826904, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0164225101470947, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.0164225101470947, "logits_per_char": -0.5082112550735474, "num_chars": 2}, {"sum_logits": -0.9988807439804077, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": true, "logits_per_token": -0.9988807439804077, "logits_per_char": -0.49944037199020386, "num_chars": 2}, {"sum_logits": -1.7602133750915527, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.7602133750915527, "logits_per_char": -0.8801066875457764, "num_chars": 2}, {"sum_logits": -2.407963275909424, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -2.407963275909424, "logits_per_char": -1.203981637954712, "num_chars": 2}], "label": 1, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 16, "native_id": 16, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5786490440368652, "incorrect_loss_raw": 1.386834979057312, "correct_loss_per_char": 0.7893245220184326, "incorrect_loss_per_char": 0.693417489528656, "correct_loss_per_token": 1.5786490440368652, "incorrect_loss_per_token": 1.386834979057312, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2476109266281128, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.2476109266281128, "logits_per_char": -0.6238054633140564, "num_chars": 2}, {"sum_logits": -1.0823137760162354, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": true, "logits_per_token": -1.0823137760162354, "logits_per_char": -0.5411568880081177, "num_chars": 2}, {"sum_logits": -1.5786490440368652, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.5786490440368652, "logits_per_char": -0.7893245220184326, "num_chars": 2}, {"sum_logits": -1.830580234527588, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.830580234527588, "logits_per_char": -0.915290117263794, "num_chars": 2}], "label": 2, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 17, "native_id": 17, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5496232509613037, "incorrect_loss_raw": 1.3893550634384155, "correct_loss_per_char": 0.7748116254806519, "incorrect_loss_per_char": 0.6946775317192078, "correct_loss_per_token": 1.5496232509613037, "incorrect_loss_per_token": 1.3893550634384155, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1776355504989624, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": true, "logits_per_token": -1.1776355504989624, "logits_per_char": -0.5888177752494812, "num_chars": 2}, {"sum_logits": -1.2633435726165771, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.2633435726165771, "logits_per_char": -0.6316717863082886, "num_chars": 2}, {"sum_logits": -1.5496232509613037, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.5496232509613037, "logits_per_char": -0.7748116254806519, "num_chars": 2}, {"sum_logits": -1.727086067199707, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.727086067199707, "logits_per_char": -0.8635430335998535, "num_chars": 2}], "label": 2, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 18, "native_id": 18, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6524745225906372, "incorrect_loss_raw": 1.3366872469584148, "correct_loss_per_char": 0.8262372612953186, "incorrect_loss_per_char": 0.6683436234792074, "correct_loss_per_token": 1.6524745225906372, "incorrect_loss_per_token": 1.3366872469584148, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2332864999771118, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.2332864999771118, "logits_per_char": -0.6166432499885559, "num_chars": 2}, {"sum_logits": -1.194968581199646, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.194968581199646, "logits_per_char": -0.597484290599823, "num_chars": 2}, {"sum_logits": -1.5818066596984863, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.5818066596984863, "logits_per_char": -0.7909033298492432, "num_chars": 2}, {"sum_logits": -1.6524745225906372, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.6524745225906372, "logits_per_char": -0.8262372612953186, "num_chars": 2}], "label": 3, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 19, "native_id": 19, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9192330837249756, "incorrect_loss_raw": 1.8051008383433025, "correct_loss_per_char": 0.4596165418624878, "incorrect_loss_per_char": 0.9025504191716512, "correct_loss_per_token": 0.9192330837249756, "incorrect_loss_per_token": 1.8051008383433025, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9192330837249756, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": true, "logits_per_token": -0.9192330837249756, "logits_per_char": -0.4596165418624878, "num_chars": 2}, {"sum_logits": -1.0202476978302002, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.0202476978302002, "logits_per_char": -0.5101238489151001, "num_chars": 2}, {"sum_logits": -1.8900208473205566, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.8900208473205566, "logits_per_char": -0.9450104236602783, "num_chars": 2}, {"sum_logits": -2.5050339698791504, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -2.5050339698791504, "logits_per_char": -1.2525169849395752, "num_chars": 2}], "label": 0, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 20, "native_id": 20, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0116591453552246, "incorrect_loss_raw": 1.693300763765971, "correct_loss_per_char": 0.5058295726776123, "incorrect_loss_per_char": 0.8466503818829855, "correct_loss_per_token": 1.0116591453552246, "incorrect_loss_per_token": 1.693300763765971, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0116591453552246, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": true, "logits_per_token": -1.0116591453552246, "logits_per_char": -0.5058295726776123, "num_chars": 2}, {"sum_logits": -1.0341585874557495, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.0341585874557495, "logits_per_char": -0.5170792937278748, "num_chars": 2}, {"sum_logits": -1.7689063549041748, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.7689063549041748, "logits_per_char": -0.8844531774520874, "num_chars": 2}, {"sum_logits": -2.2768373489379883, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -2.2768373489379883, "logits_per_char": -1.1384186744689941, "num_chars": 2}], "label": 0, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 21, "native_id": 21, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.161755084991455, "incorrect_loss_raw": 1.5091685851414998, "correct_loss_per_char": 0.5808775424957275, "incorrect_loss_per_char": 0.7545842925707499, "correct_loss_per_token": 1.161755084991455, "incorrect_loss_per_token": 1.5091685851414998, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.161755084991455, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.161755084991455, "logits_per_char": -0.5808775424957275, "num_chars": 2}, {"sum_logits": -1.2167623043060303, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.2167623043060303, "logits_per_char": -0.6083811521530151, "num_chars": 2}, {"sum_logits": -1.6048160791397095, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.6048160791397095, "logits_per_char": -0.8024080395698547, "num_chars": 2}, {"sum_logits": -1.7059273719787598, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.7059273719787598, "logits_per_char": -0.8529636859893799, "num_chars": 2}], "label": 0, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 22, "native_id": 22, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.27819561958313, "incorrect_loss_raw": 1.2678279479344685, "correct_loss_per_char": 1.139097809791565, "incorrect_loss_per_char": 0.6339139739672343, "correct_loss_per_token": 2.27819561958313, "incorrect_loss_per_token": 1.2678279479344685, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0259594917297363, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -1.0259594917297363, "logits_per_char": -0.5129797458648682, "num_chars": 2}, {"sum_logits": -1.0300630331039429, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.0300630331039429, "logits_per_char": -0.5150315165519714, "num_chars": 2}, {"sum_logits": -1.7474613189697266, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.7474613189697266, "logits_per_char": -0.8737306594848633, "num_chars": 2}, {"sum_logits": -2.27819561958313, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -2.27819561958313, "logits_per_char": -1.139097809791565, "num_chars": 2}], "label": 3, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 23, "native_id": 23, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5810750722885132, "incorrect_loss_raw": 1.3474150101343791, "correct_loss_per_char": 0.7905375361442566, "incorrect_loss_per_char": 0.6737075050671896, "correct_loss_per_token": 1.5810750722885132, "incorrect_loss_per_token": 1.3474150101343791, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3052319288253784, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.3052319288253784, "logits_per_char": -0.6526159644126892, "num_chars": 2}, {"sum_logits": -1.2733337879180908, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": true, "logits_per_token": -1.2733337879180908, "logits_per_char": -0.6366668939590454, "num_chars": 2}, {"sum_logits": -1.463679313659668, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.463679313659668, "logits_per_char": -0.731839656829834, "num_chars": 2}, {"sum_logits": -1.5810750722885132, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.5810750722885132, "logits_per_char": -0.7905375361442566, "num_chars": 2}], "label": 3, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 24, "native_id": 24, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8983031511306763, "incorrect_loss_raw": 1.8213875691095989, "correct_loss_per_char": 0.44915157556533813, "incorrect_loss_per_char": 0.9106937845547994, "correct_loss_per_token": 0.8983031511306763, "incorrect_loss_per_token": 1.8213875691095989, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0522905588150024, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.0522905588150024, "logits_per_char": -0.5261452794075012, "num_chars": 2}, {"sum_logits": -0.8983031511306763, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": true, "logits_per_token": -0.8983031511306763, "logits_per_char": -0.44915157556533813, "num_chars": 2}, {"sum_logits": -1.8219797611236572, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.8219797611236572, "logits_per_char": -0.9109898805618286, "num_chars": 2}, {"sum_logits": -2.5898923873901367, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -2.5898923873901367, "logits_per_char": -1.2949461936950684, "num_chars": 2}], "label": 1, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 25, "native_id": 25, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6786293983459473, "incorrect_loss_raw": 1.3251198927561443, "correct_loss_per_char": 0.8393146991729736, "incorrect_loss_per_char": 0.6625599463780721, "correct_loss_per_token": 1.6786293983459473, "incorrect_loss_per_token": 1.3251198927561443, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.214713215827942, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.214713215827942, "logits_per_char": -0.607356607913971, "num_chars": 2}, {"sum_logits": -1.280692219734192, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.280692219734192, "logits_per_char": -0.640346109867096, "num_chars": 2}, {"sum_logits": -1.6786293983459473, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.6786293983459473, "logits_per_char": -0.8393146991729736, "num_chars": 2}, {"sum_logits": -1.4799542427062988, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.4799542427062988, "logits_per_char": -0.7399771213531494, "num_chars": 2}], "label": 2, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 26, "native_id": 26, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8357462882995605, "incorrect_loss_raw": 1.506553093592326, "correct_loss_per_char": 0.9178731441497803, "incorrect_loss_per_char": 0.753276546796163, "correct_loss_per_token": 1.8357462882995605, "incorrect_loss_per_token": 1.506553093592326, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9913680553436279, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -0.9913680553436279, "logits_per_char": -0.49568402767181396, "num_chars": 2}, {"sum_logits": -0.9488911628723145, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -0.9488911628723145, "logits_per_char": -0.4744455814361572, "num_chars": 2}, {"sum_logits": -1.8357462882995605, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.8357462882995605, "logits_per_char": -0.9178731441497803, "num_chars": 2}, {"sum_logits": -2.579400062561035, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -2.579400062561035, "logits_per_char": -1.2897000312805176, "num_chars": 2}], "label": 2, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 27, "native_id": 27, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3352330923080444, "incorrect_loss_raw": 1.436557650566101, "correct_loss_per_char": 0.6676165461540222, "incorrect_loss_per_char": 0.7182788252830505, "correct_loss_per_token": 1.3352330923080444, "incorrect_loss_per_token": 1.436557650566101, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4282712936401367, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.4282712936401367, "logits_per_char": -0.7141356468200684, "num_chars": 2}, {"sum_logits": -1.3352330923080444, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.3352330923080444, "logits_per_char": -0.6676165461540222, "num_chars": 2}, {"sum_logits": -1.6249958276748657, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.6249958276748657, "logits_per_char": -0.8124979138374329, "num_chars": 2}, {"sum_logits": -1.2564058303833008, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.2564058303833008, "logits_per_char": -0.6282029151916504, "num_chars": 2}], "label": 1, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 28, "native_id": 28, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.562800407409668, "incorrect_loss_raw": 1.2722939650217693, "correct_loss_per_char": 1.281400203704834, "incorrect_loss_per_char": 0.6361469825108846, "correct_loss_per_token": 2.562800407409668, "incorrect_loss_per_token": 1.2722939650217693, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8665155172348022, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -0.8665155172348022, "logits_per_char": -0.4332577586174011, "num_chars": 2}, {"sum_logits": -1.0711874961853027, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.0711874961853027, "logits_per_char": -0.5355937480926514, "num_chars": 2}, {"sum_logits": -1.8791788816452026, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.8791788816452026, "logits_per_char": -0.9395894408226013, "num_chars": 2}, {"sum_logits": -2.562800407409668, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -2.562800407409668, "logits_per_char": -1.281400203704834, "num_chars": 2}], "label": 3, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 29, "native_id": 29, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9711793661117554, "incorrect_loss_raw": 1.810327450434367, "correct_loss_per_char": 0.4855896830558777, "incorrect_loss_per_char": 0.9051637252171835, "correct_loss_per_token": 0.9711793661117554, "incorrect_loss_per_token": 1.810327450434367, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9711793661117554, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -0.9711793661117554, "logits_per_char": -0.4855896830558777, "num_chars": 2}, {"sum_logits": -0.9618178606033325, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -0.9618178606033325, "logits_per_char": -0.48090893030166626, "num_chars": 2}, {"sum_logits": -1.8216606378555298, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.8216606378555298, "logits_per_char": -0.9108303189277649, "num_chars": 2}, {"sum_logits": -2.6475038528442383, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -2.6475038528442383, "logits_per_char": -1.3237519264221191, "num_chars": 2}], "label": 0, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 30, "native_id": 30, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9523808360099792, "incorrect_loss_raw": 1.8140806555747986, "correct_loss_per_char": 0.4761904180049896, "incorrect_loss_per_char": 0.9070403277873993, "correct_loss_per_token": 0.9523808360099792, "incorrect_loss_per_token": 1.8140806555747986, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9788387417793274, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -0.9788387417793274, "logits_per_char": -0.4894193708896637, "num_chars": 2}, {"sum_logits": -0.9523808360099792, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": true, "logits_per_token": -0.9523808360099792, "logits_per_char": -0.4761904180049896, "num_chars": 2}, {"sum_logits": -1.8330612182617188, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.8330612182617188, "logits_per_char": -0.9165306091308594, "num_chars": 2}, {"sum_logits": -2.6303420066833496, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -2.6303420066833496, "logits_per_char": -1.3151710033416748, "num_chars": 2}], "label": 1, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 31, "native_id": 31, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3773983716964722, "incorrect_loss_raw": 1.4177532990773518, "correct_loss_per_char": 0.6886991858482361, "incorrect_loss_per_char": 0.7088766495386759, "correct_loss_per_token": 1.3773983716964722, "incorrect_loss_per_token": 1.4177532990773518, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2701256275177002, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.2701256275177002, "logits_per_char": -0.6350628137588501, "num_chars": 2}, {"sum_logits": -1.3773983716964722, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3773983716964722, "logits_per_char": -0.6886991858482361, "num_chars": 2}, {"sum_logits": -1.552189588546753, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.552189588546753, "logits_per_char": -0.7760947942733765, "num_chars": 2}, {"sum_logits": -1.4309446811676025, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4309446811676025, "logits_per_char": -0.7154723405838013, "num_chars": 2}], "label": 1, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 32, "native_id": 32, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0861831903457642, "incorrect_loss_raw": 1.60344131787618, "correct_loss_per_char": 0.5430915951728821, "incorrect_loss_per_char": 0.80172065893809, "correct_loss_per_token": 1.0861831903457642, "incorrect_loss_per_token": 1.60344131787618, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0861831903457642, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.0861831903457642, "logits_per_char": -0.5430915951728821, "num_chars": 2}, {"sum_logits": -1.0851075649261475, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.0851075649261475, "logits_per_char": -0.5425537824630737, "num_chars": 2}, {"sum_logits": -1.657015323638916, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.657015323638916, "logits_per_char": -0.828507661819458, "num_chars": 2}, {"sum_logits": -2.0682010650634766, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -2.0682010650634766, "logits_per_char": -1.0341005325317383, "num_chars": 2}], "label": 0, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 33, "native_id": 33, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7351765632629395, "incorrect_loss_raw": 1.3125980695088704, "correct_loss_per_char": 0.8675882816314697, "incorrect_loss_per_char": 0.6562990347544352, "correct_loss_per_token": 1.7351765632629395, "incorrect_loss_per_token": 1.3125980695088704, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4024794101715088, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.4024794101715088, "logits_per_char": -0.7012397050857544, "num_chars": 2}, {"sum_logits": -1.282960057258606, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.282960057258606, "logits_per_char": -0.641480028629303, "num_chars": 2}, {"sum_logits": -1.7351765632629395, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.7351765632629395, "logits_per_char": -0.8675882816314697, "num_chars": 2}, {"sum_logits": -1.2523547410964966, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": true, "logits_per_token": -1.2523547410964966, "logits_per_char": -0.6261773705482483, "num_chars": 2}], "label": 2, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 34, "native_id": 34, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9845258593559265, "incorrect_loss_raw": 1.703710714975993, "correct_loss_per_char": 0.49226292967796326, "incorrect_loss_per_char": 0.8518553574879965, "correct_loss_per_token": 0.9845258593559265, "incorrect_loss_per_token": 1.703710714975993, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9845258593559265, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": true, "logits_per_token": -0.9845258593559265, "logits_per_char": -0.49226292967796326, "num_chars": 2}, {"sum_logits": -1.0462400913238525, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.0462400913238525, "logits_per_char": -0.5231200456619263, "num_chars": 2}, {"sum_logits": -1.8303101062774658, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.8303101062774658, "logits_per_char": -0.9151550531387329, "num_chars": 2}, {"sum_logits": -2.23458194732666, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -2.23458194732666, "logits_per_char": -1.11729097366333, "num_chars": 2}], "label": 0, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 35, "native_id": 35, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0694843530654907, "incorrect_loss_raw": 1.617441733678182, "correct_loss_per_char": 0.5347421765327454, "incorrect_loss_per_char": 0.808720866839091, "correct_loss_per_token": 1.0694843530654907, "incorrect_loss_per_token": 1.617441733678182, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0817651748657227, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.0817651748657227, "logits_per_char": -0.5408825874328613, "num_chars": 2}, {"sum_logits": -1.0694843530654907, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": true, "logits_per_token": -1.0694843530654907, "logits_per_char": -0.5347421765327454, "num_chars": 2}, {"sum_logits": -1.6926004886627197, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.6926004886627197, "logits_per_char": -0.8463002443313599, "num_chars": 2}, {"sum_logits": -2.0779595375061035, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -2.0779595375061035, "logits_per_char": -1.0389797687530518, "num_chars": 2}], "label": 1, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 36, "native_id": 36, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.121893882751465, "incorrect_loss_raw": 1.2594435612360637, "correct_loss_per_char": 1.0609469413757324, "incorrect_loss_per_char": 0.6297217806180319, "correct_loss_per_token": 2.121893882751465, "incorrect_loss_per_token": 1.2594435612360637, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0956261157989502, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": true, "logits_per_token": -1.0956261157989502, "logits_per_char": -0.5478130578994751, "num_chars": 2}, {"sum_logits": -1.125494360923767, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.125494360923767, "logits_per_char": -0.5627471804618835, "num_chars": 2}, {"sum_logits": -1.5572102069854736, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.5572102069854736, "logits_per_char": -0.7786051034927368, "num_chars": 2}, {"sum_logits": -2.121893882751465, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -2.121893882751465, "logits_per_char": -1.0609469413757324, "num_chars": 2}], "label": 3, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 37, "native_id": 37, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.297653317451477, "incorrect_loss_raw": 1.4464008808135986, "correct_loss_per_char": 0.6488266587257385, "incorrect_loss_per_char": 0.7232004404067993, "correct_loss_per_token": 1.297653317451477, "incorrect_loss_per_token": 1.4464008808135986, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3871102333068848, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.3871102333068848, "logits_per_char": -0.6935551166534424, "num_chars": 2}, {"sum_logits": -1.297653317451477, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": true, "logits_per_token": -1.297653317451477, "logits_per_char": -0.6488266587257385, "num_chars": 2}, {"sum_logits": -1.6360430717468262, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.6360430717468262, "logits_per_char": -0.8180215358734131, "num_chars": 2}, {"sum_logits": -1.316049337387085, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.316049337387085, "logits_per_char": -0.6580246686935425, "num_chars": 2}], "label": 1, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 38, "native_id": 38, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6120312213897705, "incorrect_loss_raw": 1.392020583152771, "correct_loss_per_char": 0.8060156106948853, "incorrect_loss_per_char": 0.6960102915763855, "correct_loss_per_token": 1.6120312213897705, "incorrect_loss_per_token": 1.392020583152771, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1375703811645508, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.1375703811645508, "logits_per_char": -0.5687851905822754, "num_chars": 2}, {"sum_logits": -1.1289374828338623, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": true, "logits_per_token": -1.1289374828338623, "logits_per_char": -0.5644687414169312, "num_chars": 2}, {"sum_logits": -1.6120312213897705, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.6120312213897705, "logits_per_char": -0.8060156106948853, "num_chars": 2}, {"sum_logits": -1.9095538854599, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.9095538854599, "logits_per_char": -0.95477694272995, "num_chars": 2}], "label": 2, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 39, "native_id": 39, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.7533559799194336, "incorrect_loss_raw": 1.2410456736882527, "correct_loss_per_char": 1.3766779899597168, "incorrect_loss_per_char": 0.6205228368441263, "correct_loss_per_token": 2.7533559799194336, "incorrect_loss_per_token": 1.2410456736882527, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9533343315124512, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": true, "logits_per_token": -0.9533343315124512, "logits_per_char": -0.4766671657562256, "num_chars": 2}, {"sum_logits": -0.9676883220672607, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -0.9676883220672607, "logits_per_char": -0.48384416103363037, "num_chars": 2}, {"sum_logits": -1.8021143674850464, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.8021143674850464, "logits_per_char": -0.9010571837425232, "num_chars": 2}, {"sum_logits": -2.7533559799194336, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -2.7533559799194336, "logits_per_char": -1.3766779899597168, "num_chars": 2}], "label": 3, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 40, "native_id": 40, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4747132062911987, "incorrect_loss_raw": 1.4052501916885376, "correct_loss_per_char": 0.7373566031455994, "incorrect_loss_per_char": 0.7026250958442688, "correct_loss_per_token": 1.4747132062911987, "incorrect_loss_per_token": 1.4052501916885376, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2652809619903564, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.2652809619903564, "logits_per_char": -0.6326404809951782, "num_chars": 2}, {"sum_logits": -1.168182373046875, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": true, "logits_per_token": -1.168182373046875, "logits_per_char": -0.5840911865234375, "num_chars": 2}, {"sum_logits": -1.4747132062911987, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.4747132062911987, "logits_per_char": -0.7373566031455994, "num_chars": 2}, {"sum_logits": -1.7822872400283813, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.7822872400283813, "logits_per_char": -0.8911436200141907, "num_chars": 2}], "label": 2, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 41, "native_id": 41, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8539150953292847, "incorrect_loss_raw": 1.8293171723683674, "correct_loss_per_char": 0.42695754766464233, "incorrect_loss_per_char": 0.9146585861841837, "correct_loss_per_token": 0.8539150953292847, "incorrect_loss_per_token": 1.8293171723683674, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8539150953292847, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": true, "logits_per_token": -0.8539150953292847, "logits_per_char": -0.42695754766464233, "num_chars": 2}, {"sum_logits": -1.1072862148284912, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.1072862148284912, "logits_per_char": -0.5536431074142456, "num_chars": 2}, {"sum_logits": -1.853501319885254, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.853501319885254, "logits_per_char": -0.926750659942627, "num_chars": 2}, {"sum_logits": -2.5271639823913574, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -2.5271639823913574, "logits_per_char": -1.2635819911956787, "num_chars": 2}], "label": 0, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 42, "native_id": 42, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.085038661956787, "incorrect_loss_raw": 1.695315380891164, "correct_loss_per_char": 0.5425193309783936, "incorrect_loss_per_char": 0.847657690445582, "correct_loss_per_token": 1.085038661956787, "incorrect_loss_per_token": 1.695315380891164, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9258542656898499, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -0.9258542656898499, "logits_per_char": -0.4629271328449249, "num_chars": 2}, {"sum_logits": -1.085038661956787, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.085038661956787, "logits_per_char": -0.5425193309783936, "num_chars": 2}, {"sum_logits": -1.8337454795837402, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.8337454795837402, "logits_per_char": -0.9168727397918701, "num_chars": 2}, {"sum_logits": -2.3263463973999023, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -2.3263463973999023, "logits_per_char": -1.1631731986999512, "num_chars": 2}], "label": 1, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 43, "native_id": 43, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.837992548942566, "incorrect_loss_raw": 1.5188923279444377, "correct_loss_per_char": 0.918996274471283, "incorrect_loss_per_char": 0.7594461639722189, "correct_loss_per_token": 1.837992548942566, "incorrect_loss_per_token": 1.5188923279444377, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9781531095504761, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -0.9781531095504761, "logits_per_char": -0.48907655477523804, "num_chars": 2}, {"sum_logits": -0.9531967639923096, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -0.9531967639923096, "logits_per_char": -0.4765983819961548, "num_chars": 2}, {"sum_logits": -1.837992548942566, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.837992548942566, "logits_per_char": -0.918996274471283, "num_chars": 2}, {"sum_logits": -2.6253271102905273, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -2.6253271102905273, "logits_per_char": -1.3126635551452637, "num_chars": 2}], "label": 2, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 44, "native_id": 44, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.766608476638794, "incorrect_loss_raw": 1.380307932694753, "correct_loss_per_char": 0.883304238319397, "incorrect_loss_per_char": 0.6901539663473765, "correct_loss_per_token": 1.766608476638794, "incorrect_loss_per_token": 1.380307932694753, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1749436855316162, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.1749436855316162, "logits_per_char": -0.5874718427658081, "num_chars": 2}, {"sum_logits": -0.9840973019599915, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": true, "logits_per_token": -0.9840973019599915, "logits_per_char": -0.4920486509799957, "num_chars": 2}, {"sum_logits": -1.766608476638794, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.766608476638794, "logits_per_char": -0.883304238319397, "num_chars": 2}, {"sum_logits": -1.9818828105926514, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.9818828105926514, "logits_per_char": -0.9909414052963257, "num_chars": 2}], "label": 2, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 45, "native_id": 45, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8197287321090698, "incorrect_loss_raw": 1.5188241402308147, "correct_loss_per_char": 0.9098643660545349, "incorrect_loss_per_char": 0.7594120701154073, "correct_loss_per_token": 1.8197287321090698, "incorrect_loss_per_token": 1.5188241402308147, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9644767045974731, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -0.9644767045974731, "logits_per_char": -0.4822383522987366, "num_chars": 2}, {"sum_logits": -0.9766957759857178, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -0.9766957759857178, "logits_per_char": -0.4883478879928589, "num_chars": 2}, {"sum_logits": -1.8197287321090698, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.8197287321090698, "logits_per_char": -0.9098643660545349, "num_chars": 2}, {"sum_logits": -2.615299940109253, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -2.615299940109253, "logits_per_char": -1.3076499700546265, "num_chars": 2}], "label": 2, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 46, "native_id": 46, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0100524425506592, "incorrect_loss_raw": 1.7599493861198425, "correct_loss_per_char": 0.5050262212753296, "incorrect_loss_per_char": 0.8799746930599213, "correct_loss_per_token": 1.0100524425506592, "incorrect_loss_per_token": 1.7599493861198425, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9497869610786438, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -0.9497869610786438, "logits_per_char": -0.4748934805393219, "num_chars": 2}, {"sum_logits": -1.0100524425506592, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.0100524425506592, "logits_per_char": -0.5050262212753296, "num_chars": 2}, {"sum_logits": -1.8356130123138428, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.8356130123138428, "logits_per_char": -0.9178065061569214, "num_chars": 2}, {"sum_logits": -2.494448184967041, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -2.494448184967041, "logits_per_char": -1.2472240924835205, "num_chars": 2}], "label": 1, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 47, "native_id": 47, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7158464789390564, "incorrect_loss_raw": 1.9122672478357952, "correct_loss_per_char": 0.3579232394695282, "incorrect_loss_per_char": 0.9561336239178976, "correct_loss_per_token": 0.7158464789390564, "incorrect_loss_per_token": 1.9122672478357952, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3527573347091675, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.3527573347091675, "logits_per_char": -0.6763786673545837, "num_chars": 2}, {"sum_logits": -0.7158464789390564, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": true, "logits_per_token": -0.7158464789390564, "logits_per_char": -0.3579232394695282, "num_chars": 2}, {"sum_logits": -1.750115156173706, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.750115156173706, "logits_per_char": -0.875057578086853, "num_chars": 2}, {"sum_logits": -2.6339292526245117, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -2.6339292526245117, "logits_per_char": -1.3169646263122559, "num_chars": 2}], "label": 1, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 48, "native_id": 48, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8508963584899902, "incorrect_loss_raw": 1.4099294344584148, "correct_loss_per_char": 0.9254481792449951, "incorrect_loss_per_char": 0.7049647172292074, "correct_loss_per_token": 1.8508963584899902, "incorrect_loss_per_token": 1.4099294344584148, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0394713878631592, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.0394713878631592, "logits_per_char": -0.5197356939315796, "num_chars": 2}, {"sum_logits": -1.001344919204712, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": true, "logits_per_token": -1.001344919204712, "logits_per_char": -0.500672459602356, "num_chars": 2}, {"sum_logits": -1.8508963584899902, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.8508963584899902, "logits_per_char": -0.9254481792449951, "num_chars": 2}, {"sum_logits": -2.188971996307373, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -2.188971996307373, "logits_per_char": -1.0944859981536865, "num_chars": 2}], "label": 2, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 49, "native_id": 49, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.168881893157959, "incorrect_loss_raw": 1.5723469257354736, "correct_loss_per_char": 0.5844409465789795, "incorrect_loss_per_char": 0.7861734628677368, "correct_loss_per_token": 1.168881893157959, "incorrect_loss_per_token": 1.5723469257354736, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0149110555648804, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": true, "logits_per_token": -1.0149110555648804, "logits_per_char": -0.5074555277824402, "num_chars": 2}, {"sum_logits": -1.168881893157959, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.168881893157959, "logits_per_char": -0.5844409465789795, "num_chars": 2}, {"sum_logits": -1.6983941793441772, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.6983941793441772, "logits_per_char": -0.8491970896720886, "num_chars": 2}, {"sum_logits": -2.0037355422973633, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -2.0037355422973633, "logits_per_char": -1.0018677711486816, "num_chars": 2}], "label": 1, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 50, "native_id": 50, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.524623394012451, "incorrect_loss_raw": 1.2574047247568767, "correct_loss_per_char": 1.2623116970062256, "incorrect_loss_per_char": 0.6287023623784384, "correct_loss_per_token": 2.524623394012451, "incorrect_loss_per_token": 1.2574047247568767, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.02899169921875, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.02899169921875, "logits_per_char": -0.514495849609375, "num_chars": 2}, {"sum_logits": -0.9395555257797241, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -0.9395555257797241, "logits_per_char": -0.46977776288986206, "num_chars": 2}, {"sum_logits": -1.8036669492721558, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.8036669492721558, "logits_per_char": -0.9018334746360779, "num_chars": 2}, {"sum_logits": -2.524623394012451, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -2.524623394012451, "logits_per_char": -1.2623116970062256, "num_chars": 2}], "label": 3, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 51, "native_id": 51, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7429356575012207, "incorrect_loss_raw": 1.8825831015904744, "correct_loss_per_char": 0.37146782875061035, "incorrect_loss_per_char": 0.9412915507952372, "correct_loss_per_token": 0.7429356575012207, "incorrect_loss_per_token": 1.8825831015904744, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3542616367340088, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.3542616367340088, "logits_per_char": -0.6771308183670044, "num_chars": 2}, {"sum_logits": -0.7429356575012207, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": true, "logits_per_token": -0.7429356575012207, "logits_per_char": -0.37146782875061035, "num_chars": 2}, {"sum_logits": -1.6890588998794556, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.6890588998794556, "logits_per_char": -0.8445294499397278, "num_chars": 2}, {"sum_logits": -2.604428768157959, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -2.604428768157959, "logits_per_char": -1.3022143840789795, "num_chars": 2}], "label": 1, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 52, "native_id": 52, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3594329357147217, "incorrect_loss_raw": 1.459100882212321, "correct_loss_per_char": 0.6797164678573608, "incorrect_loss_per_char": 0.7295504411061605, "correct_loss_per_token": 1.3594329357147217, "incorrect_loss_per_token": 1.459100882212321, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3594329357147217, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.3594329357147217, "logits_per_char": -0.6797164678573608, "num_chars": 2}, {"sum_logits": -1.0436177253723145, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": true, "logits_per_token": -1.0436177253723145, "logits_per_char": -0.5218088626861572, "num_chars": 2}, {"sum_logits": -1.5198794603347778, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.5198794603347778, "logits_per_char": -0.7599397301673889, "num_chars": 2}, {"sum_logits": -1.8138054609298706, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.8138054609298706, "logits_per_char": -0.9069027304649353, "num_chars": 2}], "label": 0, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 53, "native_id": 53, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.779362440109253, "incorrect_loss_raw": 1.4357098738352458, "correct_loss_per_char": 0.8896812200546265, "incorrect_loss_per_char": 0.7178549369176229, "correct_loss_per_token": 1.779362440109253, "incorrect_loss_per_token": 1.4357098738352458, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0439791679382324, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.0439791679382324, "logits_per_char": -0.5219895839691162, "num_chars": 2}, {"sum_logits": -1.0002591609954834, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": true, "logits_per_token": -1.0002591609954834, "logits_per_char": -0.5001295804977417, "num_chars": 2}, {"sum_logits": -1.779362440109253, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.779362440109253, "logits_per_char": -0.8896812200546265, "num_chars": 2}, {"sum_logits": -2.2628912925720215, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -2.2628912925720215, "logits_per_char": -1.1314456462860107, "num_chars": 2}], "label": 2, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 54, "native_id": 54, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0519213676452637, "incorrect_loss_raw": 1.6268611351648967, "correct_loss_per_char": 0.5259606838226318, "incorrect_loss_per_char": 0.8134305675824484, "correct_loss_per_token": 1.0519213676452637, "incorrect_loss_per_token": 1.6268611351648967, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0969295501708984, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.0969295501708984, "logits_per_char": -0.5484647750854492, "num_chars": 2}, {"sum_logits": -1.0519213676452637, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": true, "logits_per_token": -1.0519213676452637, "logits_per_char": -0.5259606838226318, "num_chars": 2}, {"sum_logits": -1.6862481832504272, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.6862481832504272, "logits_per_char": -0.8431240916252136, "num_chars": 2}, {"sum_logits": -2.0974056720733643, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -2.0974056720733643, "logits_per_char": -1.0487028360366821, "num_chars": 2}], "label": 1, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 55, "native_id": 55, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9905924201011658, "incorrect_loss_raw": 1.7799084782600403, "correct_loss_per_char": 0.4952962100505829, "incorrect_loss_per_char": 0.8899542391300201, "correct_loss_per_token": 0.9905924201011658, "incorrect_loss_per_token": 1.7799084782600403, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9905924201011658, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -0.9905924201011658, "logits_per_char": -0.4952962100505829, "num_chars": 2}, {"sum_logits": -0.9670650362968445, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -0.9670650362968445, "logits_per_char": -0.48353251814842224, "num_chars": 2}, {"sum_logits": -1.7952775955200195, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.7952775955200195, "logits_per_char": -0.8976387977600098, "num_chars": 2}, {"sum_logits": -2.577382802963257, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -2.577382802963257, "logits_per_char": -1.2886914014816284, "num_chars": 2}], "label": 0, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 56, "native_id": 56, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.404176950454712, "incorrect_loss_raw": 1.2586114605267842, "correct_loss_per_char": 1.202088475227356, "incorrect_loss_per_char": 0.6293057302633921, "correct_loss_per_token": 2.404176950454712, "incorrect_loss_per_token": 1.2586114605267842, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9731712937355042, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -0.9731712937355042, "logits_per_char": -0.4865856468677521, "num_chars": 2}, {"sum_logits": -1.0404026508331299, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.0404026508331299, "logits_per_char": -0.5202013254165649, "num_chars": 2}, {"sum_logits": -1.7622604370117188, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.7622604370117188, "logits_per_char": -0.8811302185058594, "num_chars": 2}, {"sum_logits": -2.404176950454712, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -2.404176950454712, "logits_per_char": -1.202088475227356, "num_chars": 2}], "label": 3, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 57, "native_id": 57, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1390130519866943, "incorrect_loss_raw": 1.5515845616658528, "correct_loss_per_char": 0.5695065259933472, "incorrect_loss_per_char": 0.7757922808329264, "correct_loss_per_token": 1.1390130519866943, "incorrect_loss_per_token": 1.5515845616658528, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1190924644470215, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": true, "logits_per_token": -1.1190924644470215, "logits_per_char": -0.5595462322235107, "num_chars": 2}, {"sum_logits": -1.1390130519866943, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.1390130519866943, "logits_per_char": -0.5695065259933472, "num_chars": 2}, {"sum_logits": -1.6233024597167969, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.6233024597167969, "logits_per_char": -0.8116512298583984, "num_chars": 2}, {"sum_logits": -1.9123587608337402, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.9123587608337402, "logits_per_char": -0.9561793804168701, "num_chars": 2}], "label": 1, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 58, "native_id": 58, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9802690744400024, "incorrect_loss_raw": 1.7606434027353923, "correct_loss_per_char": 0.4901345372200012, "incorrect_loss_per_char": 0.8803217013676962, "correct_loss_per_token": 0.9802690744400024, "incorrect_loss_per_token": 1.7606434027353923, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9802690744400024, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": true, "logits_per_token": -0.9802690744400024, "logits_per_char": -0.4901345372200012, "num_chars": 2}, {"sum_logits": -1.0047664642333984, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.0047664642333984, "logits_per_char": -0.5023832321166992, "num_chars": 2}, {"sum_logits": -1.7672197818756104, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.7672197818756104, "logits_per_char": -0.8836098909378052, "num_chars": 2}, {"sum_logits": -2.509943962097168, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -2.509943962097168, "logits_per_char": -1.254971981048584, "num_chars": 2}], "label": 0, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 59, "native_id": 59, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9990050792694092, "incorrect_loss_raw": 1.7473525603612263, "correct_loss_per_char": 0.4995025396347046, "incorrect_loss_per_char": 0.8736762801806132, "correct_loss_per_token": 0.9990050792694092, "incorrect_loss_per_token": 1.7473525603612263, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9990050792694092, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -0.9990050792694092, "logits_per_char": -0.4995025396347046, "num_chars": 2}, {"sum_logits": -0.9929027557373047, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": true, "logits_per_token": -0.9929027557373047, "logits_per_char": -0.49645137786865234, "num_chars": 2}, {"sum_logits": -1.7725170850753784, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.7725170850753784, "logits_per_char": -0.8862585425376892, "num_chars": 2}, {"sum_logits": -2.476637840270996, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -2.476637840270996, "logits_per_char": -1.238318920135498, "num_chars": 2}], "label": 0, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 60, "native_id": 60, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.5938663482666016, "incorrect_loss_raw": 1.2567834258079529, "correct_loss_per_char": 1.2969331741333008, "incorrect_loss_per_char": 0.6283917129039764, "correct_loss_per_token": 2.5938663482666016, "incorrect_loss_per_token": 1.2567834258079529, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0088915824890137, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.0088915824890137, "logits_per_char": -0.5044457912445068, "num_chars": 2}, {"sum_logits": -0.9339047074317932, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -0.9339047074317932, "logits_per_char": -0.4669523537158966, "num_chars": 2}, {"sum_logits": -1.8275539875030518, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.8275539875030518, "logits_per_char": -0.9137769937515259, "num_chars": 2}, {"sum_logits": -2.5938663482666016, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -2.5938663482666016, "logits_per_char": -1.2969331741333008, "num_chars": 2}], "label": 3, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 61, "native_id": 61, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.325117588043213, "incorrect_loss_raw": 1.2688363393147786, "correct_loss_per_char": 1.1625587940216064, "incorrect_loss_per_char": 0.6344181696573893, "correct_loss_per_token": 2.325117588043213, "incorrect_loss_per_token": 1.2688363393147786, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.99452805519104, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -0.99452805519104, "logits_per_char": -0.49726402759552, "num_chars": 2}, {"sum_logits": -1.024800419807434, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.024800419807434, "logits_per_char": -0.512400209903717, "num_chars": 2}, {"sum_logits": -1.7871805429458618, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.7871805429458618, "logits_per_char": -0.8935902714729309, "num_chars": 2}, {"sum_logits": -2.325117588043213, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -2.325117588043213, "logits_per_char": -1.1625587940216064, "num_chars": 2}], "label": 3, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 62, "native_id": 62, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7941930294036865, "incorrect_loss_raw": 1.477716604868571, "correct_loss_per_char": 0.8970965147018433, "incorrect_loss_per_char": 0.7388583024342855, "correct_loss_per_token": 1.7941930294036865, "incorrect_loss_per_token": 1.477716604868571, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0122979879379272, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.0122979879379272, "logits_per_char": -0.5061489939689636, "num_chars": 2}, {"sum_logits": -0.9777408838272095, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -0.9777408838272095, "logits_per_char": -0.48887044191360474, "num_chars": 2}, {"sum_logits": -1.7941930294036865, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.7941930294036865, "logits_per_char": -0.8970965147018433, "num_chars": 2}, {"sum_logits": -2.443110942840576, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -2.443110942840576, "logits_per_char": -1.221555471420288, "num_chars": 2}], "label": 2, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 63, "native_id": 63, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1551048755645752, "incorrect_loss_raw": 1.5068573951721191, "correct_loss_per_char": 0.5775524377822876, "incorrect_loss_per_char": 0.7534286975860596, "correct_loss_per_token": 1.1551048755645752, "incorrect_loss_per_token": 1.5068573951721191, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3527913093566895, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.3527913093566895, "logits_per_char": -0.6763956546783447, "num_chars": 2}, {"sum_logits": -1.1551048755645752, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.1551048755645752, "logits_per_char": -0.5775524377822876, "num_chars": 2}, {"sum_logits": -1.621490240097046, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.621490240097046, "logits_per_char": -0.810745120048523, "num_chars": 2}, {"sum_logits": -1.546290636062622, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.546290636062622, "logits_per_char": -0.773145318031311, "num_chars": 2}], "label": 1, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 64, "native_id": 64, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2176611423492432, "incorrect_loss_raw": 1.522673765818278, "correct_loss_per_char": 0.6088305711746216, "incorrect_loss_per_char": 0.761336882909139, "correct_loss_per_token": 1.2176611423492432, "incorrect_loss_per_token": 1.522673765818278, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0913021564483643, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": true, "logits_per_token": -1.0913021564483643, "logits_per_char": -0.5456510782241821, "num_chars": 2}, {"sum_logits": -1.2176611423492432, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.2176611423492432, "logits_per_char": -0.6088305711746216, "num_chars": 2}, {"sum_logits": -1.5840001106262207, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.5840001106262207, "logits_per_char": -0.7920000553131104, "num_chars": 2}, {"sum_logits": -1.892719030380249, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.892719030380249, "logits_per_char": -0.9463595151901245, "num_chars": 2}], "label": 1, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 65, "native_id": 65, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.826427698135376, "incorrect_loss_raw": 1.5448882182439168, "correct_loss_per_char": 0.913213849067688, "incorrect_loss_per_char": 0.7724441091219584, "correct_loss_per_token": 1.826427698135376, "incorrect_loss_per_token": 1.5448882182439168, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9533250331878662, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -0.9533250331878662, "logits_per_char": -0.4766625165939331, "num_chars": 2}, {"sum_logits": -0.9685095548629761, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -0.9685095548629761, "logits_per_char": -0.48425477743148804, "num_chars": 2}, {"sum_logits": -1.826427698135376, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.826427698135376, "logits_per_char": -0.913213849067688, "num_chars": 2}, {"sum_logits": -2.712830066680908, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -2.712830066680908, "logits_per_char": -1.356415033340454, "num_chars": 2}], "label": 2, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 66, "native_id": 66, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.4656429290771484, "incorrect_loss_raw": 1.2768408854802449, "correct_loss_per_char": 1.2328214645385742, "incorrect_loss_per_char": 0.6384204427401224, "correct_loss_per_token": 2.4656429290771484, "incorrect_loss_per_token": 1.2768408854802449, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9545725584030151, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": true, "logits_per_token": -0.9545725584030151, "logits_per_char": -0.47728627920150757, "num_chars": 2}, {"sum_logits": -1.001200795173645, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.001200795173645, "logits_per_char": -0.5006003975868225, "num_chars": 2}, {"sum_logits": -1.8747493028640747, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.8747493028640747, "logits_per_char": -0.9373746514320374, "num_chars": 2}, {"sum_logits": -2.4656429290771484, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -2.4656429290771484, "logits_per_char": -1.2328214645385742, "num_chars": 2}], "label": 3, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 67, "native_id": 67, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.701014518737793, "incorrect_loss_raw": 1.3330239454905193, "correct_loss_per_char": 0.8505072593688965, "incorrect_loss_per_char": 0.6665119727452596, "correct_loss_per_token": 1.701014518737793, "incorrect_loss_per_token": 1.3330239454905193, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1214247941970825, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.1214247941970825, "logits_per_char": -0.5607123970985413, "num_chars": 2}, {"sum_logits": -1.2753772735595703, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.2753772735595703, "logits_per_char": -0.6376886367797852, "num_chars": 2}, {"sum_logits": -1.701014518737793, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.701014518737793, "logits_per_char": -0.8505072593688965, "num_chars": 2}, {"sum_logits": -1.6022697687149048, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.6022697687149048, "logits_per_char": -0.8011348843574524, "num_chars": 2}], "label": 2, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 68, "native_id": 68, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2460765838623047, "incorrect_loss_raw": 1.4612527290980022, "correct_loss_per_char": 0.6230382919311523, "incorrect_loss_per_char": 0.7306263645490011, "correct_loss_per_token": 1.2460765838623047, "incorrect_loss_per_token": 1.4612527290980022, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2460765838623047, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": true, "logits_per_token": -1.2460765838623047, "logits_per_char": -0.6230382919311523, "num_chars": 2}, {"sum_logits": -1.282619595527649, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.282619595527649, "logits_per_char": -0.6413097977638245, "num_chars": 2}, {"sum_logits": -1.6026681661605835, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.6026681661605835, "logits_per_char": -0.8013340830802917, "num_chars": 2}, {"sum_logits": -1.498470425605774, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.498470425605774, "logits_per_char": -0.749235212802887, "num_chars": 2}], "label": 0, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 69, "native_id": 69, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9879962205886841, "incorrect_loss_raw": 1.7153171300888062, "correct_loss_per_char": 0.49399811029434204, "incorrect_loss_per_char": 0.8576585650444031, "correct_loss_per_token": 0.9879962205886841, "incorrect_loss_per_token": 1.7153171300888062, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9879962205886841, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -0.9879962205886841, "logits_per_char": -0.49399811029434204, "num_chars": 2}, {"sum_logits": -1.043735384941101, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.043735384941101, "logits_per_char": -0.5218676924705505, "num_chars": 2}, {"sum_logits": -1.7648351192474365, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.7648351192474365, "logits_per_char": -0.8824175596237183, "num_chars": 2}, {"sum_logits": -2.337380886077881, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -2.337380886077881, "logits_per_char": -1.1686904430389404, "num_chars": 2}], "label": 0, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 70, "native_id": 70, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.61566162109375, "incorrect_loss_raw": 1.3687085310618083, "correct_loss_per_char": 0.807830810546875, "incorrect_loss_per_char": 0.6843542655309042, "correct_loss_per_token": 1.61566162109375, "incorrect_loss_per_token": 1.3687085310618083, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3011176586151123, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.3011176586151123, "logits_per_char": -0.6505588293075562, "num_chars": 2}, {"sum_logits": -1.197301983833313, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.197301983833313, "logits_per_char": -0.5986509919166565, "num_chars": 2}, {"sum_logits": -1.6077059507369995, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.6077059507369995, "logits_per_char": -0.8038529753684998, "num_chars": 2}, {"sum_logits": -1.61566162109375, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.61566162109375, "logits_per_char": -0.807830810546875, "num_chars": 2}], "label": 3, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 71, "native_id": 71, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.679209589958191, "incorrect_loss_raw": 1.4070279598236084, "correct_loss_per_char": 0.8396047949790955, "incorrect_loss_per_char": 0.7035139799118042, "correct_loss_per_token": 1.679209589958191, "incorrect_loss_per_token": 1.4070279598236084, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0923681259155273, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.0923681259155273, "logits_per_char": -0.5461840629577637, "num_chars": 2}, {"sum_logits": -1.0761725902557373, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": true, "logits_per_token": -1.0761725902557373, "logits_per_char": -0.5380862951278687, "num_chars": 2}, {"sum_logits": -1.679209589958191, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.679209589958191, "logits_per_char": -0.8396047949790955, "num_chars": 2}, {"sum_logits": -2.0525431632995605, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -2.0525431632995605, "logits_per_char": -1.0262715816497803, "num_chars": 2}], "label": 2, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 72, "native_id": 72, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.837633728981018, "incorrect_loss_raw": 1.3016709486643474, "correct_loss_per_char": 0.918816864490509, "incorrect_loss_per_char": 0.6508354743321737, "correct_loss_per_token": 1.837633728981018, "incorrect_loss_per_token": 1.3016709486643474, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.165023922920227, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.165023922920227, "logits_per_char": -0.5825119614601135, "num_chars": 2}, {"sum_logits": -1.157849907875061, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": true, "logits_per_token": -1.157849907875061, "logits_per_char": -0.5789249539375305, "num_chars": 2}, {"sum_logits": -1.582139015197754, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.582139015197754, "logits_per_char": -0.791069507598877, "num_chars": 2}, {"sum_logits": -1.837633728981018, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.837633728981018, "logits_per_char": -0.918816864490509, "num_chars": 2}], "label": 3, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 73, "native_id": 73, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0994303226470947, "incorrect_loss_raw": 1.6788881818453472, "correct_loss_per_char": 0.5497151613235474, "incorrect_loss_per_char": 0.8394440909226736, "correct_loss_per_token": 1.0994303226470947, "incorrect_loss_per_token": 1.6788881818453472, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9406855702400208, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": true, "logits_per_token": -0.9406855702400208, "logits_per_char": -0.4703427851200104, "num_chars": 2}, {"sum_logits": -1.0994303226470947, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.0994303226470947, "logits_per_char": -0.5497151613235474, "num_chars": 2}, {"sum_logits": -1.7625715732574463, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.7625715732574463, "logits_per_char": -0.8812857866287231, "num_chars": 2}, {"sum_logits": -2.333407402038574, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -2.333407402038574, "logits_per_char": -1.166703701019287, "num_chars": 2}], "label": 1, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 74, "native_id": 74, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.449838399887085, "incorrect_loss_raw": 1.253905216852824, "correct_loss_per_char": 1.2249191999435425, "incorrect_loss_per_char": 0.626952608426412, "correct_loss_per_token": 2.449838399887085, "incorrect_loss_per_token": 1.253905216852824, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9382565021514893, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": true, "logits_per_token": -0.9382565021514893, "logits_per_char": -0.46912825107574463, "num_chars": 2}, {"sum_logits": -1.0671050548553467, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.0671050548553467, "logits_per_char": -0.5335525274276733, "num_chars": 2}, {"sum_logits": -1.7563540935516357, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.7563540935516357, "logits_per_char": -0.8781770467758179, "num_chars": 2}, {"sum_logits": -2.449838399887085, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -2.449838399887085, "logits_per_char": -1.2249191999435425, "num_chars": 2}], "label": 3, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 75, "native_id": 75, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9609256982803345, "incorrect_loss_raw": 1.7284382184346516, "correct_loss_per_char": 0.48046284914016724, "incorrect_loss_per_char": 0.8642191092173258, "correct_loss_per_token": 0.9609256982803345, "incorrect_loss_per_token": 1.7284382184346516, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0660815238952637, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.0660815238952637, "logits_per_char": -0.5330407619476318, "num_chars": 2}, {"sum_logits": -0.9609256982803345, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": true, "logits_per_token": -0.9609256982803345, "logits_per_char": -0.48046284914016724, "num_chars": 2}, {"sum_logits": -1.7718448638916016, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.7718448638916016, "logits_per_char": -0.8859224319458008, "num_chars": 2}, {"sum_logits": -2.34738826751709, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -2.34738826751709, "logits_per_char": -1.173694133758545, "num_chars": 2}], "label": 1, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 76, "native_id": 76, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8079307079315186, "incorrect_loss_raw": 1.4068834781646729, "correct_loss_per_char": 0.9039653539657593, "incorrect_loss_per_char": 0.7034417390823364, "correct_loss_per_token": 1.8079307079315186, "incorrect_loss_per_token": 1.4068834781646729, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0516127347946167, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.0516127347946167, "logits_per_char": -0.5258063673973083, "num_chars": 2}, {"sum_logits": -1.0113269090652466, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -1.0113269090652466, "logits_per_char": -0.5056634545326233, "num_chars": 2}, {"sum_logits": -1.8079307079315186, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.8079307079315186, "logits_per_char": -0.9039653539657593, "num_chars": 2}, {"sum_logits": -2.1577107906341553, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -2.1577107906341553, "logits_per_char": -1.0788553953170776, "num_chars": 2}], "label": 2, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 77, "native_id": 77, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7055729031562805, "incorrect_loss_raw": 1.9282135963439941, "correct_loss_per_char": 0.35278645157814026, "incorrect_loss_per_char": 0.9641067981719971, "correct_loss_per_token": 0.7055729031562805, "incorrect_loss_per_token": 1.9282135963439941, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7055729031562805, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": true, "logits_per_token": -0.7055729031562805, "logits_per_char": -0.35278645157814026, "num_chars": 2}, {"sum_logits": -1.2780942916870117, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.2780942916870117, "logits_per_char": -0.6390471458435059, "num_chars": 2}, {"sum_logits": -1.9482340812683105, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.9482340812683105, "logits_per_char": -0.9741170406341553, "num_chars": 2}, {"sum_logits": -2.55831241607666, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -2.55831241607666, "logits_per_char": -1.27915620803833, "num_chars": 2}], "label": 0, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 78, "native_id": 78, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.2972309589385986, "incorrect_loss_raw": 1.2697121302286785, "correct_loss_per_char": 1.1486154794692993, "incorrect_loss_per_char": 0.6348560651143392, "correct_loss_per_token": 2.2972309589385986, "incorrect_loss_per_token": 1.2697121302286785, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0500777959823608, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.0500777959823608, "logits_per_char": -0.5250388979911804, "num_chars": 2}, {"sum_logits": -0.9876987934112549, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": true, "logits_per_token": -0.9876987934112549, "logits_per_char": -0.49384939670562744, "num_chars": 2}, {"sum_logits": -1.7713598012924194, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.7713598012924194, "logits_per_char": -0.8856799006462097, "num_chars": 2}, {"sum_logits": -2.2972309589385986, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -2.2972309589385986, "logits_per_char": -1.1486154794692993, "num_chars": 2}], "label": 3, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 79, "native_id": 79, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7641754150390625, "incorrect_loss_raw": 1.464380959669749, "correct_loss_per_char": 0.8820877075195312, "incorrect_loss_per_char": 0.7321904798348745, "correct_loss_per_token": 1.7641754150390625, "incorrect_loss_per_token": 1.464380959669749, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0789380073547363, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.0789380073547363, "logits_per_char": -0.5394690036773682, "num_chars": 2}, {"sum_logits": -0.9457051157951355, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -0.9457051157951355, "logits_per_char": -0.47285255789756775, "num_chars": 2}, {"sum_logits": -1.7641754150390625, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.7641754150390625, "logits_per_char": -0.8820877075195312, "num_chars": 2}, {"sum_logits": -2.368499755859375, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -2.368499755859375, "logits_per_char": -1.1842498779296875, "num_chars": 2}], "label": 2, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 80, "native_id": 80, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0182316303253174, "incorrect_loss_raw": 1.7420639793078105, "correct_loss_per_char": 0.5091158151626587, "incorrect_loss_per_char": 0.8710319896539053, "correct_loss_per_token": 1.0182316303253174, "incorrect_loss_per_token": 1.7420639793078105, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9824482798576355, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": true, "logits_per_token": -0.9824482798576355, "logits_per_char": -0.49122413992881775, "num_chars": 2}, {"sum_logits": -1.0182316303253174, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.0182316303253174, "logits_per_char": -0.5091158151626587, "num_chars": 2}, {"sum_logits": -1.731065034866333, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.731065034866333, "logits_per_char": -0.8655325174331665, "num_chars": 2}, {"sum_logits": -2.512678623199463, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -2.512678623199463, "logits_per_char": -1.2563393115997314, "num_chars": 2}], "label": 1, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 81, "native_id": 81, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.505537986755371, "incorrect_loss_raw": 1.261103669802348, "correct_loss_per_char": 1.2527689933776855, "incorrect_loss_per_char": 0.630551834901174, "correct_loss_per_token": 2.505537986755371, "incorrect_loss_per_token": 1.261103669802348, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9635795950889587, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -0.9635795950889587, "logits_per_char": -0.48178979754447937, "num_chars": 2}, {"sum_logits": -0.9980356097221375, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -0.9980356097221375, "logits_per_char": -0.4990178048610687, "num_chars": 2}, {"sum_logits": -1.8216958045959473, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.8216958045959473, "logits_per_char": -0.9108479022979736, "num_chars": 2}, {"sum_logits": -2.505537986755371, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -2.505537986755371, "logits_per_char": -1.2527689933776855, "num_chars": 2}], "label": 3, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 82, "native_id": 82, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9597129821777344, "incorrect_loss_raw": 1.7746285200119019, "correct_loss_per_char": 0.4798564910888672, "incorrect_loss_per_char": 0.8873142600059509, "correct_loss_per_token": 0.9597129821777344, "incorrect_loss_per_token": 1.7746285200119019, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9597129821777344, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": true, "logits_per_token": -0.9597129821777344, "logits_per_char": -0.4798564910888672, "num_chars": 2}, {"sum_logits": -1.0074964761734009, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.0074964761734009, "logits_per_char": -0.5037482380867004, "num_chars": 2}, {"sum_logits": -1.8151841163635254, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.8151841163635254, "logits_per_char": -0.9075920581817627, "num_chars": 2}, {"sum_logits": -2.5012049674987793, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -2.5012049674987793, "logits_per_char": -1.2506024837493896, "num_chars": 2}], "label": 0, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 83, "native_id": 83, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8298301696777344, "incorrect_loss_raw": 1.4491092761357625, "correct_loss_per_char": 0.9149150848388672, "incorrect_loss_per_char": 0.7245546380678812, "correct_loss_per_token": 1.8298301696777344, "incorrect_loss_per_token": 1.4491092761357625, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.986219048500061, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -0.986219048500061, "logits_per_char": -0.4931095242500305, "num_chars": 2}, {"sum_logits": -1.00559663772583, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.00559663772583, "logits_per_char": -0.502798318862915, "num_chars": 2}, {"sum_logits": -1.8298301696777344, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.8298301696777344, "logits_per_char": -0.9149150848388672, "num_chars": 2}, {"sum_logits": -2.3555121421813965, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -2.3555121421813965, "logits_per_char": -1.1777560710906982, "num_chars": 2}], "label": 2, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 84, "native_id": 84, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9854483604431152, "incorrect_loss_raw": 1.3089015483856201, "correct_loss_per_char": 0.9927241802215576, "incorrect_loss_per_char": 0.6544507741928101, "correct_loss_per_token": 1.9854483604431152, "incorrect_loss_per_token": 1.3089015483856201, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1572697162628174, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.1572697162628174, "logits_per_char": -0.5786348581314087, "num_chars": 2}, {"sum_logits": -0.99288010597229, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": true, "logits_per_token": -0.99288010597229, "logits_per_char": -0.496440052986145, "num_chars": 2}, {"sum_logits": -1.776554822921753, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.776554822921753, "logits_per_char": -0.8882774114608765, "num_chars": 2}, {"sum_logits": -1.9854483604431152, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.9854483604431152, "logits_per_char": -0.9927241802215576, "num_chars": 2}], "label": 3, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 85, "native_id": 85, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7747790813446045, "incorrect_loss_raw": 1.4742942651112874, "correct_loss_per_char": 0.8873895406723022, "incorrect_loss_per_char": 0.7371471325556437, "correct_loss_per_token": 1.7747790813446045, "incorrect_loss_per_token": 1.4742942651112874, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9807584285736084, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -0.9807584285736084, "logits_per_char": -0.4903792142868042, "num_chars": 2}, {"sum_logits": -1.0206902027130127, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.0206902027130127, "logits_per_char": -0.5103451013565063, "num_chars": 2}, {"sum_logits": -1.7747790813446045, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.7747790813446045, "logits_per_char": -0.8873895406723022, "num_chars": 2}, {"sum_logits": -2.421434164047241, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -2.421434164047241, "logits_per_char": -1.2107170820236206, "num_chars": 2}], "label": 2, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 86, "native_id": 86, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0253244638442993, "incorrect_loss_raw": 1.7127492427825928, "correct_loss_per_char": 0.5126622319221497, "incorrect_loss_per_char": 0.8563746213912964, "correct_loss_per_token": 1.0253244638442993, "incorrect_loss_per_token": 1.7127492427825928, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0253244638442993, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.0253244638442993, "logits_per_char": -0.5126622319221497, "num_chars": 2}, {"sum_logits": -1.0087299346923828, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -1.0087299346923828, "logits_per_char": -0.5043649673461914, "num_chars": 2}, {"sum_logits": -1.7271173000335693, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.7271173000335693, "logits_per_char": -0.8635586500167847, "num_chars": 2}, {"sum_logits": -2.402400493621826, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -2.402400493621826, "logits_per_char": -1.201200246810913, "num_chars": 2}], "label": 0, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 87, "native_id": 87, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2935335636138916, "incorrect_loss_raw": 1.465480883916219, "correct_loss_per_char": 0.6467667818069458, "incorrect_loss_per_char": 0.7327404419581095, "correct_loss_per_token": 1.2935335636138916, "incorrect_loss_per_token": 1.465480883916219, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2935335636138916, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.2935335636138916, "logits_per_char": -0.6467667818069458, "num_chars": 2}, {"sum_logits": -1.1081899404525757, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.1081899404525757, "logits_per_char": -0.5540949702262878, "num_chars": 2}, {"sum_logits": -1.5535764694213867, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.5535764694213867, "logits_per_char": -0.7767882347106934, "num_chars": 2}, {"sum_logits": -1.7346762418746948, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.7346762418746948, "logits_per_char": -0.8673381209373474, "num_chars": 2}], "label": 0, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 88, "native_id": 88, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0705933570861816, "incorrect_loss_raw": 1.6195133527119954, "correct_loss_per_char": 0.5352966785430908, "incorrect_loss_per_char": 0.8097566763559977, "correct_loss_per_token": 1.0705933570861816, "incorrect_loss_per_token": 1.6195133527119954, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0705933570861816, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": true, "logits_per_token": -1.0705933570861816, "logits_per_char": -0.5352966785430908, "num_chars": 2}, {"sum_logits": -1.1302239894866943, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.1302239894866943, "logits_per_char": -0.5651119947433472, "num_chars": 2}, {"sum_logits": -1.5836799144744873, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.5836799144744873, "logits_per_char": -0.7918399572372437, "num_chars": 2}, {"sum_logits": -2.1446361541748047, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -2.1446361541748047, "logits_per_char": -1.0723180770874023, "num_chars": 2}], "label": 0, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 89, "native_id": 89, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8583638668060303, "incorrect_loss_raw": 1.511202375094096, "correct_loss_per_char": 0.9291819334030151, "incorrect_loss_per_char": 0.755601187547048, "correct_loss_per_token": 1.8583638668060303, "incorrect_loss_per_token": 1.511202375094096, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9387264847755432, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -0.9387264847755432, "logits_per_char": -0.4693632423877716, "num_chars": 2}, {"sum_logits": -0.984056293964386, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -0.984056293964386, "logits_per_char": -0.492028146982193, "num_chars": 2}, {"sum_logits": -1.8583638668060303, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.8583638668060303, "logits_per_char": -0.9291819334030151, "num_chars": 2}, {"sum_logits": -2.6108243465423584, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -2.6108243465423584, "logits_per_char": -1.3054121732711792, "num_chars": 2}], "label": 2, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 90, "native_id": 90, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.448502540588379, "incorrect_loss_raw": 1.2571728030840557, "correct_loss_per_char": 1.2242512702941895, "incorrect_loss_per_char": 0.6285864015420278, "correct_loss_per_token": 2.448502540588379, "incorrect_loss_per_token": 1.2571728030840557, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0914306640625, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.0914306640625, "logits_per_char": -0.54571533203125, "num_chars": 2}, {"sum_logits": -0.91652911901474, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -0.91652911901474, "logits_per_char": -0.45826455950737, "num_chars": 2}, {"sum_logits": -1.7635586261749268, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.7635586261749268, "logits_per_char": -0.8817793130874634, "num_chars": 2}, {"sum_logits": -2.448502540588379, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -2.448502540588379, "logits_per_char": -1.2242512702941895, "num_chars": 2}], "label": 3, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 91, "native_id": 91, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1090340614318848, "incorrect_loss_raw": 1.5830867290496826, "correct_loss_per_char": 0.5545170307159424, "incorrect_loss_per_char": 0.7915433645248413, "correct_loss_per_token": 1.1090340614318848, "incorrect_loss_per_token": 1.5830867290496826, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0799089670181274, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": true, "logits_per_token": -1.0799089670181274, "logits_per_char": -0.5399544835090637, "num_chars": 2}, {"sum_logits": -1.1090340614318848, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.1090340614318848, "logits_per_char": -0.5545170307159424, "num_chars": 2}, {"sum_logits": -1.6826215982437134, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.6826215982437134, "logits_per_char": -0.8413107991218567, "num_chars": 2}, {"sum_logits": -1.986729621887207, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.986729621887207, "logits_per_char": -0.9933648109436035, "num_chars": 2}], "label": 1, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 92, "native_id": 92, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2636159658432007, "incorrect_loss_raw": 1.5192156235376995, "correct_loss_per_char": 0.6318079829216003, "incorrect_loss_per_char": 0.7596078117688497, "correct_loss_per_token": 1.2636159658432007, "incorrect_loss_per_token": 1.5192156235376995, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0510790348052979, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.0510790348052979, "logits_per_char": -0.5255395174026489, "num_chars": 2}, {"sum_logits": -1.2636159658432007, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.2636159658432007, "logits_per_char": -0.6318079829216003, "num_chars": 2}, {"sum_logits": -1.8088881969451904, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.8088881969451904, "logits_per_char": -0.9044440984725952, "num_chars": 2}, {"sum_logits": -1.6976796388626099, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.6976796388626099, "logits_per_char": -0.8488398194313049, "num_chars": 2}], "label": 1, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 93, "native_id": 93, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0855040550231934, "incorrect_loss_raw": 1.582263429959615, "correct_loss_per_char": 0.5427520275115967, "incorrect_loss_per_char": 0.7911317149798075, "correct_loss_per_token": 1.0855040550231934, "incorrect_loss_per_token": 1.582263429959615, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0855040550231934, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": true, "logits_per_token": -1.0855040550231934, "logits_per_char": -0.5427520275115967, "num_chars": 2}, {"sum_logits": -1.129238486289978, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.129238486289978, "logits_per_char": -0.564619243144989, "num_chars": 2}, {"sum_logits": -1.6773488521575928, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.6773488521575928, "logits_per_char": -0.8386744260787964, "num_chars": 2}, {"sum_logits": -1.9402029514312744, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.9402029514312744, "logits_per_char": -0.9701014757156372, "num_chars": 2}], "label": 0, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 94, "native_id": 94, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.4870166778564453, "incorrect_loss_raw": 1.2550140221913655, "correct_loss_per_char": 1.2435083389282227, "incorrect_loss_per_char": 0.6275070110956827, "correct_loss_per_token": 2.4870166778564453, "incorrect_loss_per_token": 1.2550140221913655, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0233511924743652, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.0233511924743652, "logits_per_char": -0.5116755962371826, "num_chars": 2}, {"sum_logits": -0.9634615182876587, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -0.9634615182876587, "logits_per_char": -0.48173075914382935, "num_chars": 2}, {"sum_logits": -1.7782293558120728, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.7782293558120728, "logits_per_char": -0.8891146779060364, "num_chars": 2}, {"sum_logits": -2.4870166778564453, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -2.4870166778564453, "logits_per_char": -1.2435083389282227, "num_chars": 2}], "label": 3, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 95, "native_id": 95, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7753188610076904, "incorrect_loss_raw": 1.4717767039934795, "correct_loss_per_char": 0.8876594305038452, "incorrect_loss_per_char": 0.7358883519967397, "correct_loss_per_token": 1.7753188610076904, "incorrect_loss_per_token": 1.4717767039934795, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9949821829795837, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": true, "logits_per_token": -0.9949821829795837, "logits_per_char": -0.49749109148979187, "num_chars": 2}, {"sum_logits": -1.0094077587127686, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.0094077587127686, "logits_per_char": -0.5047038793563843, "num_chars": 2}, {"sum_logits": -1.7753188610076904, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.7753188610076904, "logits_per_char": -0.8876594305038452, "num_chars": 2}, {"sum_logits": -2.410940170288086, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -2.410940170288086, "logits_per_char": -1.205470085144043, "num_chars": 2}], "label": 2, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 96, "native_id": 96, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.066894292831421, "incorrect_loss_raw": 1.598144292831421, "correct_loss_per_char": 0.5334471464157104, "incorrect_loss_per_char": 0.7990721464157104, "correct_loss_per_token": 1.066894292831421, "incorrect_loss_per_token": 1.598144292831421, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.167852759361267, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.167852759361267, "logits_per_char": -0.5839263796806335, "num_chars": 2}, {"sum_logits": -1.066894292831421, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": true, "logits_per_token": -1.066894292831421, "logits_per_char": -0.5334471464157104, "num_chars": 2}, {"sum_logits": -1.6038130521774292, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.6038130521774292, "logits_per_char": -0.8019065260887146, "num_chars": 2}, {"sum_logits": -2.0227670669555664, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -2.0227670669555664, "logits_per_char": -1.0113835334777832, "num_chars": 2}], "label": 1, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 97, "native_id": 97, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5343291759490967, "incorrect_loss_raw": 1.4316203594207764, "correct_loss_per_char": 0.7671645879745483, "incorrect_loss_per_char": 0.7158101797103882, "correct_loss_per_token": 1.5343291759490967, "incorrect_loss_per_token": 1.4316203594207764, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1407790184020996, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.1407790184020996, "logits_per_char": -0.5703895092010498, "num_chars": 2}, {"sum_logits": -1.128190517425537, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": true, "logits_per_token": -1.128190517425537, "logits_per_char": -0.5640952587127686, "num_chars": 2}, {"sum_logits": -1.5343291759490967, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.5343291759490967, "logits_per_char": -0.7671645879745483, "num_chars": 2}, {"sum_logits": -2.0258915424346924, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -2.0258915424346924, "logits_per_char": -1.0129457712173462, "num_chars": 2}], "label": 2, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 98, "native_id": 98, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6022560596466064, "incorrect_loss_raw": 1.3902700742085774, "correct_loss_per_char": 0.8011280298233032, "incorrect_loss_per_char": 0.6951350371042887, "correct_loss_per_token": 1.6022560596466064, "incorrect_loss_per_token": 1.3902700742085774, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2459704875946045, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.2459704875946045, "logits_per_char": -0.6229852437973022, "num_chars": 2}, {"sum_logits": -1.0621399879455566, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": true, "logits_per_token": -1.0621399879455566, "logits_per_char": -0.5310699939727783, "num_chars": 2}, {"sum_logits": -1.6022560596466064, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.6022560596466064, "logits_per_char": -0.8011280298233032, "num_chars": 2}, {"sum_logits": -1.8626997470855713, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.8626997470855713, "logits_per_char": -0.9313498735427856, "num_chars": 2}], "label": 2, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
{"doc_id": 99, "native_id": 99, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8901283740997314, "incorrect_loss_raw": 1.525459885597229, "correct_loss_per_char": 0.9450641870498657, "incorrect_loss_per_char": 0.7627299427986145, "correct_loss_per_token": 1.8901283740997314, "incorrect_loss_per_token": 1.525459885597229, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9664173126220703, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -0.9664173126220703, "logits_per_char": -0.48320865631103516, "num_chars": 2}, {"sum_logits": -0.9332226514816284, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -0.9332226514816284, "logits_per_char": -0.4666113257408142, "num_chars": 2}, {"sum_logits": -1.8901283740997314, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.8901283740997314, "logits_per_char": -0.9450641870498657, "num_chars": 2}, {"sum_logits": -2.6767396926879883, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -2.6767396926879883, "logits_per_char": -1.3383698463439941, "num_chars": 2}], "label": 2, "task_hash": "bdde3fee40ebc8ddc5786c67975c5b31", "model_hash": "03418cf8091a9882619950ffb07429a5"} |
|
|