{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.034348828741352, "global_step": 25000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12, "learning_rate": 4.045307443365696e-06, "loss": 3.818, "step": 1000 }, { "epoch": 0.12, "eval_HasAns_exact": 44.97300944669366, "eval_HasAns_f1": 52.01439218879365, "eval_HasAns_total": 5928, "eval_NoAns_exact": 50.647603027754414, "eval_NoAns_f1": 50.647603027754414, "eval_NoAns_total": 5945, "eval_best_exact": 50.09685841825992, "eval_best_exact_thresh": 0.0, "eval_best_f1": 51.885203441071, "eval_best_f1_thresh": 0.0, "eval_exact": 47.81436873578708, "eval_f1": 51.33001911018023, "eval_total": 11873, "step": 1000 }, { "epoch": 0.24, "learning_rate": 8.090614886731393e-06, "loss": 1.5859, "step": 2000 }, { "epoch": 0.24, "eval_HasAns_exact": 69.04520917678812, "eval_HasAns_f1": 77.70845080310447, "eval_HasAns_total": 5928, "eval_NoAns_exact": 45.853658536585364, "eval_NoAns_f1": 45.853658536585364, "eval_NoAns_total": 5945, "eval_best_exact": 57.48336561947276, "eval_best_exact_thresh": 0.0, "eval_best_f1": 61.802768280079185, "eval_best_f1_thresh": 0.0, "eval_exact": 57.43283079255453, "eval_f1": 61.758249503984274, "eval_total": 11873, "step": 2000 }, { "epoch": 0.36, "learning_rate": 9.863646928182218e-06, "loss": 1.3077, "step": 3000 }, { "epoch": 0.36, "eval_HasAns_exact": 70.58029689608637, "eval_HasAns_f1": 78.40109850512528, "eval_HasAns_total": 5928, "eval_NoAns_exact": 64.8780487804878, "eval_NoAns_f1": 64.8780487804878, "eval_NoAns_total": 5945, "eval_best_exact": 67.73351301271794, "eval_best_exact_thresh": 0.0, "eval_best_f1": 71.63831482678222, "eval_best_f1_thresh": 0.0, "eval_exact": 67.7250905415649, "eval_f1": 71.62989235562912, "eval_total": 11873, "step": 3000 }, { "epoch": 0.49, "learning_rate": 9.605402473981872e-06, "loss": 1.1543, "step": 4000 }, { "epoch": 0.49, "eval_HasAns_exact": 65.24966261808368, "eval_HasAns_f1": 70.75967293392448, "eval_HasAns_total": 5928, "eval_NoAns_exact": 85.26492851135409, "eval_NoAns_f1": 85.26492851135409, "eval_NoAns_total": 5945, "eval_best_exact": 75.27162469468541, "eval_best_exact_thresh": 0.0, "eval_best_f1": 78.02268518085604, "eval_best_f1_thresh": 0.0, "eval_exact": 75.27162469468541, "eval_f1": 78.02268518085617, "eval_total": 11873, "step": 4000 }, { "epoch": 0.61, "learning_rate": 9.347158019781525e-06, "loss": 1.083, "step": 5000 }, { "epoch": 0.61, "eval_HasAns_exact": 72.62145748987854, "eval_HasAns_f1": 78.57932116933335, "eval_HasAns_total": 5928, "eval_NoAns_exact": 75.47518923465097, "eval_NoAns_f1": 75.47518923465097, "eval_NoAns_total": 5945, "eval_best_exact": 74.05036637749515, "eval_best_exact_thresh": 0.0, "eval_best_f1": 77.02503292274977, "eval_best_f1_thresh": 0.0, "eval_exact": 74.05036637749515, "eval_f1": 77.02503292274983, "eval_total": 11873, "step": 5000 }, { "epoch": 0.73, "learning_rate": 9.08891356558118e-06, "loss": 1.0232, "step": 6000 }, { "epoch": 0.73, "eval_HasAns_exact": 75.87719298245614, "eval_HasAns_f1": 82.4856147328558, "eval_HasAns_total": 5928, "eval_NoAns_exact": 75.1892346509672, "eval_NoAns_f1": 75.1892346509672, "eval_NoAns_total": 5945, "eval_best_exact": 75.53272130042954, "eval_best_exact_thresh": 0.0, "eval_best_f1": 78.83220114009663, "eval_best_f1_thresh": 0.0, "eval_exact": 75.53272130042954, "eval_f1": 78.83220114009681, "eval_total": 11873, "step": 6000 }, { "epoch": 0.85, "learning_rate": 8.830669111380835e-06, "loss": 0.9925, "step": 7000 }, { "epoch": 0.85, "eval_HasAns_exact": 76.16396761133603, "eval_HasAns_f1": 82.92856292587923, "eval_HasAns_total": 5928, "eval_NoAns_exact": 76.53490328006728, "eval_NoAns_f1": 76.53490328006728, "eval_NoAns_total": 5945, "eval_best_exact": 76.34970100227407, "eval_best_exact_thresh": 0.0, "eval_best_f1": 79.72715581778914, "eval_best_f1_thresh": 0.0, "eval_exact": 76.34970100227407, "eval_f1": 79.72715581778931, "eval_total": 11873, "step": 7000 }, { "epoch": 0.97, "learning_rate": 8.572424657180489e-06, "loss": 0.9409, "step": 8000 }, { "epoch": 0.97, "eval_HasAns_exact": 76.87246963562752, "eval_HasAns_f1": 83.28420126895146, "eval_HasAns_total": 5928, "eval_NoAns_exact": 77.32548359966358, "eval_NoAns_f1": 77.32548359966358, "eval_NoAns_total": 5945, "eval_best_exact": 77.0993009348943, "eval_best_exact_thresh": 0.0, "eval_best_f1": 80.3005765284547, "eval_best_f1_thresh": 0.0, "eval_exact": 77.0993009348943, "eval_f1": 80.30057652845481, "eval_total": 11873, "step": 8000 }, { "epoch": 1.09, "learning_rate": 8.314180202980142e-06, "loss": 0.8459, "step": 9000 }, { "epoch": 1.09, "eval_HasAns_exact": 79.47031039136303, "eval_HasAns_f1": 86.27241463084697, "eval_HasAns_total": 5928, "eval_NoAns_exact": 70.64760302775441, "eval_NoAns_f1": 70.64760302775441, "eval_NoAns_total": 5945, "eval_best_exact": 75.05264044470648, "eval_best_exact_thresh": 0.0, "eval_best_f1": 78.44882286967555, "eval_best_f1_thresh": 0.0, "eval_exact": 75.05264044470648, "eval_f1": 78.44882286967575, "eval_total": 11873, "step": 9000 }, { "epoch": 1.21, "learning_rate": 8.055935748779795e-06, "loss": 0.7936, "step": 10000 }, { "epoch": 1.21, "eval_HasAns_exact": 73.66734143049932, "eval_HasAns_f1": 79.21121792689594, "eval_HasAns_total": 5928, "eval_NoAns_exact": 83.28006728343145, "eval_NoAns_f1": 83.28006728343145, "eval_NoAns_total": 5945, "eval_best_exact": 78.48058620399225, "eval_best_exact_thresh": 0.0, "eval_best_f1": 81.24855553530176, "eval_best_f1_thresh": 0.0, "eval_exact": 78.48058620399225, "eval_f1": 81.24855553530186, "eval_total": 11873, "step": 10000 }, { "epoch": 1.34, "learning_rate": 7.797691294579448e-06, "loss": 0.801, "step": 11000 }, { "epoch": 1.34, "eval_HasAns_exact": 74.07219973009447, "eval_HasAns_f1": 79.92037997936747, "eval_HasAns_total": 5928, "eval_NoAns_exact": 84.52481076534903, "eval_NoAns_f1": 84.52481076534903, "eval_NoAns_total": 5945, "eval_best_exact": 79.3059883769898, "eval_best_exact_thresh": 0.0, "eval_best_f1": 82.22589173062302, "eval_best_f1_thresh": 0.0, "eval_exact": 79.3059883769898, "eval_f1": 82.2258917306232, "eval_total": 11873, "step": 11000 }, { "epoch": 1.46, "learning_rate": 7.539446840379103e-06, "loss": 0.8088, "step": 12000 }, { "epoch": 1.46, "eval_HasAns_exact": 76.06275303643724, "eval_HasAns_f1": 82.31177335930619, "eval_HasAns_total": 5928, "eval_NoAns_exact": 80.40370058873002, "eval_NoAns_f1": 80.40370058873002, "eval_NoAns_total": 5945, "eval_best_exact": 78.2363345405542, "eval_best_exact_thresh": 0.0, "eval_best_f1": 81.35637096554916, "eval_best_f1_thresh": 0.0, "eval_exact": 78.2363345405542, "eval_f1": 81.3563709655493, "eval_total": 11873, "step": 12000 }, { "epoch": 1.58, "learning_rate": 7.281202386178758e-06, "loss": 0.8092, "step": 13000 }, { "epoch": 1.58, "eval_HasAns_exact": 78.7280701754386, "eval_HasAns_f1": 85.24409087886156, "eval_HasAns_total": 5928, "eval_NoAns_exact": 76.31623212783852, "eval_NoAns_f1": 76.31623212783852, "eval_NoAns_total": 5945, "eval_best_exact": 77.52042449254611, "eval_best_exact_thresh": 0.0, "eval_best_f1": 80.77376995956291, "eval_best_f1_thresh": 0.0, "eval_exact": 77.52042449254611, "eval_f1": 80.77376995956293, "eval_total": 11873, "step": 13000 }, { "epoch": 1.7, "learning_rate": 7.0229579319784115e-06, "loss": 0.7684, "step": 14000 }, { "epoch": 1.7, "eval_HasAns_exact": 78.0195681511471, "eval_HasAns_f1": 84.35251514019012, "eval_HasAns_total": 5928, "eval_NoAns_exact": 79.76450798990749, "eval_NoAns_f1": 79.76450798990749, "eval_NoAns_total": 5945, "eval_best_exact": 78.89328729049103, "eval_best_exact_thresh": 0.0, "eval_best_f1": 82.05522696462938, "eval_best_f1_thresh": 0.0, "eval_exact": 78.89328729049103, "eval_f1": 82.05522696462947, "eval_total": 11873, "step": 14000 }, { "epoch": 1.82, "learning_rate": 6.764713477778065e-06, "loss": 0.7751, "step": 15000 }, { "epoch": 1.82, "eval_HasAns_exact": 78.69433198380567, "eval_HasAns_f1": 84.83678017806832, "eval_HasAns_total": 5928, "eval_NoAns_exact": 78.16652649285113, "eval_NoAns_f1": 78.16652649285113, "eval_NoAns_total": 5945, "eval_best_exact": 78.43005137707404, "eval_best_exact_thresh": 0.0, "eval_best_f1": 81.49687803382365, "eval_best_f1_thresh": 0.0, "eval_exact": 78.43005137707404, "eval_f1": 81.49687803382375, "eval_total": 11873, "step": 15000 }, { "epoch": 1.94, "learning_rate": 6.506469023577719e-06, "loss": 0.7746, "step": 16000 }, { "epoch": 1.94, "eval_HasAns_exact": 78.40755735492577, "eval_HasAns_f1": 84.57986037447542, "eval_HasAns_total": 5928, "eval_NoAns_exact": 78.67115222876367, "eval_NoAns_f1": 78.67115222876367, "eval_NoAns_total": 5945, "eval_best_exact": 78.53954350206351, "eval_best_exact_thresh": 0.0, "eval_best_f1": 81.62127619808709, "eval_best_f1_thresh": 0.0, "eval_exact": 78.53954350206351, "eval_f1": 81.62127619808716, "eval_total": 11873, "step": 16000 }, { "epoch": 2.06, "learning_rate": 6.248224569377374e-06, "loss": 0.6995, "step": 17000 }, { "epoch": 2.06, "eval_HasAns_exact": 79.25101214574899, "eval_HasAns_f1": 85.85300684093066, "eval_HasAns_total": 5928, "eval_NoAns_exact": 76.93860386879732, "eval_NoAns_f1": 76.93860386879732, "eval_NoAns_total": 5945, "eval_best_exact": 78.09315253095258, "eval_best_exact_thresh": 0.0, "eval_best_f1": 81.38942344420408, "eval_best_f1_thresh": 0.0, "eval_exact": 78.09315253095258, "eval_f1": 81.38942344420423, "eval_total": 11873, "step": 17000 }, { "epoch": 2.18, "learning_rate": 5.9899801151770276e-06, "loss": 0.6657, "step": 18000 }, { "epoch": 2.18, "eval_HasAns_exact": 79.1497975708502, "eval_HasAns_f1": 85.40764464218985, "eval_HasAns_total": 5928, "eval_NoAns_exact": 78.01513877207738, "eval_NoAns_f1": 78.01513877207738, "eval_NoAns_total": 5945, "eval_best_exact": 78.5816558578287, "eval_best_exact_thresh": 0.0, "eval_best_f1": 81.70609933790104, "eval_best_f1_thresh": 0.0, "eval_exact": 78.5816558578287, "eval_f1": 81.70609933790118, "eval_total": 11873, "step": 18000 }, { "epoch": 2.31, "learning_rate": 5.7317356609766805e-06, "loss": 0.643, "step": 19000 }, { "epoch": 2.31, "eval_HasAns_exact": 73.6842105263158, "eval_HasAns_f1": 79.63974257623968, "eval_HasAns_total": 5928, "eval_NoAns_exact": 84.60891505466779, "eval_NoAns_f1": 84.60891505466779, "eval_NoAns_total": 5945, "eval_best_exact": 79.15438389623516, "eval_best_exact_thresh": 0.0, "eval_best_f1": 82.12788629596118, "eval_best_f1_thresh": 0.0, "eval_exact": 79.15438389623516, "eval_f1": 82.12788629596123, "eval_total": 11873, "step": 19000 }, { "epoch": 2.43, "learning_rate": 5.473491206776334e-06, "loss": 0.6494, "step": 20000 }, { "epoch": 2.43, "eval_HasAns_exact": 77.10863697705803, "eval_HasAns_f1": 83.39536168287856, "eval_HasAns_total": 5928, "eval_NoAns_exact": 80.89150546677881, "eval_NoAns_f1": 80.89150546677881, "eval_NoAns_total": 5945, "eval_best_exact": 79.0027794154805, "eval_best_exact_thresh": 0.0, "eval_best_f1": 82.14164103900457, "eval_best_f1_thresh": 0.0, "eval_exact": 79.0027794154805, "eval_f1": 82.14164103900465, "eval_total": 11873, "step": 20000 }, { "epoch": 2.55, "learning_rate": 5.215246752575989e-06, "loss": 0.6326, "step": 21000 }, { "epoch": 2.55, "eval_HasAns_exact": 79.28475033738192, "eval_HasAns_f1": 85.68401857002343, "eval_HasAns_total": 5928, "eval_NoAns_exact": 78.33473507148865, "eval_NoAns_f1": 78.33473507148865, "eval_NoAns_total": 5945, "eval_best_exact": 78.80906257896066, "eval_best_exact_thresh": 0.0, "eval_best_f1": 82.00411539485366, "eval_best_f1_thresh": 0.0, "eval_exact": 78.80906257896066, "eval_f1": 82.00411539485363, "eval_total": 11873, "step": 21000 }, { "epoch": 2.67, "learning_rate": 4.957002298375643e-06, "loss": 0.6236, "step": 22000 }, { "epoch": 2.67, "eval_HasAns_exact": 79.77395411605939, "eval_HasAns_f1": 85.76130612988482, "eval_HasAns_total": 5928, "eval_NoAns_exact": 78.45248107653491, "eval_NoAns_f1": 78.45248107653491, "eval_NoAns_total": 5945, "eval_best_exact": 79.11227154046998, "eval_best_exact_thresh": 0.0, "eval_best_f1": 82.10166114191487, "eval_best_f1_thresh": 0.0, "eval_exact": 79.11227154046998, "eval_f1": 82.10166114191493, "eval_total": 11873, "step": 22000 }, { "epoch": 2.79, "learning_rate": 4.698757844175297e-06, "loss": 0.6177, "step": 23000 }, { "epoch": 2.79, "eval_HasAns_exact": 80.17881241565452, "eval_HasAns_f1": 86.16732437952784, "eval_HasAns_total": 5928, "eval_NoAns_exact": 78.33473507148865, "eval_NoAns_f1": 78.33473507148865, "eval_NoAns_total": 5945, "eval_best_exact": 79.25545355007159, "eval_best_exact_thresh": 0.0, "eval_best_f1": 82.24542229612054, "eval_best_f1_thresh": 0.0, "eval_exact": 79.25545355007159, "eval_f1": 82.24542229612064, "eval_total": 11873, "step": 23000 }, { "epoch": 2.91, "learning_rate": 4.4405133899749504e-06, "loss": 0.6171, "step": 24000 }, { "epoch": 2.91, "eval_HasAns_exact": 79.25101214574899, "eval_HasAns_f1": 85.3060014254297, "eval_HasAns_total": 5928, "eval_NoAns_exact": 79.3103448275862, "eval_NoAns_f1": 79.3103448275862, "eval_NoAns_total": 5945, "eval_best_exact": 79.2807209635307, "eval_best_exact_thresh": 0.0, "eval_best_f1": 82.30388077570497, "eval_best_f1_thresh": 0.0, "eval_exact": 79.2807209635307, "eval_f1": 82.303880775705, "eval_total": 11873, "step": 24000 }, { "epoch": 3.03, "learning_rate": 4.182268935774604e-06, "loss": 0.5992, "step": 25000 }, { "epoch": 3.03, "eval_HasAns_exact": 76.51821862348179, "eval_HasAns_f1": 82.59682578958576, "eval_HasAns_total": 5928, "eval_NoAns_exact": 84.00336417157276, "eval_NoAns_f1": 84.00336417157276, "eval_NoAns_total": 5945, "eval_best_exact": 80.26615008843595, "eval_best_exact_thresh": 0.0, "eval_best_f1": 83.30110193553955, "eval_best_f1_thresh": 0.0, "eval_exact": 80.26615008843595, "eval_f1": 83.30110193553969, "eval_total": 11873, "step": 25000 } ], "max_steps": 41195, "num_train_epochs": 5, "total_flos": 7.838843910309734e+16, "trial_name": null, "trial_params": null }