{ "best_metric": 0.9489402697495183, "best_model_checkpoint": "vit-msn-small-corect_cleaned_dataset_lateral_flow_ivalidation/checkpoint-32", "epoch": 92.3076923076923, "eval_steps": 500, "global_step": 300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.9230769230769231, "eval_accuracy": 0.14932562620423892, "eval_loss": 0.8616608381271362, "eval_runtime": 4.4185, "eval_samples_per_second": 234.919, "eval_steps_per_second": 3.847, "step": 3 }, { "epoch": 1.8461538461538463, "eval_accuracy": 0.8795761078998073, "eval_loss": 0.4977250099182129, "eval_runtime": 4.3984, "eval_samples_per_second": 235.995, "eval_steps_per_second": 3.865, "step": 6 }, { "epoch": 2.769230769230769, "eval_accuracy": 0.7273603082851637, "eval_loss": 0.6142979264259338, "eval_runtime": 4.4015, "eval_samples_per_second": 235.828, "eval_steps_per_second": 3.862, "step": 9 }, { "epoch": 3.076923076923077, "grad_norm": 12.760916709899902, "learning_rate": 1.6666666666666667e-05, "loss": 0.6181, "step": 10 }, { "epoch": 4.0, "eval_accuracy": 0.8323699421965318, "eval_loss": 0.46852532029151917, "eval_runtime": 4.468, "eval_samples_per_second": 232.321, "eval_steps_per_second": 3.805, "step": 13 }, { "epoch": 4.923076923076923, "eval_accuracy": 0.8622350674373795, "eval_loss": 0.3828171491622925, "eval_runtime": 4.2719, "eval_samples_per_second": 242.983, "eval_steps_per_second": 3.979, "step": 16 }, { "epoch": 5.846153846153846, "eval_accuracy": 0.8497109826589595, "eval_loss": 0.40278810262680054, "eval_runtime": 4.3081, "eval_samples_per_second": 240.944, "eval_steps_per_second": 3.946, "step": 19 }, { "epoch": 6.153846153846154, "grad_norm": 19.64206886291504, "learning_rate": 3.3333333333333335e-05, "loss": 0.3645, "step": 20 }, { "epoch": 6.769230769230769, "eval_accuracy": 0.9210019267822736, "eval_loss": 0.24846747517585754, "eval_runtime": 4.5312, "eval_samples_per_second": 229.078, "eval_steps_per_second": 3.752, "step": 22 }, { "epoch": 8.0, "eval_accuracy": 0.9123314065510597, "eval_loss": 0.24262332916259766, "eval_runtime": 4.334, "eval_samples_per_second": 239.504, "eval_steps_per_second": 3.923, "step": 26 }, { "epoch": 8.923076923076923, "eval_accuracy": 0.7774566473988439, "eval_loss": 0.5673837065696716, "eval_runtime": 4.5821, "eval_samples_per_second": 226.533, "eval_steps_per_second": 3.71, "step": 29 }, { "epoch": 9.23076923076923, "grad_norm": 55.66674041748047, "learning_rate": 5e-05, "loss": 0.3492, "step": 30 }, { "epoch": 9.846153846153847, "eval_accuracy": 0.9489402697495183, "eval_loss": 0.16099636256694794, "eval_runtime": 4.3519, "eval_samples_per_second": 238.518, "eval_steps_per_second": 3.906, "step": 32 }, { "epoch": 10.76923076923077, "eval_accuracy": 0.9171483622350675, "eval_loss": 0.32253944873809814, "eval_runtime": 4.441, "eval_samples_per_second": 233.73, "eval_steps_per_second": 3.828, "step": 35 }, { "epoch": 12.0, "eval_accuracy": 0.9123314065510597, "eval_loss": 0.29041457176208496, "eval_runtime": 4.3317, "eval_samples_per_second": 239.628, "eval_steps_per_second": 3.925, "step": 39 }, { "epoch": 12.307692307692308, "grad_norm": 8.140445709228516, "learning_rate": 4.814814814814815e-05, "loss": 0.3472, "step": 40 }, { "epoch": 12.923076923076923, "eval_accuracy": 0.9132947976878613, "eval_loss": 0.28509509563446045, "eval_runtime": 4.4172, "eval_samples_per_second": 234.99, "eval_steps_per_second": 3.849, "step": 42 }, { "epoch": 13.846153846153847, "eval_accuracy": 0.8930635838150289, "eval_loss": 0.33301812410354614, "eval_runtime": 4.6075, "eval_samples_per_second": 225.284, "eval_steps_per_second": 3.69, "step": 45 }, { "epoch": 14.76923076923077, "eval_accuracy": 0.7389210019267822, "eval_loss": 0.6116296052932739, "eval_runtime": 4.3951, "eval_samples_per_second": 236.172, "eval_steps_per_second": 3.868, "step": 48 }, { "epoch": 15.384615384615385, "grad_norm": 12.589347839355469, "learning_rate": 4.62962962962963e-05, "loss": 0.2838, "step": 50 }, { "epoch": 16.0, "eval_accuracy": 0.9046242774566474, "eval_loss": 0.26773256063461304, "eval_runtime": 4.4069, "eval_samples_per_second": 235.539, "eval_steps_per_second": 3.858, "step": 52 }, { "epoch": 16.923076923076923, "eval_accuracy": 0.882466281310212, "eval_loss": 0.32162582874298096, "eval_runtime": 4.4812, "eval_samples_per_second": 231.635, "eval_steps_per_second": 3.794, "step": 55 }, { "epoch": 17.846153846153847, "eval_accuracy": 0.9258188824662813, "eval_loss": 0.21645468473434448, "eval_runtime": 4.4148, "eval_samples_per_second": 235.118, "eval_steps_per_second": 3.851, "step": 58 }, { "epoch": 18.46153846153846, "grad_norm": 10.592439651489258, "learning_rate": 4.4444444444444447e-05, "loss": 0.292, "step": 60 }, { "epoch": 18.76923076923077, "eval_accuracy": 0.8333333333333334, "eval_loss": 0.4377180337905884, "eval_runtime": 4.4753, "eval_samples_per_second": 231.941, "eval_steps_per_second": 3.799, "step": 61 }, { "epoch": 20.0, "eval_accuracy": 0.869942196531792, "eval_loss": 0.35922446846961975, "eval_runtime": 4.5252, "eval_samples_per_second": 229.381, "eval_steps_per_second": 3.757, "step": 65 }, { "epoch": 20.923076923076923, "eval_accuracy": 0.7986512524084779, "eval_loss": 0.5010850429534912, "eval_runtime": 4.4935, "eval_samples_per_second": 231.001, "eval_steps_per_second": 3.783, "step": 68 }, { "epoch": 21.53846153846154, "grad_norm": 20.344301223754883, "learning_rate": 4.259259259259259e-05, "loss": 0.2809, "step": 70 }, { "epoch": 21.846153846153847, "eval_accuracy": 0.9161849710982659, "eval_loss": 0.2318951040506363, "eval_runtime": 4.3953, "eval_samples_per_second": 236.161, "eval_steps_per_second": 3.868, "step": 71 }, { "epoch": 22.76923076923077, "eval_accuracy": 0.8448940269749519, "eval_loss": 0.40184733271598816, "eval_runtime": 4.3509, "eval_samples_per_second": 238.569, "eval_steps_per_second": 3.907, "step": 74 }, { "epoch": 24.0, "eval_accuracy": 0.7996146435452793, "eval_loss": 0.48505887389183044, "eval_runtime": 4.4156, "eval_samples_per_second": 235.078, "eval_steps_per_second": 3.85, "step": 78 }, { "epoch": 24.615384615384617, "grad_norm": 3.687450647354126, "learning_rate": 4.074074074074074e-05, "loss": 0.251, "step": 80 }, { "epoch": 24.923076923076923, "eval_accuracy": 0.8275529865125241, "eval_loss": 0.46682825684547424, "eval_runtime": 4.4615, "eval_samples_per_second": 232.658, "eval_steps_per_second": 3.81, "step": 81 }, { "epoch": 25.846153846153847, "eval_accuracy": 0.8179190751445087, "eval_loss": 0.49736079573631287, "eval_runtime": 4.4122, "eval_samples_per_second": 235.256, "eval_steps_per_second": 3.853, "step": 84 }, { "epoch": 26.76923076923077, "eval_accuracy": 0.7890173410404624, "eval_loss": 0.5481745600700378, "eval_runtime": 4.3893, "eval_samples_per_second": 236.483, "eval_steps_per_second": 3.873, "step": 87 }, { "epoch": 27.692307692307693, "grad_norm": 8.984414100646973, "learning_rate": 3.888888888888889e-05, "loss": 0.2371, "step": 90 }, { "epoch": 28.0, "eval_accuracy": 0.7369942196531792, "eval_loss": 0.6840028762817383, "eval_runtime": 4.3759, "eval_samples_per_second": 237.209, "eval_steps_per_second": 3.885, "step": 91 }, { "epoch": 28.923076923076923, "eval_accuracy": 0.861271676300578, "eval_loss": 0.362918883562088, "eval_runtime": 4.3847, "eval_samples_per_second": 236.732, "eval_steps_per_second": 3.877, "step": 94 }, { "epoch": 29.846153846153847, "eval_accuracy": 0.733140655105973, "eval_loss": 0.6211853623390198, "eval_runtime": 4.3625, "eval_samples_per_second": 237.939, "eval_steps_per_second": 3.897, "step": 97 }, { "epoch": 30.76923076923077, "grad_norm": 10.264822006225586, "learning_rate": 3.7037037037037037e-05, "loss": 0.2416, "step": 100 }, { "epoch": 30.76923076923077, "eval_accuracy": 0.8641618497109826, "eval_loss": 0.36566850543022156, "eval_runtime": 4.2876, "eval_samples_per_second": 242.092, "eval_steps_per_second": 3.965, "step": 100 }, { "epoch": 32.0, "eval_accuracy": 0.7649325626204239, "eval_loss": 0.5856759548187256, "eval_runtime": 4.3523, "eval_samples_per_second": 238.496, "eval_steps_per_second": 3.906, "step": 104 }, { "epoch": 32.92307692307692, "eval_accuracy": 0.8564547206165704, "eval_loss": 0.36104100942611694, "eval_runtime": 4.4258, "eval_samples_per_second": 234.532, "eval_steps_per_second": 3.841, "step": 107 }, { "epoch": 33.84615384615385, "grad_norm": 9.243880271911621, "learning_rate": 3.518518518518519e-05, "loss": 0.2312, "step": 110 }, { "epoch": 33.84615384615385, "eval_accuracy": 0.6358381502890174, "eval_loss": 0.8753241300582886, "eval_runtime": 4.2456, "eval_samples_per_second": 244.487, "eval_steps_per_second": 4.004, "step": 110 }, { "epoch": 34.76923076923077, "eval_accuracy": 0.7976878612716763, "eval_loss": 0.4992680549621582, "eval_runtime": 4.2709, "eval_samples_per_second": 243.042, "eval_steps_per_second": 3.98, "step": 113 }, { "epoch": 36.0, "eval_accuracy": 0.8131021194605009, "eval_loss": 0.47023212909698486, "eval_runtime": 4.2489, "eval_samples_per_second": 244.298, "eval_steps_per_second": 4.001, "step": 117 }, { "epoch": 36.92307692307692, "grad_norm": 20.28152084350586, "learning_rate": 3.3333333333333335e-05, "loss": 0.2131, "step": 120 }, { "epoch": 36.92307692307692, "eval_accuracy": 0.8583815028901735, "eval_loss": 0.3648052513599396, "eval_runtime": 4.4007, "eval_samples_per_second": 235.873, "eval_steps_per_second": 3.863, "step": 120 }, { "epoch": 37.84615384615385, "eval_accuracy": 0.7061657032755299, "eval_loss": 0.766022264957428, "eval_runtime": 4.5176, "eval_samples_per_second": 229.77, "eval_steps_per_second": 3.763, "step": 123 }, { "epoch": 38.76923076923077, "eval_accuracy": 0.8304431599229287, "eval_loss": 0.44438010454177856, "eval_runtime": 4.5626, "eval_samples_per_second": 227.503, "eval_steps_per_second": 3.726, "step": 126 }, { "epoch": 40.0, "grad_norm": 3.9224133491516113, "learning_rate": 3.148148148148148e-05, "loss": 0.2248, "step": 130 }, { "epoch": 40.0, "eval_accuracy": 0.720616570327553, "eval_loss": 0.7567552924156189, "eval_runtime": 4.3321, "eval_samples_per_second": 239.604, "eval_steps_per_second": 3.924, "step": 130 }, { "epoch": 40.92307692307692, "eval_accuracy": 0.7745664739884393, "eval_loss": 0.6133757829666138, "eval_runtime": 4.378, "eval_samples_per_second": 237.092, "eval_steps_per_second": 3.883, "step": 133 }, { "epoch": 41.84615384615385, "eval_accuracy": 0.8371868978805395, "eval_loss": 0.3969154357910156, "eval_runtime": 4.5555, "eval_samples_per_second": 227.854, "eval_steps_per_second": 3.732, "step": 136 }, { "epoch": 42.76923076923077, "eval_accuracy": 0.7427745664739884, "eval_loss": 0.6099827885627747, "eval_runtime": 4.5075, "eval_samples_per_second": 230.285, "eval_steps_per_second": 3.772, "step": 139 }, { "epoch": 43.07692307692308, "grad_norm": 8.106155395507812, "learning_rate": 2.962962962962963e-05, "loss": 0.2341, "step": 140 }, { "epoch": 44.0, "eval_accuracy": 0.7485549132947977, "eval_loss": 0.637566328048706, "eval_runtime": 4.283, "eval_samples_per_second": 242.352, "eval_steps_per_second": 3.969, "step": 143 }, { "epoch": 44.92307692307692, "eval_accuracy": 0.6965317919075145, "eval_loss": 0.8081742525100708, "eval_runtime": 4.3271, "eval_samples_per_second": 239.886, "eval_steps_per_second": 3.929, "step": 146 }, { "epoch": 45.84615384615385, "eval_accuracy": 0.7986512524084779, "eval_loss": 0.5552083849906921, "eval_runtime": 4.291, "eval_samples_per_second": 241.904, "eval_steps_per_second": 3.962, "step": 149 }, { "epoch": 46.15384615384615, "grad_norm": 6.829940319061279, "learning_rate": 2.777777777777778e-05, "loss": 0.1998, "step": 150 }, { "epoch": 46.76923076923077, "eval_accuracy": 0.7784200385356455, "eval_loss": 0.5735900402069092, "eval_runtime": 4.2973, "eval_samples_per_second": 241.545, "eval_steps_per_second": 3.956, "step": 152 }, { "epoch": 48.0, "eval_accuracy": 0.8179190751445087, "eval_loss": 0.4477255642414093, "eval_runtime": 4.4186, "eval_samples_per_second": 234.915, "eval_steps_per_second": 3.847, "step": 156 }, { "epoch": 48.92307692307692, "eval_accuracy": 0.8063583815028902, "eval_loss": 0.4925374686717987, "eval_runtime": 4.4855, "eval_samples_per_second": 231.411, "eval_steps_per_second": 3.79, "step": 159 }, { "epoch": 49.23076923076923, "grad_norm": 4.723771572113037, "learning_rate": 2.5925925925925925e-05, "loss": 0.2075, "step": 160 }, { "epoch": 49.84615384615385, "eval_accuracy": 0.7408477842003853, "eval_loss": 0.66413414478302, "eval_runtime": 4.4252, "eval_samples_per_second": 234.565, "eval_steps_per_second": 3.842, "step": 162 }, { "epoch": 50.76923076923077, "eval_accuracy": 0.7418111753371869, "eval_loss": 0.671845018863678, "eval_runtime": 4.4087, "eval_samples_per_second": 235.445, "eval_steps_per_second": 3.856, "step": 165 }, { "epoch": 52.0, "eval_accuracy": 0.8169556840077071, "eval_loss": 0.49127721786499023, "eval_runtime": 4.6639, "eval_samples_per_second": 222.563, "eval_steps_per_second": 3.645, "step": 169 }, { "epoch": 52.30769230769231, "grad_norm": 4.890706539154053, "learning_rate": 2.4074074074074074e-05, "loss": 0.197, "step": 170 }, { "epoch": 52.92307692307692, "eval_accuracy": 0.7967244701348748, "eval_loss": 0.5315573811531067, "eval_runtime": 4.4532, "eval_samples_per_second": 233.089, "eval_steps_per_second": 3.817, "step": 172 }, { "epoch": 53.84615384615385, "eval_accuracy": 0.7032755298651252, "eval_loss": 0.7917293310165405, "eval_runtime": 4.3702, "eval_samples_per_second": 237.52, "eval_steps_per_second": 3.89, "step": 175 }, { "epoch": 54.76923076923077, "eval_accuracy": 0.684971098265896, "eval_loss": 0.823156476020813, "eval_runtime": 4.4312, "eval_samples_per_second": 234.246, "eval_steps_per_second": 3.836, "step": 178 }, { "epoch": 55.38461538461539, "grad_norm": 5.017714023590088, "learning_rate": 2.2222222222222223e-05, "loss": 0.1769, "step": 180 }, { "epoch": 56.0, "eval_accuracy": 0.6753371868978806, "eval_loss": 0.8840863704681396, "eval_runtime": 4.5959, "eval_samples_per_second": 225.855, "eval_steps_per_second": 3.699, "step": 182 }, { "epoch": 56.92307692307692, "eval_accuracy": 0.720616570327553, "eval_loss": 0.7669637799263, "eval_runtime": 4.5002, "eval_samples_per_second": 230.655, "eval_steps_per_second": 3.778, "step": 185 }, { "epoch": 57.84615384615385, "eval_accuracy": 0.7167630057803468, "eval_loss": 0.7893001437187195, "eval_runtime": 4.4637, "eval_samples_per_second": 232.544, "eval_steps_per_second": 3.809, "step": 188 }, { "epoch": 58.46153846153846, "grad_norm": 4.112958908081055, "learning_rate": 2.037037037037037e-05, "loss": 0.1735, "step": 190 }, { "epoch": 58.76923076923077, "eval_accuracy": 0.6001926782273603, "eval_loss": 1.1964884996414185, "eval_runtime": 4.4745, "eval_samples_per_second": 231.983, "eval_steps_per_second": 3.799, "step": 191 }, { "epoch": 60.0, "eval_accuracy": 0.6570327552986512, "eval_loss": 1.0561035871505737, "eval_runtime": 4.48, "eval_samples_per_second": 231.699, "eval_steps_per_second": 3.795, "step": 195 }, { "epoch": 60.92307692307692, "eval_accuracy": 0.7408477842003853, "eval_loss": 0.7164301872253418, "eval_runtime": 4.4848, "eval_samples_per_second": 231.449, "eval_steps_per_second": 3.791, "step": 198 }, { "epoch": 61.53846153846154, "grad_norm": 6.599235534667969, "learning_rate": 1.8518518518518518e-05, "loss": 0.1905, "step": 200 }, { "epoch": 61.84615384615385, "eval_accuracy": 0.7610789980732178, "eval_loss": 0.616022527217865, "eval_runtime": 4.4715, "eval_samples_per_second": 232.137, "eval_steps_per_second": 3.802, "step": 201 }, { "epoch": 62.76923076923077, "eval_accuracy": 0.8005780346820809, "eval_loss": 0.4963868260383606, "eval_runtime": 4.495, "eval_samples_per_second": 230.921, "eval_steps_per_second": 3.782, "step": 204 }, { "epoch": 64.0, "eval_accuracy": 0.7369942196531792, "eval_loss": 0.6948912739753723, "eval_runtime": 4.4865, "eval_samples_per_second": 231.359, "eval_steps_per_second": 3.789, "step": 208 }, { "epoch": 64.61538461538461, "grad_norm": 6.637955188751221, "learning_rate": 1.6666666666666667e-05, "loss": 0.1748, "step": 210 }, { "epoch": 64.92307692307692, "eval_accuracy": 0.8044315992292871, "eval_loss": 0.5144685506820679, "eval_runtime": 4.4627, "eval_samples_per_second": 232.593, "eval_steps_per_second": 3.809, "step": 211 }, { "epoch": 65.84615384615384, "eval_accuracy": 0.7707129094412332, "eval_loss": 0.6396705508232117, "eval_runtime": 4.384, "eval_samples_per_second": 236.772, "eval_steps_per_second": 3.878, "step": 214 }, { "epoch": 66.76923076923077, "eval_accuracy": 0.789980732177264, "eval_loss": 0.5983762145042419, "eval_runtime": 4.3335, "eval_samples_per_second": 239.529, "eval_steps_per_second": 3.923, "step": 217 }, { "epoch": 67.6923076923077, "grad_norm": 4.977928638458252, "learning_rate": 1.4814814814814815e-05, "loss": 0.1535, "step": 220 }, { "epoch": 68.0, "eval_accuracy": 0.8458574181117534, "eval_loss": 0.4233017861843109, "eval_runtime": 4.3666, "eval_samples_per_second": 237.715, "eval_steps_per_second": 3.893, "step": 221 }, { "epoch": 68.92307692307692, "eval_accuracy": 0.8342967244701349, "eval_loss": 0.44637879729270935, "eval_runtime": 4.5328, "eval_samples_per_second": 228.998, "eval_steps_per_second": 3.75, "step": 224 }, { "epoch": 69.84615384615384, "eval_accuracy": 0.8497109826589595, "eval_loss": 0.39527395367622375, "eval_runtime": 4.3003, "eval_samples_per_second": 241.377, "eval_steps_per_second": 3.953, "step": 227 }, { "epoch": 70.76923076923077, "grad_norm": 5.1860833168029785, "learning_rate": 1.2962962962962962e-05, "loss": 0.1633, "step": 230 }, { "epoch": 70.76923076923077, "eval_accuracy": 0.8314065510597303, "eval_loss": 0.4313718378543854, "eval_runtime": 4.5453, "eval_samples_per_second": 228.367, "eval_steps_per_second": 3.74, "step": 230 }, { "epoch": 72.0, "eval_accuracy": 0.802504816955684, "eval_loss": 0.5035440325737, "eval_runtime": 4.4372, "eval_samples_per_second": 233.93, "eval_steps_per_second": 3.831, "step": 234 }, { "epoch": 72.92307692307692, "eval_accuracy": 0.7803468208092486, "eval_loss": 0.538741946220398, "eval_runtime": 4.2892, "eval_samples_per_second": 242.001, "eval_steps_per_second": 3.963, "step": 237 }, { "epoch": 73.84615384615384, "grad_norm": 9.802773475646973, "learning_rate": 1.1111111111111112e-05, "loss": 0.145, "step": 240 }, { "epoch": 73.84615384615384, "eval_accuracy": 0.802504816955684, "eval_loss": 0.5016281008720398, "eval_runtime": 4.368, "eval_samples_per_second": 237.639, "eval_steps_per_second": 3.892, "step": 240 }, { "epoch": 74.76923076923077, "eval_accuracy": 0.8159922928709056, "eval_loss": 0.46059200167655945, "eval_runtime": 4.4494, "eval_samples_per_second": 233.289, "eval_steps_per_second": 3.821, "step": 243 }, { "epoch": 76.0, "eval_accuracy": 0.7524084778420038, "eval_loss": 0.6731807589530945, "eval_runtime": 4.4918, "eval_samples_per_second": 231.087, "eval_steps_per_second": 3.785, "step": 247 }, { "epoch": 76.92307692307692, "grad_norm": 4.373079299926758, "learning_rate": 9.259259259259259e-06, "loss": 0.1584, "step": 250 }, { "epoch": 76.92307692307692, "eval_accuracy": 0.7524084778420038, "eval_loss": 0.6853501200675964, "eval_runtime": 4.3919, "eval_samples_per_second": 236.345, "eval_steps_per_second": 3.871, "step": 250 }, { "epoch": 77.84615384615384, "eval_accuracy": 0.7572254335260116, "eval_loss": 0.6868495941162109, "eval_runtime": 4.4562, "eval_samples_per_second": 232.935, "eval_steps_per_second": 3.815, "step": 253 }, { "epoch": 78.76923076923077, "eval_accuracy": 0.7581888246628131, "eval_loss": 0.6764776706695557, "eval_runtime": 4.2898, "eval_samples_per_second": 241.971, "eval_steps_per_second": 3.963, "step": 256 }, { "epoch": 80.0, "grad_norm": 3.5113680362701416, "learning_rate": 7.4074074074074075e-06, "loss": 0.1423, "step": 260 }, { "epoch": 80.0, "eval_accuracy": 0.7832369942196532, "eval_loss": 0.6294586062431335, "eval_runtime": 4.456, "eval_samples_per_second": 232.945, "eval_steps_per_second": 3.815, "step": 260 }, { "epoch": 80.92307692307692, "eval_accuracy": 0.7909441233140655, "eval_loss": 0.6123859882354736, "eval_runtime": 4.4012, "eval_samples_per_second": 235.843, "eval_steps_per_second": 3.863, "step": 263 }, { "epoch": 81.84615384615384, "eval_accuracy": 0.7880539499036608, "eval_loss": 0.6026654839515686, "eval_runtime": 4.3377, "eval_samples_per_second": 239.297, "eval_steps_per_second": 3.919, "step": 266 }, { "epoch": 82.76923076923077, "eval_accuracy": 0.7861271676300579, "eval_loss": 0.6008134484291077, "eval_runtime": 4.3155, "eval_samples_per_second": 240.526, "eval_steps_per_second": 3.939, "step": 269 }, { "epoch": 83.07692307692308, "grad_norm": 7.668453693389893, "learning_rate": 5.555555555555556e-06, "loss": 0.1449, "step": 270 }, { "epoch": 84.0, "eval_accuracy": 0.7687861271676301, "eval_loss": 0.6533465385437012, "eval_runtime": 4.5198, "eval_samples_per_second": 229.655, "eval_steps_per_second": 3.761, "step": 273 }, { "epoch": 84.92307692307692, "eval_accuracy": 0.7697495183044316, "eval_loss": 0.6304323673248291, "eval_runtime": 4.4363, "eval_samples_per_second": 233.98, "eval_steps_per_second": 3.832, "step": 276 }, { "epoch": 85.84615384615384, "eval_accuracy": 0.7996146435452793, "eval_loss": 0.5607452392578125, "eval_runtime": 4.4792, "eval_samples_per_second": 231.739, "eval_steps_per_second": 3.795, "step": 279 }, { "epoch": 86.15384615384616, "grad_norm": 12.00244140625, "learning_rate": 3.7037037037037037e-06, "loss": 0.1452, "step": 280 }, { "epoch": 86.76923076923077, "eval_accuracy": 0.7928709055876686, "eval_loss": 0.5738906264305115, "eval_runtime": 4.3398, "eval_samples_per_second": 239.182, "eval_steps_per_second": 3.917, "step": 282 }, { "epoch": 88.0, "eval_accuracy": 0.7764932562620424, "eval_loss": 0.6114925146102905, "eval_runtime": 4.2818, "eval_samples_per_second": 242.421, "eval_steps_per_second": 3.97, "step": 286 }, { "epoch": 88.92307692307692, "eval_accuracy": 0.7726396917148363, "eval_loss": 0.6276693344116211, "eval_runtime": 4.502, "eval_samples_per_second": 230.565, "eval_steps_per_second": 3.776, "step": 289 }, { "epoch": 89.23076923076923, "grad_norm": 5.618044853210449, "learning_rate": 1.8518518518518519e-06, "loss": 0.1232, "step": 290 }, { "epoch": 89.84615384615384, "eval_accuracy": 0.7784200385356455, "eval_loss": 0.627270519733429, "eval_runtime": 4.4959, "eval_samples_per_second": 230.875, "eval_steps_per_second": 3.781, "step": 292 }, { "epoch": 90.76923076923077, "eval_accuracy": 0.7774566473988439, "eval_loss": 0.6300393342971802, "eval_runtime": 4.4712, "eval_samples_per_second": 232.154, "eval_steps_per_second": 3.802, "step": 295 }, { "epoch": 92.0, "eval_accuracy": 0.7764932562620424, "eval_loss": 0.6361268758773804, "eval_runtime": 4.4247, "eval_samples_per_second": 234.591, "eval_steps_per_second": 3.842, "step": 299 }, { "epoch": 92.3076923076923, "grad_norm": 14.779194831848145, "learning_rate": 0.0, "loss": 0.1494, "step": 300 }, { "epoch": 92.3076923076923, "eval_accuracy": 0.7764932562620424, "eval_loss": 0.6359105706214905, "eval_runtime": 4.3892, "eval_samples_per_second": 236.491, "eval_steps_per_second": 3.873, "step": 300 }, { "epoch": 92.3076923076923, "step": 300, "total_flos": 1.4740506847134351e+18, "train_loss": 0.22712578296661376, "train_runtime": 1060.3985, "train_samples_per_second": 76.952, "train_steps_per_second": 0.283 } ], "logging_steps": 10, "max_steps": 300, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.4740506847134351e+18, "train_batch_size": 64, "trial_name": null, "trial_params": null }