|
{ |
|
"best_metric": 0.9489402697495183, |
|
"best_model_checkpoint": "vit-msn-small-corect_cleaned_dataset_lateral_flow_ivalidation/checkpoint-32", |
|
"epoch": 92.3076923076923, |
|
"eval_steps": 500, |
|
"global_step": 300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9230769230769231, |
|
"eval_accuracy": 0.14932562620423892, |
|
"eval_loss": 0.8616608381271362, |
|
"eval_runtime": 4.4185, |
|
"eval_samples_per_second": 234.919, |
|
"eval_steps_per_second": 3.847, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 1.8461538461538463, |
|
"eval_accuracy": 0.8795761078998073, |
|
"eval_loss": 0.4977250099182129, |
|
"eval_runtime": 4.3984, |
|
"eval_samples_per_second": 235.995, |
|
"eval_steps_per_second": 3.865, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 2.769230769230769, |
|
"eval_accuracy": 0.7273603082851637, |
|
"eval_loss": 0.6142979264259338, |
|
"eval_runtime": 4.4015, |
|
"eval_samples_per_second": 235.828, |
|
"eval_steps_per_second": 3.862, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 3.076923076923077, |
|
"grad_norm": 12.760916709899902, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.6181, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8323699421965318, |
|
"eval_loss": 0.46852532029151917, |
|
"eval_runtime": 4.468, |
|
"eval_samples_per_second": 232.321, |
|
"eval_steps_per_second": 3.805, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 4.923076923076923, |
|
"eval_accuracy": 0.8622350674373795, |
|
"eval_loss": 0.3828171491622925, |
|
"eval_runtime": 4.2719, |
|
"eval_samples_per_second": 242.983, |
|
"eval_steps_per_second": 3.979, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 5.846153846153846, |
|
"eval_accuracy": 0.8497109826589595, |
|
"eval_loss": 0.40278810262680054, |
|
"eval_runtime": 4.3081, |
|
"eval_samples_per_second": 240.944, |
|
"eval_steps_per_second": 3.946, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 6.153846153846154, |
|
"grad_norm": 19.64206886291504, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.3645, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 6.769230769230769, |
|
"eval_accuracy": 0.9210019267822736, |
|
"eval_loss": 0.24846747517585754, |
|
"eval_runtime": 4.5312, |
|
"eval_samples_per_second": 229.078, |
|
"eval_steps_per_second": 3.752, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9123314065510597, |
|
"eval_loss": 0.24262332916259766, |
|
"eval_runtime": 4.334, |
|
"eval_samples_per_second": 239.504, |
|
"eval_steps_per_second": 3.923, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 8.923076923076923, |
|
"eval_accuracy": 0.7774566473988439, |
|
"eval_loss": 0.5673837065696716, |
|
"eval_runtime": 4.5821, |
|
"eval_samples_per_second": 226.533, |
|
"eval_steps_per_second": 3.71, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 9.23076923076923, |
|
"grad_norm": 55.66674041748047, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3492, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 9.846153846153847, |
|
"eval_accuracy": 0.9489402697495183, |
|
"eval_loss": 0.16099636256694794, |
|
"eval_runtime": 4.3519, |
|
"eval_samples_per_second": 238.518, |
|
"eval_steps_per_second": 3.906, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 10.76923076923077, |
|
"eval_accuracy": 0.9171483622350675, |
|
"eval_loss": 0.32253944873809814, |
|
"eval_runtime": 4.441, |
|
"eval_samples_per_second": 233.73, |
|
"eval_steps_per_second": 3.828, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9123314065510597, |
|
"eval_loss": 0.29041457176208496, |
|
"eval_runtime": 4.3317, |
|
"eval_samples_per_second": 239.628, |
|
"eval_steps_per_second": 3.925, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 12.307692307692308, |
|
"grad_norm": 8.140445709228516, |
|
"learning_rate": 4.814814814814815e-05, |
|
"loss": 0.3472, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 12.923076923076923, |
|
"eval_accuracy": 0.9132947976878613, |
|
"eval_loss": 0.28509509563446045, |
|
"eval_runtime": 4.4172, |
|
"eval_samples_per_second": 234.99, |
|
"eval_steps_per_second": 3.849, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 13.846153846153847, |
|
"eval_accuracy": 0.8930635838150289, |
|
"eval_loss": 0.33301812410354614, |
|
"eval_runtime": 4.6075, |
|
"eval_samples_per_second": 225.284, |
|
"eval_steps_per_second": 3.69, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 14.76923076923077, |
|
"eval_accuracy": 0.7389210019267822, |
|
"eval_loss": 0.6116296052932739, |
|
"eval_runtime": 4.3951, |
|
"eval_samples_per_second": 236.172, |
|
"eval_steps_per_second": 3.868, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 15.384615384615385, |
|
"grad_norm": 12.589347839355469, |
|
"learning_rate": 4.62962962962963e-05, |
|
"loss": 0.2838, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.9046242774566474, |
|
"eval_loss": 0.26773256063461304, |
|
"eval_runtime": 4.4069, |
|
"eval_samples_per_second": 235.539, |
|
"eval_steps_per_second": 3.858, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 16.923076923076923, |
|
"eval_accuracy": 0.882466281310212, |
|
"eval_loss": 0.32162582874298096, |
|
"eval_runtime": 4.4812, |
|
"eval_samples_per_second": 231.635, |
|
"eval_steps_per_second": 3.794, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 17.846153846153847, |
|
"eval_accuracy": 0.9258188824662813, |
|
"eval_loss": 0.21645468473434448, |
|
"eval_runtime": 4.4148, |
|
"eval_samples_per_second": 235.118, |
|
"eval_steps_per_second": 3.851, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 18.46153846153846, |
|
"grad_norm": 10.592439651489258, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.292, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 18.76923076923077, |
|
"eval_accuracy": 0.8333333333333334, |
|
"eval_loss": 0.4377180337905884, |
|
"eval_runtime": 4.4753, |
|
"eval_samples_per_second": 231.941, |
|
"eval_steps_per_second": 3.799, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.869942196531792, |
|
"eval_loss": 0.35922446846961975, |
|
"eval_runtime": 4.5252, |
|
"eval_samples_per_second": 229.381, |
|
"eval_steps_per_second": 3.757, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 20.923076923076923, |
|
"eval_accuracy": 0.7986512524084779, |
|
"eval_loss": 0.5010850429534912, |
|
"eval_runtime": 4.4935, |
|
"eval_samples_per_second": 231.001, |
|
"eval_steps_per_second": 3.783, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 21.53846153846154, |
|
"grad_norm": 20.344301223754883, |
|
"learning_rate": 4.259259259259259e-05, |
|
"loss": 0.2809, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 21.846153846153847, |
|
"eval_accuracy": 0.9161849710982659, |
|
"eval_loss": 0.2318951040506363, |
|
"eval_runtime": 4.3953, |
|
"eval_samples_per_second": 236.161, |
|
"eval_steps_per_second": 3.868, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 22.76923076923077, |
|
"eval_accuracy": 0.8448940269749519, |
|
"eval_loss": 0.40184733271598816, |
|
"eval_runtime": 4.3509, |
|
"eval_samples_per_second": 238.569, |
|
"eval_steps_per_second": 3.907, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.7996146435452793, |
|
"eval_loss": 0.48505887389183044, |
|
"eval_runtime": 4.4156, |
|
"eval_samples_per_second": 235.078, |
|
"eval_steps_per_second": 3.85, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 24.615384615384617, |
|
"grad_norm": 3.687450647354126, |
|
"learning_rate": 4.074074074074074e-05, |
|
"loss": 0.251, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 24.923076923076923, |
|
"eval_accuracy": 0.8275529865125241, |
|
"eval_loss": 0.46682825684547424, |
|
"eval_runtime": 4.4615, |
|
"eval_samples_per_second": 232.658, |
|
"eval_steps_per_second": 3.81, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 25.846153846153847, |
|
"eval_accuracy": 0.8179190751445087, |
|
"eval_loss": 0.49736079573631287, |
|
"eval_runtime": 4.4122, |
|
"eval_samples_per_second": 235.256, |
|
"eval_steps_per_second": 3.853, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 26.76923076923077, |
|
"eval_accuracy": 0.7890173410404624, |
|
"eval_loss": 0.5481745600700378, |
|
"eval_runtime": 4.3893, |
|
"eval_samples_per_second": 236.483, |
|
"eval_steps_per_second": 3.873, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 27.692307692307693, |
|
"grad_norm": 8.984414100646973, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 0.2371, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.7369942196531792, |
|
"eval_loss": 0.6840028762817383, |
|
"eval_runtime": 4.3759, |
|
"eval_samples_per_second": 237.209, |
|
"eval_steps_per_second": 3.885, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 28.923076923076923, |
|
"eval_accuracy": 0.861271676300578, |
|
"eval_loss": 0.362918883562088, |
|
"eval_runtime": 4.3847, |
|
"eval_samples_per_second": 236.732, |
|
"eval_steps_per_second": 3.877, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 29.846153846153847, |
|
"eval_accuracy": 0.733140655105973, |
|
"eval_loss": 0.6211853623390198, |
|
"eval_runtime": 4.3625, |
|
"eval_samples_per_second": 237.939, |
|
"eval_steps_per_second": 3.897, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 30.76923076923077, |
|
"grad_norm": 10.264822006225586, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.2416, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 30.76923076923077, |
|
"eval_accuracy": 0.8641618497109826, |
|
"eval_loss": 0.36566850543022156, |
|
"eval_runtime": 4.2876, |
|
"eval_samples_per_second": 242.092, |
|
"eval_steps_per_second": 3.965, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.7649325626204239, |
|
"eval_loss": 0.5856759548187256, |
|
"eval_runtime": 4.3523, |
|
"eval_samples_per_second": 238.496, |
|
"eval_steps_per_second": 3.906, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 32.92307692307692, |
|
"eval_accuracy": 0.8564547206165704, |
|
"eval_loss": 0.36104100942611694, |
|
"eval_runtime": 4.4258, |
|
"eval_samples_per_second": 234.532, |
|
"eval_steps_per_second": 3.841, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 33.84615384615385, |
|
"grad_norm": 9.243880271911621, |
|
"learning_rate": 3.518518518518519e-05, |
|
"loss": 0.2312, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 33.84615384615385, |
|
"eval_accuracy": 0.6358381502890174, |
|
"eval_loss": 0.8753241300582886, |
|
"eval_runtime": 4.2456, |
|
"eval_samples_per_second": 244.487, |
|
"eval_steps_per_second": 4.004, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 34.76923076923077, |
|
"eval_accuracy": 0.7976878612716763, |
|
"eval_loss": 0.4992680549621582, |
|
"eval_runtime": 4.2709, |
|
"eval_samples_per_second": 243.042, |
|
"eval_steps_per_second": 3.98, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.8131021194605009, |
|
"eval_loss": 0.47023212909698486, |
|
"eval_runtime": 4.2489, |
|
"eval_samples_per_second": 244.298, |
|
"eval_steps_per_second": 4.001, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 36.92307692307692, |
|
"grad_norm": 20.28152084350586, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.2131, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 36.92307692307692, |
|
"eval_accuracy": 0.8583815028901735, |
|
"eval_loss": 0.3648052513599396, |
|
"eval_runtime": 4.4007, |
|
"eval_samples_per_second": 235.873, |
|
"eval_steps_per_second": 3.863, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 37.84615384615385, |
|
"eval_accuracy": 0.7061657032755299, |
|
"eval_loss": 0.766022264957428, |
|
"eval_runtime": 4.5176, |
|
"eval_samples_per_second": 229.77, |
|
"eval_steps_per_second": 3.763, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 38.76923076923077, |
|
"eval_accuracy": 0.8304431599229287, |
|
"eval_loss": 0.44438010454177856, |
|
"eval_runtime": 4.5626, |
|
"eval_samples_per_second": 227.503, |
|
"eval_steps_per_second": 3.726, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 3.9224133491516113, |
|
"learning_rate": 3.148148148148148e-05, |
|
"loss": 0.2248, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.720616570327553, |
|
"eval_loss": 0.7567552924156189, |
|
"eval_runtime": 4.3321, |
|
"eval_samples_per_second": 239.604, |
|
"eval_steps_per_second": 3.924, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 40.92307692307692, |
|
"eval_accuracy": 0.7745664739884393, |
|
"eval_loss": 0.6133757829666138, |
|
"eval_runtime": 4.378, |
|
"eval_samples_per_second": 237.092, |
|
"eval_steps_per_second": 3.883, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 41.84615384615385, |
|
"eval_accuracy": 0.8371868978805395, |
|
"eval_loss": 0.3969154357910156, |
|
"eval_runtime": 4.5555, |
|
"eval_samples_per_second": 227.854, |
|
"eval_steps_per_second": 3.732, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 42.76923076923077, |
|
"eval_accuracy": 0.7427745664739884, |
|
"eval_loss": 0.6099827885627747, |
|
"eval_runtime": 4.5075, |
|
"eval_samples_per_second": 230.285, |
|
"eval_steps_per_second": 3.772, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 43.07692307692308, |
|
"grad_norm": 8.106155395507812, |
|
"learning_rate": 2.962962962962963e-05, |
|
"loss": 0.2341, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.7485549132947977, |
|
"eval_loss": 0.637566328048706, |
|
"eval_runtime": 4.283, |
|
"eval_samples_per_second": 242.352, |
|
"eval_steps_per_second": 3.969, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 44.92307692307692, |
|
"eval_accuracy": 0.6965317919075145, |
|
"eval_loss": 0.8081742525100708, |
|
"eval_runtime": 4.3271, |
|
"eval_samples_per_second": 239.886, |
|
"eval_steps_per_second": 3.929, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 45.84615384615385, |
|
"eval_accuracy": 0.7986512524084779, |
|
"eval_loss": 0.5552083849906921, |
|
"eval_runtime": 4.291, |
|
"eval_samples_per_second": 241.904, |
|
"eval_steps_per_second": 3.962, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 46.15384615384615, |
|
"grad_norm": 6.829940319061279, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.1998, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 46.76923076923077, |
|
"eval_accuracy": 0.7784200385356455, |
|
"eval_loss": 0.5735900402069092, |
|
"eval_runtime": 4.2973, |
|
"eval_samples_per_second": 241.545, |
|
"eval_steps_per_second": 3.956, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.8179190751445087, |
|
"eval_loss": 0.4477255642414093, |
|
"eval_runtime": 4.4186, |
|
"eval_samples_per_second": 234.915, |
|
"eval_steps_per_second": 3.847, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 48.92307692307692, |
|
"eval_accuracy": 0.8063583815028902, |
|
"eval_loss": 0.4925374686717987, |
|
"eval_runtime": 4.4855, |
|
"eval_samples_per_second": 231.411, |
|
"eval_steps_per_second": 3.79, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 49.23076923076923, |
|
"grad_norm": 4.723771572113037, |
|
"learning_rate": 2.5925925925925925e-05, |
|
"loss": 0.2075, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 49.84615384615385, |
|
"eval_accuracy": 0.7408477842003853, |
|
"eval_loss": 0.66413414478302, |
|
"eval_runtime": 4.4252, |
|
"eval_samples_per_second": 234.565, |
|
"eval_steps_per_second": 3.842, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 50.76923076923077, |
|
"eval_accuracy": 0.7418111753371869, |
|
"eval_loss": 0.671845018863678, |
|
"eval_runtime": 4.4087, |
|
"eval_samples_per_second": 235.445, |
|
"eval_steps_per_second": 3.856, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.8169556840077071, |
|
"eval_loss": 0.49127721786499023, |
|
"eval_runtime": 4.6639, |
|
"eval_samples_per_second": 222.563, |
|
"eval_steps_per_second": 3.645, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 52.30769230769231, |
|
"grad_norm": 4.890706539154053, |
|
"learning_rate": 2.4074074074074074e-05, |
|
"loss": 0.197, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 52.92307692307692, |
|
"eval_accuracy": 0.7967244701348748, |
|
"eval_loss": 0.5315573811531067, |
|
"eval_runtime": 4.4532, |
|
"eval_samples_per_second": 233.089, |
|
"eval_steps_per_second": 3.817, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 53.84615384615385, |
|
"eval_accuracy": 0.7032755298651252, |
|
"eval_loss": 0.7917293310165405, |
|
"eval_runtime": 4.3702, |
|
"eval_samples_per_second": 237.52, |
|
"eval_steps_per_second": 3.89, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 54.76923076923077, |
|
"eval_accuracy": 0.684971098265896, |
|
"eval_loss": 0.823156476020813, |
|
"eval_runtime": 4.4312, |
|
"eval_samples_per_second": 234.246, |
|
"eval_steps_per_second": 3.836, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 55.38461538461539, |
|
"grad_norm": 5.017714023590088, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.1769, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.6753371868978806, |
|
"eval_loss": 0.8840863704681396, |
|
"eval_runtime": 4.5959, |
|
"eval_samples_per_second": 225.855, |
|
"eval_steps_per_second": 3.699, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 56.92307692307692, |
|
"eval_accuracy": 0.720616570327553, |
|
"eval_loss": 0.7669637799263, |
|
"eval_runtime": 4.5002, |
|
"eval_samples_per_second": 230.655, |
|
"eval_steps_per_second": 3.778, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 57.84615384615385, |
|
"eval_accuracy": 0.7167630057803468, |
|
"eval_loss": 0.7893001437187195, |
|
"eval_runtime": 4.4637, |
|
"eval_samples_per_second": 232.544, |
|
"eval_steps_per_second": 3.809, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 58.46153846153846, |
|
"grad_norm": 4.112958908081055, |
|
"learning_rate": 2.037037037037037e-05, |
|
"loss": 0.1735, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 58.76923076923077, |
|
"eval_accuracy": 0.6001926782273603, |
|
"eval_loss": 1.1964884996414185, |
|
"eval_runtime": 4.4745, |
|
"eval_samples_per_second": 231.983, |
|
"eval_steps_per_second": 3.799, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.6570327552986512, |
|
"eval_loss": 1.0561035871505737, |
|
"eval_runtime": 4.48, |
|
"eval_samples_per_second": 231.699, |
|
"eval_steps_per_second": 3.795, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 60.92307692307692, |
|
"eval_accuracy": 0.7408477842003853, |
|
"eval_loss": 0.7164301872253418, |
|
"eval_runtime": 4.4848, |
|
"eval_samples_per_second": 231.449, |
|
"eval_steps_per_second": 3.791, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 61.53846153846154, |
|
"grad_norm": 6.599235534667969, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.1905, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 61.84615384615385, |
|
"eval_accuracy": 0.7610789980732178, |
|
"eval_loss": 0.616022527217865, |
|
"eval_runtime": 4.4715, |
|
"eval_samples_per_second": 232.137, |
|
"eval_steps_per_second": 3.802, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 62.76923076923077, |
|
"eval_accuracy": 0.8005780346820809, |
|
"eval_loss": 0.4963868260383606, |
|
"eval_runtime": 4.495, |
|
"eval_samples_per_second": 230.921, |
|
"eval_steps_per_second": 3.782, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.7369942196531792, |
|
"eval_loss": 0.6948912739753723, |
|
"eval_runtime": 4.4865, |
|
"eval_samples_per_second": 231.359, |
|
"eval_steps_per_second": 3.789, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 64.61538461538461, |
|
"grad_norm": 6.637955188751221, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.1748, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 64.92307692307692, |
|
"eval_accuracy": 0.8044315992292871, |
|
"eval_loss": 0.5144685506820679, |
|
"eval_runtime": 4.4627, |
|
"eval_samples_per_second": 232.593, |
|
"eval_steps_per_second": 3.809, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 65.84615384615384, |
|
"eval_accuracy": 0.7707129094412332, |
|
"eval_loss": 0.6396705508232117, |
|
"eval_runtime": 4.384, |
|
"eval_samples_per_second": 236.772, |
|
"eval_steps_per_second": 3.878, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 66.76923076923077, |
|
"eval_accuracy": 0.789980732177264, |
|
"eval_loss": 0.5983762145042419, |
|
"eval_runtime": 4.3335, |
|
"eval_samples_per_second": 239.529, |
|
"eval_steps_per_second": 3.923, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 67.6923076923077, |
|
"grad_norm": 4.977928638458252, |
|
"learning_rate": 1.4814814814814815e-05, |
|
"loss": 0.1535, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.8458574181117534, |
|
"eval_loss": 0.4233017861843109, |
|
"eval_runtime": 4.3666, |
|
"eval_samples_per_second": 237.715, |
|
"eval_steps_per_second": 3.893, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 68.92307692307692, |
|
"eval_accuracy": 0.8342967244701349, |
|
"eval_loss": 0.44637879729270935, |
|
"eval_runtime": 4.5328, |
|
"eval_samples_per_second": 228.998, |
|
"eval_steps_per_second": 3.75, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 69.84615384615384, |
|
"eval_accuracy": 0.8497109826589595, |
|
"eval_loss": 0.39527395367622375, |
|
"eval_runtime": 4.3003, |
|
"eval_samples_per_second": 241.377, |
|
"eval_steps_per_second": 3.953, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 70.76923076923077, |
|
"grad_norm": 5.1860833168029785, |
|
"learning_rate": 1.2962962962962962e-05, |
|
"loss": 0.1633, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 70.76923076923077, |
|
"eval_accuracy": 0.8314065510597303, |
|
"eval_loss": 0.4313718378543854, |
|
"eval_runtime": 4.5453, |
|
"eval_samples_per_second": 228.367, |
|
"eval_steps_per_second": 3.74, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.802504816955684, |
|
"eval_loss": 0.5035440325737, |
|
"eval_runtime": 4.4372, |
|
"eval_samples_per_second": 233.93, |
|
"eval_steps_per_second": 3.831, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 72.92307692307692, |
|
"eval_accuracy": 0.7803468208092486, |
|
"eval_loss": 0.538741946220398, |
|
"eval_runtime": 4.2892, |
|
"eval_samples_per_second": 242.001, |
|
"eval_steps_per_second": 3.963, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 73.84615384615384, |
|
"grad_norm": 9.802773475646973, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.145, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 73.84615384615384, |
|
"eval_accuracy": 0.802504816955684, |
|
"eval_loss": 0.5016281008720398, |
|
"eval_runtime": 4.368, |
|
"eval_samples_per_second": 237.639, |
|
"eval_steps_per_second": 3.892, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 74.76923076923077, |
|
"eval_accuracy": 0.8159922928709056, |
|
"eval_loss": 0.46059200167655945, |
|
"eval_runtime": 4.4494, |
|
"eval_samples_per_second": 233.289, |
|
"eval_steps_per_second": 3.821, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.7524084778420038, |
|
"eval_loss": 0.6731807589530945, |
|
"eval_runtime": 4.4918, |
|
"eval_samples_per_second": 231.087, |
|
"eval_steps_per_second": 3.785, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 76.92307692307692, |
|
"grad_norm": 4.373079299926758, |
|
"learning_rate": 9.259259259259259e-06, |
|
"loss": 0.1584, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 76.92307692307692, |
|
"eval_accuracy": 0.7524084778420038, |
|
"eval_loss": 0.6853501200675964, |
|
"eval_runtime": 4.3919, |
|
"eval_samples_per_second": 236.345, |
|
"eval_steps_per_second": 3.871, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 77.84615384615384, |
|
"eval_accuracy": 0.7572254335260116, |
|
"eval_loss": 0.6868495941162109, |
|
"eval_runtime": 4.4562, |
|
"eval_samples_per_second": 232.935, |
|
"eval_steps_per_second": 3.815, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 78.76923076923077, |
|
"eval_accuracy": 0.7581888246628131, |
|
"eval_loss": 0.6764776706695557, |
|
"eval_runtime": 4.2898, |
|
"eval_samples_per_second": 241.971, |
|
"eval_steps_per_second": 3.963, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"grad_norm": 3.5113680362701416, |
|
"learning_rate": 7.4074074074074075e-06, |
|
"loss": 0.1423, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.7832369942196532, |
|
"eval_loss": 0.6294586062431335, |
|
"eval_runtime": 4.456, |
|
"eval_samples_per_second": 232.945, |
|
"eval_steps_per_second": 3.815, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 80.92307692307692, |
|
"eval_accuracy": 0.7909441233140655, |
|
"eval_loss": 0.6123859882354736, |
|
"eval_runtime": 4.4012, |
|
"eval_samples_per_second": 235.843, |
|
"eval_steps_per_second": 3.863, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 81.84615384615384, |
|
"eval_accuracy": 0.7880539499036608, |
|
"eval_loss": 0.6026654839515686, |
|
"eval_runtime": 4.3377, |
|
"eval_samples_per_second": 239.297, |
|
"eval_steps_per_second": 3.919, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 82.76923076923077, |
|
"eval_accuracy": 0.7861271676300579, |
|
"eval_loss": 0.6008134484291077, |
|
"eval_runtime": 4.3155, |
|
"eval_samples_per_second": 240.526, |
|
"eval_steps_per_second": 3.939, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 83.07692307692308, |
|
"grad_norm": 7.668453693389893, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.1449, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.7687861271676301, |
|
"eval_loss": 0.6533465385437012, |
|
"eval_runtime": 4.5198, |
|
"eval_samples_per_second": 229.655, |
|
"eval_steps_per_second": 3.761, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 84.92307692307692, |
|
"eval_accuracy": 0.7697495183044316, |
|
"eval_loss": 0.6304323673248291, |
|
"eval_runtime": 4.4363, |
|
"eval_samples_per_second": 233.98, |
|
"eval_steps_per_second": 3.832, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 85.84615384615384, |
|
"eval_accuracy": 0.7996146435452793, |
|
"eval_loss": 0.5607452392578125, |
|
"eval_runtime": 4.4792, |
|
"eval_samples_per_second": 231.739, |
|
"eval_steps_per_second": 3.795, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 86.15384615384616, |
|
"grad_norm": 12.00244140625, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 0.1452, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 86.76923076923077, |
|
"eval_accuracy": 0.7928709055876686, |
|
"eval_loss": 0.5738906264305115, |
|
"eval_runtime": 4.3398, |
|
"eval_samples_per_second": 239.182, |
|
"eval_steps_per_second": 3.917, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.7764932562620424, |
|
"eval_loss": 0.6114925146102905, |
|
"eval_runtime": 4.2818, |
|
"eval_samples_per_second": 242.421, |
|
"eval_steps_per_second": 3.97, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 88.92307692307692, |
|
"eval_accuracy": 0.7726396917148363, |
|
"eval_loss": 0.6276693344116211, |
|
"eval_runtime": 4.502, |
|
"eval_samples_per_second": 230.565, |
|
"eval_steps_per_second": 3.776, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 89.23076923076923, |
|
"grad_norm": 5.618044853210449, |
|
"learning_rate": 1.8518518518518519e-06, |
|
"loss": 0.1232, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 89.84615384615384, |
|
"eval_accuracy": 0.7784200385356455, |
|
"eval_loss": 0.627270519733429, |
|
"eval_runtime": 4.4959, |
|
"eval_samples_per_second": 230.875, |
|
"eval_steps_per_second": 3.781, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 90.76923076923077, |
|
"eval_accuracy": 0.7774566473988439, |
|
"eval_loss": 0.6300393342971802, |
|
"eval_runtime": 4.4712, |
|
"eval_samples_per_second": 232.154, |
|
"eval_steps_per_second": 3.802, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.7764932562620424, |
|
"eval_loss": 0.6361268758773804, |
|
"eval_runtime": 4.4247, |
|
"eval_samples_per_second": 234.591, |
|
"eval_steps_per_second": 3.842, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 92.3076923076923, |
|
"grad_norm": 14.779194831848145, |
|
"learning_rate": 0.0, |
|
"loss": 0.1494, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 92.3076923076923, |
|
"eval_accuracy": 0.7764932562620424, |
|
"eval_loss": 0.6359105706214905, |
|
"eval_runtime": 4.3892, |
|
"eval_samples_per_second": 236.491, |
|
"eval_steps_per_second": 3.873, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 92.3076923076923, |
|
"step": 300, |
|
"total_flos": 1.4740506847134351e+18, |
|
"train_loss": 0.22712578296661376, |
|
"train_runtime": 1060.3985, |
|
"train_samples_per_second": 76.952, |
|
"train_steps_per_second": 0.283 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 300, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.4740506847134351e+18, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|