|
{ |
|
"best_metric": NaN, |
|
"best_model_checkpoint": "mobilebert_sa_pre-training-complete/checkpoint-1787", |
|
"epoch": 167.87912702853944, |
|
"global_step": 300000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.9746415471823945e-05, |
|
"loss": 0.0, |
|
"step": 1787 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6389621318373071, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.565, |
|
"eval_samples_per_second": 186.748, |
|
"eval_steps_per_second": 1.559, |
|
"step": 1787 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.9448482827609205e-05, |
|
"loss": 0.0, |
|
"step": 3574 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6426345489766697, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.5231, |
|
"eval_samples_per_second": 189.849, |
|
"eval_steps_per_second": 1.585, |
|
"step": 3574 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.9150550183394464e-05, |
|
"loss": 0.0, |
|
"step": 5361 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6414778901613307, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.3803, |
|
"eval_samples_per_second": 201.236, |
|
"eval_steps_per_second": 1.68, |
|
"step": 5361 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.885261753917973e-05, |
|
"loss": 0.0, |
|
"step": 7148 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6339985546722997, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.2859, |
|
"eval_samples_per_second": 209.542, |
|
"eval_steps_per_second": 1.75, |
|
"step": 7148 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.855468489496499e-05, |
|
"loss": 0.0, |
|
"step": 8935 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.6359570661896243, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.5196, |
|
"eval_samples_per_second": 190.108, |
|
"eval_steps_per_second": 1.588, |
|
"step": 8935 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.825675225075025e-05, |
|
"loss": 0.0, |
|
"step": 10722 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6341442999832598, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.687, |
|
"eval_samples_per_second": 178.265, |
|
"eval_steps_per_second": 1.489, |
|
"step": 10722 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 4.795881960653552e-05, |
|
"loss": 0.0, |
|
"step": 12509 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.6378068416146436, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.4684, |
|
"eval_samples_per_second": 194.049, |
|
"eval_steps_per_second": 1.62, |
|
"step": 12509 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.766088696232078e-05, |
|
"loss": 0.0, |
|
"step": 14296 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6334561982096832, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.4312, |
|
"eval_samples_per_second": 197.02, |
|
"eval_steps_per_second": 1.645, |
|
"step": 14296 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 4.736295431810604e-05, |
|
"loss": 0.0, |
|
"step": 16083 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.6362770101734817, |
|
"eval_loss": NaN, |
|
"eval_runtime": 1.8926, |
|
"eval_samples_per_second": 253.092, |
|
"eval_steps_per_second": 2.114, |
|
"step": 16083 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 4.70650216738913e-05, |
|
"loss": 0.0, |
|
"step": 17870 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.6382731776362349, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.2948, |
|
"eval_samples_per_second": 208.735, |
|
"eval_steps_per_second": 1.743, |
|
"step": 17870 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 4.6767089029676556e-05, |
|
"loss": 0.0, |
|
"step": 19657 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.6379281255272482, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.498, |
|
"eval_samples_per_second": 191.754, |
|
"eval_steps_per_second": 1.601, |
|
"step": 19657 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 4.646915638546182e-05, |
|
"loss": 0.0, |
|
"step": 21444 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.6346471430144226, |
|
"eval_loss": NaN, |
|
"eval_runtime": 1.9097, |
|
"eval_samples_per_second": 250.823, |
|
"eval_steps_per_second": 2.095, |
|
"step": 21444 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 4.617122374124708e-05, |
|
"loss": 0.0006, |
|
"step": 23231 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.6408630160258225, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.4081, |
|
"eval_samples_per_second": 198.912, |
|
"eval_steps_per_second": 1.661, |
|
"step": 23231 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 4.587329109703234e-05, |
|
"loss": 0.0, |
|
"step": 25018 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.6406372714013239, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.3337, |
|
"eval_samples_per_second": 205.253, |
|
"eval_steps_per_second": 1.714, |
|
"step": 25018 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 4.557535845281761e-05, |
|
"loss": 0.0, |
|
"step": 26805 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.6322750589213919, |
|
"eval_loss": NaN, |
|
"eval_runtime": 1.8775, |
|
"eval_samples_per_second": 255.123, |
|
"eval_steps_per_second": 2.13, |
|
"step": 26805 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 4.527742580860287e-05, |
|
"loss": 0.0, |
|
"step": 28592 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.6402202163038314, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.3437, |
|
"eval_samples_per_second": 204.38, |
|
"eval_steps_per_second": 1.707, |
|
"step": 28592 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 4.497949316438813e-05, |
|
"loss": 0.0, |
|
"step": 30379 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.6400190007823852, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.3387, |
|
"eval_samples_per_second": 204.815, |
|
"eval_steps_per_second": 1.71, |
|
"step": 30379 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 4.4681560520173395e-05, |
|
"loss": 0.0, |
|
"step": 32166 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.632776553916511, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.9244, |
|
"eval_samples_per_second": 163.796, |
|
"eval_steps_per_second": 1.368, |
|
"step": 32166 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 4.4383627875958655e-05, |
|
"loss": 0.0, |
|
"step": 33953 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.6352045133991537, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.3016, |
|
"eval_samples_per_second": 208.113, |
|
"eval_steps_per_second": 1.738, |
|
"step": 33953 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 4.4085695231743915e-05, |
|
"loss": 0.0, |
|
"step": 35740 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.6380321611988887, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.3407, |
|
"eval_samples_per_second": 204.636, |
|
"eval_steps_per_second": 1.709, |
|
"step": 35740 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"learning_rate": 4.378776258752918e-05, |
|
"loss": 0.0, |
|
"step": 37527 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.6462707571066704, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.3673, |
|
"eval_samples_per_second": 202.344, |
|
"eval_steps_per_second": 1.69, |
|
"step": 37527 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 4.348982994331444e-05, |
|
"loss": 0.0, |
|
"step": 39314 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.6313444886822396, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.3572, |
|
"eval_samples_per_second": 203.209, |
|
"eval_steps_per_second": 1.697, |
|
"step": 39314 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"learning_rate": 4.31918972990997e-05, |
|
"loss": 0.0, |
|
"step": 41101 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.6385646226283677, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.3089, |
|
"eval_samples_per_second": 207.457, |
|
"eval_steps_per_second": 1.732, |
|
"step": 41101 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 4.289396465488497e-05, |
|
"loss": 0.0, |
|
"step": 42888 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.6412596473438116, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.2196, |
|
"eval_samples_per_second": 215.801, |
|
"eval_steps_per_second": 1.802, |
|
"step": 42888 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 4.259603201067023e-05, |
|
"loss": 0.0, |
|
"step": 44675 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.6323012458168115, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.4055, |
|
"eval_samples_per_second": 199.128, |
|
"eval_steps_per_second": 1.663, |
|
"step": 44675 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 4.229809936645549e-05, |
|
"loss": 0.0008, |
|
"step": 46462 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.6358967122901087, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.3859, |
|
"eval_samples_per_second": 200.764, |
|
"eval_steps_per_second": 1.677, |
|
"step": 46462 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"learning_rate": 4.200016672224075e-05, |
|
"loss": 0.0, |
|
"step": 48249 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.6397165131112686, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.5031, |
|
"eval_samples_per_second": 191.366, |
|
"eval_steps_per_second": 1.598, |
|
"step": 48249 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 4.170223407802601e-05, |
|
"loss": 0.0, |
|
"step": 50036 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.6377492196507409, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.4488, |
|
"eval_samples_per_second": 195.602, |
|
"eval_steps_per_second": 1.633, |
|
"step": 50036 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"learning_rate": 4.1404301433811273e-05, |
|
"loss": 0.0, |
|
"step": 51823 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.6382942861958537, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.4323, |
|
"eval_samples_per_second": 196.929, |
|
"eval_steps_per_second": 1.645, |
|
"step": 51823 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 4.110636878959653e-05, |
|
"loss": 0.0, |
|
"step": 53610 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.6374012291483757, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.584, |
|
"eval_samples_per_second": 185.372, |
|
"eval_steps_per_second": 1.548, |
|
"step": 53610 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"learning_rate": 4.080843614538179e-05, |
|
"loss": 0.0, |
|
"step": 55397 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.6475859910666022, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.4306, |
|
"eval_samples_per_second": 197.073, |
|
"eval_steps_per_second": 1.646, |
|
"step": 55397 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 4.051050350116706e-05, |
|
"loss": 0.0, |
|
"step": 57184 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.6304695249641794, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.4506, |
|
"eval_samples_per_second": 195.466, |
|
"eval_steps_per_second": 1.632, |
|
"step": 57184 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"learning_rate": 4.021257085695232e-05, |
|
"loss": 0.0011, |
|
"step": 58971 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.6451093860268172, |
|
"eval_loss": NaN, |
|
"eval_runtime": 3.4567, |
|
"eval_samples_per_second": 138.57, |
|
"eval_steps_per_second": 1.157, |
|
"step": 58971 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"learning_rate": 3.991463821273758e-05, |
|
"loss": 0.0, |
|
"step": 60758 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.6371792721784336, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.3878, |
|
"eval_samples_per_second": 200.599, |
|
"eval_steps_per_second": 1.675, |
|
"step": 60758 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"learning_rate": 3.9616705568522846e-05, |
|
"loss": 0.0, |
|
"step": 62545 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.6368389980602176, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.369, |
|
"eval_samples_per_second": 202.197, |
|
"eval_steps_per_second": 1.688, |
|
"step": 62545 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"learning_rate": 3.9318772924308106e-05, |
|
"loss": 0.0006, |
|
"step": 64332 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.6385094217390079, |
|
"eval_loss": NaN, |
|
"eval_runtime": 1.8468, |
|
"eval_samples_per_second": 259.365, |
|
"eval_steps_per_second": 2.166, |
|
"step": 64332 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"learning_rate": 3.9020840280093365e-05, |
|
"loss": 0.0, |
|
"step": 66119 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.63491709005639, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.4159, |
|
"eval_samples_per_second": 198.273, |
|
"eval_steps_per_second": 1.656, |
|
"step": 66119 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"learning_rate": 3.872290763587863e-05, |
|
"loss": 0.0, |
|
"step": 67906 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.6334269272469489, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.5332, |
|
"eval_samples_per_second": 189.086, |
|
"eval_steps_per_second": 1.579, |
|
"step": 67906 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"learning_rate": 3.842497499166389e-05, |
|
"loss": 0.0, |
|
"step": 69693 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.639051440673243, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.9795, |
|
"eval_samples_per_second": 160.763, |
|
"eval_steps_per_second": 1.342, |
|
"step": 69693 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 3.812704234744915e-05, |
|
"loss": 0.0, |
|
"step": 71480 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.6345379279480868, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.2745, |
|
"eval_samples_per_second": 210.596, |
|
"eval_steps_per_second": 1.759, |
|
"step": 71480 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"learning_rate": 3.782910970323441e-05, |
|
"loss": 0.0, |
|
"step": 73267 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.6423277130247822, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.3025, |
|
"eval_samples_per_second": 208.034, |
|
"eval_steps_per_second": 1.737, |
|
"step": 73267 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"learning_rate": 3.753117705901967e-05, |
|
"loss": 0.0, |
|
"step": 75054 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.6374992997591171, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.2992, |
|
"eval_samples_per_second": 208.332, |
|
"eval_steps_per_second": 1.74, |
|
"step": 75054 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"learning_rate": 3.723324441480494e-05, |
|
"loss": 0.0, |
|
"step": 76841 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.6292381720579646, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.3504, |
|
"eval_samples_per_second": 203.794, |
|
"eval_steps_per_second": 1.702, |
|
"step": 76841 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"learning_rate": 3.69353117705902e-05, |
|
"loss": 0.0, |
|
"step": 78628 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.6336636427076064, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.1818, |
|
"eval_samples_per_second": 219.543, |
|
"eval_steps_per_second": 1.833, |
|
"step": 78628 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"learning_rate": 3.663737912637546e-05, |
|
"loss": 0.0, |
|
"step": 80415 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.6451018731049335, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.2283, |
|
"eval_samples_per_second": 214.958, |
|
"eval_steps_per_second": 1.795, |
|
"step": 80415 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"learning_rate": 3.6339446482160724e-05, |
|
"loss": 0.0, |
|
"step": 82202 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.6376266844860229, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.9896, |
|
"eval_samples_per_second": 160.224, |
|
"eval_steps_per_second": 1.338, |
|
"step": 82202 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"learning_rate": 3.6041513837945984e-05, |
|
"loss": 0.0, |
|
"step": 83989 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.6354616290150793, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.3984, |
|
"eval_samples_per_second": 199.716, |
|
"eval_steps_per_second": 1.668, |
|
"step": 83989 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"learning_rate": 3.5743581193731244e-05, |
|
"loss": 0.0, |
|
"step": 85776 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.64114792031153, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.2, |
|
"eval_samples_per_second": 217.728, |
|
"eval_steps_per_second": 1.818, |
|
"step": 85776 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"learning_rate": 3.544564854951651e-05, |
|
"loss": 0.0, |
|
"step": 87563 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.6358090934764191, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.0902, |
|
"eval_samples_per_second": 229.163, |
|
"eval_steps_per_second": 1.914, |
|
"step": 87563 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 3.514771590530177e-05, |
|
"loss": 0.0, |
|
"step": 89350 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.64278059785674, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.0804, |
|
"eval_samples_per_second": 230.249, |
|
"eval_steps_per_second": 1.923, |
|
"step": 89350 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"learning_rate": 3.484978326108703e-05, |
|
"loss": 0.0, |
|
"step": 91137 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.6421150697306759, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.0459, |
|
"eval_samples_per_second": 234.122, |
|
"eval_steps_per_second": 1.955, |
|
"step": 91137 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"learning_rate": 3.4551850616872296e-05, |
|
"loss": 0.004, |
|
"step": 92924 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.6352199471785989, |
|
"eval_loss": NaN, |
|
"eval_runtime": 1.9574, |
|
"eval_samples_per_second": 244.709, |
|
"eval_steps_per_second": 2.044, |
|
"step": 92924 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"learning_rate": 3.4253917972657556e-05, |
|
"loss": 0.0, |
|
"step": 94711 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.641116936275197, |
|
"eval_loss": NaN, |
|
"eval_runtime": 1.8623, |
|
"eval_samples_per_second": 257.205, |
|
"eval_steps_per_second": 2.148, |
|
"step": 94711 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"learning_rate": 3.3955985328442816e-05, |
|
"loss": 0.0, |
|
"step": 96498 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.6376506868516961, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.0734, |
|
"eval_samples_per_second": 231.022, |
|
"eval_steps_per_second": 1.929, |
|
"step": 96498 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"learning_rate": 3.3658052684228076e-05, |
|
"loss": 0.0, |
|
"step": 98285 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.6374884528175125, |
|
"eval_loss": NaN, |
|
"eval_runtime": 1.9769, |
|
"eval_samples_per_second": 242.303, |
|
"eval_steps_per_second": 2.023, |
|
"step": 98285 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"learning_rate": 3.3360120040013336e-05, |
|
"loss": 0.0, |
|
"step": 100072 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.6368413657439933, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.4635, |
|
"eval_samples_per_second": 194.44, |
|
"eval_steps_per_second": 1.624, |
|
"step": 100072 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"learning_rate": 3.30621873957986e-05, |
|
"loss": 0.0, |
|
"step": 101859 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.6364829030006979, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.4227, |
|
"eval_samples_per_second": 197.715, |
|
"eval_steps_per_second": 1.651, |
|
"step": 101859 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"learning_rate": 3.276425475158386e-05, |
|
"loss": 0.0, |
|
"step": 103646 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.6412562335406511, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.535, |
|
"eval_samples_per_second": 188.957, |
|
"eval_steps_per_second": 1.578, |
|
"step": 103646 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"learning_rate": 3.246632210736912e-05, |
|
"loss": 0.0, |
|
"step": 105433 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.6347360495273153, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.3607, |
|
"eval_samples_per_second": 202.908, |
|
"eval_steps_per_second": 1.694, |
|
"step": 105433 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 3.216838946315439e-05, |
|
"loss": 0.0, |
|
"step": 107220 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.6407410498205791, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.469, |
|
"eval_samples_per_second": 194.009, |
|
"eval_steps_per_second": 1.62, |
|
"step": 107220 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"learning_rate": 3.187045681893965e-05, |
|
"loss": 0.0, |
|
"step": 109007 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.6394521622227202, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.4014, |
|
"eval_samples_per_second": 199.465, |
|
"eval_steps_per_second": 1.666, |
|
"step": 109007 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"learning_rate": 3.157252417472491e-05, |
|
"loss": 0.0, |
|
"step": 110794 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.6373080145210835, |
|
"eval_loss": NaN, |
|
"eval_runtime": 3.0171, |
|
"eval_samples_per_second": 158.763, |
|
"eval_steps_per_second": 1.326, |
|
"step": 110794 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"learning_rate": 3.1274591530510175e-05, |
|
"loss": 0.0, |
|
"step": 112581 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.6356097148176356, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.5297, |
|
"eval_samples_per_second": 189.349, |
|
"eval_steps_per_second": 1.581, |
|
"step": 112581 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"learning_rate": 3.0976658886295434e-05, |
|
"loss": 0.0, |
|
"step": 114368 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.6366671342404264, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.4813, |
|
"eval_samples_per_second": 193.043, |
|
"eval_steps_per_second": 1.612, |
|
"step": 114368 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"learning_rate": 3.0678726242080694e-05, |
|
"loss": 0.0, |
|
"step": 116155 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.6440701993138839, |
|
"eval_loss": NaN, |
|
"eval_runtime": 3.2092, |
|
"eval_samples_per_second": 149.257, |
|
"eval_steps_per_second": 1.246, |
|
"step": 116155 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"learning_rate": 3.0380793597865957e-05, |
|
"loss": 0.0017, |
|
"step": 117942 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.6379713046379714, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.5164, |
|
"eval_samples_per_second": 190.349, |
|
"eval_steps_per_second": 1.59, |
|
"step": 117942 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"learning_rate": 3.008286095365122e-05, |
|
"loss": 0.0, |
|
"step": 119729 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.6348476780359295, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.4358, |
|
"eval_samples_per_second": 196.654, |
|
"eval_steps_per_second": 1.642, |
|
"step": 119729 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"learning_rate": 2.9784928309436484e-05, |
|
"loss": 0.0, |
|
"step": 121516 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.6356471316782075, |
|
"eval_loss": NaN, |
|
"eval_runtime": 1.8438, |
|
"eval_samples_per_second": 259.796, |
|
"eval_steps_per_second": 2.169, |
|
"step": 121516 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"learning_rate": 2.948699566522174e-05, |
|
"loss": 0.0, |
|
"step": 123303 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.6391043176626526, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.4179, |
|
"eval_samples_per_second": 198.106, |
|
"eval_steps_per_second": 1.654, |
|
"step": 123303 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"learning_rate": 2.9189063021007e-05, |
|
"loss": 0.0006, |
|
"step": 125090 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.636170153182671, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.5589, |
|
"eval_samples_per_second": 187.19, |
|
"eval_steps_per_second": 1.563, |
|
"step": 125090 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"learning_rate": 2.8891130376792263e-05, |
|
"loss": 0.0, |
|
"step": 126877 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.6387962677575724, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.2538, |
|
"eval_samples_per_second": 212.526, |
|
"eval_steps_per_second": 1.775, |
|
"step": 126877 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"learning_rate": 2.8593197732577526e-05, |
|
"loss": 0.0, |
|
"step": 128664 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.6353993285750558, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.4543, |
|
"eval_samples_per_second": 195.171, |
|
"eval_steps_per_second": 1.63, |
|
"step": 128664 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"learning_rate": 2.8295265088362786e-05, |
|
"loss": 0.0, |
|
"step": 130451 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.6361857774753318, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.4316, |
|
"eval_samples_per_second": 196.992, |
|
"eval_steps_per_second": 1.645, |
|
"step": 130451 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"learning_rate": 2.799733244414805e-05, |
|
"loss": 0.0013, |
|
"step": 132238 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.6347066167290887, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.3647, |
|
"eval_samples_per_second": 202.564, |
|
"eval_steps_per_second": 1.692, |
|
"step": 132238 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"learning_rate": 2.7699399799933313e-05, |
|
"loss": 0.0, |
|
"step": 134025 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.632699704554323, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.4813, |
|
"eval_samples_per_second": 193.045, |
|
"eval_steps_per_second": 1.612, |
|
"step": 134025 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"learning_rate": 2.7401467155718576e-05, |
|
"loss": 0.0, |
|
"step": 135812 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.6382416594058293, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.3498, |
|
"eval_samples_per_second": 203.844, |
|
"eval_steps_per_second": 1.702, |
|
"step": 135812 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"learning_rate": 2.7103534511503836e-05, |
|
"loss": 0.0, |
|
"step": 137599 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.6411355703960205, |
|
"eval_loss": NaN, |
|
"eval_runtime": 1.8611, |
|
"eval_samples_per_second": 257.378, |
|
"eval_steps_per_second": 2.149, |
|
"step": 137599 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"learning_rate": 2.68056018672891e-05, |
|
"loss": 0.0, |
|
"step": 139386 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.640435869351532, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.2547, |
|
"eval_samples_per_second": 212.448, |
|
"eval_steps_per_second": 1.774, |
|
"step": 139386 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"learning_rate": 2.6507669223074362e-05, |
|
"loss": 0.0, |
|
"step": 141173 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.6391697411777959, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.1542, |
|
"eval_samples_per_second": 222.352, |
|
"eval_steps_per_second": 1.857, |
|
"step": 141173 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 2.6209736578859622e-05, |
|
"loss": 0.0, |
|
"step": 142960 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.6403956068586798, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.2858, |
|
"eval_samples_per_second": 209.555, |
|
"eval_steps_per_second": 1.75, |
|
"step": 142960 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"learning_rate": 2.5911803934644885e-05, |
|
"loss": 0.0, |
|
"step": 144747 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_accuracy": 0.6420577823455521, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.0194, |
|
"eval_samples_per_second": 237.198, |
|
"eval_steps_per_second": 1.981, |
|
"step": 144747 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"learning_rate": 2.5613871290430148e-05, |
|
"loss": 0.0, |
|
"step": 146534 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_accuracy": 0.6363990538472242, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.2166, |
|
"eval_samples_per_second": 216.094, |
|
"eval_steps_per_second": 1.805, |
|
"step": 146534 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"learning_rate": 2.5315938646215408e-05, |
|
"loss": 0.0, |
|
"step": 148321 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_accuracy": 0.6363839910439406, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.0538, |
|
"eval_samples_per_second": 233.223, |
|
"eval_steps_per_second": 1.948, |
|
"step": 148321 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"learning_rate": 2.5018006002000664e-05, |
|
"loss": 0.0, |
|
"step": 150108 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.6370422337589449, |
|
"eval_loss": NaN, |
|
"eval_runtime": 1.9129, |
|
"eval_samples_per_second": 250.403, |
|
"eval_steps_per_second": 2.091, |
|
"step": 150108 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"learning_rate": 2.472007335778593e-05, |
|
"loss": 0.0, |
|
"step": 151895 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_accuracy": 0.6357136919112145, |
|
"eval_loss": NaN, |
|
"eval_runtime": 1.9511, |
|
"eval_samples_per_second": 245.5, |
|
"eval_steps_per_second": 2.05, |
|
"step": 151895 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"learning_rate": 2.4422140713571194e-05, |
|
"loss": 0.0, |
|
"step": 153682 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_accuracy": 0.6353468815697267, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.3614, |
|
"eval_samples_per_second": 202.845, |
|
"eval_steps_per_second": 1.694, |
|
"step": 153682 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"learning_rate": 2.412420806935645e-05, |
|
"loss": 0.0, |
|
"step": 155469 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_accuracy": 0.6392810219998323, |
|
"eval_loss": NaN, |
|
"eval_runtime": 1.9531, |
|
"eval_samples_per_second": 245.249, |
|
"eval_steps_per_second": 2.048, |
|
"step": 155469 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"learning_rate": 2.3826275425141714e-05, |
|
"loss": 0.0, |
|
"step": 157256 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.639685437438562, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.3006, |
|
"eval_samples_per_second": 208.209, |
|
"eval_steps_per_second": 1.739, |
|
"step": 157256 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"learning_rate": 2.3528342780926977e-05, |
|
"loss": 0.0006, |
|
"step": 159043 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_accuracy": 0.639618138424821, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.0432, |
|
"eval_samples_per_second": 234.432, |
|
"eval_steps_per_second": 1.958, |
|
"step": 159043 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"learning_rate": 2.3230410136712237e-05, |
|
"loss": 0.0013, |
|
"step": 160830 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_accuracy": 0.6378478767047344, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.1388, |
|
"eval_samples_per_second": 223.962, |
|
"eval_steps_per_second": 1.87, |
|
"step": 160830 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"learning_rate": 2.29324774924975e-05, |
|
"loss": 0.0, |
|
"step": 162617 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_accuracy": 0.6385569271433793, |
|
"eval_loss": NaN, |
|
"eval_runtime": 1.9971, |
|
"eval_samples_per_second": 239.843, |
|
"eval_steps_per_second": 2.003, |
|
"step": 162617 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"learning_rate": 2.2634544848282763e-05, |
|
"loss": 0.0, |
|
"step": 164404 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.6414956218149055, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.0014, |
|
"eval_samples_per_second": 239.331, |
|
"eval_steps_per_second": 1.999, |
|
"step": 164404 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"learning_rate": 2.2336612204068026e-05, |
|
"loss": 0.0, |
|
"step": 166191 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_accuracy": 0.6342390696634239, |
|
"eval_loss": NaN, |
|
"eval_runtime": 1.987, |
|
"eval_samples_per_second": 241.063, |
|
"eval_steps_per_second": 2.013, |
|
"step": 166191 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"learning_rate": 2.2038679559853283e-05, |
|
"loss": 0.0, |
|
"step": 167978 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_accuracy": 0.6355638270873869, |
|
"eval_loss": NaN, |
|
"eval_runtime": 1.9543, |
|
"eval_samples_per_second": 245.1, |
|
"eval_steps_per_second": 2.047, |
|
"step": 167978 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"learning_rate": 2.1740746915638546e-05, |
|
"loss": 0.0, |
|
"step": 169765 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_accuracy": 0.6410132895072398, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.1422, |
|
"eval_samples_per_second": 223.599, |
|
"eval_steps_per_second": 1.867, |
|
"step": 169765 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"learning_rate": 2.144281427142381e-05, |
|
"loss": 0.0, |
|
"step": 171552 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_accuracy": 0.6365928831605492, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.1461, |
|
"eval_samples_per_second": 223.199, |
|
"eval_steps_per_second": 1.864, |
|
"step": 171552 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"learning_rate": 2.114488162720907e-05, |
|
"loss": 0.0, |
|
"step": 173339 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_accuracy": 0.6328623582197698, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.0065, |
|
"eval_samples_per_second": 238.72, |
|
"eval_steps_per_second": 1.993, |
|
"step": 173339 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"learning_rate": 2.0846948982994332e-05, |
|
"loss": 0.0013, |
|
"step": 175126 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_accuracy": 0.635225093083606, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.2096, |
|
"eval_samples_per_second": 216.784, |
|
"eval_steps_per_second": 1.81, |
|
"step": 175126 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"learning_rate": 2.0549016338779595e-05, |
|
"loss": 0.0, |
|
"step": 176913 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"eval_accuracy": 0.633986562150056, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.7617, |
|
"eval_samples_per_second": 173.442, |
|
"eval_steps_per_second": 1.448, |
|
"step": 176913 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 2.0251083694564855e-05, |
|
"loss": 0.0, |
|
"step": 178700 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy": 0.6358424725822532, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.5626, |
|
"eval_samples_per_second": 186.922, |
|
"eval_steps_per_second": 1.561, |
|
"step": 178700 |
|
}, |
|
{ |
|
"epoch": 101.0, |
|
"learning_rate": 1.995315105035012e-05, |
|
"loss": 0.0, |
|
"step": 180487 |
|
}, |
|
{ |
|
"epoch": 101.0, |
|
"eval_accuracy": 0.6366913511247729, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.4375, |
|
"eval_samples_per_second": 196.515, |
|
"eval_steps_per_second": 1.641, |
|
"step": 180487 |
|
}, |
|
{ |
|
"epoch": 102.0, |
|
"learning_rate": 1.9655218406135378e-05, |
|
"loss": 0.0006, |
|
"step": 182274 |
|
}, |
|
{ |
|
"epoch": 102.0, |
|
"eval_accuracy": 0.6367565747003845, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.3509, |
|
"eval_samples_per_second": 203.749, |
|
"eval_steps_per_second": 1.701, |
|
"step": 182274 |
|
}, |
|
{ |
|
"epoch": 103.0, |
|
"learning_rate": 1.935728576192064e-05, |
|
"loss": 0.0, |
|
"step": 184061 |
|
}, |
|
{ |
|
"epoch": 103.0, |
|
"eval_accuracy": 0.6353013702468686, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.0088, |
|
"eval_samples_per_second": 238.45, |
|
"eval_steps_per_second": 1.991, |
|
"step": 184061 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"learning_rate": 1.90593531177059e-05, |
|
"loss": 0.0, |
|
"step": 185848 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"eval_accuracy": 0.6369532258970184, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.1758, |
|
"eval_samples_per_second": 220.149, |
|
"eval_steps_per_second": 1.838, |
|
"step": 185848 |
|
}, |
|
{ |
|
"epoch": 105.0, |
|
"learning_rate": 1.8761420473491164e-05, |
|
"loss": 0.0, |
|
"step": 187635 |
|
}, |
|
{ |
|
"epoch": 105.0, |
|
"eval_accuracy": 0.6333213286422694, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.3075, |
|
"eval_samples_per_second": 207.588, |
|
"eval_steps_per_second": 1.734, |
|
"step": 187635 |
|
}, |
|
{ |
|
"epoch": 106.0, |
|
"learning_rate": 1.8463487829276428e-05, |
|
"loss": 0.0, |
|
"step": 189422 |
|
}, |
|
{ |
|
"epoch": 106.0, |
|
"eval_accuracy": 0.6316263365222284, |
|
"eval_loss": NaN, |
|
"eval_runtime": 1.8744, |
|
"eval_samples_per_second": 255.55, |
|
"eval_steps_per_second": 2.134, |
|
"step": 189422 |
|
}, |
|
{ |
|
"epoch": 107.0, |
|
"learning_rate": 1.8165555185061687e-05, |
|
"loss": 0.0006, |
|
"step": 191209 |
|
}, |
|
{ |
|
"epoch": 107.0, |
|
"eval_accuracy": 0.6393596184961253, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.5091, |
|
"eval_samples_per_second": 190.907, |
|
"eval_steps_per_second": 1.594, |
|
"step": 191209 |
|
}, |
|
{ |
|
"epoch": 108.0, |
|
"learning_rate": 1.786762254084695e-05, |
|
"loss": 0.0, |
|
"step": 192996 |
|
}, |
|
{ |
|
"epoch": 108.0, |
|
"eval_accuracy": 0.6323443376514835, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.4565, |
|
"eval_samples_per_second": 194.993, |
|
"eval_steps_per_second": 1.628, |
|
"step": 192996 |
|
}, |
|
{ |
|
"epoch": 109.0, |
|
"learning_rate": 1.756968989663221e-05, |
|
"loss": 0.0, |
|
"step": 194783 |
|
}, |
|
{ |
|
"epoch": 109.0, |
|
"eval_accuracy": 0.6405684971827432, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.4473, |
|
"eval_samples_per_second": 195.727, |
|
"eval_steps_per_second": 1.634, |
|
"step": 194783 |
|
}, |
|
{ |
|
"epoch": 110.0, |
|
"learning_rate": 1.7271757252417474e-05, |
|
"loss": 0.0012, |
|
"step": 196570 |
|
}, |
|
{ |
|
"epoch": 110.0, |
|
"eval_accuracy": 0.6330752990851513, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.4244, |
|
"eval_samples_per_second": 197.575, |
|
"eval_steps_per_second": 1.65, |
|
"step": 196570 |
|
}, |
|
{ |
|
"epoch": 111.0, |
|
"learning_rate": 1.6973824608202733e-05, |
|
"loss": 0.0, |
|
"step": 198357 |
|
}, |
|
{ |
|
"epoch": 111.0, |
|
"eval_accuracy": 0.6397748592870544, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.4021, |
|
"eval_samples_per_second": 199.407, |
|
"eval_steps_per_second": 1.665, |
|
"step": 198357 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"learning_rate": 1.6675891963987997e-05, |
|
"loss": 0.0, |
|
"step": 200144 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"eval_accuracy": 0.640183902890303, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.4547, |
|
"eval_samples_per_second": 195.14, |
|
"eval_steps_per_second": 1.63, |
|
"step": 200144 |
|
}, |
|
{ |
|
"epoch": 113.0, |
|
"learning_rate": 1.637795931977326e-05, |
|
"loss": 0.0, |
|
"step": 201931 |
|
}, |
|
{ |
|
"epoch": 113.0, |
|
"eval_accuracy": 0.6345416867743492, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.4355, |
|
"eval_samples_per_second": 196.673, |
|
"eval_steps_per_second": 1.642, |
|
"step": 201931 |
|
}, |
|
{ |
|
"epoch": 114.0, |
|
"learning_rate": 1.608002667555852e-05, |
|
"loss": 0.0, |
|
"step": 203718 |
|
}, |
|
{ |
|
"epoch": 114.0, |
|
"eval_accuracy": 0.6416126303918804, |
|
"eval_loss": NaN, |
|
"eval_runtime": 1.881, |
|
"eval_samples_per_second": 254.65, |
|
"eval_steps_per_second": 2.127, |
|
"step": 203718 |
|
}, |
|
{ |
|
"epoch": 115.0, |
|
"learning_rate": 1.5782094031343783e-05, |
|
"loss": 0.0, |
|
"step": 205505 |
|
}, |
|
{ |
|
"epoch": 115.0, |
|
"eval_accuracy": 0.635246810870771, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.3521, |
|
"eval_samples_per_second": 203.648, |
|
"eval_steps_per_second": 1.701, |
|
"step": 205505 |
|
}, |
|
{ |
|
"epoch": 116.0, |
|
"learning_rate": 1.5484161387129043e-05, |
|
"loss": 0.0, |
|
"step": 207292 |
|
}, |
|
{ |
|
"epoch": 116.0, |
|
"eval_accuracy": 0.635686274509804, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.3117, |
|
"eval_samples_per_second": 207.209, |
|
"eval_steps_per_second": 1.73, |
|
"step": 207292 |
|
}, |
|
{ |
|
"epoch": 117.0, |
|
"learning_rate": 1.5186228742914304e-05, |
|
"loss": 0.0032, |
|
"step": 209079 |
|
}, |
|
{ |
|
"epoch": 117.0, |
|
"eval_accuracy": 0.6358383124351314, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.4352, |
|
"eval_samples_per_second": 196.698, |
|
"eval_steps_per_second": 1.643, |
|
"step": 209079 |
|
}, |
|
{ |
|
"epoch": 118.0, |
|
"learning_rate": 1.4888296098699567e-05, |
|
"loss": 0.0013, |
|
"step": 210866 |
|
}, |
|
{ |
|
"epoch": 118.0, |
|
"eval_accuracy": 0.6405930899426493, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.2861, |
|
"eval_samples_per_second": 209.529, |
|
"eval_steps_per_second": 1.75, |
|
"step": 210866 |
|
}, |
|
{ |
|
"epoch": 119.0, |
|
"learning_rate": 1.4590363454484829e-05, |
|
"loss": 0.0, |
|
"step": 212653 |
|
}, |
|
{ |
|
"epoch": 119.0, |
|
"eval_accuracy": 0.6353956511992827, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.3549, |
|
"eval_samples_per_second": 203.406, |
|
"eval_steps_per_second": 1.699, |
|
"step": 212653 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"learning_rate": 1.429243081027009e-05, |
|
"loss": 0.0, |
|
"step": 214440 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"eval_accuracy": 0.6345421474450066, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.4547, |
|
"eval_samples_per_second": 195.139, |
|
"eval_steps_per_second": 1.63, |
|
"step": 214440 |
|
}, |
|
{ |
|
"epoch": 121.0, |
|
"learning_rate": 1.3994498166055354e-05, |
|
"loss": 0.0, |
|
"step": 216227 |
|
}, |
|
{ |
|
"epoch": 121.0, |
|
"eval_accuracy": 0.6432686391856368, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.5342, |
|
"eval_samples_per_second": 189.015, |
|
"eval_steps_per_second": 1.578, |
|
"step": 216227 |
|
}, |
|
{ |
|
"epoch": 122.0, |
|
"learning_rate": 1.3696565521840615e-05, |
|
"loss": 0.0, |
|
"step": 218014 |
|
}, |
|
{ |
|
"epoch": 122.0, |
|
"eval_accuracy": 0.6326222917132008, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.3651, |
|
"eval_samples_per_second": 202.525, |
|
"eval_steps_per_second": 1.691, |
|
"step": 218014 |
|
}, |
|
{ |
|
"epoch": 123.0, |
|
"learning_rate": 1.3398632877625875e-05, |
|
"loss": 0.0, |
|
"step": 219801 |
|
}, |
|
{ |
|
"epoch": 123.0, |
|
"eval_accuracy": 0.6357796872798985, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.4594, |
|
"eval_samples_per_second": 194.763, |
|
"eval_steps_per_second": 1.626, |
|
"step": 219801 |
|
}, |
|
{ |
|
"epoch": 124.0, |
|
"learning_rate": 1.3100700233411136e-05, |
|
"loss": 0.0, |
|
"step": 221588 |
|
}, |
|
{ |
|
"epoch": 124.0, |
|
"eval_accuracy": 0.6409228526575466, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.4398, |
|
"eval_samples_per_second": 196.326, |
|
"eval_steps_per_second": 1.639, |
|
"step": 221588 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"learning_rate": 1.28027675891964e-05, |
|
"loss": 0.0, |
|
"step": 223375 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"eval_accuracy": 0.6404630806155583, |
|
"eval_loss": NaN, |
|
"eval_runtime": 3.34, |
|
"eval_samples_per_second": 143.413, |
|
"eval_steps_per_second": 1.198, |
|
"step": 223375 |
|
}, |
|
{ |
|
"epoch": 126.0, |
|
"learning_rate": 1.2504834944981661e-05, |
|
"loss": 0.0, |
|
"step": 225162 |
|
}, |
|
{ |
|
"epoch": 126.0, |
|
"eval_accuracy": 0.637574502616336, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.7328, |
|
"eval_samples_per_second": 175.276, |
|
"eval_steps_per_second": 1.464, |
|
"step": 225162 |
|
}, |
|
{ |
|
"epoch": 127.0, |
|
"learning_rate": 1.2206902300766923e-05, |
|
"loss": 0.0, |
|
"step": 226949 |
|
}, |
|
{ |
|
"epoch": 127.0, |
|
"eval_accuracy": 0.63958607925068, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.3572, |
|
"eval_samples_per_second": 203.207, |
|
"eval_steps_per_second": 1.697, |
|
"step": 226949 |
|
}, |
|
{ |
|
"epoch": 128.0, |
|
"learning_rate": 1.1908969656552186e-05, |
|
"loss": 0.0, |
|
"step": 228736 |
|
}, |
|
{ |
|
"epoch": 128.0, |
|
"eval_accuracy": 0.6355922615680507, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.0355, |
|
"eval_samples_per_second": 235.321, |
|
"eval_steps_per_second": 1.965, |
|
"step": 228736 |
|
}, |
|
{ |
|
"epoch": 129.0, |
|
"learning_rate": 1.1611037012337446e-05, |
|
"loss": 0.0, |
|
"step": 230523 |
|
}, |
|
{ |
|
"epoch": 129.0, |
|
"eval_accuracy": 0.6431967398686892, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.3821, |
|
"eval_samples_per_second": 201.084, |
|
"eval_steps_per_second": 1.679, |
|
"step": 230523 |
|
}, |
|
{ |
|
"epoch": 130.0, |
|
"learning_rate": 1.1313104368122709e-05, |
|
"loss": 0.0, |
|
"step": 232310 |
|
}, |
|
{ |
|
"epoch": 130.0, |
|
"eval_accuracy": 0.6384714590108781, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.3962, |
|
"eval_samples_per_second": 199.899, |
|
"eval_steps_per_second": 1.669, |
|
"step": 232310 |
|
}, |
|
{ |
|
"epoch": 131.0, |
|
"learning_rate": 1.101517172390797e-05, |
|
"loss": 0.0, |
|
"step": 234097 |
|
}, |
|
{ |
|
"epoch": 131.0, |
|
"eval_accuracy": 0.6337281095644365, |
|
"eval_loss": NaN, |
|
"eval_runtime": 1.8635, |
|
"eval_samples_per_second": 257.042, |
|
"eval_steps_per_second": 2.146, |
|
"step": 234097 |
|
}, |
|
{ |
|
"epoch": 132.0, |
|
"learning_rate": 1.0717239079693232e-05, |
|
"loss": 0.0, |
|
"step": 235884 |
|
}, |
|
{ |
|
"epoch": 132.0, |
|
"eval_accuracy": 0.6389531480810502, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.3837, |
|
"eval_samples_per_second": 200.947, |
|
"eval_steps_per_second": 1.678, |
|
"step": 235884 |
|
}, |
|
{ |
|
"epoch": 133.0, |
|
"learning_rate": 1.0419306435478493e-05, |
|
"loss": 0.0, |
|
"step": 237671 |
|
}, |
|
{ |
|
"epoch": 133.0, |
|
"eval_accuracy": 0.636236112669734, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.4808, |
|
"eval_samples_per_second": 193.082, |
|
"eval_steps_per_second": 1.612, |
|
"step": 237671 |
|
}, |
|
{ |
|
"epoch": 134.0, |
|
"learning_rate": 1.0121373791263755e-05, |
|
"loss": 0.0, |
|
"step": 239458 |
|
}, |
|
{ |
|
"epoch": 134.0, |
|
"eval_accuracy": 0.6331533837934105, |
|
"eval_loss": NaN, |
|
"eval_runtime": 1.8767, |
|
"eval_samples_per_second": 255.23, |
|
"eval_steps_per_second": 2.131, |
|
"step": 239458 |
|
}, |
|
{ |
|
"epoch": 135.0, |
|
"learning_rate": 9.823441147049018e-06, |
|
"loss": 0.0, |
|
"step": 241245 |
|
}, |
|
{ |
|
"epoch": 135.0, |
|
"eval_accuracy": 0.636656406748746, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.2751, |
|
"eval_samples_per_second": 210.541, |
|
"eval_steps_per_second": 1.758, |
|
"step": 241245 |
|
}, |
|
{ |
|
"epoch": 136.0, |
|
"learning_rate": 9.525508502834278e-06, |
|
"loss": 0.0016, |
|
"step": 243032 |
|
}, |
|
{ |
|
"epoch": 136.0, |
|
"eval_accuracy": 0.6333677196308585, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.3276, |
|
"eval_samples_per_second": 205.787, |
|
"eval_steps_per_second": 1.718, |
|
"step": 243032 |
|
}, |
|
{ |
|
"epoch": 137.0, |
|
"learning_rate": 9.227575858619541e-06, |
|
"loss": 0.0, |
|
"step": 244819 |
|
}, |
|
{ |
|
"epoch": 137.0, |
|
"eval_accuracy": 0.6411590941060628, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.7102, |
|
"eval_samples_per_second": 176.741, |
|
"eval_steps_per_second": 1.476, |
|
"step": 244819 |
|
}, |
|
{ |
|
"epoch": 138.0, |
|
"learning_rate": 8.929643214404802e-06, |
|
"loss": 0.0, |
|
"step": 246606 |
|
}, |
|
{ |
|
"epoch": 138.0, |
|
"eval_accuracy": 0.63665891972111, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.397, |
|
"eval_samples_per_second": 199.829, |
|
"eval_steps_per_second": 1.669, |
|
"step": 246606 |
|
}, |
|
{ |
|
"epoch": 139.0, |
|
"learning_rate": 8.631710570190064e-06, |
|
"loss": 0.0, |
|
"step": 248393 |
|
}, |
|
{ |
|
"epoch": 139.0, |
|
"eval_accuracy": 0.637815760763141, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.2977, |
|
"eval_samples_per_second": 208.473, |
|
"eval_steps_per_second": 1.741, |
|
"step": 248393 |
|
}, |
|
{ |
|
"epoch": 140.0, |
|
"learning_rate": 8.333777925975325e-06, |
|
"loss": 0.0, |
|
"step": 250180 |
|
}, |
|
{ |
|
"epoch": 140.0, |
|
"eval_accuracy": 0.6389793548752514, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.4084, |
|
"eval_samples_per_second": 198.888, |
|
"eval_steps_per_second": 1.661, |
|
"step": 250180 |
|
}, |
|
{ |
|
"epoch": 141.0, |
|
"learning_rate": 8.035845281760587e-06, |
|
"loss": 0.0, |
|
"step": 251967 |
|
}, |
|
{ |
|
"epoch": 141.0, |
|
"eval_accuracy": 0.6375586326994916, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.1944, |
|
"eval_samples_per_second": 218.284, |
|
"eval_steps_per_second": 1.823, |
|
"step": 251967 |
|
}, |
|
{ |
|
"epoch": 142.0, |
|
"learning_rate": 7.73791263754585e-06, |
|
"loss": 0.0, |
|
"step": 253754 |
|
}, |
|
{ |
|
"epoch": 142.0, |
|
"eval_accuracy": 0.6363045444268596, |
|
"eval_loss": NaN, |
|
"eval_runtime": 1.9755, |
|
"eval_samples_per_second": 242.476, |
|
"eval_steps_per_second": 2.025, |
|
"step": 253754 |
|
}, |
|
{ |
|
"epoch": 143.0, |
|
"learning_rate": 7.43997999333111e-06, |
|
"loss": 0.0033, |
|
"step": 255541 |
|
}, |
|
{ |
|
"epoch": 143.0, |
|
"eval_accuracy": 0.642540373190528, |
|
"eval_loss": NaN, |
|
"eval_runtime": 1.9855, |
|
"eval_samples_per_second": 241.25, |
|
"eval_steps_per_second": 2.015, |
|
"step": 255541 |
|
}, |
|
{ |
|
"epoch": 144.0, |
|
"learning_rate": 7.142047349116372e-06, |
|
"loss": 0.0, |
|
"step": 257328 |
|
}, |
|
{ |
|
"epoch": 144.0, |
|
"eval_accuracy": 0.6360186889423758, |
|
"eval_loss": NaN, |
|
"eval_runtime": 1.9778, |
|
"eval_samples_per_second": 242.187, |
|
"eval_steps_per_second": 2.022, |
|
"step": 257328 |
|
}, |
|
{ |
|
"epoch": 145.0, |
|
"learning_rate": 6.844114704901635e-06, |
|
"loss": 0.0, |
|
"step": 259115 |
|
}, |
|
{ |
|
"epoch": 145.0, |
|
"eval_accuracy": 0.6377054679637706, |
|
"eval_loss": NaN, |
|
"eval_runtime": 1.9561, |
|
"eval_samples_per_second": 244.876, |
|
"eval_steps_per_second": 2.045, |
|
"step": 259115 |
|
}, |
|
{ |
|
"epoch": 146.0, |
|
"learning_rate": 6.546182060686896e-06, |
|
"loss": 0.0, |
|
"step": 260902 |
|
}, |
|
{ |
|
"epoch": 146.0, |
|
"eval_accuracy": 0.630178854426081, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.1068, |
|
"eval_samples_per_second": 227.357, |
|
"eval_steps_per_second": 1.899, |
|
"step": 260902 |
|
}, |
|
{ |
|
"epoch": 147.0, |
|
"learning_rate": 6.248249416472158e-06, |
|
"loss": 0.0, |
|
"step": 262689 |
|
}, |
|
{ |
|
"epoch": 147.0, |
|
"eval_accuracy": 0.6320312280603219, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.051, |
|
"eval_samples_per_second": 233.54, |
|
"eval_steps_per_second": 1.95, |
|
"step": 262689 |
|
}, |
|
{ |
|
"epoch": 148.0, |
|
"learning_rate": 5.950316772257419e-06, |
|
"loss": 0.0, |
|
"step": 264476 |
|
}, |
|
{ |
|
"epoch": 148.0, |
|
"eval_accuracy": 0.6358165946266633, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.0868, |
|
"eval_samples_per_second": 229.54, |
|
"eval_steps_per_second": 1.917, |
|
"step": 264476 |
|
}, |
|
{ |
|
"epoch": 149.0, |
|
"learning_rate": 5.6523841280426815e-06, |
|
"loss": 0.0, |
|
"step": 266263 |
|
}, |
|
{ |
|
"epoch": 149.0, |
|
"eval_accuracy": 0.6381050924242848, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.0799, |
|
"eval_samples_per_second": 230.305, |
|
"eval_steps_per_second": 1.923, |
|
"step": 266263 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"learning_rate": 5.354451483827943e-06, |
|
"loss": 0.0, |
|
"step": 268050 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"eval_accuracy": 0.6414367457934395, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.0658, |
|
"eval_samples_per_second": 231.876, |
|
"eval_steps_per_second": 1.936, |
|
"step": 268050 |
|
}, |
|
{ |
|
"epoch": 151.0, |
|
"learning_rate": 5.0565188396132045e-06, |
|
"loss": 0.0, |
|
"step": 269837 |
|
}, |
|
{ |
|
"epoch": 151.0, |
|
"eval_accuracy": 0.640085841757497, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.0508, |
|
"eval_samples_per_second": 233.571, |
|
"eval_steps_per_second": 1.95, |
|
"step": 269837 |
|
}, |
|
{ |
|
"epoch": 152.0, |
|
"learning_rate": 4.758586195398467e-06, |
|
"loss": 0.0012, |
|
"step": 271624 |
|
}, |
|
{ |
|
"epoch": 152.0, |
|
"eval_accuracy": 0.6415057319841915, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.0208, |
|
"eval_samples_per_second": 237.04, |
|
"eval_steps_per_second": 1.979, |
|
"step": 271624 |
|
}, |
|
{ |
|
"epoch": 153.0, |
|
"learning_rate": 4.4606535511837275e-06, |
|
"loss": 0.0, |
|
"step": 273411 |
|
}, |
|
{ |
|
"epoch": 153.0, |
|
"eval_accuracy": 0.6424933908445805, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.1125, |
|
"eval_samples_per_second": 226.742, |
|
"eval_steps_per_second": 1.893, |
|
"step": 273411 |
|
}, |
|
{ |
|
"epoch": 154.0, |
|
"learning_rate": 4.16272090696899e-06, |
|
"loss": 0.0, |
|
"step": 275198 |
|
}, |
|
{ |
|
"epoch": 154.0, |
|
"eval_accuracy": 0.6366794508365293, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.1748, |
|
"eval_samples_per_second": 220.247, |
|
"eval_steps_per_second": 1.839, |
|
"step": 275198 |
|
}, |
|
{ |
|
"epoch": 155.0, |
|
"learning_rate": 3.864788262754251e-06, |
|
"loss": 0.0, |
|
"step": 276985 |
|
}, |
|
{ |
|
"epoch": 155.0, |
|
"eval_accuracy": 0.6356290122761572, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.0784, |
|
"eval_samples_per_second": 230.462, |
|
"eval_steps_per_second": 1.925, |
|
"step": 276985 |
|
}, |
|
{ |
|
"epoch": 156.0, |
|
"learning_rate": 3.5668556185395137e-06, |
|
"loss": 0.0, |
|
"step": 278772 |
|
}, |
|
{ |
|
"epoch": 156.0, |
|
"eval_accuracy": 0.6411258795934324, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.0969, |
|
"eval_samples_per_second": 228.428, |
|
"eval_steps_per_second": 1.908, |
|
"step": 278772 |
|
}, |
|
{ |
|
"epoch": 157.0, |
|
"learning_rate": 3.268922974324775e-06, |
|
"loss": 0.0, |
|
"step": 280559 |
|
}, |
|
{ |
|
"epoch": 157.0, |
|
"eval_accuracy": 0.6343390602592582, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.0784, |
|
"eval_samples_per_second": 230.468, |
|
"eval_steps_per_second": 1.925, |
|
"step": 280559 |
|
}, |
|
{ |
|
"epoch": 158.0, |
|
"learning_rate": 2.9709903301100367e-06, |
|
"loss": 0.0007, |
|
"step": 282346 |
|
}, |
|
{ |
|
"epoch": 158.0, |
|
"eval_accuracy": 0.6368964554842311, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.0391, |
|
"eval_samples_per_second": 234.912, |
|
"eval_steps_per_second": 1.962, |
|
"step": 282346 |
|
}, |
|
{ |
|
"epoch": 159.0, |
|
"learning_rate": 2.6730576858952986e-06, |
|
"loss": 0.0, |
|
"step": 284133 |
|
}, |
|
{ |
|
"epoch": 159.0, |
|
"eval_accuracy": 0.636098163643511, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.1494, |
|
"eval_samples_per_second": 222.857, |
|
"eval_steps_per_second": 1.861, |
|
"step": 284133 |
|
}, |
|
{ |
|
"epoch": 160.0, |
|
"learning_rate": 2.3751250416805605e-06, |
|
"loss": 0.0013, |
|
"step": 285920 |
|
}, |
|
{ |
|
"epoch": 160.0, |
|
"eval_accuracy": 0.6396206236631768, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.0359, |
|
"eval_samples_per_second": 235.272, |
|
"eval_steps_per_second": 1.965, |
|
"step": 285920 |
|
}, |
|
{ |
|
"epoch": 161.0, |
|
"learning_rate": 2.077192397465822e-06, |
|
"loss": 0.0008, |
|
"step": 287707 |
|
}, |
|
{ |
|
"epoch": 161.0, |
|
"eval_accuracy": 0.6381123797738906, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.4553, |
|
"eval_samples_per_second": 195.089, |
|
"eval_steps_per_second": 1.629, |
|
"step": 287707 |
|
}, |
|
{ |
|
"epoch": 162.0, |
|
"learning_rate": 1.779259753251084e-06, |
|
"loss": 0.0, |
|
"step": 289494 |
|
}, |
|
{ |
|
"epoch": 162.0, |
|
"eval_accuracy": 0.6351684010294282, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.0507, |
|
"eval_samples_per_second": 233.583, |
|
"eval_steps_per_second": 1.951, |
|
"step": 289494 |
|
}, |
|
{ |
|
"epoch": 163.0, |
|
"learning_rate": 1.4813271090363454e-06, |
|
"loss": 0.0, |
|
"step": 291281 |
|
}, |
|
{ |
|
"epoch": 163.0, |
|
"eval_accuracy": 0.6370490792387252, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.0888, |
|
"eval_samples_per_second": 229.314, |
|
"eval_steps_per_second": 1.915, |
|
"step": 291281 |
|
}, |
|
{ |
|
"epoch": 164.0, |
|
"learning_rate": 1.1833944648216071e-06, |
|
"loss": 0.0, |
|
"step": 293068 |
|
}, |
|
{ |
|
"epoch": 164.0, |
|
"eval_accuracy": 0.6399250601196801, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.061, |
|
"eval_samples_per_second": 232.411, |
|
"eval_steps_per_second": 1.941, |
|
"step": 293068 |
|
}, |
|
{ |
|
"epoch": 165.0, |
|
"learning_rate": 8.854618206068691e-07, |
|
"loss": 0.0031, |
|
"step": 294855 |
|
}, |
|
{ |
|
"epoch": 165.0, |
|
"eval_accuracy": 0.6401486145381321, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.2403, |
|
"eval_samples_per_second": 213.812, |
|
"eval_steps_per_second": 1.785, |
|
"step": 294855 |
|
}, |
|
{ |
|
"epoch": 166.0, |
|
"learning_rate": 5.875291763921307e-07, |
|
"loss": 0.0, |
|
"step": 296642 |
|
}, |
|
{ |
|
"epoch": 166.0, |
|
"eval_accuracy": 0.6357598978288633, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.2206, |
|
"eval_samples_per_second": 215.711, |
|
"eval_steps_per_second": 1.801, |
|
"step": 296642 |
|
}, |
|
{ |
|
"epoch": 167.0, |
|
"learning_rate": 2.895965321773925e-07, |
|
"loss": 0.0, |
|
"step": 298429 |
|
}, |
|
{ |
|
"epoch": 167.0, |
|
"eval_accuracy": 0.6389859154929578, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.2354, |
|
"eval_samples_per_second": 214.281, |
|
"eval_steps_per_second": 1.789, |
|
"step": 298429 |
|
}, |
|
{ |
|
"epoch": 167.88, |
|
"learning_rate": 2.7675891963987998e-08, |
|
"loss": 0.0, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 167.88, |
|
"eval_accuracy": 0.6354230747721045, |
|
"eval_loss": NaN, |
|
"eval_runtime": 2.133, |
|
"eval_samples_per_second": 224.569, |
|
"eval_steps_per_second": 1.875, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 167.88, |
|
"step": 300000, |
|
"total_flos": 3.823595109857886e+18, |
|
"train_loss": 0.00019481298685073851, |
|
"train_runtime": 193101.3534, |
|
"train_samples_per_second": 198.859, |
|
"train_steps_per_second": 1.554 |
|
} |
|
], |
|
"max_steps": 300000, |
|
"num_train_epochs": 168, |
|
"total_flos": 3.823595109857886e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|