|
{ |
|
"best_metric": 1.6074018478393555, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.02050020500205002, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0001025010250102501, |
|
"grad_norm": 0.705419659614563, |
|
"learning_rate": 1.001e-05, |
|
"loss": 1.8146, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0001025010250102501, |
|
"eval_loss": 2.0756940841674805, |
|
"eval_runtime": 214.3165, |
|
"eval_samples_per_second": 19.168, |
|
"eval_steps_per_second": 4.792, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0002050020500205002, |
|
"grad_norm": 0.9089563488960266, |
|
"learning_rate": 2.002e-05, |
|
"loss": 1.5119, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0003075030750307503, |
|
"grad_norm": 0.9483783841133118, |
|
"learning_rate": 3.0029999999999995e-05, |
|
"loss": 1.6709, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0004100041000410004, |
|
"grad_norm": 1.0198091268539429, |
|
"learning_rate": 4.004e-05, |
|
"loss": 1.9187, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0005125051250512505, |
|
"grad_norm": 1.091034173965454, |
|
"learning_rate": 5.005e-05, |
|
"loss": 1.8457, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0006150061500615006, |
|
"grad_norm": 1.2312097549438477, |
|
"learning_rate": 6.005999999999999e-05, |
|
"loss": 2.1334, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0007175071750717508, |
|
"grad_norm": 1.246527910232544, |
|
"learning_rate": 7.006999999999998e-05, |
|
"loss": 1.562, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0008200082000820008, |
|
"grad_norm": 1.4241694211959839, |
|
"learning_rate": 8.008e-05, |
|
"loss": 1.7474, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0009225092250922509, |
|
"grad_norm": 1.28750479221344, |
|
"learning_rate": 9.009e-05, |
|
"loss": 2.0525, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.001025010250102501, |
|
"grad_norm": 1.2573869228363037, |
|
"learning_rate": 0.0001001, |
|
"loss": 2.0545, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.001127511275112751, |
|
"grad_norm": 1.154963731765747, |
|
"learning_rate": 9.957315789473684e-05, |
|
"loss": 1.7098, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0012300123001230013, |
|
"grad_norm": 1.1779276132583618, |
|
"learning_rate": 9.904631578947367e-05, |
|
"loss": 1.9339, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0013325133251332513, |
|
"grad_norm": 1.2296435832977295, |
|
"learning_rate": 9.851947368421052e-05, |
|
"loss": 1.8391, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0014350143501435015, |
|
"grad_norm": 1.2227979898452759, |
|
"learning_rate": 9.799263157894736e-05, |
|
"loss": 1.8549, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0015375153751537515, |
|
"grad_norm": 1.230628252029419, |
|
"learning_rate": 9.746578947368421e-05, |
|
"loss": 1.6791, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0016400164001640015, |
|
"grad_norm": 1.2459157705307007, |
|
"learning_rate": 9.693894736842104e-05, |
|
"loss": 1.8406, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0017425174251742518, |
|
"grad_norm": 1.397838830947876, |
|
"learning_rate": 9.641210526315789e-05, |
|
"loss": 1.796, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0018450184501845018, |
|
"grad_norm": 1.3623087406158447, |
|
"learning_rate": 9.588526315789473e-05, |
|
"loss": 1.6451, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.001947519475194752, |
|
"grad_norm": 1.386612057685852, |
|
"learning_rate": 9.535842105263157e-05, |
|
"loss": 1.8315, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.002050020500205002, |
|
"grad_norm": 1.4097331762313843, |
|
"learning_rate": 9.483157894736841e-05, |
|
"loss": 1.8867, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.002152521525215252, |
|
"grad_norm": 1.2772889137268066, |
|
"learning_rate": 9.430473684210526e-05, |
|
"loss": 1.8137, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.002255022550225502, |
|
"grad_norm": 1.3513169288635254, |
|
"learning_rate": 9.37778947368421e-05, |
|
"loss": 1.6449, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0023575235752357525, |
|
"grad_norm": 1.5203684568405151, |
|
"learning_rate": 9.325105263157894e-05, |
|
"loss": 1.7515, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0024600246002460025, |
|
"grad_norm": 1.2085375785827637, |
|
"learning_rate": 9.272421052631578e-05, |
|
"loss": 1.4646, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0025625256252562525, |
|
"grad_norm": 1.4566919803619385, |
|
"learning_rate": 9.219736842105263e-05, |
|
"loss": 1.8844, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0026650266502665026, |
|
"grad_norm": 1.5748794078826904, |
|
"learning_rate": 9.167052631578946e-05, |
|
"loss": 1.7089, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0027675276752767526, |
|
"grad_norm": 1.3820806741714478, |
|
"learning_rate": 9.114368421052632e-05, |
|
"loss": 1.7397, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.002870028700287003, |
|
"grad_norm": 1.47676420211792, |
|
"learning_rate": 9.061684210526315e-05, |
|
"loss": 1.6333, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.002972529725297253, |
|
"grad_norm": 1.3956615924835205, |
|
"learning_rate": 9.009e-05, |
|
"loss": 1.5381, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.003075030750307503, |
|
"grad_norm": 1.4320456981658936, |
|
"learning_rate": 8.956315789473683e-05, |
|
"loss": 1.5204, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.003177531775317753, |
|
"grad_norm": 1.564431071281433, |
|
"learning_rate": 8.903631578947368e-05, |
|
"loss": 1.6053, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.003280032800328003, |
|
"grad_norm": 1.604783296585083, |
|
"learning_rate": 8.850947368421052e-05, |
|
"loss": 1.4296, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.0033825338253382535, |
|
"grad_norm": 2.1952967643737793, |
|
"learning_rate": 8.798263157894736e-05, |
|
"loss": 2.3028, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0034850348503485036, |
|
"grad_norm": 1.7859286069869995, |
|
"learning_rate": 8.745578947368422e-05, |
|
"loss": 1.0892, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.0035875358753587536, |
|
"grad_norm": 2.099475145339966, |
|
"learning_rate": 8.692894736842105e-05, |
|
"loss": 1.9027, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0036900369003690036, |
|
"grad_norm": 2.05716609954834, |
|
"learning_rate": 8.64021052631579e-05, |
|
"loss": 1.8596, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0037925379253792536, |
|
"grad_norm": 3.045736074447632, |
|
"learning_rate": 8.587526315789473e-05, |
|
"loss": 1.6003, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.003895038950389504, |
|
"grad_norm": 3.077202320098877, |
|
"learning_rate": 8.534842105263157e-05, |
|
"loss": 1.7161, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.003997539975399754, |
|
"grad_norm": 2.8727805614471436, |
|
"learning_rate": 8.482157894736842e-05, |
|
"loss": 1.2155, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.004100041000410004, |
|
"grad_norm": 3.2088754177093506, |
|
"learning_rate": 8.429473684210525e-05, |
|
"loss": 2.3325, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0042025420254202545, |
|
"grad_norm": 3.467123031616211, |
|
"learning_rate": 8.376789473684211e-05, |
|
"loss": 1.7372, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.004305043050430504, |
|
"grad_norm": 3.2959988117218018, |
|
"learning_rate": 8.324105263157894e-05, |
|
"loss": 2.0588, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.0044075440754407546, |
|
"grad_norm": 3.0905165672302246, |
|
"learning_rate": 8.271421052631579e-05, |
|
"loss": 1.793, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.004510045100451004, |
|
"grad_norm": 4.93464469909668, |
|
"learning_rate": 8.218736842105262e-05, |
|
"loss": 1.8903, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.004612546125461255, |
|
"grad_norm": 2.7704639434814453, |
|
"learning_rate": 8.166052631578947e-05, |
|
"loss": 1.1567, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.004715047150471505, |
|
"grad_norm": 3.1440534591674805, |
|
"learning_rate": 8.113368421052631e-05, |
|
"loss": 1.568, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.004817548175481755, |
|
"grad_norm": 4.074300289154053, |
|
"learning_rate": 8.060684210526315e-05, |
|
"loss": 1.9814, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.004920049200492005, |
|
"grad_norm": 5.697752475738525, |
|
"learning_rate": 8.008e-05, |
|
"loss": 1.862, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.005022550225502255, |
|
"grad_norm": 6.887198448181152, |
|
"learning_rate": 7.955315789473684e-05, |
|
"loss": 1.7794, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.005125051250512505, |
|
"grad_norm": 6.863124847412109, |
|
"learning_rate": 7.902631578947368e-05, |
|
"loss": 1.5001, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.005125051250512505, |
|
"eval_loss": 1.7083282470703125, |
|
"eval_runtime": 213.8938, |
|
"eval_samples_per_second": 19.206, |
|
"eval_steps_per_second": 4.801, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0052275522755227555, |
|
"grad_norm": 0.7324660420417786, |
|
"learning_rate": 7.849947368421052e-05, |
|
"loss": 1.7833, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.005330053300533005, |
|
"grad_norm": 1.0079008340835571, |
|
"learning_rate": 7.797263157894736e-05, |
|
"loss": 2.0611, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.005432554325543256, |
|
"grad_norm": 1.0478756427764893, |
|
"learning_rate": 7.744578947368421e-05, |
|
"loss": 1.5655, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.005535055350553505, |
|
"grad_norm": 1.1489282846450806, |
|
"learning_rate": 7.691894736842104e-05, |
|
"loss": 2.0112, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.005637556375563756, |
|
"grad_norm": 1.0685741901397705, |
|
"learning_rate": 7.63921052631579e-05, |
|
"loss": 1.837, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.005740057400574006, |
|
"grad_norm": 0.9752334952354431, |
|
"learning_rate": 7.586526315789473e-05, |
|
"loss": 1.8408, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.005842558425584256, |
|
"grad_norm": 1.0988349914550781, |
|
"learning_rate": 7.533842105263158e-05, |
|
"loss": 1.6708, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.005945059450594506, |
|
"grad_norm": 1.059479832649231, |
|
"learning_rate": 7.481157894736841e-05, |
|
"loss": 1.4579, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.006047560475604756, |
|
"grad_norm": 1.0964906215667725, |
|
"learning_rate": 7.428473684210526e-05, |
|
"loss": 1.944, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.006150061500615006, |
|
"grad_norm": 1.0522820949554443, |
|
"learning_rate": 7.375789473684209e-05, |
|
"loss": 1.762, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0062525625256252566, |
|
"grad_norm": 1.0768394470214844, |
|
"learning_rate": 7.323105263157895e-05, |
|
"loss": 1.7185, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.006355063550635506, |
|
"grad_norm": 1.1058900356292725, |
|
"learning_rate": 7.270421052631578e-05, |
|
"loss": 1.6855, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.006457564575645757, |
|
"grad_norm": 1.0793980360031128, |
|
"learning_rate": 7.217736842105263e-05, |
|
"loss": 1.6553, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.006560065600656006, |
|
"grad_norm": 1.257716417312622, |
|
"learning_rate": 7.165052631578947e-05, |
|
"loss": 1.5379, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.006662566625666257, |
|
"grad_norm": 1.116843581199646, |
|
"learning_rate": 7.11236842105263e-05, |
|
"loss": 1.6653, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.006765067650676507, |
|
"grad_norm": 1.016861081123352, |
|
"learning_rate": 7.059684210526315e-05, |
|
"loss": 1.3905, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.006867568675686757, |
|
"grad_norm": 1.1021226644515991, |
|
"learning_rate": 7.006999999999998e-05, |
|
"loss": 1.6295, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.006970069700697007, |
|
"grad_norm": 1.1197590827941895, |
|
"learning_rate": 6.954315789473684e-05, |
|
"loss": 1.6643, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.007072570725707257, |
|
"grad_norm": 1.242423415184021, |
|
"learning_rate": 6.901631578947368e-05, |
|
"loss": 1.7367, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.007175071750717507, |
|
"grad_norm": 1.1498782634735107, |
|
"learning_rate": 6.848947368421052e-05, |
|
"loss": 1.6476, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.007277572775727758, |
|
"grad_norm": 1.174621820449829, |
|
"learning_rate": 6.796263157894737e-05, |
|
"loss": 2.0002, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.007380073800738007, |
|
"grad_norm": 1.1680294275283813, |
|
"learning_rate": 6.74357894736842e-05, |
|
"loss": 1.4689, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.007482574825748258, |
|
"grad_norm": 1.3079551458358765, |
|
"learning_rate": 6.690894736842105e-05, |
|
"loss": 1.5757, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.007585075850758507, |
|
"grad_norm": 1.2481780052185059, |
|
"learning_rate": 6.638210526315788e-05, |
|
"loss": 1.6238, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.007687576875768758, |
|
"grad_norm": 1.3879331350326538, |
|
"learning_rate": 6.585526315789474e-05, |
|
"loss": 1.9082, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.007790077900779008, |
|
"grad_norm": 1.6329386234283447, |
|
"learning_rate": 6.532842105263157e-05, |
|
"loss": 1.739, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.007892578925789259, |
|
"grad_norm": 1.4674111604690552, |
|
"learning_rate": 6.480157894736842e-05, |
|
"loss": 1.6045, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.007995079950799507, |
|
"grad_norm": 1.4382115602493286, |
|
"learning_rate": 6.427473684210526e-05, |
|
"loss": 1.4885, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.008097580975809758, |
|
"grad_norm": 1.4571962356567383, |
|
"learning_rate": 6.37478947368421e-05, |
|
"loss": 1.3451, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.008200082000820008, |
|
"grad_norm": 1.6193132400512695, |
|
"learning_rate": 6.322105263157894e-05, |
|
"loss": 1.8344, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.008302583025830259, |
|
"grad_norm": 1.673788070678711, |
|
"learning_rate": 6.269421052631577e-05, |
|
"loss": 1.4983, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.008405084050840509, |
|
"grad_norm": 1.7791383266448975, |
|
"learning_rate": 6.216736842105263e-05, |
|
"loss": 1.8181, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.008507585075850758, |
|
"grad_norm": 1.6379783153533936, |
|
"learning_rate": 6.164052631578947e-05, |
|
"loss": 1.5085, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.008610086100861008, |
|
"grad_norm": 2.0746712684631348, |
|
"learning_rate": 6.111368421052631e-05, |
|
"loss": 1.8331, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.008712587125871259, |
|
"grad_norm": 1.9016090631484985, |
|
"learning_rate": 6.058684210526315e-05, |
|
"loss": 1.6955, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.008815088150881509, |
|
"grad_norm": 2.0374410152435303, |
|
"learning_rate": 6.005999999999999e-05, |
|
"loss": 1.6262, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.00891758917589176, |
|
"grad_norm": 2.458918809890747, |
|
"learning_rate": 5.953315789473684e-05, |
|
"loss": 1.782, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.009020090200902008, |
|
"grad_norm": 2.420497179031372, |
|
"learning_rate": 5.9006315789473676e-05, |
|
"loss": 1.4256, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.009122591225912259, |
|
"grad_norm": 3.1010406017303467, |
|
"learning_rate": 5.847947368421053e-05, |
|
"loss": 1.5861, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.00922509225092251, |
|
"grad_norm": 2.9462192058563232, |
|
"learning_rate": 5.795263157894737e-05, |
|
"loss": 1.7643, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.00932759327593276, |
|
"grad_norm": 3.0921950340270996, |
|
"learning_rate": 5.742578947368421e-05, |
|
"loss": 1.7482, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.00943009430094301, |
|
"grad_norm": 3.2740588188171387, |
|
"learning_rate": 5.6898947368421046e-05, |
|
"loss": 1.7103, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.009532595325953259, |
|
"grad_norm": 3.282179594039917, |
|
"learning_rate": 5.6372105263157886e-05, |
|
"loss": 1.3958, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.00963509635096351, |
|
"grad_norm": 2.8050053119659424, |
|
"learning_rate": 5.584526315789473e-05, |
|
"loss": 1.0987, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.00973759737597376, |
|
"grad_norm": 3.167931079864502, |
|
"learning_rate": 5.531842105263158e-05, |
|
"loss": 1.0239, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.00984009840098401, |
|
"grad_norm": 4.01984167098999, |
|
"learning_rate": 5.4791578947368424e-05, |
|
"loss": 2.1924, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.00994259942599426, |
|
"grad_norm": 7.1500983238220215, |
|
"learning_rate": 5.426473684210526e-05, |
|
"loss": 1.6619, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.01004510045100451, |
|
"grad_norm": 3.5555317401885986, |
|
"learning_rate": 5.37378947368421e-05, |
|
"loss": 1.0794, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.01014760147601476, |
|
"grad_norm": 3.8048603534698486, |
|
"learning_rate": 5.321105263157894e-05, |
|
"loss": 1.4438, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.01025010250102501, |
|
"grad_norm": 6.511986255645752, |
|
"learning_rate": 5.268421052631578e-05, |
|
"loss": 2.3968, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01025010250102501, |
|
"eval_loss": 1.6530330181121826, |
|
"eval_runtime": 213.675, |
|
"eval_samples_per_second": 19.225, |
|
"eval_steps_per_second": 4.806, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01035260352603526, |
|
"grad_norm": 0.74406498670578, |
|
"learning_rate": 5.2157368421052626e-05, |
|
"loss": 2.0527, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.010455104551045511, |
|
"grad_norm": 0.8070314526557922, |
|
"learning_rate": 5.163052631578947e-05, |
|
"loss": 1.6314, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.01055760557605576, |
|
"grad_norm": 0.8939439058303833, |
|
"learning_rate": 5.110368421052632e-05, |
|
"loss": 1.8166, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.01066010660106601, |
|
"grad_norm": 0.9275736808776855, |
|
"learning_rate": 5.057684210526316e-05, |
|
"loss": 1.7837, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.01076260762607626, |
|
"grad_norm": 1.0159188508987427, |
|
"learning_rate": 5.005e-05, |
|
"loss": 1.8655, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.010865108651086511, |
|
"grad_norm": 0.9382534027099609, |
|
"learning_rate": 4.9523157894736836e-05, |
|
"loss": 1.634, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.010967609676096762, |
|
"grad_norm": 0.8507430553436279, |
|
"learning_rate": 4.899631578947368e-05, |
|
"loss": 1.6201, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.01107011070110701, |
|
"grad_norm": 0.9986007809638977, |
|
"learning_rate": 4.846947368421052e-05, |
|
"loss": 1.7325, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.01117261172611726, |
|
"grad_norm": 0.955406904220581, |
|
"learning_rate": 4.794263157894737e-05, |
|
"loss": 1.6368, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.011275112751127511, |
|
"grad_norm": 0.9369739294052124, |
|
"learning_rate": 4.7415789473684206e-05, |
|
"loss": 1.5455, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.011377613776137762, |
|
"grad_norm": 1.0813074111938477, |
|
"learning_rate": 4.688894736842105e-05, |
|
"loss": 1.6737, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.011480114801148012, |
|
"grad_norm": 1.034050703048706, |
|
"learning_rate": 4.636210526315789e-05, |
|
"loss": 1.5688, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.01158261582615826, |
|
"grad_norm": 1.0678492784500122, |
|
"learning_rate": 4.583526315789473e-05, |
|
"loss": 1.6165, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.011685116851168511, |
|
"grad_norm": 1.0256990194320679, |
|
"learning_rate": 4.530842105263158e-05, |
|
"loss": 1.3301, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.011787617876178762, |
|
"grad_norm": 1.0190774202346802, |
|
"learning_rate": 4.4781578947368416e-05, |
|
"loss": 1.6662, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.011890118901189012, |
|
"grad_norm": 1.0290199518203735, |
|
"learning_rate": 4.425473684210526e-05, |
|
"loss": 1.5645, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.011992619926199263, |
|
"grad_norm": 1.0103486776351929, |
|
"learning_rate": 4.372789473684211e-05, |
|
"loss": 1.2945, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.012095120951209511, |
|
"grad_norm": 1.2646342515945435, |
|
"learning_rate": 4.320105263157895e-05, |
|
"loss": 1.735, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.012197621976219762, |
|
"grad_norm": 1.2448196411132812, |
|
"learning_rate": 4.2674210526315786e-05, |
|
"loss": 1.5587, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.012300123001230012, |
|
"grad_norm": 1.3261544704437256, |
|
"learning_rate": 4.2147368421052626e-05, |
|
"loss": 1.7949, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.012402624026240263, |
|
"grad_norm": 1.2770674228668213, |
|
"learning_rate": 4.162052631578947e-05, |
|
"loss": 1.4959, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.012505125051250513, |
|
"grad_norm": 1.4941719770431519, |
|
"learning_rate": 4.109368421052631e-05, |
|
"loss": 1.5308, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.012607626076260762, |
|
"grad_norm": 1.2907934188842773, |
|
"learning_rate": 4.056684210526316e-05, |
|
"loss": 1.7877, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.012710127101271012, |
|
"grad_norm": 1.3961200714111328, |
|
"learning_rate": 4.004e-05, |
|
"loss": 1.7712, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.012812628126281263, |
|
"grad_norm": 1.4105072021484375, |
|
"learning_rate": 3.951315789473684e-05, |
|
"loss": 1.6403, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.012915129151291513, |
|
"grad_norm": 1.323488473892212, |
|
"learning_rate": 3.898631578947368e-05, |
|
"loss": 1.5076, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.013017630176301764, |
|
"grad_norm": 1.4488778114318848, |
|
"learning_rate": 3.845947368421052e-05, |
|
"loss": 1.655, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.013120131201312012, |
|
"grad_norm": 1.50008225440979, |
|
"learning_rate": 3.7932631578947367e-05, |
|
"loss": 1.7404, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.013222632226322263, |
|
"grad_norm": 1.4806251525878906, |
|
"learning_rate": 3.7405789473684206e-05, |
|
"loss": 1.4379, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.013325133251332513, |
|
"grad_norm": 1.5019195079803467, |
|
"learning_rate": 3.6878947368421045e-05, |
|
"loss": 1.5715, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.013427634276342764, |
|
"grad_norm": 1.620043158531189, |
|
"learning_rate": 3.635210526315789e-05, |
|
"loss": 1.0703, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.013530135301353014, |
|
"grad_norm": 1.8016197681427002, |
|
"learning_rate": 3.582526315789474e-05, |
|
"loss": 1.3446, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.013632636326363263, |
|
"grad_norm": 2.6074109077453613, |
|
"learning_rate": 3.5298421052631576e-05, |
|
"loss": 1.3707, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.013735137351373513, |
|
"grad_norm": 2.0289924144744873, |
|
"learning_rate": 3.477157894736842e-05, |
|
"loss": 1.836, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.013837638376383764, |
|
"grad_norm": 2.613877058029175, |
|
"learning_rate": 3.424473684210526e-05, |
|
"loss": 1.9227, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.013940139401394014, |
|
"grad_norm": 2.5930979251861572, |
|
"learning_rate": 3.37178947368421e-05, |
|
"loss": 1.5137, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.014042640426404265, |
|
"grad_norm": 2.3788273334503174, |
|
"learning_rate": 3.319105263157894e-05, |
|
"loss": 1.5968, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.014145141451414513, |
|
"grad_norm": 2.544044256210327, |
|
"learning_rate": 3.2664210526315786e-05, |
|
"loss": 1.8297, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.014247642476424764, |
|
"grad_norm": 1.9537235498428345, |
|
"learning_rate": 3.213736842105263e-05, |
|
"loss": 1.19, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.014350143501435014, |
|
"grad_norm": 2.7683722972869873, |
|
"learning_rate": 3.161052631578947e-05, |
|
"loss": 1.753, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.014452644526445265, |
|
"grad_norm": 2.9598679542541504, |
|
"learning_rate": 3.108368421052632e-05, |
|
"loss": 1.6878, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.014555145551455515, |
|
"grad_norm": 2.162545919418335, |
|
"learning_rate": 3.0556842105263156e-05, |
|
"loss": 1.0099, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.014657646576465764, |
|
"grad_norm": 2.538316249847412, |
|
"learning_rate": 3.0029999999999995e-05, |
|
"loss": 1.57, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.014760147601476014, |
|
"grad_norm": 2.4566807746887207, |
|
"learning_rate": 2.9503157894736838e-05, |
|
"loss": 1.2957, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.014862648626486265, |
|
"grad_norm": 2.836510181427002, |
|
"learning_rate": 2.8976315789473684e-05, |
|
"loss": 1.7585, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.014965149651496515, |
|
"grad_norm": 2.739304780960083, |
|
"learning_rate": 2.8449473684210523e-05, |
|
"loss": 1.0675, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.015067650676506766, |
|
"grad_norm": 3.267035484313965, |
|
"learning_rate": 2.7922631578947366e-05, |
|
"loss": 1.3077, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.015170151701517014, |
|
"grad_norm": 3.737492084503174, |
|
"learning_rate": 2.7395789473684212e-05, |
|
"loss": 1.5623, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.015272652726527265, |
|
"grad_norm": 3.9221222400665283, |
|
"learning_rate": 2.686894736842105e-05, |
|
"loss": 1.4467, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.015375153751537515, |
|
"grad_norm": 9.252695083618164, |
|
"learning_rate": 2.634210526315789e-05, |
|
"loss": 2.2941, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.015375153751537515, |
|
"eval_loss": 1.6176789999008179, |
|
"eval_runtime": 213.9116, |
|
"eval_samples_per_second": 19.204, |
|
"eval_steps_per_second": 4.801, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.015477654776547766, |
|
"grad_norm": 0.5801679491996765, |
|
"learning_rate": 2.5815263157894736e-05, |
|
"loss": 1.5924, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.015580155801558016, |
|
"grad_norm": 0.7414329051971436, |
|
"learning_rate": 2.528842105263158e-05, |
|
"loss": 1.8859, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.015682656826568265, |
|
"grad_norm": 0.7194873690605164, |
|
"learning_rate": 2.4761578947368418e-05, |
|
"loss": 1.7569, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.015785157851578517, |
|
"grad_norm": 0.7468145489692688, |
|
"learning_rate": 2.423473684210526e-05, |
|
"loss": 1.5206, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.015887658876588766, |
|
"grad_norm": 0.9180918335914612, |
|
"learning_rate": 2.3707894736842103e-05, |
|
"loss": 1.6067, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.015990159901599015, |
|
"grad_norm": 1.052054762840271, |
|
"learning_rate": 2.3181052631578946e-05, |
|
"loss": 1.5043, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.016092660926609267, |
|
"grad_norm": 0.8969054818153381, |
|
"learning_rate": 2.265421052631579e-05, |
|
"loss": 1.5507, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.016195161951619515, |
|
"grad_norm": 0.9579923152923584, |
|
"learning_rate": 2.212736842105263e-05, |
|
"loss": 1.5959, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.016297662976629768, |
|
"grad_norm": 1.0006402730941772, |
|
"learning_rate": 2.1600526315789474e-05, |
|
"loss": 1.6553, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.016400164001640016, |
|
"grad_norm": 1.090065598487854, |
|
"learning_rate": 2.1073684210526313e-05, |
|
"loss": 1.7441, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.016502665026650265, |
|
"grad_norm": 1.0853620767593384, |
|
"learning_rate": 2.0546842105263155e-05, |
|
"loss": 1.8341, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.016605166051660517, |
|
"grad_norm": 0.9956846833229065, |
|
"learning_rate": 2.002e-05, |
|
"loss": 1.7975, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.016707667076670766, |
|
"grad_norm": 1.2386821508407593, |
|
"learning_rate": 1.949315789473684e-05, |
|
"loss": 1.7063, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.016810168101681018, |
|
"grad_norm": 0.9561980962753296, |
|
"learning_rate": 1.8966315789473683e-05, |
|
"loss": 1.4661, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.016912669126691267, |
|
"grad_norm": 1.2328252792358398, |
|
"learning_rate": 1.8439473684210522e-05, |
|
"loss": 1.7836, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.017015170151701516, |
|
"grad_norm": 1.1177650690078735, |
|
"learning_rate": 1.791263157894737e-05, |
|
"loss": 1.7525, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.017117671176711768, |
|
"grad_norm": 1.012744426727295, |
|
"learning_rate": 1.738578947368421e-05, |
|
"loss": 1.5806, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.017220172201722016, |
|
"grad_norm": 1.1991872787475586, |
|
"learning_rate": 1.685894736842105e-05, |
|
"loss": 1.9107, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.01732267322673227, |
|
"grad_norm": 1.1519800424575806, |
|
"learning_rate": 1.6332105263157893e-05, |
|
"loss": 1.7668, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.017425174251742517, |
|
"grad_norm": 1.0659619569778442, |
|
"learning_rate": 1.5805263157894735e-05, |
|
"loss": 1.5949, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.017527675276752766, |
|
"grad_norm": 1.173683524131775, |
|
"learning_rate": 1.5278421052631578e-05, |
|
"loss": 1.4432, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.017630176301763018, |
|
"grad_norm": 1.2720458507537842, |
|
"learning_rate": 1.4751578947368419e-05, |
|
"loss": 1.6366, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.017732677326773267, |
|
"grad_norm": 1.412047266960144, |
|
"learning_rate": 1.4224736842105262e-05, |
|
"loss": 1.3457, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.01783517835178352, |
|
"grad_norm": 1.2512191534042358, |
|
"learning_rate": 1.3697894736842106e-05, |
|
"loss": 1.4677, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.017937679376793768, |
|
"grad_norm": 1.4642128944396973, |
|
"learning_rate": 1.3171052631578945e-05, |
|
"loss": 1.6189, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.018040180401804017, |
|
"grad_norm": 1.37020742893219, |
|
"learning_rate": 1.264421052631579e-05, |
|
"loss": 1.669, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.01814268142681427, |
|
"grad_norm": 1.3239974975585938, |
|
"learning_rate": 1.211736842105263e-05, |
|
"loss": 1.5652, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.018245182451824517, |
|
"grad_norm": 1.3516684770584106, |
|
"learning_rate": 1.1590526315789473e-05, |
|
"loss": 1.5116, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.01834768347683477, |
|
"grad_norm": 1.7171400785446167, |
|
"learning_rate": 1.1063684210526316e-05, |
|
"loss": 2.1205, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.01845018450184502, |
|
"grad_norm": 1.3495593070983887, |
|
"learning_rate": 1.0536842105263156e-05, |
|
"loss": 1.1325, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.018552685526855267, |
|
"grad_norm": 1.9317636489868164, |
|
"learning_rate": 1.001e-05, |
|
"loss": 1.5652, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.01865518655186552, |
|
"grad_norm": 2.422668695449829, |
|
"learning_rate": 9.483157894736842e-06, |
|
"loss": 1.5383, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.018757687576875768, |
|
"grad_norm": 2.1638026237487793, |
|
"learning_rate": 8.956315789473684e-06, |
|
"loss": 1.8706, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.01886018860188602, |
|
"grad_norm": 2.067598342895508, |
|
"learning_rate": 8.429473684210525e-06, |
|
"loss": 1.867, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.01896268962689627, |
|
"grad_norm": 2.594118118286133, |
|
"learning_rate": 7.902631578947368e-06, |
|
"loss": 1.5805, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.019065190651906518, |
|
"grad_norm": 2.4905567169189453, |
|
"learning_rate": 7.3757894736842095e-06, |
|
"loss": 1.0738, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.01916769167691677, |
|
"grad_norm": 2.5180282592773438, |
|
"learning_rate": 6.848947368421053e-06, |
|
"loss": 1.7173, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.01927019270192702, |
|
"grad_norm": 2.4175117015838623, |
|
"learning_rate": 6.322105263157895e-06, |
|
"loss": 1.5599, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.01937269372693727, |
|
"grad_norm": 2.6707475185394287, |
|
"learning_rate": 5.7952631578947365e-06, |
|
"loss": 1.3726, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.01947519475194752, |
|
"grad_norm": 3.7667629718780518, |
|
"learning_rate": 5.268421052631578e-06, |
|
"loss": 0.8815, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.019577695776957768, |
|
"grad_norm": 3.1217727661132812, |
|
"learning_rate": 4.741578947368421e-06, |
|
"loss": 1.5793, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.01968019680196802, |
|
"grad_norm": 3.0410215854644775, |
|
"learning_rate": 4.2147368421052626e-06, |
|
"loss": 1.5182, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.01978269782697827, |
|
"grad_norm": 2.907959222793579, |
|
"learning_rate": 3.6878947368421047e-06, |
|
"loss": 1.2825, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.01988519885198852, |
|
"grad_norm": 3.6714532375335693, |
|
"learning_rate": 3.1610526315789474e-06, |
|
"loss": 1.2199, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.01998769987699877, |
|
"grad_norm": 3.782942295074463, |
|
"learning_rate": 2.634210526315789e-06, |
|
"loss": 1.436, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.02009020090200902, |
|
"grad_norm": 4.33630895614624, |
|
"learning_rate": 2.1073684210526313e-06, |
|
"loss": 1.2589, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.02019270192701927, |
|
"grad_norm": 3.1009058952331543, |
|
"learning_rate": 1.5805263157894737e-06, |
|
"loss": 1.0389, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.02029520295202952, |
|
"grad_norm": 3.7881686687469482, |
|
"learning_rate": 1.0536842105263156e-06, |
|
"loss": 1.0837, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.02039770397703977, |
|
"grad_norm": 5.994607925415039, |
|
"learning_rate": 5.268421052631578e-07, |
|
"loss": 1.7627, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.02050020500205002, |
|
"grad_norm": 6.927132606506348, |
|
"learning_rate": 0.0, |
|
"loss": 1.7932, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.02050020500205002, |
|
"eval_loss": 1.6074018478393555, |
|
"eval_runtime": 214.0108, |
|
"eval_samples_per_second": 19.195, |
|
"eval_steps_per_second": 4.799, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.127217035280384e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|