{ "best_metric": 1.6176789999008179, "best_model_checkpoint": "miner_id_24/checkpoint-150", "epoch": 0.015375153751537515, "eval_steps": 50, "global_step": 150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0001025010250102501, "grad_norm": 0.705419659614563, "learning_rate": 1.001e-05, "loss": 1.8146, "step": 1 }, { "epoch": 0.0001025010250102501, "eval_loss": 2.0756940841674805, "eval_runtime": 214.3165, "eval_samples_per_second": 19.168, "eval_steps_per_second": 4.792, "step": 1 }, { "epoch": 0.0002050020500205002, "grad_norm": 0.9089563488960266, "learning_rate": 2.002e-05, "loss": 1.5119, "step": 2 }, { "epoch": 0.0003075030750307503, "grad_norm": 0.9483783841133118, "learning_rate": 3.0029999999999995e-05, "loss": 1.6709, "step": 3 }, { "epoch": 0.0004100041000410004, "grad_norm": 1.0198091268539429, "learning_rate": 4.004e-05, "loss": 1.9187, "step": 4 }, { "epoch": 0.0005125051250512505, "grad_norm": 1.091034173965454, "learning_rate": 5.005e-05, "loss": 1.8457, "step": 5 }, { "epoch": 0.0006150061500615006, "grad_norm": 1.2312097549438477, "learning_rate": 6.005999999999999e-05, "loss": 2.1334, "step": 6 }, { "epoch": 0.0007175071750717508, "grad_norm": 1.246527910232544, "learning_rate": 7.006999999999998e-05, "loss": 1.562, "step": 7 }, { "epoch": 0.0008200082000820008, "grad_norm": 1.4241694211959839, "learning_rate": 8.008e-05, "loss": 1.7474, "step": 8 }, { "epoch": 0.0009225092250922509, "grad_norm": 1.28750479221344, "learning_rate": 9.009e-05, "loss": 2.0525, "step": 9 }, { "epoch": 0.001025010250102501, "grad_norm": 1.2573869228363037, "learning_rate": 0.0001001, "loss": 2.0545, "step": 10 }, { "epoch": 0.001127511275112751, "grad_norm": 1.154963731765747, "learning_rate": 9.957315789473684e-05, "loss": 1.7098, "step": 11 }, { "epoch": 0.0012300123001230013, "grad_norm": 1.1779276132583618, "learning_rate": 9.904631578947367e-05, "loss": 1.9339, "step": 12 }, { "epoch": 0.0013325133251332513, "grad_norm": 1.2296435832977295, "learning_rate": 9.851947368421052e-05, "loss": 1.8391, "step": 13 }, { "epoch": 0.0014350143501435015, "grad_norm": 1.2227979898452759, "learning_rate": 9.799263157894736e-05, "loss": 1.8549, "step": 14 }, { "epoch": 0.0015375153751537515, "grad_norm": 1.230628252029419, "learning_rate": 9.746578947368421e-05, "loss": 1.6791, "step": 15 }, { "epoch": 0.0016400164001640015, "grad_norm": 1.2459157705307007, "learning_rate": 9.693894736842104e-05, "loss": 1.8406, "step": 16 }, { "epoch": 0.0017425174251742518, "grad_norm": 1.397838830947876, "learning_rate": 9.641210526315789e-05, "loss": 1.796, "step": 17 }, { "epoch": 0.0018450184501845018, "grad_norm": 1.3623087406158447, "learning_rate": 9.588526315789473e-05, "loss": 1.6451, "step": 18 }, { "epoch": 0.001947519475194752, "grad_norm": 1.386612057685852, "learning_rate": 9.535842105263157e-05, "loss": 1.8315, "step": 19 }, { "epoch": 0.002050020500205002, "grad_norm": 1.4097331762313843, "learning_rate": 9.483157894736841e-05, "loss": 1.8867, "step": 20 }, { "epoch": 0.002152521525215252, "grad_norm": 1.2772889137268066, "learning_rate": 9.430473684210526e-05, "loss": 1.8137, "step": 21 }, { "epoch": 0.002255022550225502, "grad_norm": 1.3513169288635254, "learning_rate": 9.37778947368421e-05, "loss": 1.6449, "step": 22 }, { "epoch": 0.0023575235752357525, "grad_norm": 1.5203684568405151, "learning_rate": 9.325105263157894e-05, "loss": 1.7515, "step": 23 }, { "epoch": 0.0024600246002460025, "grad_norm": 1.2085375785827637, "learning_rate": 9.272421052631578e-05, "loss": 1.4646, "step": 24 }, { "epoch": 0.0025625256252562525, "grad_norm": 1.4566919803619385, "learning_rate": 9.219736842105263e-05, "loss": 1.8844, "step": 25 }, { "epoch": 0.0026650266502665026, "grad_norm": 1.5748794078826904, "learning_rate": 9.167052631578946e-05, "loss": 1.7089, "step": 26 }, { "epoch": 0.0027675276752767526, "grad_norm": 1.3820806741714478, "learning_rate": 9.114368421052632e-05, "loss": 1.7397, "step": 27 }, { "epoch": 0.002870028700287003, "grad_norm": 1.47676420211792, "learning_rate": 9.061684210526315e-05, "loss": 1.6333, "step": 28 }, { "epoch": 0.002972529725297253, "grad_norm": 1.3956615924835205, "learning_rate": 9.009e-05, "loss": 1.5381, "step": 29 }, { "epoch": 0.003075030750307503, "grad_norm": 1.4320456981658936, "learning_rate": 8.956315789473683e-05, "loss": 1.5204, "step": 30 }, { "epoch": 0.003177531775317753, "grad_norm": 1.564431071281433, "learning_rate": 8.903631578947368e-05, "loss": 1.6053, "step": 31 }, { "epoch": 0.003280032800328003, "grad_norm": 1.604783296585083, "learning_rate": 8.850947368421052e-05, "loss": 1.4296, "step": 32 }, { "epoch": 0.0033825338253382535, "grad_norm": 2.1952967643737793, "learning_rate": 8.798263157894736e-05, "loss": 2.3028, "step": 33 }, { "epoch": 0.0034850348503485036, "grad_norm": 1.7859286069869995, "learning_rate": 8.745578947368422e-05, "loss": 1.0892, "step": 34 }, { "epoch": 0.0035875358753587536, "grad_norm": 2.099475145339966, "learning_rate": 8.692894736842105e-05, "loss": 1.9027, "step": 35 }, { "epoch": 0.0036900369003690036, "grad_norm": 2.05716609954834, "learning_rate": 8.64021052631579e-05, "loss": 1.8596, "step": 36 }, { "epoch": 0.0037925379253792536, "grad_norm": 3.045736074447632, "learning_rate": 8.587526315789473e-05, "loss": 1.6003, "step": 37 }, { "epoch": 0.003895038950389504, "grad_norm": 3.077202320098877, "learning_rate": 8.534842105263157e-05, "loss": 1.7161, "step": 38 }, { "epoch": 0.003997539975399754, "grad_norm": 2.8727805614471436, "learning_rate": 8.482157894736842e-05, "loss": 1.2155, "step": 39 }, { "epoch": 0.004100041000410004, "grad_norm": 3.2088754177093506, "learning_rate": 8.429473684210525e-05, "loss": 2.3325, "step": 40 }, { "epoch": 0.0042025420254202545, "grad_norm": 3.467123031616211, "learning_rate": 8.376789473684211e-05, "loss": 1.7372, "step": 41 }, { "epoch": 0.004305043050430504, "grad_norm": 3.2959988117218018, "learning_rate": 8.324105263157894e-05, "loss": 2.0588, "step": 42 }, { "epoch": 0.0044075440754407546, "grad_norm": 3.0905165672302246, "learning_rate": 8.271421052631579e-05, "loss": 1.793, "step": 43 }, { "epoch": 0.004510045100451004, "grad_norm": 4.93464469909668, "learning_rate": 8.218736842105262e-05, "loss": 1.8903, "step": 44 }, { "epoch": 0.004612546125461255, "grad_norm": 2.7704639434814453, "learning_rate": 8.166052631578947e-05, "loss": 1.1567, "step": 45 }, { "epoch": 0.004715047150471505, "grad_norm": 3.1440534591674805, "learning_rate": 8.113368421052631e-05, "loss": 1.568, "step": 46 }, { "epoch": 0.004817548175481755, "grad_norm": 4.074300289154053, "learning_rate": 8.060684210526315e-05, "loss": 1.9814, "step": 47 }, { "epoch": 0.004920049200492005, "grad_norm": 5.697752475738525, "learning_rate": 8.008e-05, "loss": 1.862, "step": 48 }, { "epoch": 0.005022550225502255, "grad_norm": 6.887198448181152, "learning_rate": 7.955315789473684e-05, "loss": 1.7794, "step": 49 }, { "epoch": 0.005125051250512505, "grad_norm": 6.863124847412109, "learning_rate": 7.902631578947368e-05, "loss": 1.5001, "step": 50 }, { "epoch": 0.005125051250512505, "eval_loss": 1.7083282470703125, "eval_runtime": 213.8938, "eval_samples_per_second": 19.206, "eval_steps_per_second": 4.801, "step": 50 }, { "epoch": 0.0052275522755227555, "grad_norm": 0.7324660420417786, "learning_rate": 7.849947368421052e-05, "loss": 1.7833, "step": 51 }, { "epoch": 0.005330053300533005, "grad_norm": 1.0079008340835571, "learning_rate": 7.797263157894736e-05, "loss": 2.0611, "step": 52 }, { "epoch": 0.005432554325543256, "grad_norm": 1.0478756427764893, "learning_rate": 7.744578947368421e-05, "loss": 1.5655, "step": 53 }, { "epoch": 0.005535055350553505, "grad_norm": 1.1489282846450806, "learning_rate": 7.691894736842104e-05, "loss": 2.0112, "step": 54 }, { "epoch": 0.005637556375563756, "grad_norm": 1.0685741901397705, "learning_rate": 7.63921052631579e-05, "loss": 1.837, "step": 55 }, { "epoch": 0.005740057400574006, "grad_norm": 0.9752334952354431, "learning_rate": 7.586526315789473e-05, "loss": 1.8408, "step": 56 }, { "epoch": 0.005842558425584256, "grad_norm": 1.0988349914550781, "learning_rate": 7.533842105263158e-05, "loss": 1.6708, "step": 57 }, { "epoch": 0.005945059450594506, "grad_norm": 1.059479832649231, "learning_rate": 7.481157894736841e-05, "loss": 1.4579, "step": 58 }, { "epoch": 0.006047560475604756, "grad_norm": 1.0964906215667725, "learning_rate": 7.428473684210526e-05, "loss": 1.944, "step": 59 }, { "epoch": 0.006150061500615006, "grad_norm": 1.0522820949554443, "learning_rate": 7.375789473684209e-05, "loss": 1.762, "step": 60 }, { "epoch": 0.0062525625256252566, "grad_norm": 1.0768394470214844, "learning_rate": 7.323105263157895e-05, "loss": 1.7185, "step": 61 }, { "epoch": 0.006355063550635506, "grad_norm": 1.1058900356292725, "learning_rate": 7.270421052631578e-05, "loss": 1.6855, "step": 62 }, { "epoch": 0.006457564575645757, "grad_norm": 1.0793980360031128, "learning_rate": 7.217736842105263e-05, "loss": 1.6553, "step": 63 }, { "epoch": 0.006560065600656006, "grad_norm": 1.257716417312622, "learning_rate": 7.165052631578947e-05, "loss": 1.5379, "step": 64 }, { "epoch": 0.006662566625666257, "grad_norm": 1.116843581199646, "learning_rate": 7.11236842105263e-05, "loss": 1.6653, "step": 65 }, { "epoch": 0.006765067650676507, "grad_norm": 1.016861081123352, "learning_rate": 7.059684210526315e-05, "loss": 1.3905, "step": 66 }, { "epoch": 0.006867568675686757, "grad_norm": 1.1021226644515991, "learning_rate": 7.006999999999998e-05, "loss": 1.6295, "step": 67 }, { "epoch": 0.006970069700697007, "grad_norm": 1.1197590827941895, "learning_rate": 6.954315789473684e-05, "loss": 1.6643, "step": 68 }, { "epoch": 0.007072570725707257, "grad_norm": 1.242423415184021, "learning_rate": 6.901631578947368e-05, "loss": 1.7367, "step": 69 }, { "epoch": 0.007175071750717507, "grad_norm": 1.1498782634735107, "learning_rate": 6.848947368421052e-05, "loss": 1.6476, "step": 70 }, { "epoch": 0.007277572775727758, "grad_norm": 1.174621820449829, "learning_rate": 6.796263157894737e-05, "loss": 2.0002, "step": 71 }, { "epoch": 0.007380073800738007, "grad_norm": 1.1680294275283813, "learning_rate": 6.74357894736842e-05, "loss": 1.4689, "step": 72 }, { "epoch": 0.007482574825748258, "grad_norm": 1.3079551458358765, "learning_rate": 6.690894736842105e-05, "loss": 1.5757, "step": 73 }, { "epoch": 0.007585075850758507, "grad_norm": 1.2481780052185059, "learning_rate": 6.638210526315788e-05, "loss": 1.6238, "step": 74 }, { "epoch": 0.007687576875768758, "grad_norm": 1.3879331350326538, "learning_rate": 6.585526315789474e-05, "loss": 1.9082, "step": 75 }, { "epoch": 0.007790077900779008, "grad_norm": 1.6329386234283447, "learning_rate": 6.532842105263157e-05, "loss": 1.739, "step": 76 }, { "epoch": 0.007892578925789259, "grad_norm": 1.4674111604690552, "learning_rate": 6.480157894736842e-05, "loss": 1.6045, "step": 77 }, { "epoch": 0.007995079950799507, "grad_norm": 1.4382115602493286, "learning_rate": 6.427473684210526e-05, "loss": 1.4885, "step": 78 }, { "epoch": 0.008097580975809758, "grad_norm": 1.4571962356567383, "learning_rate": 6.37478947368421e-05, "loss": 1.3451, "step": 79 }, { "epoch": 0.008200082000820008, "grad_norm": 1.6193132400512695, "learning_rate": 6.322105263157894e-05, "loss": 1.8344, "step": 80 }, { "epoch": 0.008302583025830259, "grad_norm": 1.673788070678711, "learning_rate": 6.269421052631577e-05, "loss": 1.4983, "step": 81 }, { "epoch": 0.008405084050840509, "grad_norm": 1.7791383266448975, "learning_rate": 6.216736842105263e-05, "loss": 1.8181, "step": 82 }, { "epoch": 0.008507585075850758, "grad_norm": 1.6379783153533936, "learning_rate": 6.164052631578947e-05, "loss": 1.5085, "step": 83 }, { "epoch": 0.008610086100861008, "grad_norm": 2.0746712684631348, "learning_rate": 6.111368421052631e-05, "loss": 1.8331, "step": 84 }, { "epoch": 0.008712587125871259, "grad_norm": 1.9016090631484985, "learning_rate": 6.058684210526315e-05, "loss": 1.6955, "step": 85 }, { "epoch": 0.008815088150881509, "grad_norm": 2.0374410152435303, "learning_rate": 6.005999999999999e-05, "loss": 1.6262, "step": 86 }, { "epoch": 0.00891758917589176, "grad_norm": 2.458918809890747, "learning_rate": 5.953315789473684e-05, "loss": 1.782, "step": 87 }, { "epoch": 0.009020090200902008, "grad_norm": 2.420497179031372, "learning_rate": 5.9006315789473676e-05, "loss": 1.4256, "step": 88 }, { "epoch": 0.009122591225912259, "grad_norm": 3.1010406017303467, "learning_rate": 5.847947368421053e-05, "loss": 1.5861, "step": 89 }, { "epoch": 0.00922509225092251, "grad_norm": 2.9462192058563232, "learning_rate": 5.795263157894737e-05, "loss": 1.7643, "step": 90 }, { "epoch": 0.00932759327593276, "grad_norm": 3.0921950340270996, "learning_rate": 5.742578947368421e-05, "loss": 1.7482, "step": 91 }, { "epoch": 0.00943009430094301, "grad_norm": 3.2740588188171387, "learning_rate": 5.6898947368421046e-05, "loss": 1.7103, "step": 92 }, { "epoch": 0.009532595325953259, "grad_norm": 3.282179594039917, "learning_rate": 5.6372105263157886e-05, "loss": 1.3958, "step": 93 }, { "epoch": 0.00963509635096351, "grad_norm": 2.8050053119659424, "learning_rate": 5.584526315789473e-05, "loss": 1.0987, "step": 94 }, { "epoch": 0.00973759737597376, "grad_norm": 3.167931079864502, "learning_rate": 5.531842105263158e-05, "loss": 1.0239, "step": 95 }, { "epoch": 0.00984009840098401, "grad_norm": 4.01984167098999, "learning_rate": 5.4791578947368424e-05, "loss": 2.1924, "step": 96 }, { "epoch": 0.00994259942599426, "grad_norm": 7.1500983238220215, "learning_rate": 5.426473684210526e-05, "loss": 1.6619, "step": 97 }, { "epoch": 0.01004510045100451, "grad_norm": 3.5555317401885986, "learning_rate": 5.37378947368421e-05, "loss": 1.0794, "step": 98 }, { "epoch": 0.01014760147601476, "grad_norm": 3.8048603534698486, "learning_rate": 5.321105263157894e-05, "loss": 1.4438, "step": 99 }, { "epoch": 0.01025010250102501, "grad_norm": 6.511986255645752, "learning_rate": 5.268421052631578e-05, "loss": 2.3968, "step": 100 }, { "epoch": 0.01025010250102501, "eval_loss": 1.6530330181121826, "eval_runtime": 213.675, "eval_samples_per_second": 19.225, "eval_steps_per_second": 4.806, "step": 100 }, { "epoch": 0.01035260352603526, "grad_norm": 0.74406498670578, "learning_rate": 5.2157368421052626e-05, "loss": 2.0527, "step": 101 }, { "epoch": 0.010455104551045511, "grad_norm": 0.8070314526557922, "learning_rate": 5.163052631578947e-05, "loss": 1.6314, "step": 102 }, { "epoch": 0.01055760557605576, "grad_norm": 0.8939439058303833, "learning_rate": 5.110368421052632e-05, "loss": 1.8166, "step": 103 }, { "epoch": 0.01066010660106601, "grad_norm": 0.9275736808776855, "learning_rate": 5.057684210526316e-05, "loss": 1.7837, "step": 104 }, { "epoch": 0.01076260762607626, "grad_norm": 1.0159188508987427, "learning_rate": 5.005e-05, "loss": 1.8655, "step": 105 }, { "epoch": 0.010865108651086511, "grad_norm": 0.9382534027099609, "learning_rate": 4.9523157894736836e-05, "loss": 1.634, "step": 106 }, { "epoch": 0.010967609676096762, "grad_norm": 0.8507430553436279, "learning_rate": 4.899631578947368e-05, "loss": 1.6201, "step": 107 }, { "epoch": 0.01107011070110701, "grad_norm": 0.9986007809638977, "learning_rate": 4.846947368421052e-05, "loss": 1.7325, "step": 108 }, { "epoch": 0.01117261172611726, "grad_norm": 0.955406904220581, "learning_rate": 4.794263157894737e-05, "loss": 1.6368, "step": 109 }, { "epoch": 0.011275112751127511, "grad_norm": 0.9369739294052124, "learning_rate": 4.7415789473684206e-05, "loss": 1.5455, "step": 110 }, { "epoch": 0.011377613776137762, "grad_norm": 1.0813074111938477, "learning_rate": 4.688894736842105e-05, "loss": 1.6737, "step": 111 }, { "epoch": 0.011480114801148012, "grad_norm": 1.034050703048706, "learning_rate": 4.636210526315789e-05, "loss": 1.5688, "step": 112 }, { "epoch": 0.01158261582615826, "grad_norm": 1.0678492784500122, "learning_rate": 4.583526315789473e-05, "loss": 1.6165, "step": 113 }, { "epoch": 0.011685116851168511, "grad_norm": 1.0256990194320679, "learning_rate": 4.530842105263158e-05, "loss": 1.3301, "step": 114 }, { "epoch": 0.011787617876178762, "grad_norm": 1.0190774202346802, "learning_rate": 4.4781578947368416e-05, "loss": 1.6662, "step": 115 }, { "epoch": 0.011890118901189012, "grad_norm": 1.0290199518203735, "learning_rate": 4.425473684210526e-05, "loss": 1.5645, "step": 116 }, { "epoch": 0.011992619926199263, "grad_norm": 1.0103486776351929, "learning_rate": 4.372789473684211e-05, "loss": 1.2945, "step": 117 }, { "epoch": 0.012095120951209511, "grad_norm": 1.2646342515945435, "learning_rate": 4.320105263157895e-05, "loss": 1.735, "step": 118 }, { "epoch": 0.012197621976219762, "grad_norm": 1.2448196411132812, "learning_rate": 4.2674210526315786e-05, "loss": 1.5587, "step": 119 }, { "epoch": 0.012300123001230012, "grad_norm": 1.3261544704437256, "learning_rate": 4.2147368421052626e-05, "loss": 1.7949, "step": 120 }, { "epoch": 0.012402624026240263, "grad_norm": 1.2770674228668213, "learning_rate": 4.162052631578947e-05, "loss": 1.4959, "step": 121 }, { "epoch": 0.012505125051250513, "grad_norm": 1.4941719770431519, "learning_rate": 4.109368421052631e-05, "loss": 1.5308, "step": 122 }, { "epoch": 0.012607626076260762, "grad_norm": 1.2907934188842773, "learning_rate": 4.056684210526316e-05, "loss": 1.7877, "step": 123 }, { "epoch": 0.012710127101271012, "grad_norm": 1.3961200714111328, "learning_rate": 4.004e-05, "loss": 1.7712, "step": 124 }, { "epoch": 0.012812628126281263, "grad_norm": 1.4105072021484375, "learning_rate": 3.951315789473684e-05, "loss": 1.6403, "step": 125 }, { "epoch": 0.012915129151291513, "grad_norm": 1.323488473892212, "learning_rate": 3.898631578947368e-05, "loss": 1.5076, "step": 126 }, { "epoch": 0.013017630176301764, "grad_norm": 1.4488778114318848, "learning_rate": 3.845947368421052e-05, "loss": 1.655, "step": 127 }, { "epoch": 0.013120131201312012, "grad_norm": 1.50008225440979, "learning_rate": 3.7932631578947367e-05, "loss": 1.7404, "step": 128 }, { "epoch": 0.013222632226322263, "grad_norm": 1.4806251525878906, "learning_rate": 3.7405789473684206e-05, "loss": 1.4379, "step": 129 }, { "epoch": 0.013325133251332513, "grad_norm": 1.5019195079803467, "learning_rate": 3.6878947368421045e-05, "loss": 1.5715, "step": 130 }, { "epoch": 0.013427634276342764, "grad_norm": 1.620043158531189, "learning_rate": 3.635210526315789e-05, "loss": 1.0703, "step": 131 }, { "epoch": 0.013530135301353014, "grad_norm": 1.8016197681427002, "learning_rate": 3.582526315789474e-05, "loss": 1.3446, "step": 132 }, { "epoch": 0.013632636326363263, "grad_norm": 2.6074109077453613, "learning_rate": 3.5298421052631576e-05, "loss": 1.3707, "step": 133 }, { "epoch": 0.013735137351373513, "grad_norm": 2.0289924144744873, "learning_rate": 3.477157894736842e-05, "loss": 1.836, "step": 134 }, { "epoch": 0.013837638376383764, "grad_norm": 2.613877058029175, "learning_rate": 3.424473684210526e-05, "loss": 1.9227, "step": 135 }, { "epoch": 0.013940139401394014, "grad_norm": 2.5930979251861572, "learning_rate": 3.37178947368421e-05, "loss": 1.5137, "step": 136 }, { "epoch": 0.014042640426404265, "grad_norm": 2.3788273334503174, "learning_rate": 3.319105263157894e-05, "loss": 1.5968, "step": 137 }, { "epoch": 0.014145141451414513, "grad_norm": 2.544044256210327, "learning_rate": 3.2664210526315786e-05, "loss": 1.8297, "step": 138 }, { "epoch": 0.014247642476424764, "grad_norm": 1.9537235498428345, "learning_rate": 3.213736842105263e-05, "loss": 1.19, "step": 139 }, { "epoch": 0.014350143501435014, "grad_norm": 2.7683722972869873, "learning_rate": 3.161052631578947e-05, "loss": 1.753, "step": 140 }, { "epoch": 0.014452644526445265, "grad_norm": 2.9598679542541504, "learning_rate": 3.108368421052632e-05, "loss": 1.6878, "step": 141 }, { "epoch": 0.014555145551455515, "grad_norm": 2.162545919418335, "learning_rate": 3.0556842105263156e-05, "loss": 1.0099, "step": 142 }, { "epoch": 0.014657646576465764, "grad_norm": 2.538316249847412, "learning_rate": 3.0029999999999995e-05, "loss": 1.57, "step": 143 }, { "epoch": 0.014760147601476014, "grad_norm": 2.4566807746887207, "learning_rate": 2.9503157894736838e-05, "loss": 1.2957, "step": 144 }, { "epoch": 0.014862648626486265, "grad_norm": 2.836510181427002, "learning_rate": 2.8976315789473684e-05, "loss": 1.7585, "step": 145 }, { "epoch": 0.014965149651496515, "grad_norm": 2.739304780960083, "learning_rate": 2.8449473684210523e-05, "loss": 1.0675, "step": 146 }, { "epoch": 0.015067650676506766, "grad_norm": 3.267035484313965, "learning_rate": 2.7922631578947366e-05, "loss": 1.3077, "step": 147 }, { "epoch": 0.015170151701517014, "grad_norm": 3.737492084503174, "learning_rate": 2.7395789473684212e-05, "loss": 1.5623, "step": 148 }, { "epoch": 0.015272652726527265, "grad_norm": 3.9221222400665283, "learning_rate": 2.686894736842105e-05, "loss": 1.4467, "step": 149 }, { "epoch": 0.015375153751537515, "grad_norm": 9.252695083618164, "learning_rate": 2.634210526315789e-05, "loss": 2.2941, "step": 150 }, { "epoch": 0.015375153751537515, "eval_loss": 1.6176789999008179, "eval_runtime": 213.9116, "eval_samples_per_second": 19.204, "eval_steps_per_second": 4.801, "step": 150 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5.349856178601984e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }