| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9927007299270073, | |
| "eval_steps": 500, | |
| "global_step": 85, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01167883211678832, | |
| "grad_norm": 2.102217674255371, | |
| "learning_rate": 4.998292650357558e-05, | |
| "loss": 1.2528, | |
| "num_input_tokens_seen": 1572864, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.02335766423357664, | |
| "grad_norm": 1.8151874542236328, | |
| "learning_rate": 4.993172933464471e-05, | |
| "loss": 1.1456, | |
| "num_input_tokens_seen": 3145728, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.035036496350364967, | |
| "grad_norm": 1.6563794612884521, | |
| "learning_rate": 4.984647842238185e-05, | |
| "loss": 1.0528, | |
| "num_input_tokens_seen": 4718592, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.04671532846715328, | |
| "grad_norm": 1.4128868579864502, | |
| "learning_rate": 4.972729020927865e-05, | |
| "loss": 0.9542, | |
| "num_input_tokens_seen": 6291456, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.058394160583941604, | |
| "grad_norm": 1.3149222135543823, | |
| "learning_rate": 4.957432749209755e-05, | |
| "loss": 0.8888, | |
| "num_input_tokens_seen": 7864320, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.07007299270072993, | |
| "grad_norm": 1.1523312330245972, | |
| "learning_rate": 4.938779919951092e-05, | |
| "loss": 0.8125, | |
| "num_input_tokens_seen": 9437184, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.08175182481751825, | |
| "grad_norm": 1.0558948516845703, | |
| "learning_rate": 4.916796010672969e-05, | |
| "loss": 0.7639, | |
| "num_input_tokens_seen": 11010048, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.09343065693430656, | |
| "grad_norm": 0.9993464350700378, | |
| "learning_rate": 4.891511048751102e-05, | |
| "loss": 0.7068, | |
| "num_input_tokens_seen": 12582912, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.10510948905109489, | |
| "grad_norm": 0.7277781367301941, | |
| "learning_rate": 4.862959570402049e-05, | |
| "loss": 0.6852, | |
| "num_input_tokens_seen": 14155776, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.11678832116788321, | |
| "grad_norm": 0.33135032653808594, | |
| "learning_rate": 4.8311805735108894e-05, | |
| "loss": 0.6571, | |
| "num_input_tokens_seen": 15728640, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.12846715328467154, | |
| "grad_norm": 0.24875016510486603, | |
| "learning_rate": 4.796217464364808e-05, | |
| "loss": 0.6347, | |
| "num_input_tokens_seen": 17301504, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.14014598540145987, | |
| "grad_norm": 0.219735249876976, | |
| "learning_rate": 4.758117998365322e-05, | |
| "loss": 0.6191, | |
| "num_input_tokens_seen": 18874368, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.15182481751824817, | |
| "grad_norm": 0.20399349927902222, | |
| "learning_rate": 4.716934214800155e-05, | |
| "loss": 0.5761, | |
| "num_input_tokens_seen": 20447232, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.1635036496350365, | |
| "grad_norm": 0.18108128011226654, | |
| "learning_rate": 4.672722365763821e-05, | |
| "loss": 0.5772, | |
| "num_input_tokens_seen": 22020096, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.17518248175182483, | |
| "grad_norm": 0.15887582302093506, | |
| "learning_rate": 4.625542839324036e-05, | |
| "loss": 0.5579, | |
| "num_input_tokens_seen": 23592960, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.18686131386861313, | |
| "grad_norm": 0.1574232280254364, | |
| "learning_rate": 4.575460077038877e-05, | |
| "loss": 0.5674, | |
| "num_input_tokens_seen": 25165824, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.19854014598540146, | |
| "grad_norm": 0.1397695690393448, | |
| "learning_rate": 4.522542485937369e-05, | |
| "loss": 0.5767, | |
| "num_input_tokens_seen": 26738688, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.21021897810218979, | |
| "grad_norm": 0.12465737760066986, | |
| "learning_rate": 4.4668623450837085e-05, | |
| "loss": 0.5559, | |
| "num_input_tokens_seen": 28311552, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.22189781021897811, | |
| "grad_norm": 0.10970822721719742, | |
| "learning_rate": 4.408495706852758e-05, | |
| "loss": 0.5603, | |
| "num_input_tokens_seen": 29884416, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.23357664233576642, | |
| "grad_norm": 0.10278748720884323, | |
| "learning_rate": 4.347522293051648e-05, | |
| "loss": 0.5541, | |
| "num_input_tokens_seen": 31457280, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.24525547445255474, | |
| "grad_norm": 0.09158932417631149, | |
| "learning_rate": 4.284025386029381e-05, | |
| "loss": 0.5358, | |
| "num_input_tokens_seen": 33030144, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.2569343065693431, | |
| "grad_norm": 0.09265461564064026, | |
| "learning_rate": 4.218091714923157e-05, | |
| "loss": 0.529, | |
| "num_input_tokens_seen": 34603008, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.2686131386861314, | |
| "grad_norm": 0.08593211323022842, | |
| "learning_rate": 4.149811337196807e-05, | |
| "loss": 0.5391, | |
| "num_input_tokens_seen": 36175872, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.28029197080291973, | |
| "grad_norm": 0.07743365317583084, | |
| "learning_rate": 4.079277515633127e-05, | |
| "loss": 0.5368, | |
| "num_input_tokens_seen": 37748736, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.291970802919708, | |
| "grad_norm": 0.08235425502061844, | |
| "learning_rate": 4.0065865909481417e-05, | |
| "loss": 0.5226, | |
| "num_input_tokens_seen": 39321600, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.30364963503649633, | |
| "grad_norm": 0.07797503471374512, | |
| "learning_rate": 3.931837850201263e-05, | |
| "loss": 0.5181, | |
| "num_input_tokens_seen": 40894464, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.31532846715328466, | |
| "grad_norm": 0.07740040868520737, | |
| "learning_rate": 3.855133391181124e-05, | |
| "loss": 0.5368, | |
| "num_input_tokens_seen": 42467328, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.327007299270073, | |
| "grad_norm": 0.07796761393547058, | |
| "learning_rate": 3.7765779829522675e-05, | |
| "loss": 0.5143, | |
| "num_input_tokens_seen": 44040192, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.3386861313868613, | |
| "grad_norm": 0.06905554234981537, | |
| "learning_rate": 3.696278922753216e-05, | |
| "loss": 0.5084, | |
| "num_input_tokens_seen": 45613056, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.35036496350364965, | |
| "grad_norm": 0.07220657169818878, | |
| "learning_rate": 3.6143458894413465e-05, | |
| "loss": 0.5026, | |
| "num_input_tokens_seen": 47185920, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.362043795620438, | |
| "grad_norm": 0.0667012482881546, | |
| "learning_rate": 3.5308907936847594e-05, | |
| "loss": 0.53, | |
| "num_input_tokens_seen": 48758784, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.37372262773722625, | |
| "grad_norm": 0.06964272260665894, | |
| "learning_rate": 3.446027625105776e-05, | |
| "loss": 0.532, | |
| "num_input_tokens_seen": 50331648, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.3854014598540146, | |
| "grad_norm": 0.06804265826940536, | |
| "learning_rate": 3.3598722965848204e-05, | |
| "loss": 0.5257, | |
| "num_input_tokens_seen": 51904512, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.3970802919708029, | |
| "grad_norm": 0.06633932888507843, | |
| "learning_rate": 3.272542485937369e-05, | |
| "loss": 0.5078, | |
| "num_input_tokens_seen": 53477376, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.40875912408759124, | |
| "grad_norm": 0.06411036849021912, | |
| "learning_rate": 3.1841574751802076e-05, | |
| "loss": 0.5012, | |
| "num_input_tokens_seen": 55050240, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.42043795620437957, | |
| "grad_norm": 0.06728877872228622, | |
| "learning_rate": 3.094837987606547e-05, | |
| "loss": 0.5117, | |
| "num_input_tokens_seen": 56623104, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.4321167883211679, | |
| "grad_norm": 0.06455469876527786, | |
| "learning_rate": 3.0047060228925256e-05, | |
| "loss": 0.516, | |
| "num_input_tokens_seen": 58195968, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.44379562043795623, | |
| "grad_norm": 0.06497868150472641, | |
| "learning_rate": 2.913884690460325e-05, | |
| "loss": 0.5189, | |
| "num_input_tokens_seen": 59768832, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.4554744525547445, | |
| "grad_norm": 0.062144387513399124, | |
| "learning_rate": 2.8224980413255086e-05, | |
| "loss": 0.4973, | |
| "num_input_tokens_seen": 61341696, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.46715328467153283, | |
| "grad_norm": 0.064272440969944, | |
| "learning_rate": 2.7306708986582553e-05, | |
| "loss": 0.4942, | |
| "num_input_tokens_seen": 62914560, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.47883211678832116, | |
| "grad_norm": 0.06099522113800049, | |
| "learning_rate": 2.638528687289925e-05, | |
| "loss": 0.5069, | |
| "num_input_tokens_seen": 64487424, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.4905109489051095, | |
| "grad_norm": 0.06261651962995529, | |
| "learning_rate": 2.5461972623978247e-05, | |
| "loss": 0.5218, | |
| "num_input_tokens_seen": 66060288, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.5021897810218978, | |
| "grad_norm": 0.059084702283144, | |
| "learning_rate": 2.453802737602176e-05, | |
| "loss": 0.5003, | |
| "num_input_tokens_seen": 67633152, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.5138686131386861, | |
| "grad_norm": 0.05868009477853775, | |
| "learning_rate": 2.361471312710075e-05, | |
| "loss": 0.4997, | |
| "num_input_tokens_seen": 69206016, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.5255474452554745, | |
| "grad_norm": 0.05527840927243233, | |
| "learning_rate": 2.2693291013417453e-05, | |
| "loss": 0.5045, | |
| "num_input_tokens_seen": 70778880, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.5372262773722628, | |
| "grad_norm": 0.05962590128183365, | |
| "learning_rate": 2.1775019586744923e-05, | |
| "loss": 0.4972, | |
| "num_input_tokens_seen": 72351744, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.5489051094890511, | |
| "grad_norm": 0.05784057453274727, | |
| "learning_rate": 2.0861153095396748e-05, | |
| "loss": 0.5174, | |
| "num_input_tokens_seen": 73924608, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.5605839416058395, | |
| "grad_norm": 0.0580497644841671, | |
| "learning_rate": 1.995293977107475e-05, | |
| "loss": 0.5086, | |
| "num_input_tokens_seen": 75497472, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.5722627737226277, | |
| "grad_norm": 0.05789309740066528, | |
| "learning_rate": 1.9051620123934537e-05, | |
| "loss": 0.4988, | |
| "num_input_tokens_seen": 77070336, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.583941605839416, | |
| "grad_norm": 0.05743095278739929, | |
| "learning_rate": 1.815842524819793e-05, | |
| "loss": 0.5047, | |
| "num_input_tokens_seen": 78643200, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.5956204379562043, | |
| "grad_norm": 0.05637204647064209, | |
| "learning_rate": 1.7274575140626318e-05, | |
| "loss": 0.4977, | |
| "num_input_tokens_seen": 80216064, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.6072992700729927, | |
| "grad_norm": 0.054249729961156845, | |
| "learning_rate": 1.6401277034151798e-05, | |
| "loss": 0.4775, | |
| "num_input_tokens_seen": 81788928, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.618978102189781, | |
| "grad_norm": 0.05614548176527023, | |
| "learning_rate": 1.5539723748942245e-05, | |
| "loss": 0.519, | |
| "num_input_tokens_seen": 83361792, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.6306569343065693, | |
| "grad_norm": 0.05785420536994934, | |
| "learning_rate": 1.4691092063152417e-05, | |
| "loss": 0.5102, | |
| "num_input_tokens_seen": 84934656, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.6423357664233577, | |
| "grad_norm": 0.05397043377161026, | |
| "learning_rate": 1.3856541105586545e-05, | |
| "loss": 0.4705, | |
| "num_input_tokens_seen": 86507520, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.654014598540146, | |
| "grad_norm": 0.05297677591443062, | |
| "learning_rate": 1.303721077246784e-05, | |
| "loss": 0.4965, | |
| "num_input_tokens_seen": 88080384, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.6656934306569343, | |
| "grad_norm": 0.05379556864500046, | |
| "learning_rate": 1.223422017047733e-05, | |
| "loss": 0.503, | |
| "num_input_tokens_seen": 89653248, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.6773722627737226, | |
| "grad_norm": 0.058262865990400314, | |
| "learning_rate": 1.1448666088188764e-05, | |
| "loss": 0.4921, | |
| "num_input_tokens_seen": 91226112, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.689051094890511, | |
| "grad_norm": 0.05684163048863411, | |
| "learning_rate": 1.068162149798737e-05, | |
| "loss": 0.5042, | |
| "num_input_tokens_seen": 92798976, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.7007299270072993, | |
| "grad_norm": 0.053847264498472214, | |
| "learning_rate": 9.934134090518593e-06, | |
| "loss": 0.5145, | |
| "num_input_tokens_seen": 94371840, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.7124087591240876, | |
| "grad_norm": 0.05480194464325905, | |
| "learning_rate": 9.207224843668732e-06, | |
| "loss": 0.4897, | |
| "num_input_tokens_seen": 95944704, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.724087591240876, | |
| "grad_norm": 0.05290277674794197, | |
| "learning_rate": 8.50188662803194e-06, | |
| "loss": 0.5033, | |
| "num_input_tokens_seen": 97517568, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.7357664233576642, | |
| "grad_norm": 0.05715763568878174, | |
| "learning_rate": 7.819082850768434e-06, | |
| "loss": 0.473, | |
| "num_input_tokens_seen": 99090432, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.7474452554744525, | |
| "grad_norm": 0.060164712369441986, | |
| "learning_rate": 7.159746139706194e-06, | |
| "loss": 0.5298, | |
| "num_input_tokens_seen": 100663296, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.7591240875912408, | |
| "grad_norm": 0.058020610362291336, | |
| "learning_rate": 6.524777069483526e-06, | |
| "loss": 0.4685, | |
| "num_input_tokens_seen": 102236160, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.7708029197080292, | |
| "grad_norm": 0.05399800464510918, | |
| "learning_rate": 5.915042931472425e-06, | |
| "loss": 0.4931, | |
| "num_input_tokens_seen": 103809024, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.7824817518248175, | |
| "grad_norm": 0.05629091337323189, | |
| "learning_rate": 5.33137654916292e-06, | |
| "loss": 0.4957, | |
| "num_input_tokens_seen": 105381888, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.7941605839416058, | |
| "grad_norm": 0.05569099634885788, | |
| "learning_rate": 4.7745751406263165e-06, | |
| "loss": 0.4924, | |
| "num_input_tokens_seen": 106954752, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.8058394160583942, | |
| "grad_norm": 0.05562946945428848, | |
| "learning_rate": 4.245399229611238e-06, | |
| "loss": 0.5041, | |
| "num_input_tokens_seen": 108527616, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.8175182481751825, | |
| "grad_norm": 0.05801980197429657, | |
| "learning_rate": 3.7445716067596503e-06, | |
| "loss": 0.4948, | |
| "num_input_tokens_seen": 110100480, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.8291970802919708, | |
| "grad_norm": 0.058106038719415665, | |
| "learning_rate": 3.2727763423617913e-06, | |
| "loss": 0.5085, | |
| "num_input_tokens_seen": 111673344, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.8408759124087591, | |
| "grad_norm": 0.052898507565259933, | |
| "learning_rate": 2.8306578519984527e-06, | |
| "loss": 0.5152, | |
| "num_input_tokens_seen": 113246208, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.8525547445255475, | |
| "grad_norm": 0.05265094339847565, | |
| "learning_rate": 2.418820016346779e-06, | |
| "loss": 0.4905, | |
| "num_input_tokens_seen": 114819072, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.8642335766423358, | |
| "grad_norm": 0.054221879690885544, | |
| "learning_rate": 2.0378253563519247e-06, | |
| "loss": 0.5023, | |
| "num_input_tokens_seen": 116391936, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.8759124087591241, | |
| "grad_norm": 0.0557856447994709, | |
| "learning_rate": 1.6881942648911076e-06, | |
| "loss": 0.4993, | |
| "num_input_tokens_seen": 117964800, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.8875912408759125, | |
| "grad_norm": 0.05209067091345787, | |
| "learning_rate": 1.3704042959795132e-06, | |
| "loss": 0.481, | |
| "num_input_tokens_seen": 119537664, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.8992700729927007, | |
| "grad_norm": 0.05373719707131386, | |
| "learning_rate": 1.0848895124889818e-06, | |
| "loss": 0.4806, | |
| "num_input_tokens_seen": 121110528, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.910948905109489, | |
| "grad_norm": 0.055330030620098114, | |
| "learning_rate": 8.320398932703144e-07, | |
| "loss": 0.4915, | |
| "num_input_tokens_seen": 122683392, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.9226277372262773, | |
| "grad_norm": 0.05640307813882828, | |
| "learning_rate": 6.122008004890851e-07, | |
| "loss": 0.4937, | |
| "num_input_tokens_seen": 124256256, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.9343065693430657, | |
| "grad_norm": 0.05659601837396622, | |
| "learning_rate": 4.256725079024554e-07, | |
| "loss": 0.5152, | |
| "num_input_tokens_seen": 125829120, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.945985401459854, | |
| "grad_norm": 0.05199190601706505, | |
| "learning_rate": 2.7270979072135104e-07, | |
| "loss": 0.4832, | |
| "num_input_tokens_seen": 127401984, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.9576642335766423, | |
| "grad_norm": 0.05153407156467438, | |
| "learning_rate": 1.5352157761815977e-07, | |
| "loss": 0.4975, | |
| "num_input_tokens_seen": 128974848, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.9693430656934306, | |
| "grad_norm": 0.05456986650824547, | |
| "learning_rate": 6.827066535529946e-08, | |
| "loss": 0.4887, | |
| "num_input_tokens_seen": 130547712, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.981021897810219, | |
| "grad_norm": 0.057446081191301346, | |
| "learning_rate": 1.7073496424427348e-08, | |
| "loss": 0.4905, | |
| "num_input_tokens_seen": 132120576, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.9927007299270073, | |
| "grad_norm": 0.05915559083223343, | |
| "learning_rate": 0.0, | |
| "loss": 0.5088, | |
| "num_input_tokens_seen": 133693440, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.9927007299270073, | |
| "num_input_tokens_seen": 133693440, | |
| "step": 85, | |
| "total_flos": 5.206772811237949e+18, | |
| "train_loss": 0.5589800634804893, | |
| "train_runtime": 13057.0702, | |
| "train_samples_per_second": 2.516, | |
| "train_steps_per_second": 0.007 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 85, | |
| "num_input_tokens_seen": 133693440, | |
| "num_train_epochs": 1, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.206772811237949e+18, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |