|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 354, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.005649717514124294, |
|
"grad_norm": 0.5776382684707642, |
|
"learning_rate": 1e-05, |
|
"loss": 88.641, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.011299435028248588, |
|
"grad_norm": 0.5559790730476379, |
|
"learning_rate": 2e-05, |
|
"loss": 88.6713, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01694915254237288, |
|
"grad_norm": 0.5598555207252502, |
|
"learning_rate": 3e-05, |
|
"loss": 88.7191, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.022598870056497175, |
|
"grad_norm": 0.5563465356826782, |
|
"learning_rate": 4e-05, |
|
"loss": 88.6407, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.02824858757062147, |
|
"grad_norm": 0.6338562965393066, |
|
"learning_rate": 5e-05, |
|
"loss": 88.64, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.03389830508474576, |
|
"grad_norm": 0.5904527902603149, |
|
"learning_rate": 6e-05, |
|
"loss": 88.6842, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.03954802259887006, |
|
"grad_norm": 0.629975438117981, |
|
"learning_rate": 7e-05, |
|
"loss": 88.6572, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.04519774011299435, |
|
"grad_norm": 0.6180629730224609, |
|
"learning_rate": 8e-05, |
|
"loss": 88.6373, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.05084745762711865, |
|
"grad_norm": 0.6689253449440002, |
|
"learning_rate": 9e-05, |
|
"loss": 88.5833, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.05649717514124294, |
|
"grad_norm": 0.676450788974762, |
|
"learning_rate": 0.0001, |
|
"loss": 88.6067, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.062146892655367235, |
|
"grad_norm": 0.6853811740875244, |
|
"learning_rate": 9.999791493378921e-05, |
|
"loss": 88.6227, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.06779661016949153, |
|
"grad_norm": 0.6194440126419067, |
|
"learning_rate": 9.999165990905683e-05, |
|
"loss": 88.6231, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.07344632768361582, |
|
"grad_norm": 0.6684823632240295, |
|
"learning_rate": 9.998123544748852e-05, |
|
"loss": 88.6185, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.07909604519774012, |
|
"grad_norm": 0.6482928991317749, |
|
"learning_rate": 9.996664241851197e-05, |
|
"loss": 88.6342, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0847457627118644, |
|
"grad_norm": 0.7201379537582397, |
|
"learning_rate": 9.994788203922447e-05, |
|
"loss": 88.5714, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0903954802259887, |
|
"grad_norm": 0.6155017614364624, |
|
"learning_rate": 9.992495587429129e-05, |
|
"loss": 88.5541, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.096045197740113, |
|
"grad_norm": 0.6636160016059875, |
|
"learning_rate": 9.989786583581535e-05, |
|
"loss": 88.5598, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.1016949152542373, |
|
"grad_norm": 0.6538222432136536, |
|
"learning_rate": 9.986661418317759e-05, |
|
"loss": 88.5863, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.10734463276836158, |
|
"grad_norm": 0.5915122628211975, |
|
"learning_rate": 9.98312035228486e-05, |
|
"loss": 88.6102, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.11299435028248588, |
|
"grad_norm": 0.6704850792884827, |
|
"learning_rate": 9.979163680817124e-05, |
|
"loss": 88.5026, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.11864406779661017, |
|
"grad_norm": 0.6887699961662292, |
|
"learning_rate": 9.97479173391143e-05, |
|
"loss": 88.5315, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.12429378531073447, |
|
"grad_norm": 0.7467436790466309, |
|
"learning_rate": 9.97000487619973e-05, |
|
"loss": 88.5097, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.12994350282485875, |
|
"grad_norm": 0.750785231590271, |
|
"learning_rate": 9.964803506918634e-05, |
|
"loss": 88.5409, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.13559322033898305, |
|
"grad_norm": 0.7216200828552246, |
|
"learning_rate": 9.959188059876115e-05, |
|
"loss": 88.4787, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.14124293785310735, |
|
"grad_norm": 0.7141463160514832, |
|
"learning_rate": 9.953159003415328e-05, |
|
"loss": 88.5226, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.14689265536723164, |
|
"grad_norm": 0.7239276766777039, |
|
"learning_rate": 9.946716840375551e-05, |
|
"loss": 88.4626, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.15254237288135594, |
|
"grad_norm": 0.7370103001594543, |
|
"learning_rate": 9.939862108050243e-05, |
|
"loss": 88.5769, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.15819209039548024, |
|
"grad_norm": 0.6700708270072937, |
|
"learning_rate": 9.932595378142233e-05, |
|
"loss": 88.558, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.1638418079096045, |
|
"grad_norm": 0.7646386027336121, |
|
"learning_rate": 9.924917256716042e-05, |
|
"loss": 88.4619, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.1694915254237288, |
|
"grad_norm": 0.6728429198265076, |
|
"learning_rate": 9.916828384147331e-05, |
|
"loss": 88.5065, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1751412429378531, |
|
"grad_norm": 0.7492191195487976, |
|
"learning_rate": 9.908329435069495e-05, |
|
"loss": 88.4889, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.1807909604519774, |
|
"grad_norm": 0.7224460244178772, |
|
"learning_rate": 9.899421118317398e-05, |
|
"loss": 88.4961, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.1864406779661017, |
|
"grad_norm": 0.7774916887283325, |
|
"learning_rate": 9.890104176868247e-05, |
|
"loss": 88.6111, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.192090395480226, |
|
"grad_norm": 0.7639403939247131, |
|
"learning_rate": 9.880379387779637e-05, |
|
"loss": 88.4938, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.1977401129943503, |
|
"grad_norm": 0.683748185634613, |
|
"learning_rate": 9.87024756212473e-05, |
|
"loss": 88.5265, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.2033898305084746, |
|
"grad_norm": 0.7794637680053711, |
|
"learning_rate": 9.859709544924624e-05, |
|
"loss": 88.5168, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.20903954802259886, |
|
"grad_norm": 0.7606369256973267, |
|
"learning_rate": 9.848766215077858e-05, |
|
"loss": 88.4687, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.21468926553672316, |
|
"grad_norm": 0.7200310826301575, |
|
"learning_rate": 9.837418485287127e-05, |
|
"loss": 88.494, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.22033898305084745, |
|
"grad_norm": 0.804512619972229, |
|
"learning_rate": 9.825667301983148e-05, |
|
"loss": 88.4699, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.22598870056497175, |
|
"grad_norm": 0.7734663486480713, |
|
"learning_rate": 9.813513645245729e-05, |
|
"loss": 88.4918, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.23163841807909605, |
|
"grad_norm": 0.8824536204338074, |
|
"learning_rate": 9.800958528722036e-05, |
|
"loss": 88.5269, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.23728813559322035, |
|
"grad_norm": 0.8647343516349792, |
|
"learning_rate": 9.78800299954203e-05, |
|
"loss": 88.4415, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.24293785310734464, |
|
"grad_norm": 0.8488788604736328, |
|
"learning_rate": 9.774648138231163e-05, |
|
"loss": 88.5083, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.24858757062146894, |
|
"grad_norm": 1.1741865873336792, |
|
"learning_rate": 9.760895058620235e-05, |
|
"loss": 88.4342, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.2542372881355932, |
|
"grad_norm": 0.6116179823875427, |
|
"learning_rate": 9.746744907752509e-05, |
|
"loss": 88.5531, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.2598870056497175, |
|
"grad_norm": 0.6405426859855652, |
|
"learning_rate": 9.732198865788047e-05, |
|
"loss": 88.5101, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.2655367231638418, |
|
"grad_norm": 0.6234097480773926, |
|
"learning_rate": 9.71725814590527e-05, |
|
"loss": 88.5438, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.2711864406779661, |
|
"grad_norm": 0.6477924585342407, |
|
"learning_rate": 9.701923994199784e-05, |
|
"loss": 88.4566, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.2768361581920904, |
|
"grad_norm": 0.5889535546302795, |
|
"learning_rate": 9.686197689580456e-05, |
|
"loss": 88.515, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.2824858757062147, |
|
"grad_norm": 0.7161465883255005, |
|
"learning_rate": 9.67008054366274e-05, |
|
"loss": 88.4243, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.288135593220339, |
|
"grad_norm": 0.6408765316009521, |
|
"learning_rate": 9.653573900659292e-05, |
|
"loss": 88.4917, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.2937853107344633, |
|
"grad_norm": 0.7543685436248779, |
|
"learning_rate": 9.636679137267852e-05, |
|
"loss": 88.443, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.2994350282485876, |
|
"grad_norm": 0.71647709608078, |
|
"learning_rate": 9.619397662556435e-05, |
|
"loss": 88.4699, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.3050847457627119, |
|
"grad_norm": 0.7122645974159241, |
|
"learning_rate": 9.601730917845797e-05, |
|
"loss": 88.401, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.3107344632768362, |
|
"grad_norm": 0.7030540704727173, |
|
"learning_rate": 9.583680376589241e-05, |
|
"loss": 88.4259, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.3163841807909605, |
|
"grad_norm": 0.7593639492988586, |
|
"learning_rate": 9.56524754424971e-05, |
|
"loss": 88.4487, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.3220338983050847, |
|
"grad_norm": 0.7722213268280029, |
|
"learning_rate": 9.546433958174238e-05, |
|
"loss": 88.43, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.327683615819209, |
|
"grad_norm": 0.7288438677787781, |
|
"learning_rate": 9.527241187465734e-05, |
|
"loss": 88.3441, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 0.7370777130126953, |
|
"learning_rate": 9.507670832852102e-05, |
|
"loss": 88.4693, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.3389830508474576, |
|
"grad_norm": 0.6613349318504333, |
|
"learning_rate": 9.487724526552753e-05, |
|
"loss": 88.3618, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.3446327683615819, |
|
"grad_norm": 0.6312256455421448, |
|
"learning_rate": 9.467403932142452e-05, |
|
"loss": 88.3936, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.3502824858757062, |
|
"grad_norm": 0.7118995189666748, |
|
"learning_rate": 9.446710744412595e-05, |
|
"loss": 88.4331, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.3559322033898305, |
|
"grad_norm": 0.6945487260818481, |
|
"learning_rate": 9.425646689229842e-05, |
|
"loss": 88.3982, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.3615819209039548, |
|
"grad_norm": 0.617613673210144, |
|
"learning_rate": 9.404213523392183e-05, |
|
"loss": 88.4616, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.3672316384180791, |
|
"grad_norm": 0.6954861879348755, |
|
"learning_rate": 9.38241303448241e-05, |
|
"loss": 88.3863, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.3728813559322034, |
|
"grad_norm": 0.6992458701133728, |
|
"learning_rate": 9.360247040719039e-05, |
|
"loss": 88.3965, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.3785310734463277, |
|
"grad_norm": 0.6800311207771301, |
|
"learning_rate": 9.337717390804652e-05, |
|
"loss": 88.3868, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.384180790960452, |
|
"grad_norm": 0.7097700238227844, |
|
"learning_rate": 9.314825963771723e-05, |
|
"loss": 88.3632, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.3898305084745763, |
|
"grad_norm": 0.7166731953620911, |
|
"learning_rate": 9.29157466882589e-05, |
|
"loss": 88.3418, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.3954802259887006, |
|
"grad_norm": 0.7072133421897888, |
|
"learning_rate": 9.267965445186733e-05, |
|
"loss": 88.3753, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.4011299435028249, |
|
"grad_norm": 0.7065291404724121, |
|
"learning_rate": 9.24400026192603e-05, |
|
"loss": 88.3499, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.4067796610169492, |
|
"grad_norm": 0.8113076090812683, |
|
"learning_rate": 9.219681117803536e-05, |
|
"loss": 88.3214, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.4124293785310734, |
|
"grad_norm": 0.7646364569664001, |
|
"learning_rate": 9.195010041100275e-05, |
|
"loss": 88.3064, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.4180790960451977, |
|
"grad_norm": 0.756248950958252, |
|
"learning_rate": 9.16998908944939e-05, |
|
"loss": 88.3968, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.423728813559322, |
|
"grad_norm": 0.805596113204956, |
|
"learning_rate": 9.14462034966451e-05, |
|
"loss": 88.3718, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.4293785310734463, |
|
"grad_norm": 0.6950631737709045, |
|
"learning_rate": 9.118905937565722e-05, |
|
"loss": 88.3981, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.4350282485875706, |
|
"grad_norm": 0.7553988695144653, |
|
"learning_rate": 9.092847997803097e-05, |
|
"loss": 88.3488, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.4406779661016949, |
|
"grad_norm": 0.7791416645050049, |
|
"learning_rate": 9.066448703677828e-05, |
|
"loss": 88.4197, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.4463276836158192, |
|
"grad_norm": 0.7216358184814453, |
|
"learning_rate": 9.039710256960957e-05, |
|
"loss": 88.3813, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.4519774011299435, |
|
"grad_norm": 0.8204500079154968, |
|
"learning_rate": 9.012634887709754e-05, |
|
"loss": 88.2346, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.4576271186440678, |
|
"grad_norm": 0.843241274356842, |
|
"learning_rate": 8.985224854081726e-05, |
|
"loss": 88.4055, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4632768361581921, |
|
"grad_norm": 0.7781232595443726, |
|
"learning_rate": 8.957482442146272e-05, |
|
"loss": 88.372, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.4689265536723164, |
|
"grad_norm": 0.8177736401557922, |
|
"learning_rate": 8.929409965694016e-05, |
|
"loss": 88.3409, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.4745762711864407, |
|
"grad_norm": 0.8297736644744873, |
|
"learning_rate": 8.901009766043847e-05, |
|
"loss": 88.3283, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.480225988700565, |
|
"grad_norm": 0.8678383231163025, |
|
"learning_rate": 8.872284211847629e-05, |
|
"loss": 88.3416, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.4858757062146893, |
|
"grad_norm": 0.8668798804283142, |
|
"learning_rate": 8.84323569889266e-05, |
|
"loss": 88.2483, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.4915254237288136, |
|
"grad_norm": 0.9378306269645691, |
|
"learning_rate": 8.813866649901856e-05, |
|
"loss": 88.3618, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.4971751412429379, |
|
"grad_norm": 1.1833090782165527, |
|
"learning_rate": 8.784179514331682e-05, |
|
"loss": 88.2735, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.5028248587570622, |
|
"grad_norm": 0.6026833057403564, |
|
"learning_rate": 8.75417676816787e-05, |
|
"loss": 88.4478, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.5084745762711864, |
|
"grad_norm": 0.6131640672683716, |
|
"learning_rate": 8.72386091371891e-05, |
|
"loss": 88.4419, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5141242937853108, |
|
"grad_norm": 0.6138200759887695, |
|
"learning_rate": 8.693234479407353e-05, |
|
"loss": 88.4078, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.519774011299435, |
|
"grad_norm": 0.6851401329040527, |
|
"learning_rate": 8.662300019558931e-05, |
|
"loss": 88.3743, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.5254237288135594, |
|
"grad_norm": 0.6595472693443298, |
|
"learning_rate": 8.631060114189525e-05, |
|
"loss": 88.387, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.5310734463276836, |
|
"grad_norm": 0.6642423272132874, |
|
"learning_rate": 8.59951736878998e-05, |
|
"loss": 88.2821, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.536723163841808, |
|
"grad_norm": 0.7108228206634521, |
|
"learning_rate": 8.5676744141088e-05, |
|
"loss": 88.2668, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.5423728813559322, |
|
"grad_norm": 0.7303377985954285, |
|
"learning_rate": 8.535533905932738e-05, |
|
"loss": 88.2737, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.5480225988700564, |
|
"grad_norm": 0.6971437931060791, |
|
"learning_rate": 8.503098524865301e-05, |
|
"loss": 88.342, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.5536723163841808, |
|
"grad_norm": 0.7270698547363281, |
|
"learning_rate": 8.47037097610317e-05, |
|
"loss": 88.2374, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.559322033898305, |
|
"grad_norm": 0.6642720103263855, |
|
"learning_rate": 8.43735398921059e-05, |
|
"loss": 88.2445, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.5649717514124294, |
|
"grad_norm": 0.6921072602272034, |
|
"learning_rate": 8.404050317891711e-05, |
|
"loss": 88.3058, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5706214689265536, |
|
"grad_norm": 0.7314338088035583, |
|
"learning_rate": 8.370462739760923e-05, |
|
"loss": 88.1885, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.576271186440678, |
|
"grad_norm": 0.7479543089866638, |
|
"learning_rate": 8.336594056111197e-05, |
|
"loss": 88.3319, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.5819209039548022, |
|
"grad_norm": 0.6370671987533569, |
|
"learning_rate": 8.30244709168045e-05, |
|
"loss": 88.3088, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.5875706214689266, |
|
"grad_norm": 0.68263179063797, |
|
"learning_rate": 8.268024694415947e-05, |
|
"loss": 88.216, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.5932203389830508, |
|
"grad_norm": 0.7159000635147095, |
|
"learning_rate": 8.233329735236789e-05, |
|
"loss": 88.2724, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.5988700564971752, |
|
"grad_norm": 0.6509236097335815, |
|
"learning_rate": 8.198365107794457e-05, |
|
"loss": 88.2867, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.6045197740112994, |
|
"grad_norm": 0.6526631712913513, |
|
"learning_rate": 8.163133728231482e-05, |
|
"loss": 88.3246, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.6101694915254238, |
|
"grad_norm": 0.7395955920219421, |
|
"learning_rate": 8.127638534938227e-05, |
|
"loss": 88.276, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.615819209039548, |
|
"grad_norm": 0.6584829688072205, |
|
"learning_rate": 8.09188248830782e-05, |
|
"loss": 88.1927, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.6214689265536724, |
|
"grad_norm": 0.8070241808891296, |
|
"learning_rate": 8.055868570489247e-05, |
|
"loss": 88.1449, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.6271186440677966, |
|
"grad_norm": 0.6944290995597839, |
|
"learning_rate": 8.019599785138635e-05, |
|
"loss": 88.2262, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.632768361581921, |
|
"grad_norm": 0.7627915740013123, |
|
"learning_rate": 7.983079157168736e-05, |
|
"loss": 88.2257, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.6384180790960452, |
|
"grad_norm": 0.7771816849708557, |
|
"learning_rate": 7.946309732496647e-05, |
|
"loss": 88.211, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.6440677966101694, |
|
"grad_norm": 0.766257107257843, |
|
"learning_rate": 7.909294577789766e-05, |
|
"loss": 88.1717, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.6497175141242938, |
|
"grad_norm": 0.7782958149909973, |
|
"learning_rate": 7.872036780210026e-05, |
|
"loss": 88.15, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.655367231638418, |
|
"grad_norm": 0.7616683840751648, |
|
"learning_rate": 7.834539447156424e-05, |
|
"loss": 88.1597, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.6610169491525424, |
|
"grad_norm": 0.6816295981407166, |
|
"learning_rate": 7.796805706005843e-05, |
|
"loss": 88.1752, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 0.7385973334312439, |
|
"learning_rate": 7.75883870385223e-05, |
|
"loss": 88.1828, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.672316384180791, |
|
"grad_norm": 0.7695619463920593, |
|
"learning_rate": 7.72064160724412e-05, |
|
"loss": 88.2801, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.6779661016949152, |
|
"grad_norm": 0.7640277147293091, |
|
"learning_rate": 7.682217601920529e-05, |
|
"loss": 88.2095, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.6836158192090396, |
|
"grad_norm": 0.7877722382545471, |
|
"learning_rate": 7.643569892545267e-05, |
|
"loss": 88.16, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.6892655367231638, |
|
"grad_norm": 0.7659584283828735, |
|
"learning_rate": 7.604701702439651e-05, |
|
"loss": 88.1962, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.6949152542372882, |
|
"grad_norm": 0.8098394870758057, |
|
"learning_rate": 7.565616273313678e-05, |
|
"loss": 88.1656, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.7005649717514124, |
|
"grad_norm": 0.7991154193878174, |
|
"learning_rate": 7.526316864995647e-05, |
|
"loss": 88.1524, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.7062146892655368, |
|
"grad_norm": 0.7656694650650024, |
|
"learning_rate": 7.486806755160297e-05, |
|
"loss": 88.1187, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.711864406779661, |
|
"grad_norm": 0.7287772297859192, |
|
"learning_rate": 7.447089239055428e-05, |
|
"loss": 88.1673, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.7175141242937854, |
|
"grad_norm": 0.8145878911018372, |
|
"learning_rate": 7.407167629227072e-05, |
|
"loss": 88.1032, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.7231638418079096, |
|
"grad_norm": 0.857018768787384, |
|
"learning_rate": 7.367045255243216e-05, |
|
"loss": 88.0358, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.7288135593220338, |
|
"grad_norm": 0.8250066041946411, |
|
"learning_rate": 7.326725463416117e-05, |
|
"loss": 88.2404, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.7344632768361582, |
|
"grad_norm": 0.9477486610412598, |
|
"learning_rate": 7.286211616523193e-05, |
|
"loss": 88.2214, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.7401129943502824, |
|
"grad_norm": 0.971202552318573, |
|
"learning_rate": 7.245507093526574e-05, |
|
"loss": 88.214, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.7457627118644068, |
|
"grad_norm": 1.0824439525604248, |
|
"learning_rate": 7.204615289291283e-05, |
|
"loss": 88.1631, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.751412429378531, |
|
"grad_norm": 0.7381979823112488, |
|
"learning_rate": 7.163539614302088e-05, |
|
"loss": 88.2446, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.7570621468926554, |
|
"grad_norm": 0.6837661266326904, |
|
"learning_rate": 7.122283494379076e-05, |
|
"loss": 88.2198, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.7627118644067796, |
|
"grad_norm": 0.6421964764595032, |
|
"learning_rate": 7.080850370391907e-05, |
|
"loss": 88.2727, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.768361581920904, |
|
"grad_norm": 0.6602257490158081, |
|
"learning_rate": 7.039243697972857e-05, |
|
"loss": 88.1771, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.7740112994350282, |
|
"grad_norm": 0.7069459557533264, |
|
"learning_rate": 6.997466947228596e-05, |
|
"loss": 88.1327, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.7796610169491526, |
|
"grad_norm": 0.6446311473846436, |
|
"learning_rate": 6.95552360245078e-05, |
|
"loss": 88.2014, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.7853107344632768, |
|
"grad_norm": 0.6669993996620178, |
|
"learning_rate": 6.91341716182545e-05, |
|
"loss": 88.1836, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.7909604519774012, |
|
"grad_norm": 0.7162554264068604, |
|
"learning_rate": 6.871151137141266e-05, |
|
"loss": 88.0806, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.7966101694915254, |
|
"grad_norm": 0.6205349564552307, |
|
"learning_rate": 6.828729053496629e-05, |
|
"loss": 88.1998, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.8022598870056498, |
|
"grad_norm": 0.6284624934196472, |
|
"learning_rate": 6.786154449005665e-05, |
|
"loss": 88.0922, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.807909604519774, |
|
"grad_norm": 0.6776576638221741, |
|
"learning_rate": 6.743430874503143e-05, |
|
"loss": 88.104, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.8135593220338984, |
|
"grad_norm": 0.6751952767372131, |
|
"learning_rate": 6.700561893248332e-05, |
|
"loss": 88.1876, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.8192090395480226, |
|
"grad_norm": 0.6897196769714355, |
|
"learning_rate": 6.6575510806278e-05, |
|
"loss": 88.1415, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.8248587570621468, |
|
"grad_norm": 0.6813532114028931, |
|
"learning_rate": 6.614402023857232e-05, |
|
"loss": 88.0379, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.8305084745762712, |
|
"grad_norm": 0.7052000164985657, |
|
"learning_rate": 6.57111832168224e-05, |
|
"loss": 88.0754, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.8361581920903954, |
|
"grad_norm": 0.6268584132194519, |
|
"learning_rate": 6.527703584078218e-05, |
|
"loss": 88.0336, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.8418079096045198, |
|
"grad_norm": 0.6451026201248169, |
|
"learning_rate": 6.484161431949267e-05, |
|
"loss": 88.1128, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.847457627118644, |
|
"grad_norm": 0.6330875158309937, |
|
"learning_rate": 6.440495496826189e-05, |
|
"loss": 88.0844, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.8531073446327684, |
|
"grad_norm": 0.5872354507446289, |
|
"learning_rate": 6.39670942056362e-05, |
|
"loss": 88.082, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.8587570621468926, |
|
"grad_norm": 0.6449838280677795, |
|
"learning_rate": 6.352806855036287e-05, |
|
"loss": 88.0767, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.864406779661017, |
|
"grad_norm": 0.6349008083343506, |
|
"learning_rate": 6.308791461834426e-05, |
|
"loss": 88.0604, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.8700564971751412, |
|
"grad_norm": 0.6187208890914917, |
|
"learning_rate": 6.264666911958404e-05, |
|
"loss": 88.1468, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.8757062146892656, |
|
"grad_norm": 0.6470857262611389, |
|
"learning_rate": 6.22043688551254e-05, |
|
"loss": 88.0885, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.8813559322033898, |
|
"grad_norm": 0.6183845400810242, |
|
"learning_rate": 6.17610507139818e-05, |
|
"loss": 88.0303, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.8870056497175142, |
|
"grad_norm": 0.6772004961967468, |
|
"learning_rate": 6.13167516700603e-05, |
|
"loss": 88.111, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.8926553672316384, |
|
"grad_norm": 0.6499753594398499, |
|
"learning_rate": 6.0871508779077856e-05, |
|
"loss": 88.0266, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.8983050847457628, |
|
"grad_norm": 0.6717754602432251, |
|
"learning_rate": 6.04253591754708e-05, |
|
"loss": 88.0712, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.903954802259887, |
|
"grad_norm": 0.6724316477775574, |
|
"learning_rate": 5.9978340069297647e-05, |
|
"loss": 87.9934, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.9096045197740112, |
|
"grad_norm": 0.6548104286193848, |
|
"learning_rate": 5.9530488743135746e-05, |
|
"loss": 88.0043, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.9152542372881356, |
|
"grad_norm": 0.6596248745918274, |
|
"learning_rate": 5.908184254897182e-05, |
|
"loss": 88.0424, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.9209039548022598, |
|
"grad_norm": 0.6875431537628174, |
|
"learning_rate": 5.863243890508668e-05, |
|
"loss": 88.0799, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.9265536723163842, |
|
"grad_norm": 0.6975457668304443, |
|
"learning_rate": 5.8182315292934406e-05, |
|
"loss": 88.0774, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.9322033898305084, |
|
"grad_norm": 0.6618639230728149, |
|
"learning_rate": 5.773150925401641e-05, |
|
"loss": 88.124, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.9378531073446328, |
|
"grad_norm": 0.6853637099266052, |
|
"learning_rate": 5.7280058386750255e-05, |
|
"loss": 87.9972, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.943502824858757, |
|
"grad_norm": 0.7230448126792908, |
|
"learning_rate": 5.68280003433339e-05, |
|
"loss": 87.9713, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.9491525423728814, |
|
"grad_norm": 0.7414244413375854, |
|
"learning_rate": 5.63753728266054e-05, |
|
"loss": 88.0774, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.9548022598870056, |
|
"grad_norm": 0.766106367111206, |
|
"learning_rate": 5.592221358689843e-05, |
|
"loss": 88.0479, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.96045197740113, |
|
"grad_norm": 0.7827813029289246, |
|
"learning_rate": 5.546856041889373e-05, |
|
"loss": 88.1115, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.9661016949152542, |
|
"grad_norm": 0.693665623664856, |
|
"learning_rate": 5.5014451158466975e-05, |
|
"loss": 88.0148, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.9717514124293786, |
|
"grad_norm": 0.7761551737785339, |
|
"learning_rate": 5.4559923679533173e-05, |
|
"loss": 87.9896, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.9774011299435028, |
|
"grad_norm": 0.7685063481330872, |
|
"learning_rate": 5.410501589088785e-05, |
|
"loss": 87.9791, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.9830508474576272, |
|
"grad_norm": 0.8432329297065735, |
|
"learning_rate": 5.364976573304538e-05, |
|
"loss": 88.1228, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.9887005649717514, |
|
"grad_norm": 0.9350976347923279, |
|
"learning_rate": 5.319421117507462e-05, |
|
"loss": 87.9292, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.9943502824858758, |
|
"grad_norm": 1.2149473428726196, |
|
"learning_rate": 5.273839021143218e-05, |
|
"loss": 87.9655, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.6291913986206055, |
|
"learning_rate": 5.22823408587937e-05, |
|
"loss": 88.1277, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 11.00737476348877, |
|
"eval_runtime": 0.6468, |
|
"eval_samples_per_second": 460.76, |
|
"eval_steps_per_second": 115.963, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 1.0056497175141244, |
|
"grad_norm": 0.6629582047462463, |
|
"learning_rate": 5.182610115288295e-05, |
|
"loss": 88.1835, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 1.0112994350282485, |
|
"grad_norm": 0.6663825511932373, |
|
"learning_rate": 5.136970914529975e-05, |
|
"loss": 88.162, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 1.0169491525423728, |
|
"grad_norm": 0.6589259505271912, |
|
"learning_rate": 5.091320290034625e-05, |
|
"loss": 88.1961, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.0225988700564972, |
|
"grad_norm": 0.5902798175811768, |
|
"learning_rate": 5.045662049185229e-05, |
|
"loss": 88.0916, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 1.0282485875706215, |
|
"grad_norm": 0.5283370018005371, |
|
"learning_rate": 5e-05, |
|
"loss": 88.119, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 1.0338983050847457, |
|
"grad_norm": 0.5795127749443054, |
|
"learning_rate": 4.9543379508147716e-05, |
|
"loss": 88.1366, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 1.03954802259887, |
|
"grad_norm": 0.6025692820549011, |
|
"learning_rate": 4.9086797099653756e-05, |
|
"loss": 88.0548, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 1.0451977401129944, |
|
"grad_norm": 0.5939972996711731, |
|
"learning_rate": 4.863029085470026e-05, |
|
"loss": 87.9877, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.0508474576271187, |
|
"grad_norm": 0.5899671316146851, |
|
"learning_rate": 4.817389884711705e-05, |
|
"loss": 88.0826, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 1.0564971751412429, |
|
"grad_norm": 0.5887870788574219, |
|
"learning_rate": 4.771765914120631e-05, |
|
"loss": 88.1155, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 1.0621468926553672, |
|
"grad_norm": 0.5916518568992615, |
|
"learning_rate": 4.726160978856782e-05, |
|
"loss": 88.0002, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 1.0677966101694916, |
|
"grad_norm": 0.6661847233772278, |
|
"learning_rate": 4.6805788824925386e-05, |
|
"loss": 88.0365, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.073446327683616, |
|
"grad_norm": 0.5642584562301636, |
|
"learning_rate": 4.6350234266954626e-05, |
|
"loss": 88.0447, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.07909604519774, |
|
"grad_norm": 0.5734567046165466, |
|
"learning_rate": 4.589498410911215e-05, |
|
"loss": 88.0759, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 1.0847457627118644, |
|
"grad_norm": 0.5504806041717529, |
|
"learning_rate": 4.5440076320466825e-05, |
|
"loss": 88.0262, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.0903954802259888, |
|
"grad_norm": 0.5161648392677307, |
|
"learning_rate": 4.4985548841533036e-05, |
|
"loss": 88.0113, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 1.0960451977401129, |
|
"grad_norm": 0.5934569239616394, |
|
"learning_rate": 4.4531439581106295e-05, |
|
"loss": 88.1235, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 1.1016949152542372, |
|
"grad_norm": 0.5542126297950745, |
|
"learning_rate": 4.4077786413101595e-05, |
|
"loss": 88.0276, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.1073446327683616, |
|
"grad_norm": 0.6331348419189453, |
|
"learning_rate": 4.362462717339461e-05, |
|
"loss": 88.0501, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 1.112994350282486, |
|
"grad_norm": 0.5662806630134583, |
|
"learning_rate": 4.317199965666613e-05, |
|
"loss": 88.0157, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 1.11864406779661, |
|
"grad_norm": 0.5534279942512512, |
|
"learning_rate": 4.271994161324977e-05, |
|
"loss": 87.9114, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 1.1242937853107344, |
|
"grad_norm": 0.5692214369773865, |
|
"learning_rate": 4.22684907459836e-05, |
|
"loss": 87.9938, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 1.1299435028248588, |
|
"grad_norm": 0.6049486994743347, |
|
"learning_rate": 4.1817684707065605e-05, |
|
"loss": 87.9819, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.1355932203389831, |
|
"grad_norm": 0.5856545567512512, |
|
"learning_rate": 4.1367561094913335e-05, |
|
"loss": 87.9492, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 1.1412429378531073, |
|
"grad_norm": 0.5690354704856873, |
|
"learning_rate": 4.0918157451028185e-05, |
|
"loss": 87.9628, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 1.1468926553672316, |
|
"grad_norm": 0.6273289322853088, |
|
"learning_rate": 4.0469511256864265e-05, |
|
"loss": 88.0531, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 1.152542372881356, |
|
"grad_norm": 0.6032590866088867, |
|
"learning_rate": 4.002165993070237e-05, |
|
"loss": 87.8896, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 1.1581920903954803, |
|
"grad_norm": 0.5722517371177673, |
|
"learning_rate": 3.957464082452922e-05, |
|
"loss": 87.9297, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.1638418079096045, |
|
"grad_norm": 0.6684175729751587, |
|
"learning_rate": 3.9128491220922156e-05, |
|
"loss": 88.0202, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 1.1694915254237288, |
|
"grad_norm": 0.6134036779403687, |
|
"learning_rate": 3.8683248329939716e-05, |
|
"loss": 87.9706, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 1.1751412429378532, |
|
"grad_norm": 0.6217228174209595, |
|
"learning_rate": 3.823894928601822e-05, |
|
"loss": 88.0366, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 1.1807909604519775, |
|
"grad_norm": 0.577599287033081, |
|
"learning_rate": 3.7795631144874604e-05, |
|
"loss": 88.0188, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 1.1864406779661016, |
|
"grad_norm": 0.6666799783706665, |
|
"learning_rate": 3.735333088041596e-05, |
|
"loss": 88.0513, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.192090395480226, |
|
"grad_norm": 0.5872676968574524, |
|
"learning_rate": 3.6912085381655734e-05, |
|
"loss": 88.0103, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 1.1977401129943503, |
|
"grad_norm": 0.5996030569076538, |
|
"learning_rate": 3.6471931449637124e-05, |
|
"loss": 87.9581, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 1.2033898305084745, |
|
"grad_norm": 0.6568124890327454, |
|
"learning_rate": 3.60329057943638e-05, |
|
"loss": 87.9921, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 1.2090395480225988, |
|
"grad_norm": 0.7044454216957092, |
|
"learning_rate": 3.5595045031738125e-05, |
|
"loss": 88.0731, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.2146892655367232, |
|
"grad_norm": 0.7266508936882019, |
|
"learning_rate": 3.515838568050736e-05, |
|
"loss": 88.0184, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.2203389830508475, |
|
"grad_norm": 0.7107569575309753, |
|
"learning_rate": 3.472296415921783e-05, |
|
"loss": 88.0427, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.2259887005649717, |
|
"grad_norm": 0.7413809299468994, |
|
"learning_rate": 3.428881678317763e-05, |
|
"loss": 87.9618, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 1.231638418079096, |
|
"grad_norm": 0.7287234663963318, |
|
"learning_rate": 3.38559797614277e-05, |
|
"loss": 88.007, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 1.2372881355932204, |
|
"grad_norm": 0.8537057042121887, |
|
"learning_rate": 3.3424489193722013e-05, |
|
"loss": 88.055, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 1.2429378531073447, |
|
"grad_norm": 0.8392075300216675, |
|
"learning_rate": 3.2994381067516696e-05, |
|
"loss": 88.0221, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.2485875706214689, |
|
"grad_norm": 1.036270022392273, |
|
"learning_rate": 3.256569125496858e-05, |
|
"loss": 87.9944, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 1.2542372881355932, |
|
"grad_norm": 0.633751392364502, |
|
"learning_rate": 3.2138455509943366e-05, |
|
"loss": 88.1435, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 1.2598870056497176, |
|
"grad_norm": 0.5413132309913635, |
|
"learning_rate": 3.171270946503373e-05, |
|
"loss": 88.1694, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 1.2655367231638417, |
|
"grad_norm": 0.5542528629302979, |
|
"learning_rate": 3.128848862858734e-05, |
|
"loss": 88.1215, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.271186440677966, |
|
"grad_norm": 0.5940180420875549, |
|
"learning_rate": 3.086582838174551e-05, |
|
"loss": 88.0598, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.2768361581920904, |
|
"grad_norm": 0.5067227482795715, |
|
"learning_rate": 3.0444763975492208e-05, |
|
"loss": 88.0876, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 1.2824858757062148, |
|
"grad_norm": 0.5656532049179077, |
|
"learning_rate": 3.0025330527714046e-05, |
|
"loss": 88.0915, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 1.288135593220339, |
|
"grad_norm": 0.52092045545578, |
|
"learning_rate": 2.9607563020271446e-05, |
|
"loss": 88.0636, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 1.2937853107344632, |
|
"grad_norm": 0.5944317579269409, |
|
"learning_rate": 2.9191496296080935e-05, |
|
"loss": 87.999, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 1.2994350282485876, |
|
"grad_norm": 0.5355740189552307, |
|
"learning_rate": 2.8777165056209256e-05, |
|
"loss": 88.0768, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.305084745762712, |
|
"grad_norm": 0.5801308155059814, |
|
"learning_rate": 2.836460385697911e-05, |
|
"loss": 88.016, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 1.310734463276836, |
|
"grad_norm": 0.5269469618797302, |
|
"learning_rate": 2.7953847107087172e-05, |
|
"loss": 87.9684, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 1.3163841807909604, |
|
"grad_norm": 0.5054383873939514, |
|
"learning_rate": 2.754492906473425e-05, |
|
"loss": 88.1685, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 1.3220338983050848, |
|
"grad_norm": 0.6001875400543213, |
|
"learning_rate": 2.7137883834768073e-05, |
|
"loss": 88.0379, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 1.327683615819209, |
|
"grad_norm": 0.5223692059516907, |
|
"learning_rate": 2.6732745365838828e-05, |
|
"loss": 87.9794, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.3333333333333333, |
|
"grad_norm": 0.6370963454246521, |
|
"learning_rate": 2.6329547447567836e-05, |
|
"loss": 87.9989, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 1.3389830508474576, |
|
"grad_norm": 0.49859458208084106, |
|
"learning_rate": 2.5928323707729306e-05, |
|
"loss": 88.0401, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 1.344632768361582, |
|
"grad_norm": 0.544282078742981, |
|
"learning_rate": 2.5529107609445733e-05, |
|
"loss": 88.0129, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.3502824858757063, |
|
"grad_norm": 0.6058323979377747, |
|
"learning_rate": 2.513193244839704e-05, |
|
"loss": 88.0901, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 1.3559322033898304, |
|
"grad_norm": 0.5392006635665894, |
|
"learning_rate": 2.4736831350043536e-05, |
|
"loss": 88.0176, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.3615819209039548, |
|
"grad_norm": 0.5761566758155823, |
|
"learning_rate": 2.4343837266863246e-05, |
|
"loss": 88.0378, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 1.3672316384180792, |
|
"grad_norm": 0.5357884764671326, |
|
"learning_rate": 2.3952982975603496e-05, |
|
"loss": 88.0098, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.3728813559322033, |
|
"grad_norm": 0.5242740511894226, |
|
"learning_rate": 2.356430107454733e-05, |
|
"loss": 88.0394, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.3785310734463276, |
|
"grad_norm": 0.5258982181549072, |
|
"learning_rate": 2.3177823980794727e-05, |
|
"loss": 87.9584, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 1.384180790960452, |
|
"grad_norm": 0.6452914476394653, |
|
"learning_rate": 2.279358392755882e-05, |
|
"loss": 88.0219, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.3898305084745763, |
|
"grad_norm": 0.5566428899765015, |
|
"learning_rate": 2.24116129614777e-05, |
|
"loss": 87.9679, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 1.3954802259887007, |
|
"grad_norm": 0.5655139088630676, |
|
"learning_rate": 2.2031942939941592e-05, |
|
"loss": 87.9924, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 1.4011299435028248, |
|
"grad_norm": 0.6263113617897034, |
|
"learning_rate": 2.1654605528435773e-05, |
|
"loss": 87.919, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 1.4067796610169492, |
|
"grad_norm": 0.5700308084487915, |
|
"learning_rate": 2.127963219789974e-05, |
|
"loss": 87.9079, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 1.4124293785310735, |
|
"grad_norm": 0.5626516938209534, |
|
"learning_rate": 2.090705422210237e-05, |
|
"loss": 87.9963, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.4180790960451977, |
|
"grad_norm": 0.5786779522895813, |
|
"learning_rate": 2.0536902675033548e-05, |
|
"loss": 87.9655, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 1.423728813559322, |
|
"grad_norm": 0.6087446212768555, |
|
"learning_rate": 2.0169208428312647e-05, |
|
"loss": 87.9886, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.4293785310734464, |
|
"grad_norm": 0.627742350101471, |
|
"learning_rate": 1.980400214861367e-05, |
|
"loss": 88.072, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 1.4350282485875705, |
|
"grad_norm": 0.6009258031845093, |
|
"learning_rate": 1.9441314295107537e-05, |
|
"loss": 87.9974, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 1.4406779661016949, |
|
"grad_norm": 0.5660704374313354, |
|
"learning_rate": 1.90811751169218e-05, |
|
"loss": 87.9457, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.4463276836158192, |
|
"grad_norm": 0.6332747340202332, |
|
"learning_rate": 1.8723614650617723e-05, |
|
"loss": 88.0142, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.4519774011299436, |
|
"grad_norm": 0.653758704662323, |
|
"learning_rate": 1.8368662717685187e-05, |
|
"loss": 88.0092, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 1.457627118644068, |
|
"grad_norm": 0.574155330657959, |
|
"learning_rate": 1.801634892205545e-05, |
|
"loss": 87.9217, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 1.463276836158192, |
|
"grad_norm": 0.6903696060180664, |
|
"learning_rate": 1.766670264763213e-05, |
|
"loss": 87.9605, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 1.4689265536723164, |
|
"grad_norm": 0.6985341310501099, |
|
"learning_rate": 1.7319753055840553e-05, |
|
"loss": 88.1013, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.4745762711864407, |
|
"grad_norm": 0.6917281150817871, |
|
"learning_rate": 1.697552908319553e-05, |
|
"loss": 87.9394, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 1.4802259887005649, |
|
"grad_norm": 0.7425939440727234, |
|
"learning_rate": 1.6634059438888033e-05, |
|
"loss": 87.9243, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 1.4858757062146892, |
|
"grad_norm": 0.7640402317047119, |
|
"learning_rate": 1.6295372602390767e-05, |
|
"loss": 87.9382, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 1.4915254237288136, |
|
"grad_norm": 0.8300817608833313, |
|
"learning_rate": 1.5959496821082905e-05, |
|
"loss": 87.8827, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.497175141242938, |
|
"grad_norm": 1.0759795904159546, |
|
"learning_rate": 1.562646010789411e-05, |
|
"loss": 87.8789, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.5028248587570623, |
|
"grad_norm": 0.6373040080070496, |
|
"learning_rate": 1.5296290238968303e-05, |
|
"loss": 88.0713, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.5084745762711864, |
|
"grad_norm": 0.5541619062423706, |
|
"learning_rate": 1.496901475134701e-05, |
|
"loss": 88.1348, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 1.5141242937853108, |
|
"grad_norm": 0.5655987858772278, |
|
"learning_rate": 1.4644660940672627e-05, |
|
"loss": 88.1458, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.5197740112994351, |
|
"grad_norm": 0.5608534216880798, |
|
"learning_rate": 1.4323255858912011e-05, |
|
"loss": 88.1328, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 1.5254237288135593, |
|
"grad_norm": 0.5184534192085266, |
|
"learning_rate": 1.4004826312100216e-05, |
|
"loss": 88.049, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.5310734463276836, |
|
"grad_norm": 0.5682441592216492, |
|
"learning_rate": 1.3689398858104751e-05, |
|
"loss": 88.0269, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 1.536723163841808, |
|
"grad_norm": 0.46682822704315186, |
|
"learning_rate": 1.337699980441069e-05, |
|
"loss": 88.0322, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 1.542372881355932, |
|
"grad_norm": 0.55782550573349, |
|
"learning_rate": 1.3067655205926488e-05, |
|
"loss": 87.9584, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.5480225988700564, |
|
"grad_norm": 0.549993097782135, |
|
"learning_rate": 1.2761390862810907e-05, |
|
"loss": 87.9824, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 1.5536723163841808, |
|
"grad_norm": 0.49282097816467285, |
|
"learning_rate": 1.2458232318321305e-05, |
|
"loss": 87.9938, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.559322033898305, |
|
"grad_norm": 0.5615452527999878, |
|
"learning_rate": 1.2158204856683176e-05, |
|
"loss": 87.9862, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.5649717514124295, |
|
"grad_norm": 0.5333657264709473, |
|
"learning_rate": 1.1861333500981448e-05, |
|
"loss": 87.9579, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 1.5706214689265536, |
|
"grad_norm": 0.5501605868339539, |
|
"learning_rate": 1.1567643011073392e-05, |
|
"loss": 87.8844, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 1.576271186440678, |
|
"grad_norm": 0.5416379570960999, |
|
"learning_rate": 1.127715788152372e-05, |
|
"loss": 87.9542, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 1.5819209039548023, |
|
"grad_norm": 0.5352757573127747, |
|
"learning_rate": 1.0989902339561553e-05, |
|
"loss": 87.9788, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.5875706214689265, |
|
"grad_norm": 0.514640748500824, |
|
"learning_rate": 1.0705900343059855e-05, |
|
"loss": 87.9644, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 1.5932203389830508, |
|
"grad_norm": 0.5264514088630676, |
|
"learning_rate": 1.0425175578537299e-05, |
|
"loss": 88.0876, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 1.5988700564971752, |
|
"grad_norm": 0.5024112462997437, |
|
"learning_rate": 1.0147751459182736e-05, |
|
"loss": 87.9844, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 1.6045197740112993, |
|
"grad_norm": 0.48817285895347595, |
|
"learning_rate": 9.873651122902472e-06, |
|
"loss": 88.0376, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 1.6101694915254239, |
|
"grad_norm": 0.6276935935020447, |
|
"learning_rate": 9.602897430390457e-06, |
|
"loss": 87.933, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.615819209039548, |
|
"grad_norm": 0.5100238919258118, |
|
"learning_rate": 9.335512963221732e-06, |
|
"loss": 88.0798, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 1.6214689265536724, |
|
"grad_norm": 0.534130871295929, |
|
"learning_rate": 9.071520021969027e-06, |
|
"loss": 88.1008, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 1.6271186440677967, |
|
"grad_norm": 0.5745760798454285, |
|
"learning_rate": 8.810940624342785e-06, |
|
"loss": 87.9179, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.6327683615819208, |
|
"grad_norm": 0.5422095656394958, |
|
"learning_rate": 8.553796503354899e-06, |
|
"loss": 87.9494, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 1.6384180790960452, |
|
"grad_norm": 0.5460516214370728, |
|
"learning_rate": 8.30010910550611e-06, |
|
"loss": 87.9545, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.6440677966101696, |
|
"grad_norm": 0.6041167974472046, |
|
"learning_rate": 8.049899588997244e-06, |
|
"loss": 87.9521, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 1.6497175141242937, |
|
"grad_norm": 0.5654332637786865, |
|
"learning_rate": 7.803188821964652e-06, |
|
"loss": 87.9232, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 1.655367231638418, |
|
"grad_norm": 0.5767014026641846, |
|
"learning_rate": 7.559997380739714e-06, |
|
"loss": 87.8771, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 1.6610169491525424, |
|
"grad_norm": 0.5665624737739563, |
|
"learning_rate": 7.320345548132679e-06, |
|
"loss": 88.0009, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 0.5585253238677979, |
|
"learning_rate": 7.084253311741101e-06, |
|
"loss": 87.9623, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.672316384180791, |
|
"grad_norm": 0.6449034214019775, |
|
"learning_rate": 6.851740362282788e-06, |
|
"loss": 87.968, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 1.6779661016949152, |
|
"grad_norm": 0.6085680723190308, |
|
"learning_rate": 6.622826091953482e-06, |
|
"loss": 88.0431, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.6836158192090396, |
|
"grad_norm": 0.7078083753585815, |
|
"learning_rate": 6.397529592809614e-06, |
|
"loss": 87.9145, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 1.689265536723164, |
|
"grad_norm": 0.5888929963111877, |
|
"learning_rate": 6.1758696551758976e-06, |
|
"loss": 87.974, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 1.694915254237288, |
|
"grad_norm": 0.6576589941978455, |
|
"learning_rate": 5.957864766078186e-06, |
|
"loss": 87.9155, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.7005649717514124, |
|
"grad_norm": 0.6700954437255859, |
|
"learning_rate": 5.743533107701593e-06, |
|
"loss": 87.9094, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 1.7062146892655368, |
|
"grad_norm": 0.6426727175712585, |
|
"learning_rate": 5.532892555874059e-06, |
|
"loss": 87.9853, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 1.711864406779661, |
|
"grad_norm": 0.6309840679168701, |
|
"learning_rate": 5.325960678575498e-06, |
|
"loss": 87.997, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 1.7175141242937855, |
|
"grad_norm": 0.6771166920661926, |
|
"learning_rate": 5.122754734472496e-06, |
|
"loss": 87.9336, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 1.7231638418079096, |
|
"grad_norm": 0.7140227556228638, |
|
"learning_rate": 4.92329167147898e-06, |
|
"loss": 87.9128, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.7288135593220337, |
|
"grad_norm": 0.7067782282829285, |
|
"learning_rate": 4.727588125342669e-06, |
|
"loss": 88.008, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 1.7344632768361583, |
|
"grad_norm": 0.746557891368866, |
|
"learning_rate": 4.535660418257631e-06, |
|
"loss": 87.9927, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 1.7401129943502824, |
|
"grad_norm": 0.8619289994239807, |
|
"learning_rate": 4.3475245575029185e-06, |
|
"loss": 87.8356, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 1.7457627118644068, |
|
"grad_norm": 1.2444101572036743, |
|
"learning_rate": 4.163196234107603e-06, |
|
"loss": 87.9559, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 1.7514124293785311, |
|
"grad_norm": 0.6213842034339905, |
|
"learning_rate": 3.982690821542035e-06, |
|
"loss": 88.1411, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.7570621468926553, |
|
"grad_norm": 0.5701696872711182, |
|
"learning_rate": 3.8060233744356633e-06, |
|
"loss": 88.1261, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 1.7627118644067796, |
|
"grad_norm": 0.5259557366371155, |
|
"learning_rate": 3.6332086273214827e-06, |
|
"loss": 88.1058, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.768361581920904, |
|
"grad_norm": 0.561783492565155, |
|
"learning_rate": 3.464260993407098e-06, |
|
"loss": 87.9936, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 1.774011299435028, |
|
"grad_norm": 0.5012845993041992, |
|
"learning_rate": 3.299194563372604e-06, |
|
"loss": 88.0487, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 1.7796610169491527, |
|
"grad_norm": 0.5462010502815247, |
|
"learning_rate": 3.1380231041954366e-06, |
|
"loss": 87.9775, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.7853107344632768, |
|
"grad_norm": 0.5348891615867615, |
|
"learning_rate": 2.9807600580021634e-06, |
|
"loss": 88.0445, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 1.7909604519774012, |
|
"grad_norm": 0.5476884245872498, |
|
"learning_rate": 2.827418540947313e-06, |
|
"loss": 87.9698, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 1.7966101694915255, |
|
"grad_norm": 0.5374354124069214, |
|
"learning_rate": 2.6780113421195298e-06, |
|
"loss": 87.9377, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 1.8022598870056497, |
|
"grad_norm": 0.5628228783607483, |
|
"learning_rate": 2.532550922474897e-06, |
|
"loss": 87.8776, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 1.807909604519774, |
|
"grad_norm": 0.5416897535324097, |
|
"learning_rate": 2.3910494137976523e-06, |
|
"loss": 87.9337, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.8135593220338984, |
|
"grad_norm": 0.590156614780426, |
|
"learning_rate": 2.253518617688377e-06, |
|
"loss": 87.9672, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.8192090395480225, |
|
"grad_norm": 0.5673385858535767, |
|
"learning_rate": 2.1199700045797077e-06, |
|
"loss": 87.9425, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 1.8248587570621468, |
|
"grad_norm": 0.5306013226509094, |
|
"learning_rate": 1.9904147127796646e-06, |
|
"loss": 87.9447, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 1.8305084745762712, |
|
"grad_norm": 0.5657864809036255, |
|
"learning_rate": 1.864863547542711e-06, |
|
"loss": 87.9126, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.8361581920903953, |
|
"grad_norm": 0.5366300344467163, |
|
"learning_rate": 1.7433269801685303e-06, |
|
"loss": 87.9557, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.84180790960452, |
|
"grad_norm": 0.5389237403869629, |
|
"learning_rate": 1.6258151471287396e-06, |
|
"loss": 87.9312, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 1.847457627118644, |
|
"grad_norm": 0.5478444695472717, |
|
"learning_rate": 1.5123378492214291e-06, |
|
"loss": 87.9573, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 1.8531073446327684, |
|
"grad_norm": 0.5147649645805359, |
|
"learning_rate": 1.4029045507537697e-06, |
|
"loss": 88.0396, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 1.8587570621468927, |
|
"grad_norm": 0.5677837133407593, |
|
"learning_rate": 1.297524378752696e-06, |
|
"loss": 88.0318, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 1.8644067796610169, |
|
"grad_norm": 0.5549485087394714, |
|
"learning_rate": 1.196206122203647e-06, |
|
"loss": 88.005, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.8700564971751412, |
|
"grad_norm": 0.5245904922485352, |
|
"learning_rate": 1.0989582313175374e-06, |
|
"loss": 88.0128, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 1.8757062146892656, |
|
"grad_norm": 0.5384089946746826, |
|
"learning_rate": 1.005788816826031e-06, |
|
"loss": 87.8896, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 1.8813559322033897, |
|
"grad_norm": 0.5512574911117554, |
|
"learning_rate": 9.167056493050496e-07, |
|
"loss": 87.996, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 1.8870056497175143, |
|
"grad_norm": 0.6043868064880371, |
|
"learning_rate": 8.317161585266964e-07, |
|
"loss": 87.9643, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 1.8926553672316384, |
|
"grad_norm": 0.617608368396759, |
|
"learning_rate": 7.508274328395848e-07, |
|
"loss": 87.9645, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.8983050847457628, |
|
"grad_norm": 0.5591124892234802, |
|
"learning_rate": 6.74046218577673e-07, |
|
"loss": 87.9943, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.9039548022598871, |
|
"grad_norm": 0.5672664046287537, |
|
"learning_rate": 6.013789194975749e-07, |
|
"loss": 87.9743, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 1.9096045197740112, |
|
"grad_norm": 0.5669750571250916, |
|
"learning_rate": 5.328315962444874e-07, |
|
"loss": 87.8982, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 1.9152542372881356, |
|
"grad_norm": 0.5600360035896301, |
|
"learning_rate": 4.684099658467223e-07, |
|
"loss": 87.8697, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 1.92090395480226, |
|
"grad_norm": 0.5569552183151245, |
|
"learning_rate": 4.0811940123886004e-07, |
|
"loss": 88.0418, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.926553672316384, |
|
"grad_norm": 0.549552857875824, |
|
"learning_rate": 3.5196493081366967e-07, |
|
"loss": 87.9701, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.9322033898305084, |
|
"grad_norm": 0.6111685037612915, |
|
"learning_rate": 2.9995123800270476e-07, |
|
"loss": 88.1107, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 1.9378531073446328, |
|
"grad_norm": 0.6287999153137207, |
|
"learning_rate": 2.5208266088569966e-07, |
|
"loss": 87.9764, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 1.943502824858757, |
|
"grad_norm": 0.5528276562690735, |
|
"learning_rate": 2.083631918287643e-07, |
|
"loss": 87.9886, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 1.9491525423728815, |
|
"grad_norm": 0.6393068432807922, |
|
"learning_rate": 1.6879647715140611e-07, |
|
"loss": 88.0424, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.9548022598870056, |
|
"grad_norm": 0.6431833505630493, |
|
"learning_rate": 1.333858168224178e-07, |
|
"loss": 88.0173, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 1.96045197740113, |
|
"grad_norm": 0.6915642023086548, |
|
"learning_rate": 1.0213416418465294e-07, |
|
"loss": 87.9516, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 1.9661016949152543, |
|
"grad_norm": 0.6479020714759827, |
|
"learning_rate": 7.5044125708712e-08, |
|
"loss": 87.9935, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.9717514124293785, |
|
"grad_norm": 0.7263698577880859, |
|
"learning_rate": 5.2117960775543986e-08, |
|
"loss": 87.9126, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 1.9774011299435028, |
|
"grad_norm": 0.6763975024223328, |
|
"learning_rate": 3.3357581488030475e-08, |
|
"loss": 87.993, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.9830508474576272, |
|
"grad_norm": 0.8059737682342529, |
|
"learning_rate": 1.8764552511485457e-08, |
|
"loss": 88.038, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.9887005649717513, |
|
"grad_norm": 0.7300512790679932, |
|
"learning_rate": 8.340090943176338e-09, |
|
"loss": 88.0236, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.9943502824858759, |
|
"grad_norm": 0.9957327842712402, |
|
"learning_rate": 2.0850662108051755e-09, |
|
"loss": 87.9029, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.6088115572929382, |
|
"learning_rate": 0.0, |
|
"loss": 87.8932, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 10.999165534973145, |
|
"eval_runtime": 0.674, |
|
"eval_samples_per_second": 442.111, |
|
"eval_steps_per_second": 111.27, |
|
"step": 354 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 354, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3718069616640.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|