|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 5334, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0009373828271466067, |
|
"grad_norm": 55.87532592115914, |
|
"learning_rate": 2.8089887640449436e-06, |
|
"loss": 3.8993, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0018747656542932134, |
|
"grad_norm": 39.52831505761594, |
|
"learning_rate": 5.617977528089887e-06, |
|
"loss": 3.4743, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.00281214848143982, |
|
"grad_norm": 22.428432087019985, |
|
"learning_rate": 8.42696629213483e-06, |
|
"loss": 2.8849, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0037495313085864268, |
|
"grad_norm": 9.054209275536381, |
|
"learning_rate": 1.1235955056179774e-05, |
|
"loss": 2.1428, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.004686914135733034, |
|
"grad_norm": 3.948592126973317, |
|
"learning_rate": 1.404494382022472e-05, |
|
"loss": 1.7056, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.00562429696287964, |
|
"grad_norm": 2.299992616041375, |
|
"learning_rate": 1.685393258426966e-05, |
|
"loss": 1.4186, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.006561679790026247, |
|
"grad_norm": 0.9839714616377845, |
|
"learning_rate": 1.9662921348314603e-05, |
|
"loss": 1.2306, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0074990626171728535, |
|
"grad_norm": 0.6008365887782849, |
|
"learning_rate": 2.247191011235955e-05, |
|
"loss": 1.1511, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.00843644544431946, |
|
"grad_norm": 0.4330325556834079, |
|
"learning_rate": 2.528089887640449e-05, |
|
"loss": 1.0624, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.009373828271466067, |
|
"grad_norm": 0.35553504278533465, |
|
"learning_rate": 2.808988764044944e-05, |
|
"loss": 1.0431, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.010311211098612674, |
|
"grad_norm": 0.25388532511026035, |
|
"learning_rate": 3.089887640449438e-05, |
|
"loss": 1.005, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.01124859392575928, |
|
"grad_norm": 0.2576041354688323, |
|
"learning_rate": 3.370786516853932e-05, |
|
"loss": 0.9996, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.012185976752905886, |
|
"grad_norm": 0.21819394992900415, |
|
"learning_rate": 3.6516853932584265e-05, |
|
"loss": 0.9775, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.013123359580052493, |
|
"grad_norm": 0.21574042351319167, |
|
"learning_rate": 3.932584269662921e-05, |
|
"loss": 0.9914, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0140607424071991, |
|
"grad_norm": 0.18802524325518408, |
|
"learning_rate": 4.2134831460674156e-05, |
|
"loss": 0.9853, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.014998125234345707, |
|
"grad_norm": 0.18620252651860728, |
|
"learning_rate": 4.49438202247191e-05, |
|
"loss": 1.0079, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.015935508061492312, |
|
"grad_norm": 0.18097949689788895, |
|
"learning_rate": 4.775280898876404e-05, |
|
"loss": 0.9329, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.01687289088863892, |
|
"grad_norm": 0.15787887483249505, |
|
"learning_rate": 5.056179775280898e-05, |
|
"loss": 0.9277, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.017810273715785526, |
|
"grad_norm": 0.15373249141451148, |
|
"learning_rate": 5.337078651685392e-05, |
|
"loss": 0.9232, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.018747656542932135, |
|
"grad_norm": 0.13868806686970064, |
|
"learning_rate": 5.617977528089888e-05, |
|
"loss": 0.9002, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01968503937007874, |
|
"grad_norm": 0.14099964891127995, |
|
"learning_rate": 5.898876404494382e-05, |
|
"loss": 0.916, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.02062242219722535, |
|
"grad_norm": 0.12190687602764205, |
|
"learning_rate": 6.179775280898876e-05, |
|
"loss": 0.926, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.021559805024371954, |
|
"grad_norm": 0.1146212972821343, |
|
"learning_rate": 6.46067415730337e-05, |
|
"loss": 0.9661, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.02249718785151856, |
|
"grad_norm": 0.1305834260724098, |
|
"learning_rate": 6.741573033707865e-05, |
|
"loss": 0.8979, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.023434570678665168, |
|
"grad_norm": 0.09286668479471845, |
|
"learning_rate": 7.022471910112359e-05, |
|
"loss": 0.895, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.024371953505811773, |
|
"grad_norm": 0.09893175557780264, |
|
"learning_rate": 7.303370786516853e-05, |
|
"loss": 0.8941, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.02530933633295838, |
|
"grad_norm": 0.08722137311816515, |
|
"learning_rate": 7.584269662921347e-05, |
|
"loss": 0.887, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.026246719160104987, |
|
"grad_norm": 0.08022348162278936, |
|
"learning_rate": 7.865168539325841e-05, |
|
"loss": 0.8804, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.027184101987251595, |
|
"grad_norm": 0.08856735925510693, |
|
"learning_rate": 8.146067415730337e-05, |
|
"loss": 0.855, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.0281214848143982, |
|
"grad_norm": 0.08311974627408422, |
|
"learning_rate": 8.426966292134831e-05, |
|
"loss": 0.8837, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.029058867641544806, |
|
"grad_norm": 0.07297089629042264, |
|
"learning_rate": 8.707865168539325e-05, |
|
"loss": 0.8748, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.029996250468691414, |
|
"grad_norm": 0.0720196207299981, |
|
"learning_rate": 8.98876404494382e-05, |
|
"loss": 0.8997, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.03093363329583802, |
|
"grad_norm": 0.07463772342009761, |
|
"learning_rate": 9.269662921348314e-05, |
|
"loss": 0.9228, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.031871016122984624, |
|
"grad_norm": 0.08058875574417819, |
|
"learning_rate": 9.550561797752808e-05, |
|
"loss": 0.8742, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.03280839895013123, |
|
"grad_norm": 0.07434935030584572, |
|
"learning_rate": 9.831460674157303e-05, |
|
"loss": 0.9169, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.03374578177727784, |
|
"grad_norm": 0.06516079400878816, |
|
"learning_rate": 0.00010112359550561796, |
|
"loss": 0.8842, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.034683164604424443, |
|
"grad_norm": 0.068946140527396, |
|
"learning_rate": 0.00010393258426966292, |
|
"loss": 0.8673, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.03562054743157105, |
|
"grad_norm": 0.0742056092302864, |
|
"learning_rate": 0.00010674157303370785, |
|
"loss": 0.8704, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.03655793025871766, |
|
"grad_norm": 0.09321869505325037, |
|
"learning_rate": 0.0001095505617977528, |
|
"loss": 0.8774, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.03749531308586427, |
|
"grad_norm": 0.07237285280180873, |
|
"learning_rate": 0.00011235955056179776, |
|
"loss": 0.8645, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03843269591301087, |
|
"grad_norm": 0.0725790446638447, |
|
"learning_rate": 0.00011516853932584269, |
|
"loss": 0.8853, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.03937007874015748, |
|
"grad_norm": 0.06714726508233679, |
|
"learning_rate": 0.00011797752808988764, |
|
"loss": 0.8722, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.04030746156730409, |
|
"grad_norm": 0.06816724261165027, |
|
"learning_rate": 0.00012078651685393257, |
|
"loss": 0.857, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.0412448443944507, |
|
"grad_norm": 0.0630221201023211, |
|
"learning_rate": 0.00012359550561797752, |
|
"loss": 0.8844, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.0421822272215973, |
|
"grad_norm": 0.06920243449410893, |
|
"learning_rate": 0.00012640449438202245, |
|
"loss": 0.8786, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.04311961004874391, |
|
"grad_norm": 0.07088619559489383, |
|
"learning_rate": 0.0001292134831460674, |
|
"loss": 0.8797, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.044056992875890516, |
|
"grad_norm": 0.061740726041492694, |
|
"learning_rate": 0.00013202247191011236, |
|
"loss": 0.8698, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.04499437570303712, |
|
"grad_norm": 0.07106582585336212, |
|
"learning_rate": 0.0001348314606741573, |
|
"loss": 0.869, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.045931758530183726, |
|
"grad_norm": 0.061678059675451424, |
|
"learning_rate": 0.00013764044943820225, |
|
"loss": 0.8643, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.046869141357330335, |
|
"grad_norm": 0.06524467963947332, |
|
"learning_rate": 0.00014044943820224718, |
|
"loss": 0.8927, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.047806524184476944, |
|
"grad_norm": 0.0650786155112897, |
|
"learning_rate": 0.00014325842696629213, |
|
"loss": 0.8384, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.048743907011623545, |
|
"grad_norm": 0.07154242679452663, |
|
"learning_rate": 0.00014606741573033706, |
|
"loss": 0.8809, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.049681289838770154, |
|
"grad_norm": 0.07795179863304261, |
|
"learning_rate": 0.00014887640449438202, |
|
"loss": 0.8394, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.05061867266591676, |
|
"grad_norm": 0.06374006181294534, |
|
"learning_rate": 0.00015168539325842694, |
|
"loss": 0.8961, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.051556055493063364, |
|
"grad_norm": 0.07787607064114185, |
|
"learning_rate": 0.0001544943820224719, |
|
"loss": 0.8632, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.05249343832020997, |
|
"grad_norm": 0.06289687876280393, |
|
"learning_rate": 0.00015730337078651683, |
|
"loss": 0.88, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.05343082114735658, |
|
"grad_norm": 0.06491539830485436, |
|
"learning_rate": 0.00016011235955056178, |
|
"loss": 0.8737, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.05436820397450319, |
|
"grad_norm": 0.070328841548907, |
|
"learning_rate": 0.00016292134831460674, |
|
"loss": 0.8617, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.05530558680164979, |
|
"grad_norm": 0.0638014579038207, |
|
"learning_rate": 0.00016573033707865167, |
|
"loss": 0.8506, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.0562429696287964, |
|
"grad_norm": 0.06422804662460813, |
|
"learning_rate": 0.00016853932584269662, |
|
"loss": 0.8805, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.05718035245594301, |
|
"grad_norm": 0.06361653985204194, |
|
"learning_rate": 0.00017134831460674155, |
|
"loss": 0.8918, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.05811773528308961, |
|
"grad_norm": 0.06932335644552794, |
|
"learning_rate": 0.0001741573033707865, |
|
"loss": 0.8595, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.05905511811023622, |
|
"grad_norm": 0.07957054733316223, |
|
"learning_rate": 0.00017696629213483143, |
|
"loss": 0.8663, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.05999250093738283, |
|
"grad_norm": 0.06998445535720693, |
|
"learning_rate": 0.0001797752808988764, |
|
"loss": 0.8676, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.06092988376452944, |
|
"grad_norm": 0.06702795111487583, |
|
"learning_rate": 0.00018258426966292135, |
|
"loss": 0.8802, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.06186726659167604, |
|
"grad_norm": 0.057562966446872095, |
|
"learning_rate": 0.00018539325842696627, |
|
"loss": 0.8671, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.06280464941882265, |
|
"grad_norm": 0.07214305689508435, |
|
"learning_rate": 0.0001882022471910112, |
|
"loss": 0.8992, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.06374203224596925, |
|
"grad_norm": 0.06640078935309972, |
|
"learning_rate": 0.00019101123595505616, |
|
"loss": 0.8589, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.06467941507311586, |
|
"grad_norm": 0.06432108168682822, |
|
"learning_rate": 0.0001938202247191011, |
|
"loss": 0.8792, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.06561679790026247, |
|
"grad_norm": 0.06995296734682956, |
|
"learning_rate": 0.00019662921348314607, |
|
"loss": 0.865, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.06655418072740907, |
|
"grad_norm": 0.0781580610323253, |
|
"learning_rate": 0.00019943820224719097, |
|
"loss": 0.8815, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.06749156355455568, |
|
"grad_norm": 0.07071637338397053, |
|
"learning_rate": 0.00020224719101123593, |
|
"loss": 0.8666, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.06842894638170229, |
|
"grad_norm": 0.0682378939648076, |
|
"learning_rate": 0.00020505617977528088, |
|
"loss": 0.8862, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.06936632920884889, |
|
"grad_norm": 0.06900860789126333, |
|
"learning_rate": 0.00020786516853932584, |
|
"loss": 0.8638, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.0703037120359955, |
|
"grad_norm": 0.06343642719314092, |
|
"learning_rate": 0.0002106741573033708, |
|
"loss": 0.8692, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.0712410948631421, |
|
"grad_norm": 0.058805566981791894, |
|
"learning_rate": 0.0002134831460674157, |
|
"loss": 0.8699, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.07217847769028872, |
|
"grad_norm": 0.06501680635838757, |
|
"learning_rate": 0.00021629213483146065, |
|
"loss": 0.8794, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.07311586051743532, |
|
"grad_norm": 0.06790932619576265, |
|
"learning_rate": 0.0002191011235955056, |
|
"loss": 0.8609, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.07405324334458192, |
|
"grad_norm": 0.07253373879567401, |
|
"learning_rate": 0.00022191011235955056, |
|
"loss": 0.8586, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.07499062617172854, |
|
"grad_norm": 0.05597241270795834, |
|
"learning_rate": 0.00022471910112359551, |
|
"loss": 0.8606, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.07592800899887514, |
|
"grad_norm": 0.07414431562106621, |
|
"learning_rate": 0.00022752808988764042, |
|
"loss": 0.859, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.07686539182602174, |
|
"grad_norm": 0.06416271464203936, |
|
"learning_rate": 0.00023033707865168537, |
|
"loss": 0.8462, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.07780277465316836, |
|
"grad_norm": 0.06411386307806086, |
|
"learning_rate": 0.00023314606741573033, |
|
"loss": 0.8859, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.07874015748031496, |
|
"grad_norm": 0.08671615531146802, |
|
"learning_rate": 0.00023595505617977528, |
|
"loss": 0.858, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.07967754030746156, |
|
"grad_norm": 0.06950084739281429, |
|
"learning_rate": 0.00023876404494382018, |
|
"loss": 0.8779, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.08061492313460818, |
|
"grad_norm": 0.06732606238114026, |
|
"learning_rate": 0.00024157303370786514, |
|
"loss": 0.8585, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.08155230596175478, |
|
"grad_norm": 0.06753409562352092, |
|
"learning_rate": 0.0002443820224719101, |
|
"loss": 0.8648, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.0824896887889014, |
|
"grad_norm": 0.06868527875344918, |
|
"learning_rate": 0.00024719101123595505, |
|
"loss": 0.8923, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.083427071616048, |
|
"grad_norm": 0.060659330441044046, |
|
"learning_rate": 0.00025, |
|
"loss": 0.8441, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.0843644544431946, |
|
"grad_norm": 0.056891953999125895, |
|
"learning_rate": 0.0002528089887640449, |
|
"loss": 0.8711, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.08530183727034121, |
|
"grad_norm": 0.06315987668061082, |
|
"learning_rate": 0.00025561797752808986, |
|
"loss": 0.8709, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.08623922009748781, |
|
"grad_norm": 0.06974874046223557, |
|
"learning_rate": 0.0002584269662921348, |
|
"loss": 0.8881, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.08717660292463442, |
|
"grad_norm": 0.07106045458363727, |
|
"learning_rate": 0.00026123595505617977, |
|
"loss": 0.8455, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.08811398575178103, |
|
"grad_norm": 0.06530109700061691, |
|
"learning_rate": 0.00026404494382022473, |
|
"loss": 0.8701, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.08905136857892763, |
|
"grad_norm": 0.06284436680171902, |
|
"learning_rate": 0.00026685393258426963, |
|
"loss": 0.8639, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.08998875140607424, |
|
"grad_norm": 0.06695190689656623, |
|
"learning_rate": 0.0002696629213483146, |
|
"loss": 0.8596, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.09092613423322085, |
|
"grad_norm": 0.06134041035709648, |
|
"learning_rate": 0.00027247191011235954, |
|
"loss": 0.8846, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.09186351706036745, |
|
"grad_norm": 0.05390358172595912, |
|
"learning_rate": 0.0002752808988764045, |
|
"loss": 0.8608, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.09280089988751405, |
|
"grad_norm": 0.06061308522705228, |
|
"learning_rate": 0.00027808988764044945, |
|
"loss": 0.8688, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.09373828271466067, |
|
"grad_norm": 0.05665308816810634, |
|
"learning_rate": 0.00028089887640449435, |
|
"loss": 0.8431, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.09467566554180727, |
|
"grad_norm": 0.06298526377992197, |
|
"learning_rate": 0.0002837078651685393, |
|
"loss": 0.8534, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.09561304836895389, |
|
"grad_norm": 0.06828852662205184, |
|
"learning_rate": 0.00028651685393258426, |
|
"loss": 0.855, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.09655043119610049, |
|
"grad_norm": 0.059581044094172864, |
|
"learning_rate": 0.0002893258426966292, |
|
"loss": 0.892, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.09748781402324709, |
|
"grad_norm": 0.05834286349398695, |
|
"learning_rate": 0.0002921348314606741, |
|
"loss": 0.8791, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.0984251968503937, |
|
"grad_norm": 0.05628861715462521, |
|
"learning_rate": 0.0002949438202247191, |
|
"loss": 0.8572, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.09936257967754031, |
|
"grad_norm": 0.0752400574763716, |
|
"learning_rate": 0.00029775280898876403, |
|
"loss": 0.9176, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.10029996250468691, |
|
"grad_norm": 0.0589889669786586, |
|
"learning_rate": 0.0002999999678723826, |
|
"loss": 0.879, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.10123734533183353, |
|
"grad_norm": 0.06984453009723764, |
|
"learning_rate": 0.0002999988434072206, |
|
"loss": 0.8791, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.10217472815898013, |
|
"grad_norm": 0.05131555199439601, |
|
"learning_rate": 0.0002999961125749536, |
|
"loss": 0.8639, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.10311211098612673, |
|
"grad_norm": 0.06129502320830477, |
|
"learning_rate": 0.0002999917754048268, |
|
"loss": 0.8626, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.10404949381327334, |
|
"grad_norm": 0.052116063904268276, |
|
"learning_rate": 0.00029998583194328776, |
|
"loss": 0.8405, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.10498687664041995, |
|
"grad_norm": 0.05805086354219647, |
|
"learning_rate": 0.0002999782822539861, |
|
"loss": 0.8521, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.10592425946756655, |
|
"grad_norm": 0.058854180699594805, |
|
"learning_rate": 0.000299969126417773, |
|
"loss": 0.8612, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.10686164229471316, |
|
"grad_norm": 0.05170888163012354, |
|
"learning_rate": 0.00029995836453270005, |
|
"loss": 0.852, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.10779902512185977, |
|
"grad_norm": 0.053497899422118654, |
|
"learning_rate": 0.0002999459967140185, |
|
"loss": 0.8425, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.10873640794900638, |
|
"grad_norm": 0.0517344152203251, |
|
"learning_rate": 0.00029993202309417765, |
|
"loss": 0.8666, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.10967379077615298, |
|
"grad_norm": 0.06883148118348337, |
|
"learning_rate": 0.00029991644382282377, |
|
"loss": 0.8699, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.11061117360329958, |
|
"grad_norm": 0.06701524872749254, |
|
"learning_rate": 0.0002998992590667984, |
|
"loss": 0.8612, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.1115485564304462, |
|
"grad_norm": 0.0633741891240403, |
|
"learning_rate": 0.00029988046901013643, |
|
"loss": 0.8861, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.1124859392575928, |
|
"grad_norm": 0.059595235950932066, |
|
"learning_rate": 0.00029986007385406424, |
|
"loss": 0.8761, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1134233220847394, |
|
"grad_norm": 0.05592116325579977, |
|
"learning_rate": 0.00029983807381699757, |
|
"loss": 0.8756, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.11436070491188602, |
|
"grad_norm": 0.057345108961587225, |
|
"learning_rate": 0.0002998144691345392, |
|
"loss": 0.8574, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.11529808773903262, |
|
"grad_norm": 0.05734473521251835, |
|
"learning_rate": 0.0002997892600594762, |
|
"loss": 0.8544, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.11623547056617922, |
|
"grad_norm": 0.05186000961639975, |
|
"learning_rate": 0.00029976244686177764, |
|
"loss": 0.8808, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.11717285339332584, |
|
"grad_norm": 0.05414634714867266, |
|
"learning_rate": 0.00029973402982859127, |
|
"loss": 0.8331, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.11811023622047244, |
|
"grad_norm": 0.04968211925070222, |
|
"learning_rate": 0.0002997040092642407, |
|
"loss": 0.8592, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.11904761904761904, |
|
"grad_norm": 0.06248537463553037, |
|
"learning_rate": 0.00029967238549022206, |
|
"loss": 0.8819, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.11998500187476566, |
|
"grad_norm": 0.055299575032660746, |
|
"learning_rate": 0.00029963915884520054, |
|
"loss": 0.8636, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.12092238470191226, |
|
"grad_norm": 0.04854206172628792, |
|
"learning_rate": 0.00029960432968500675, |
|
"loss": 0.8628, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.12185976752905887, |
|
"grad_norm": 0.058596631103748714, |
|
"learning_rate": 0.00029956789838263314, |
|
"loss": 0.8489, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.12279715035620548, |
|
"grad_norm": 0.06652964862071063, |
|
"learning_rate": 0.0002995298653282297, |
|
"loss": 0.876, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.12373453318335208, |
|
"grad_norm": 0.05617071779521591, |
|
"learning_rate": 0.00029949023092909976, |
|
"loss": 0.8582, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.12467191601049869, |
|
"grad_norm": 0.06128826182928016, |
|
"learning_rate": 0.00029944899560969593, |
|
"loss": 0.8556, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.1256092988376453, |
|
"grad_norm": 0.06596974095737278, |
|
"learning_rate": 0.00029940615981161544, |
|
"loss": 0.8484, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.1265466816647919, |
|
"grad_norm": 0.05241653325144218, |
|
"learning_rate": 0.00029936172399359516, |
|
"loss": 0.8681, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.1274840644919385, |
|
"grad_norm": 0.0656117272709645, |
|
"learning_rate": 0.00029931568863150705, |
|
"loss": 0.8611, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.1284214473190851, |
|
"grad_norm": 0.05416806152576802, |
|
"learning_rate": 0.0002992680542183529, |
|
"loss": 0.8543, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.12935883014623173, |
|
"grad_norm": 0.05949667729922976, |
|
"learning_rate": 0.00029921882126425893, |
|
"loss": 0.8476, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.13029621297337832, |
|
"grad_norm": 0.05476709738462092, |
|
"learning_rate": 0.0002991679902964706, |
|
"loss": 0.8329, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.13123359580052493, |
|
"grad_norm": 0.05583654578373451, |
|
"learning_rate": 0.00029911556185934667, |
|
"loss": 0.8546, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.13217097862767155, |
|
"grad_norm": 0.052842518474849304, |
|
"learning_rate": 0.0002990615365143536, |
|
"loss": 0.8576, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.13310836145481814, |
|
"grad_norm": 0.05325159315281095, |
|
"learning_rate": 0.0002990059148400594, |
|
"loss": 0.8475, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.13404574428196475, |
|
"grad_norm": 0.0477613812018967, |
|
"learning_rate": 0.00029894869743212767, |
|
"loss": 0.8505, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.13498312710911137, |
|
"grad_norm": 0.04650972000773531, |
|
"learning_rate": 0.00029888988490331067, |
|
"loss": 0.8406, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.13592050993625795, |
|
"grad_norm": 0.05548534041152614, |
|
"learning_rate": 0.00029882947788344345, |
|
"loss": 0.8731, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.13685789276340457, |
|
"grad_norm": 0.047893477001569186, |
|
"learning_rate": 0.00029876747701943667, |
|
"loss": 0.8666, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.1377952755905512, |
|
"grad_norm": 0.05399917193542452, |
|
"learning_rate": 0.00029870388297526966, |
|
"loss": 0.8476, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.13873265841769777, |
|
"grad_norm": 0.04738143330368603, |
|
"learning_rate": 0.0002986386964319837, |
|
"loss": 0.8423, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.1396700412448444, |
|
"grad_norm": 0.05491107440574904, |
|
"learning_rate": 0.0002985719180876742, |
|
"loss": 0.8451, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.140607424071991, |
|
"grad_norm": 0.060398470624460965, |
|
"learning_rate": 0.0002985035486574836, |
|
"loss": 0.8801, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.14154480689913762, |
|
"grad_norm": 0.05350788151410978, |
|
"learning_rate": 0.00029843358887359357, |
|
"loss": 0.8516, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.1424821897262842, |
|
"grad_norm": 0.05517308695615106, |
|
"learning_rate": 0.0002983620394852172, |
|
"loss": 0.8703, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.14341957255343082, |
|
"grad_norm": 0.054231118168541766, |
|
"learning_rate": 0.000298288901258591, |
|
"loss": 0.8693, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.14435695538057744, |
|
"grad_norm": 0.049622640241897456, |
|
"learning_rate": 0.0002982141749769665, |
|
"loss": 0.8395, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.14529433820772403, |
|
"grad_norm": 0.046995063624861166, |
|
"learning_rate": 0.0002981378614406022, |
|
"loss": 0.8604, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.14623172103487064, |
|
"grad_norm": 0.05520295521540041, |
|
"learning_rate": 0.0002980599614667548, |
|
"loss": 0.8645, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.14716910386201726, |
|
"grad_norm": 0.04894265506037402, |
|
"learning_rate": 0.0002979804758896704, |
|
"loss": 0.8652, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.14810648668916385, |
|
"grad_norm": 0.06029411158482043, |
|
"learning_rate": 0.0002978994055605757, |
|
"loss": 0.8416, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.14904386951631046, |
|
"grad_norm": 0.05780387639217229, |
|
"learning_rate": 0.0002978167513476688, |
|
"loss": 0.8526, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.14998125234345708, |
|
"grad_norm": 0.05465366981027733, |
|
"learning_rate": 0.00029773251413610987, |
|
"loss": 0.8655, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.15091863517060367, |
|
"grad_norm": 0.05272490905818877, |
|
"learning_rate": 0.00029764669482801174, |
|
"loss": 0.8519, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.15185601799775028, |
|
"grad_norm": 0.05718657624393557, |
|
"learning_rate": 0.00029755929434243034, |
|
"loss": 0.8853, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.1527934008248969, |
|
"grad_norm": 0.05197508584506214, |
|
"learning_rate": 0.00029747031361535464, |
|
"loss": 0.8349, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.15373078365204348, |
|
"grad_norm": 0.05481428649702033, |
|
"learning_rate": 0.0002973797535996967, |
|
"loss": 0.8627, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.1546681664791901, |
|
"grad_norm": 0.04732127095107982, |
|
"learning_rate": 0.00029728761526528157, |
|
"loss": 0.8698, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.15560554930633672, |
|
"grad_norm": 0.05743584601025622, |
|
"learning_rate": 0.00029719389959883673, |
|
"loss": 0.8736, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.1565429321334833, |
|
"grad_norm": 0.05196494960994641, |
|
"learning_rate": 0.00029709860760398176, |
|
"loss": 0.8634, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.15748031496062992, |
|
"grad_norm": 0.05085488275567502, |
|
"learning_rate": 0.0002970017403012173, |
|
"loss": 0.8568, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.15841769778777653, |
|
"grad_norm": 0.050395431338846663, |
|
"learning_rate": 0.0002969032987279144, |
|
"loss": 0.8225, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.15935508061492312, |
|
"grad_norm": 0.04628767205411848, |
|
"learning_rate": 0.00029680328393830315, |
|
"loss": 0.875, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.16029246344206974, |
|
"grad_norm": 0.051723935710402326, |
|
"learning_rate": 0.00029670169700346164, |
|
"loss": 0.8145, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.16122984626921635, |
|
"grad_norm": 0.05201508957810899, |
|
"learning_rate": 0.0002965985390113043, |
|
"loss": 0.8648, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.16216722909636294, |
|
"grad_norm": 0.0495933900905848, |
|
"learning_rate": 0.0002964938110665704, |
|
"loss": 0.8587, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.16310461192350956, |
|
"grad_norm": 0.04586671475206476, |
|
"learning_rate": 0.0002963875142908121, |
|
"loss": 0.8412, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.16404199475065617, |
|
"grad_norm": 0.0532312699053512, |
|
"learning_rate": 0.00029627964982238236, |
|
"loss": 0.842, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.1649793775778028, |
|
"grad_norm": 0.043681059319998494, |
|
"learning_rate": 0.0002961702188164231, |
|
"loss": 0.8274, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.16591676040494938, |
|
"grad_norm": 0.048557735504717925, |
|
"learning_rate": 0.0002960592224448524, |
|
"loss": 0.8426, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.166854143232096, |
|
"grad_norm": 0.04679289724415963, |
|
"learning_rate": 0.00029594666189635224, |
|
"loss": 0.8347, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.1677915260592426, |
|
"grad_norm": 0.047070279337964094, |
|
"learning_rate": 0.00029583253837635575, |
|
"loss": 0.8456, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.1687289088863892, |
|
"grad_norm": 0.05283241949368948, |
|
"learning_rate": 0.00029571685310703403, |
|
"loss": 0.8326, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.1696662917135358, |
|
"grad_norm": 0.05385523710797248, |
|
"learning_rate": 0.00029559960732728337, |
|
"loss": 0.8529, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.17060367454068243, |
|
"grad_norm": 0.04730829638902959, |
|
"learning_rate": 0.000295480802292712, |
|
"loss": 0.8085, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.17154105736782901, |
|
"grad_norm": 0.04488134785670412, |
|
"learning_rate": 0.0002953604392756263, |
|
"loss": 0.8371, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.17247844019497563, |
|
"grad_norm": 0.051313805015627746, |
|
"learning_rate": 0.00029523851956501744, |
|
"loss": 0.8486, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.17341582302212225, |
|
"grad_norm": 0.0430483274069335, |
|
"learning_rate": 0.00029511504446654767, |
|
"loss": 0.8475, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.17435320584926883, |
|
"grad_norm": 0.05543052805044051, |
|
"learning_rate": 0.00029499001530253606, |
|
"loss": 0.8571, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.17529058867641545, |
|
"grad_norm": 0.05160006837470796, |
|
"learning_rate": 0.0002948634334119445, |
|
"loss": 0.8348, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.17622797150356206, |
|
"grad_norm": 0.049000725000881734, |
|
"learning_rate": 0.00029473530015036335, |
|
"loss": 0.8243, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.17716535433070865, |
|
"grad_norm": 0.05324980516591907, |
|
"learning_rate": 0.0002946056168899969, |
|
"loss": 0.817, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.17810273715785527, |
|
"grad_norm": 0.05112366142864306, |
|
"learning_rate": 0.00029447438501964873, |
|
"loss": 0.8493, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.17904011998500188, |
|
"grad_norm": 0.0498102110793069, |
|
"learning_rate": 0.0002943416059447066, |
|
"loss": 0.8155, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.17997750281214847, |
|
"grad_norm": 0.048456183794281356, |
|
"learning_rate": 0.0002942072810871279, |
|
"loss": 0.8057, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.1809148856392951, |
|
"grad_norm": 0.0514791482209668, |
|
"learning_rate": 0.0002940714118854238, |
|
"loss": 0.8125, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.1818522684664417, |
|
"grad_norm": 0.04725030159140053, |
|
"learning_rate": 0.0002939339997946444, |
|
"loss": 0.8572, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.1827896512935883, |
|
"grad_norm": 0.05374547829970908, |
|
"learning_rate": 0.0002937950462863627, |
|
"loss": 0.8328, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.1837270341207349, |
|
"grad_norm": 0.0562408856971059, |
|
"learning_rate": 0.00029365455284865923, |
|
"loss": 0.8087, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.18466441694788152, |
|
"grad_norm": 0.0455189157494031, |
|
"learning_rate": 0.00029351252098610577, |
|
"loss": 0.8418, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.1856017997750281, |
|
"grad_norm": 0.050711487872205016, |
|
"learning_rate": 0.00029336895221974946, |
|
"loss": 0.8482, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.18653918260217472, |
|
"grad_norm": 0.04919906548209661, |
|
"learning_rate": 0.00029322384808709654, |
|
"loss": 0.8349, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.18747656542932134, |
|
"grad_norm": 0.05662595028293593, |
|
"learning_rate": 0.00029307721014209555, |
|
"loss": 0.8577, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.18841394825646793, |
|
"grad_norm": 0.057447476057492466, |
|
"learning_rate": 0.00029292903995512123, |
|
"loss": 0.8534, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.18935133108361454, |
|
"grad_norm": 0.05299814571131224, |
|
"learning_rate": 0.0002927793391129571, |
|
"loss": 0.8577, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.19028871391076116, |
|
"grad_norm": 0.04319708352419991, |
|
"learning_rate": 0.00029262810921877906, |
|
"loss": 0.8188, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.19122609673790777, |
|
"grad_norm": 0.05353832659477085, |
|
"learning_rate": 0.0002924753518921376, |
|
"loss": 0.846, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.19216347956505436, |
|
"grad_norm": 0.04966007615937987, |
|
"learning_rate": 0.0002923210687689411, |
|
"loss": 0.8552, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.19310086239220098, |
|
"grad_norm": 0.048671986424680805, |
|
"learning_rate": 0.00029216526150143785, |
|
"loss": 0.8433, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.1940382452193476, |
|
"grad_norm": 0.05118103987659261, |
|
"learning_rate": 0.0002920079317581984, |
|
"loss": 0.8545, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.19497562804649418, |
|
"grad_norm": 0.04730760417822063, |
|
"learning_rate": 0.00029184908122409804, |
|
"loss": 0.8255, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.1959130108736408, |
|
"grad_norm": 0.05859108169984351, |
|
"learning_rate": 0.0002916887116002983, |
|
"loss": 0.8391, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.1968503937007874, |
|
"grad_norm": 0.05306001691326635, |
|
"learning_rate": 0.000291526824604229, |
|
"loss": 0.8424, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.197787776527934, |
|
"grad_norm": 0.047277287716965946, |
|
"learning_rate": 0.00029136342196956985, |
|
"loss": 0.833, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.19872515935508062, |
|
"grad_norm": 0.05159138803836899, |
|
"learning_rate": 0.0002911985054462318, |
|
"loss": 0.8304, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.19966254218222723, |
|
"grad_norm": 0.046550244925313046, |
|
"learning_rate": 0.00029103207680033827, |
|
"loss": 0.8422, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.20059992500937382, |
|
"grad_norm": 0.046411253292755236, |
|
"learning_rate": 0.00029086413781420633, |
|
"loss": 0.8575, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.20153730783652044, |
|
"grad_norm": 0.04762694973314196, |
|
"learning_rate": 0.0002906946902863277, |
|
"loss": 0.8541, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.20247469066366705, |
|
"grad_norm": 0.043558681229160734, |
|
"learning_rate": 0.0002905237360313492, |
|
"loss": 0.8509, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.20341207349081364, |
|
"grad_norm": 0.047235839608751946, |
|
"learning_rate": 0.00029035127688005355, |
|
"loss": 0.8326, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.20434945631796025, |
|
"grad_norm": 0.04405113672607244, |
|
"learning_rate": 0.00029017731467933974, |
|
"loss": 0.8235, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.20528683914510687, |
|
"grad_norm": 0.04917819994251594, |
|
"learning_rate": 0.0002900018512922032, |
|
"loss": 0.8514, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.20622422197225346, |
|
"grad_norm": 0.045391849870203616, |
|
"learning_rate": 0.0002898248885977158, |
|
"loss": 0.841, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.20716160479940007, |
|
"grad_norm": 0.04950574892382217, |
|
"learning_rate": 0.0002896464284910058, |
|
"loss": 0.8604, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.2080989876265467, |
|
"grad_norm": 0.04483838357821004, |
|
"learning_rate": 0.00028946647288323766, |
|
"loss": 0.8477, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.20903637045369328, |
|
"grad_norm": 0.04819763033021056, |
|
"learning_rate": 0.00028928502370159133, |
|
"loss": 0.8301, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.2099737532808399, |
|
"grad_norm": 0.04846529213809432, |
|
"learning_rate": 0.0002891020828892417, |
|
"loss": 0.8376, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.2109111361079865, |
|
"grad_norm": 0.0467946353030176, |
|
"learning_rate": 0.00028891765240533795, |
|
"loss": 0.8705, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.2118485189351331, |
|
"grad_norm": 0.044000801667735845, |
|
"learning_rate": 0.00028873173422498243, |
|
"loss": 0.8281, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.2127859017622797, |
|
"grad_norm": 0.047502640131898, |
|
"learning_rate": 0.0002885443303392094, |
|
"loss": 0.8381, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.21372328458942633, |
|
"grad_norm": 0.04801904197558332, |
|
"learning_rate": 0.000288355442754964, |
|
"loss": 0.8179, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.21466066741657294, |
|
"grad_norm": 0.04261523390710894, |
|
"learning_rate": 0.00028816507349508047, |
|
"loss": 0.8263, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.21559805024371953, |
|
"grad_norm": 0.047191156643762354, |
|
"learning_rate": 0.00028797322459826063, |
|
"loss": 0.8374, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.21653543307086615, |
|
"grad_norm": 0.05167657856075751, |
|
"learning_rate": 0.00028777989811905205, |
|
"loss": 0.8614, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.21747281589801276, |
|
"grad_norm": 0.048897121204297574, |
|
"learning_rate": 0.000287585096127826, |
|
"loss": 0.8538, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.21841019872515935, |
|
"grad_norm": 0.043161388562608975, |
|
"learning_rate": 0.0002873888207107553, |
|
"loss": 0.8457, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.21934758155230596, |
|
"grad_norm": 0.054581961552759836, |
|
"learning_rate": 0.000287191073969792, |
|
"loss": 0.8362, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.22028496437945258, |
|
"grad_norm": 0.05058965528101186, |
|
"learning_rate": 0.0002869918580226448, |
|
"loss": 0.8687, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.22122234720659917, |
|
"grad_norm": 0.04739504236542665, |
|
"learning_rate": 0.00028679117500275653, |
|
"loss": 0.831, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.22215973003374578, |
|
"grad_norm": 0.045754358105207775, |
|
"learning_rate": 0.00028658902705928094, |
|
"loss": 0.868, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.2230971128608924, |
|
"grad_norm": 0.04508332677839864, |
|
"learning_rate": 0.00028638541635706027, |
|
"loss": 0.8129, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.224034495688039, |
|
"grad_norm": 0.047178895399584496, |
|
"learning_rate": 0.00028618034507660144, |
|
"loss": 0.8548, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.2249718785151856, |
|
"grad_norm": 0.04249857153746906, |
|
"learning_rate": 0.0002859738154140532, |
|
"loss": 0.8171, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.22590926134233222, |
|
"grad_norm": 0.04271090221572632, |
|
"learning_rate": 0.00028576582958118223, |
|
"loss": 0.8274, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.2268466441694788, |
|
"grad_norm": 0.04664121668293475, |
|
"learning_rate": 0.00028555638980534974, |
|
"loss": 0.8374, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.22778402699662542, |
|
"grad_norm": 0.045498060568064186, |
|
"learning_rate": 0.0002853454983294875, |
|
"loss": 0.8039, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.22872140982377204, |
|
"grad_norm": 0.04534000569075493, |
|
"learning_rate": 0.0002851331574120738, |
|
"loss": 0.858, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.22965879265091863, |
|
"grad_norm": 0.04664900784446732, |
|
"learning_rate": 0.00028491936932710917, |
|
"loss": 0.8402, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.23059617547806524, |
|
"grad_norm": 0.04417588784422196, |
|
"learning_rate": 0.0002847041363640923, |
|
"loss": 0.8207, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.23153355830521186, |
|
"grad_norm": 0.049652178225426, |
|
"learning_rate": 0.0002844874608279954, |
|
"loss": 0.8515, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.23247094113235844, |
|
"grad_norm": 0.048818844243959776, |
|
"learning_rate": 0.00028426934503923923, |
|
"loss": 0.8322, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.23340832395950506, |
|
"grad_norm": 0.04283049790199607, |
|
"learning_rate": 0.0002840497913336687, |
|
"loss": 0.8247, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.23434570678665168, |
|
"grad_norm": 0.04305639669050898, |
|
"learning_rate": 0.0002838288020625277, |
|
"loss": 0.8273, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.23528308961379826, |
|
"grad_norm": 0.046845793494778305, |
|
"learning_rate": 0.00028360637959243365, |
|
"loss": 0.8628, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.23622047244094488, |
|
"grad_norm": 0.046154254481951956, |
|
"learning_rate": 0.00028338252630535264, |
|
"loss": 0.8565, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.2371578552680915, |
|
"grad_norm": 0.04282295436813289, |
|
"learning_rate": 0.00028315724459857346, |
|
"loss": 0.8468, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.23809523809523808, |
|
"grad_norm": 0.04767889894413164, |
|
"learning_rate": 0.00028293053688468214, |
|
"loss": 0.81, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.2390326209223847, |
|
"grad_norm": 0.05527610802137319, |
|
"learning_rate": 0.00028270240559153634, |
|
"loss": 0.8481, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.2399700037495313, |
|
"grad_norm": 0.0506803378722897, |
|
"learning_rate": 0.0002824728531622388, |
|
"loss": 0.8354, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.24090738657667793, |
|
"grad_norm": 0.04602503202904283, |
|
"learning_rate": 0.00028224188205511154, |
|
"loss": 0.8169, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.24184476940382452, |
|
"grad_norm": 0.047777320498584894, |
|
"learning_rate": 0.0002820094947436698, |
|
"loss": 0.8143, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.24278215223097113, |
|
"grad_norm": 0.041292637408164454, |
|
"learning_rate": 0.0002817756937165947, |
|
"loss": 0.8429, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.24371953505811775, |
|
"grad_norm": 0.04301715565733742, |
|
"learning_rate": 0.00028154048147770763, |
|
"loss": 0.8225, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.24465691788526434, |
|
"grad_norm": 0.046059693751385175, |
|
"learning_rate": 0.0002813038605459426, |
|
"loss": 0.8459, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.24559430071241095, |
|
"grad_norm": 0.044196493163681724, |
|
"learning_rate": 0.0002810658334553198, |
|
"loss": 0.8161, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.24653168353955757, |
|
"grad_norm": 0.04577271591342366, |
|
"learning_rate": 0.00028082640275491793, |
|
"loss": 0.8386, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.24746906636670415, |
|
"grad_norm": 0.043540551106033394, |
|
"learning_rate": 0.0002805855710088476, |
|
"loss": 0.8332, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.24840644919385077, |
|
"grad_norm": 0.05495402299781008, |
|
"learning_rate": 0.0002803433407962233, |
|
"loss": 0.7858, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.24934383202099739, |
|
"grad_norm": 0.04718094998030703, |
|
"learning_rate": 0.00028009971471113594, |
|
"loss": 0.8382, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.250281214848144, |
|
"grad_norm": 0.047616730401137404, |
|
"learning_rate": 0.00027985469536262524, |
|
"loss": 0.8454, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.2512185976752906, |
|
"grad_norm": 0.0409389484091512, |
|
"learning_rate": 0.0002796082853746515, |
|
"loss": 0.81, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.2521559805024372, |
|
"grad_norm": 0.04522706585670625, |
|
"learning_rate": 0.00027936048738606785, |
|
"loss": 0.8199, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.2530933633295838, |
|
"grad_norm": 0.04653508675318254, |
|
"learning_rate": 0.0002791113040505915, |
|
"loss": 0.8298, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.25403074615673044, |
|
"grad_norm": 0.04663421779124662, |
|
"learning_rate": 0.0002788607380367759, |
|
"loss": 0.865, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.254968128983877, |
|
"grad_norm": 0.04061192230877169, |
|
"learning_rate": 0.0002786087920279818, |
|
"loss": 0.8188, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.2559055118110236, |
|
"grad_norm": 0.04379262228090009, |
|
"learning_rate": 0.0002783554687223484, |
|
"loss": 0.8412, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.2568428946381702, |
|
"grad_norm": 0.04557822686420308, |
|
"learning_rate": 0.0002781007708327649, |
|
"loss": 0.8349, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.25778027746531684, |
|
"grad_norm": 0.045209475352911276, |
|
"learning_rate": 0.00027784470108684094, |
|
"loss": 0.8554, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.25871766029246346, |
|
"grad_norm": 0.0435631363718186, |
|
"learning_rate": 0.0002775872622268779, |
|
"loss": 0.8127, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.2596550431196101, |
|
"grad_norm": 0.044012008476995, |
|
"learning_rate": 0.0002773284570098391, |
|
"loss": 0.8125, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.26059242594675663, |
|
"grad_norm": 0.04874513976965234, |
|
"learning_rate": 0.0002770682882073206, |
|
"loss": 0.8116, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.26152980877390325, |
|
"grad_norm": 0.04889268142442837, |
|
"learning_rate": 0.00027680675860552106, |
|
"loss": 0.8315, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.26246719160104987, |
|
"grad_norm": 0.05581866633201944, |
|
"learning_rate": 0.0002765438710052125, |
|
"loss": 0.8279, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.2634045744281965, |
|
"grad_norm": 0.04602436853592128, |
|
"learning_rate": 0.0002762796282217099, |
|
"loss": 0.821, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 0.2643419572553431, |
|
"grad_norm": 0.04678707820307465, |
|
"learning_rate": 0.0002760140330848412, |
|
"loss": 0.8466, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.2652793400824897, |
|
"grad_norm": 0.04516389038252434, |
|
"learning_rate": 0.000275747088438917, |
|
"loss": 0.7947, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 0.26621672290963627, |
|
"grad_norm": 0.04755311816167163, |
|
"learning_rate": 0.00027547879714269995, |
|
"loss": 0.8314, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.2671541057367829, |
|
"grad_norm": 0.044502973033447774, |
|
"learning_rate": 0.0002752091620693742, |
|
"loss": 0.8227, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.2680914885639295, |
|
"grad_norm": 0.0441159145083971, |
|
"learning_rate": 0.00027493818610651487, |
|
"loss": 0.8322, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.2690288713910761, |
|
"grad_norm": 0.04046403847734057, |
|
"learning_rate": 0.0002746658721560568, |
|
"loss": 0.8287, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.26996625421822273, |
|
"grad_norm": 0.04518695193113121, |
|
"learning_rate": 0.0002743922231342636, |
|
"loss": 0.8063, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.27090363704536935, |
|
"grad_norm": 0.04299554365501998, |
|
"learning_rate": 0.00027411724197169647, |
|
"loss": 0.8418, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 0.2718410198725159, |
|
"grad_norm": 0.04235528853329544, |
|
"learning_rate": 0.0002738409316131827, |
|
"loss": 0.8506, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.2727784026996625, |
|
"grad_norm": 0.04133212847113579, |
|
"learning_rate": 0.0002735632950177843, |
|
"loss": 0.8095, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 0.27371578552680914, |
|
"grad_norm": 0.044217737534462995, |
|
"learning_rate": 0.00027328433515876613, |
|
"loss": 0.8222, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.27465316835395576, |
|
"grad_norm": 0.04811397623157419, |
|
"learning_rate": 0.0002730040550235642, |
|
"loss": 0.8223, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 0.2755905511811024, |
|
"grad_norm": 0.043283154172989766, |
|
"learning_rate": 0.0002727224576137535, |
|
"loss": 0.8244, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.276527934008249, |
|
"grad_norm": 0.04254147668888092, |
|
"learning_rate": 0.0002724395459450161, |
|
"loss": 0.8158, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.27746531683539555, |
|
"grad_norm": 0.0437512052356442, |
|
"learning_rate": 0.0002721553230471087, |
|
"loss": 0.8449, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.27840269966254216, |
|
"grad_norm": 0.04187590056923411, |
|
"learning_rate": 0.0002718697919638302, |
|
"loss": 0.7986, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 0.2793400824896888, |
|
"grad_norm": 0.042437873734035854, |
|
"learning_rate": 0.0002715829557529891, |
|
"loss": 0.8286, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.2802774653168354, |
|
"grad_norm": 0.04430563389754819, |
|
"learning_rate": 0.00027129481748637075, |
|
"loss": 0.841, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 0.281214848143982, |
|
"grad_norm": 0.045758411532611065, |
|
"learning_rate": 0.00027100538024970444, |
|
"loss": 0.8285, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.2821522309711286, |
|
"grad_norm": 0.04423481610722165, |
|
"learning_rate": 0.00027071464714263063, |
|
"loss": 0.8168, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.28308961379827524, |
|
"grad_norm": 0.04571606782924112, |
|
"learning_rate": 0.00027042262127866716, |
|
"loss": 0.8249, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.2840269966254218, |
|
"grad_norm": 0.04732141795830983, |
|
"learning_rate": 0.00027012930578517645, |
|
"loss": 0.8387, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 0.2849643794525684, |
|
"grad_norm": 0.046305063021735725, |
|
"learning_rate": 0.00026983470380333185, |
|
"loss": 0.8106, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.28590176227971503, |
|
"grad_norm": 0.04195897212582909, |
|
"learning_rate": 0.0002695388184880839, |
|
"loss": 0.8261, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.28683914510686165, |
|
"grad_norm": 0.04456321258007771, |
|
"learning_rate": 0.0002692416530081265, |
|
"loss": 0.8215, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.28777652793400826, |
|
"grad_norm": 0.04879364590021316, |
|
"learning_rate": 0.0002689432105458633, |
|
"loss": 0.8135, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 0.2887139107611549, |
|
"grad_norm": 0.04789940824296893, |
|
"learning_rate": 0.00026864349429737326, |
|
"loss": 0.8368, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.28965129358830144, |
|
"grad_norm": 0.04616782271102101, |
|
"learning_rate": 0.00026834250747237665, |
|
"loss": 0.8269, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 0.29058867641544806, |
|
"grad_norm": 0.04502530076723608, |
|
"learning_rate": 0.0002680402532942006, |
|
"loss": 0.83, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.29152605924259467, |
|
"grad_norm": 0.04002183200170624, |
|
"learning_rate": 0.00026773673499974436, |
|
"loss": 0.8053, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 0.2924634420697413, |
|
"grad_norm": 0.04425864811179813, |
|
"learning_rate": 0.00026743195583944524, |
|
"loss": 0.8354, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.2934008248968879, |
|
"grad_norm": 0.046865997902279015, |
|
"learning_rate": 0.000267125919077243, |
|
"loss": 0.8263, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 0.2943382077240345, |
|
"grad_norm": 0.04361053554450397, |
|
"learning_rate": 0.00026681862799054557, |
|
"loss": 0.8007, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.2952755905511811, |
|
"grad_norm": 0.048458258290373916, |
|
"learning_rate": 0.0002665100858701937, |
|
"loss": 0.8237, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.2962129733783277, |
|
"grad_norm": 0.047990047586811434, |
|
"learning_rate": 0.0002662002960204254, |
|
"loss": 0.8162, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.2971503562054743, |
|
"grad_norm": 0.04409542819477004, |
|
"learning_rate": 0.0002658892617588413, |
|
"loss": 0.8433, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 0.2980877390326209, |
|
"grad_norm": 0.04731184190347321, |
|
"learning_rate": 0.00026557698641636835, |
|
"loss": 0.8133, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.29902512185976754, |
|
"grad_norm": 0.042762397685975305, |
|
"learning_rate": 0.0002652634733372246, |
|
"loss": 0.834, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 0.29996250468691416, |
|
"grad_norm": 0.04579010165352788, |
|
"learning_rate": 0.0002649487258788833, |
|
"loss": 0.8214, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.3008998875140607, |
|
"grad_norm": 0.04144592090502628, |
|
"learning_rate": 0.0002646327474120368, |
|
"loss": 0.8207, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 0.30183727034120733, |
|
"grad_norm": 0.04770299028768292, |
|
"learning_rate": 0.00026431554132056063, |
|
"loss": 0.8258, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.30277465316835395, |
|
"grad_norm": 0.044453071872775064, |
|
"learning_rate": 0.00026399711100147724, |
|
"loss": 0.8085, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 0.30371203599550056, |
|
"grad_norm": 0.05130385596231612, |
|
"learning_rate": 0.0002636774598649195, |
|
"loss": 0.8287, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.3046494188226472, |
|
"grad_norm": 0.053015815539756074, |
|
"learning_rate": 0.00026335659133409423, |
|
"loss": 0.8063, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.3055868016497938, |
|
"grad_norm": 0.04286401355249045, |
|
"learning_rate": 0.00026303450884524566, |
|
"loss": 0.8084, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.3065241844769404, |
|
"grad_norm": 0.0383196394419625, |
|
"learning_rate": 0.0002627112158476185, |
|
"loss": 0.8001, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 0.30746156730408697, |
|
"grad_norm": 0.042481350200920615, |
|
"learning_rate": 0.00026238671580342096, |
|
"loss": 0.8342, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.3083989501312336, |
|
"grad_norm": 0.0437271017287051, |
|
"learning_rate": 0.0002620610121877879, |
|
"loss": 0.8301, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.3093363329583802, |
|
"grad_norm": 0.042066811375379806, |
|
"learning_rate": 0.0002617341084887433, |
|
"loss": 0.8183, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.3102737157855268, |
|
"grad_norm": 0.0434545427613146, |
|
"learning_rate": 0.00026140600820716314, |
|
"loss": 0.8144, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 0.31121109861267343, |
|
"grad_norm": 0.038937665823880176, |
|
"learning_rate": 0.00026107671485673794, |
|
"loss": 0.817, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.31214848143982005, |
|
"grad_norm": 0.044655599527732244, |
|
"learning_rate": 0.0002607462319639348, |
|
"loss": 0.8344, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 0.3130858642669666, |
|
"grad_norm": 0.03965599804534495, |
|
"learning_rate": 0.00026041456306796014, |
|
"loss": 0.8083, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.3140232470941132, |
|
"grad_norm": 0.039069129149835194, |
|
"learning_rate": 0.00026008171172072126, |
|
"loss": 0.8196, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.31496062992125984, |
|
"grad_norm": 0.040202855570058024, |
|
"learning_rate": 0.0002597476814867887, |
|
"loss": 0.8205, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.31589801274840645, |
|
"grad_norm": 0.04003968304410291, |
|
"learning_rate": 0.0002594124759433579, |
|
"loss": 0.8108, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 0.31683539557555307, |
|
"grad_norm": 0.047433872652346235, |
|
"learning_rate": 0.000259076098680211, |
|
"loss": 0.8039, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.3177727784026997, |
|
"grad_norm": 0.04141242579423705, |
|
"learning_rate": 0.0002587385532996782, |
|
"loss": 0.8259, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 0.31871016122984624, |
|
"grad_norm": 0.054121951067220224, |
|
"learning_rate": 0.0002583998434165993, |
|
"loss": 0.8246, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.31964754405699286, |
|
"grad_norm": 0.04197913764657862, |
|
"learning_rate": 0.00025805997265828507, |
|
"loss": 0.8463, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 0.3205849268841395, |
|
"grad_norm": 0.0426917661733632, |
|
"learning_rate": 0.0002577189446644783, |
|
"loss": 0.8183, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.3215223097112861, |
|
"grad_norm": 0.04179736226931492, |
|
"learning_rate": 0.00025737676308731477, |
|
"loss": 0.7976, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 0.3224596925384327, |
|
"grad_norm": 0.04799629600304747, |
|
"learning_rate": 0.0002570334315912844, |
|
"loss": 0.8289, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.3233970753655793, |
|
"grad_norm": 0.042393149930441694, |
|
"learning_rate": 0.0002566889538531915, |
|
"loss": 0.8112, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.3243344581927259, |
|
"grad_norm": 0.04255536591889664, |
|
"learning_rate": 0.000256343333562116, |
|
"loss": 0.8187, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.3252718410198725, |
|
"grad_norm": 0.044062962674437295, |
|
"learning_rate": 0.00025599657441937354, |
|
"loss": 0.8018, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 0.3262092238470191, |
|
"grad_norm": 0.043474283263771174, |
|
"learning_rate": 0.00025564868013847595, |
|
"loss": 0.8306, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.32714660667416573, |
|
"grad_norm": 0.0422049730670292, |
|
"learning_rate": 0.0002552996544450914, |
|
"loss": 0.8047, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 0.32808398950131235, |
|
"grad_norm": 0.04744673000933406, |
|
"learning_rate": 0.0002549495010770048, |
|
"loss": 0.8422, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.32902137232845896, |
|
"grad_norm": 0.04429260845252424, |
|
"learning_rate": 0.0002545982237840773, |
|
"loss": 0.8191, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 0.3299587551556056, |
|
"grad_norm": 0.04232121926909998, |
|
"learning_rate": 0.0002542458263282066, |
|
"loss": 0.7905, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.33089613798275214, |
|
"grad_norm": 0.03903941148795766, |
|
"learning_rate": 0.00025389231248328624, |
|
"loss": 0.8047, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 0.33183352080989875, |
|
"grad_norm": 0.043127224537608456, |
|
"learning_rate": 0.00025353768603516555, |
|
"loss": 0.8202, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.33277090363704537, |
|
"grad_norm": 0.04603755356895304, |
|
"learning_rate": 0.0002531819507816089, |
|
"loss": 0.8474, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.333708286464192, |
|
"grad_norm": 0.04421659565539193, |
|
"learning_rate": 0.00025282511053225493, |
|
"loss": 0.8258, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.3346456692913386, |
|
"grad_norm": 0.04354315354304148, |
|
"learning_rate": 0.0002524671691085762, |
|
"loss": 0.7872, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 0.3355830521184852, |
|
"grad_norm": 0.04513774703149973, |
|
"learning_rate": 0.0002521081303438377, |
|
"loss": 0.7985, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.3365204349456318, |
|
"grad_norm": 0.04656688027583207, |
|
"learning_rate": 0.00025174799808305606, |
|
"loss": 0.808, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 0.3374578177727784, |
|
"grad_norm": 0.04760744964038916, |
|
"learning_rate": 0.0002513867761829587, |
|
"loss": 0.793, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.338395200599925, |
|
"grad_norm": 0.04121747602293146, |
|
"learning_rate": 0.0002510244685119418, |
|
"loss": 0.8293, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 0.3393325834270716, |
|
"grad_norm": 0.04112754367048587, |
|
"learning_rate": 0.00025066107895002946, |
|
"loss": 0.831, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.34026996625421824, |
|
"grad_norm": 0.03926106896606557, |
|
"learning_rate": 0.0002502966113888319, |
|
"loss": 0.8072, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 0.34120734908136485, |
|
"grad_norm": 0.04562546212037259, |
|
"learning_rate": 0.000249931069731504, |
|
"loss": 0.788, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.3421447319085114, |
|
"grad_norm": 0.0406588900726224, |
|
"learning_rate": 0.0002495644578927032, |
|
"loss": 0.8184, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.34308211473565803, |
|
"grad_norm": 0.04003290325962031, |
|
"learning_rate": 0.00024919677979854776, |
|
"loss": 0.8272, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.34401949756280464, |
|
"grad_norm": 0.04186901209736264, |
|
"learning_rate": 0.00024882803938657466, |
|
"loss": 0.7956, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 0.34495688038995126, |
|
"grad_norm": 0.041398093060463485, |
|
"learning_rate": 0.00024845824060569743, |
|
"loss": 0.8114, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.3458942632170979, |
|
"grad_norm": 0.04109679086847299, |
|
"learning_rate": 0.000248087387416164, |
|
"loss": 0.807, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 0.3468316460442445, |
|
"grad_norm": 0.042039328634813876, |
|
"learning_rate": 0.000247715483789514, |
|
"loss": 0.8306, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.34776902887139105, |
|
"grad_norm": 0.0394540126640408, |
|
"learning_rate": 0.0002473425337085366, |
|
"loss": 0.7966, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 0.34870641169853767, |
|
"grad_norm": 0.04432229876319661, |
|
"learning_rate": 0.0002469685411672275, |
|
"loss": 0.811, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.3496437945256843, |
|
"grad_norm": 0.04227321189035649, |
|
"learning_rate": 0.0002465935101707463, |
|
"loss": 0.8248, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 0.3505811773528309, |
|
"grad_norm": 0.039191702176161206, |
|
"learning_rate": 0.00024621744473537365, |
|
"loss": 0.8205, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.3515185601799775, |
|
"grad_norm": 0.04634923450670881, |
|
"learning_rate": 0.00024584034888846835, |
|
"loss": 0.7763, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.35245594300712413, |
|
"grad_norm": 0.04501112952096222, |
|
"learning_rate": 0.0002454622266684239, |
|
"loss": 0.8258, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.3533933258342707, |
|
"grad_norm": 0.04422758808317238, |
|
"learning_rate": 0.0002450830821246255, |
|
"loss": 0.8106, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 0.3543307086614173, |
|
"grad_norm": 0.04421016805518408, |
|
"learning_rate": 0.00024470291931740667, |
|
"loss": 0.7815, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.3552680914885639, |
|
"grad_norm": 0.04319380391113109, |
|
"learning_rate": 0.0002443217423180055, |
|
"loss": 0.7973, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 0.35620547431571054, |
|
"grad_norm": 0.040535892316044465, |
|
"learning_rate": 0.00024393955520852158, |
|
"loss": 0.8231, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.35714285714285715, |
|
"grad_norm": 0.050182245944215505, |
|
"learning_rate": 0.00024355636208187175, |
|
"loss": 0.809, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 0.35808023997000377, |
|
"grad_norm": 0.05056635484821874, |
|
"learning_rate": 0.00024317216704174653, |
|
"loss": 0.7863, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.3590176227971504, |
|
"grad_norm": 0.04022954249460216, |
|
"learning_rate": 0.00024278697420256615, |
|
"loss": 0.8144, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 0.35995500562429694, |
|
"grad_norm": 0.04323635830441304, |
|
"learning_rate": 0.00024240078768943647, |
|
"loss": 0.8052, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.36089238845144356, |
|
"grad_norm": 0.05048502253231987, |
|
"learning_rate": 0.00024201361163810476, |
|
"loss": 0.8292, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.3618297712785902, |
|
"grad_norm": 0.045094345947545966, |
|
"learning_rate": 0.00024162545019491545, |
|
"loss": 0.8263, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.3627671541057368, |
|
"grad_norm": 0.04751777775798578, |
|
"learning_rate": 0.0002412363075167658, |
|
"loss": 0.813, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 0.3637045369328834, |
|
"grad_norm": 0.03970350230348181, |
|
"learning_rate": 0.0002408461877710613, |
|
"loss": 0.7874, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.36464191976003, |
|
"grad_norm": 0.043021517102531895, |
|
"learning_rate": 0.00024045509513567092, |
|
"loss": 0.7835, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 0.3655793025871766, |
|
"grad_norm": 0.04145029708505387, |
|
"learning_rate": 0.0002400630337988826, |
|
"loss": 0.7976, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.3665166854143232, |
|
"grad_norm": 0.04060705381251834, |
|
"learning_rate": 0.0002396700079593583, |
|
"loss": 0.784, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 0.3674540682414698, |
|
"grad_norm": 0.04105772844447629, |
|
"learning_rate": 0.00023927602182608902, |
|
"loss": 0.7982, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.3683914510686164, |
|
"grad_norm": 0.04405339270275701, |
|
"learning_rate": 0.00023888107961834968, |
|
"loss": 0.8367, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 0.36932883389576304, |
|
"grad_norm": 0.04229707897710055, |
|
"learning_rate": 0.00023848518556565405, |
|
"loss": 0.8147, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.37026621672290966, |
|
"grad_norm": 0.04101868201617462, |
|
"learning_rate": 0.00023808834390770937, |
|
"loss": 0.7984, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.3712035995500562, |
|
"grad_norm": 0.0452498903694969, |
|
"learning_rate": 0.00023769055889437103, |
|
"loss": 0.8064, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.37214098237720283, |
|
"grad_norm": 0.040047239815103164, |
|
"learning_rate": 0.0002372918347855969, |
|
"loss": 0.7737, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 0.37307836520434945, |
|
"grad_norm": 0.03830098106296232, |
|
"learning_rate": 0.0002368921758514018, |
|
"loss": 0.7735, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.37401574803149606, |
|
"grad_norm": 0.0466704182451149, |
|
"learning_rate": 0.00023649158637181191, |
|
"loss": 0.7913, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 0.3749531308586427, |
|
"grad_norm": 0.045697413605673594, |
|
"learning_rate": 0.00023609007063681874, |
|
"loss": 0.8083, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.3758905136857893, |
|
"grad_norm": 0.046012146633052885, |
|
"learning_rate": 0.0002356876329463332, |
|
"loss": 0.7986, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 0.37682789651293586, |
|
"grad_norm": 0.042087372585577834, |
|
"learning_rate": 0.0002352842776101396, |
|
"loss": 0.789, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.37776527934008247, |
|
"grad_norm": 0.04351288912284283, |
|
"learning_rate": 0.00023488000894784954, |
|
"loss": 0.8066, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 0.3787026621672291, |
|
"grad_norm": 0.04264731715281628, |
|
"learning_rate": 0.0002344748312888557, |
|
"loss": 0.8242, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.3796400449943757, |
|
"grad_norm": 0.04004005288826855, |
|
"learning_rate": 0.00023406874897228527, |
|
"loss": 0.8134, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.3805774278215223, |
|
"grad_norm": 0.03990638844779895, |
|
"learning_rate": 0.00023366176634695353, |
|
"loss": 0.8341, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.38151481064866893, |
|
"grad_norm": 0.04606026511862361, |
|
"learning_rate": 0.00023325388777131748, |
|
"loss": 0.8001, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 0.38245219347581555, |
|
"grad_norm": 0.04391729186424558, |
|
"learning_rate": 0.000232845117613429, |
|
"loss": 0.8195, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.3833895763029621, |
|
"grad_norm": 0.041518162558628426, |
|
"learning_rate": 0.00023243546025088799, |
|
"loss": 0.7802, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 0.3843269591301087, |
|
"grad_norm": 0.04366132632044734, |
|
"learning_rate": 0.00023202492007079584, |
|
"loss": 0.7828, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.38526434195725534, |
|
"grad_norm": 0.04383475380804256, |
|
"learning_rate": 0.00023161350146970794, |
|
"loss": 0.7876, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 0.38620172478440196, |
|
"grad_norm": 0.036393360894576204, |
|
"learning_rate": 0.00023120120885358698, |
|
"loss": 0.7975, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.38713910761154857, |
|
"grad_norm": 0.04084955682840953, |
|
"learning_rate": 0.00023078804663775572, |
|
"loss": 0.7786, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 0.3880764904386952, |
|
"grad_norm": 0.03911103036688829, |
|
"learning_rate": 0.00023037401924684946, |
|
"loss": 0.8026, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.38901387326584175, |
|
"grad_norm": 0.041012148951149276, |
|
"learning_rate": 0.000229959131114769, |
|
"loss": 0.7885, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.38995125609298836, |
|
"grad_norm": 0.03899340776511314, |
|
"learning_rate": 0.00022954338668463296, |
|
"loss": 0.7813, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.390888638920135, |
|
"grad_norm": 0.04177139426462313, |
|
"learning_rate": 0.00022912679040873005, |
|
"loss": 0.7433, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 0.3918260217472816, |
|
"grad_norm": 0.041993929485497614, |
|
"learning_rate": 0.00022870934674847177, |
|
"loss": 0.8079, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.3927634045744282, |
|
"grad_norm": 0.04106413868995631, |
|
"learning_rate": 0.00022829106017434434, |
|
"loss": 0.7872, |
|
"step": 2095 |
|
}, |
|
{ |
|
"epoch": 0.3937007874015748, |
|
"grad_norm": 0.047413157951248786, |
|
"learning_rate": 0.00022787193516586091, |
|
"loss": 0.796, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.3946381702287214, |
|
"grad_norm": 0.04503978112797596, |
|
"learning_rate": 0.00022745197621151363, |
|
"loss": 0.8352, |
|
"step": 2105 |
|
}, |
|
{ |
|
"epoch": 0.395575553055868, |
|
"grad_norm": 0.04115650674340414, |
|
"learning_rate": 0.0002270311878087255, |
|
"loss": 0.816, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.3965129358830146, |
|
"grad_norm": 0.041160476408568784, |
|
"learning_rate": 0.00022660957446380225, |
|
"loss": 0.7982, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 0.39745031871016123, |
|
"grad_norm": 0.04408506165618202, |
|
"learning_rate": 0.00022618714069188404, |
|
"loss": 0.8209, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.39838770153730785, |
|
"grad_norm": 0.04001800900141434, |
|
"learning_rate": 0.00022576389101689725, |
|
"loss": 0.7771, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.39932508436445446, |
|
"grad_norm": 0.04320785141278407, |
|
"learning_rate": 0.00022533982997150585, |
|
"loss": 0.7677, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.400262467191601, |
|
"grad_norm": 0.039431554219398125, |
|
"learning_rate": 0.00022491496209706293, |
|
"loss": 0.7715, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 0.40119985001874764, |
|
"grad_norm": 0.04041447511709435, |
|
"learning_rate": 0.0002244892919435621, |
|
"loss": 0.7961, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.40213723284589425, |
|
"grad_norm": 0.04233570336630376, |
|
"learning_rate": 0.00022406282406958874, |
|
"loss": 0.7932, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 0.40307461567304087, |
|
"grad_norm": 0.040223931466695007, |
|
"learning_rate": 0.00022363556304227111, |
|
"loss": 0.7972, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.4040119985001875, |
|
"grad_norm": 0.03920199535556696, |
|
"learning_rate": 0.0002232075134372316, |
|
"loss": 0.7912, |
|
"step": 2155 |
|
}, |
|
{ |
|
"epoch": 0.4049493813273341, |
|
"grad_norm": 0.04045705766440815, |
|
"learning_rate": 0.00022277867983853754, |
|
"loss": 0.772, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.4058867641544807, |
|
"grad_norm": 0.038172006672106074, |
|
"learning_rate": 0.00022234906683865234, |
|
"loss": 0.7994, |
|
"step": 2165 |
|
}, |
|
{ |
|
"epoch": 0.4068241469816273, |
|
"grad_norm": 0.042094914465141645, |
|
"learning_rate": 0.00022191867903838597, |
|
"loss": 0.7908, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.4077615298087739, |
|
"grad_norm": 0.041299074122275056, |
|
"learning_rate": 0.00022148752104684608, |
|
"loss": 0.791, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.4086989126359205, |
|
"grad_norm": 0.03672565143578591, |
|
"learning_rate": 0.00022105559748138834, |
|
"loss": 0.7879, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.4096362954630671, |
|
"grad_norm": 0.03978313771126583, |
|
"learning_rate": 0.00022062291296756715, |
|
"loss": 0.8095, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 0.41057367829021374, |
|
"grad_norm": 0.04001569067395201, |
|
"learning_rate": 0.000220189472139086, |
|
"loss": 0.7826, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.41151106111736035, |
|
"grad_norm": 0.043631782457239365, |
|
"learning_rate": 0.00021975527963774796, |
|
"loss": 0.7927, |
|
"step": 2195 |
|
}, |
|
{ |
|
"epoch": 0.4124484439445069, |
|
"grad_norm": 0.04073514355199719, |
|
"learning_rate": 0.00021932034011340587, |
|
"loss": 0.7939, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.41338582677165353, |
|
"grad_norm": 0.044704633805377016, |
|
"learning_rate": 0.00021888465822391269, |
|
"loss": 0.795, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.41432320959880015, |
|
"grad_norm": 0.04223574180256086, |
|
"learning_rate": 0.00021844823863507136, |
|
"loss": 0.7697, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.41526059242594676, |
|
"grad_norm": 0.04033678944300133, |
|
"learning_rate": 0.00021801108602058507, |
|
"loss": 0.7942, |
|
"step": 2215 |
|
}, |
|
{ |
|
"epoch": 0.4161979752530934, |
|
"grad_norm": 0.040615950774112376, |
|
"learning_rate": 0.00021757320506200713, |
|
"loss": 0.7976, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.41713535808024, |
|
"grad_norm": 0.03966904553021298, |
|
"learning_rate": 0.00021713460044869078, |
|
"loss": 0.7356, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.41807274090738655, |
|
"grad_norm": 0.049048512180610696, |
|
"learning_rate": 0.0002166952768777391, |
|
"loss": 0.788, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.41901012373453317, |
|
"grad_norm": 0.04363519676021418, |
|
"learning_rate": 0.00021625523905395458, |
|
"loss": 0.7778, |
|
"step": 2235 |
|
}, |
|
{ |
|
"epoch": 0.4199475065616798, |
|
"grad_norm": 0.040788196081375995, |
|
"learning_rate": 0.00021581449168978878, |
|
"loss": 0.7845, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.4208848893888264, |
|
"grad_norm": 0.04388975232146075, |
|
"learning_rate": 0.00021537303950529185, |
|
"loss": 0.7912, |
|
"step": 2245 |
|
}, |
|
{ |
|
"epoch": 0.421822272215973, |
|
"grad_norm": 0.039487905759282925, |
|
"learning_rate": 0.0002149308872280621, |
|
"loss": 0.7904, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.42275965504311963, |
|
"grad_norm": 0.03892803525416569, |
|
"learning_rate": 0.0002144880395931951, |
|
"loss": 0.7832, |
|
"step": 2255 |
|
}, |
|
{ |
|
"epoch": 0.4236970378702662, |
|
"grad_norm": 0.048355006016535, |
|
"learning_rate": 0.0002140445013432333, |
|
"loss": 0.8233, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.4246344206974128, |
|
"grad_norm": 0.04226596396802164, |
|
"learning_rate": 0.00021360027722811505, |
|
"loss": 0.7986, |
|
"step": 2265 |
|
}, |
|
{ |
|
"epoch": 0.4255718035245594, |
|
"grad_norm": 0.03777343997339362, |
|
"learning_rate": 0.00021315537200512362, |
|
"loss": 0.7739, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.42650918635170604, |
|
"grad_norm": 0.0447778590274291, |
|
"learning_rate": 0.00021270979043883664, |
|
"loss": 0.8097, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.42744656917885265, |
|
"grad_norm": 0.04168554679696771, |
|
"learning_rate": 0.00021226353730107467, |
|
"loss": 0.7835, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.42838395200599927, |
|
"grad_norm": 0.04011938574259242, |
|
"learning_rate": 0.00021181661737085028, |
|
"loss": 0.8223, |
|
"step": 2285 |
|
}, |
|
{ |
|
"epoch": 0.4293213348331459, |
|
"grad_norm": 0.03778469888602811, |
|
"learning_rate": 0.00021136903543431685, |
|
"loss": 0.7739, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.43025871766029244, |
|
"grad_norm": 0.03715090423495881, |
|
"learning_rate": 0.0002109207962847174, |
|
"loss": 0.8144, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 0.43119610048743906, |
|
"grad_norm": 0.03962090097485277, |
|
"learning_rate": 0.00021047190472233305, |
|
"loss": 0.7811, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.4321334833145857, |
|
"grad_norm": 0.039092040132198524, |
|
"learning_rate": 0.00021002236555443183, |
|
"loss": 0.7909, |
|
"step": 2305 |
|
}, |
|
{ |
|
"epoch": 0.4330708661417323, |
|
"grad_norm": 0.03883145505567921, |
|
"learning_rate": 0.00020957218359521706, |
|
"loss": 0.8176, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.4340082489688789, |
|
"grad_norm": 0.03985259948134514, |
|
"learning_rate": 0.0002091213636657759, |
|
"loss": 0.7869, |
|
"step": 2315 |
|
}, |
|
{ |
|
"epoch": 0.4349456317960255, |
|
"grad_norm": 0.0405689266411763, |
|
"learning_rate": 0.0002086699105940275, |
|
"loss": 0.8039, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.4358830146231721, |
|
"grad_norm": 0.04016614154253833, |
|
"learning_rate": 0.00020821782921467166, |
|
"loss": 0.7911, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.4368203974503187, |
|
"grad_norm": 0.03975323854656061, |
|
"learning_rate": 0.0002077651243691367, |
|
"loss": 0.7833, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.4377577802774653, |
|
"grad_norm": 0.041105027708813344, |
|
"learning_rate": 0.00020731180090552783, |
|
"loss": 0.7675, |
|
"step": 2335 |
|
}, |
|
{ |
|
"epoch": 0.43869516310461193, |
|
"grad_norm": 0.03872126186366071, |
|
"learning_rate": 0.00020685786367857518, |
|
"loss": 0.7959, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.43963254593175854, |
|
"grad_norm": 0.041950004467004046, |
|
"learning_rate": 0.0002064033175495817, |
|
"loss": 0.7642, |
|
"step": 2345 |
|
}, |
|
{ |
|
"epoch": 0.44056992875890516, |
|
"grad_norm": 0.04308753428596666, |
|
"learning_rate": 0.00020594816738637133, |
|
"loss": 0.7828, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.4415073115860517, |
|
"grad_norm": 0.04067335511731369, |
|
"learning_rate": 0.00020549241806323658, |
|
"loss": 0.7731, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 0.44244469441319834, |
|
"grad_norm": 0.038488821747570894, |
|
"learning_rate": 0.00020503607446088661, |
|
"loss": 0.7783, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.44338207724034495, |
|
"grad_norm": 0.0405566911757408, |
|
"learning_rate": 0.00020457914146639473, |
|
"loss": 0.7913, |
|
"step": 2365 |
|
}, |
|
{ |
|
"epoch": 0.44431946006749157, |
|
"grad_norm": 0.041242394949931045, |
|
"learning_rate": 0.00020412162397314624, |
|
"loss": 0.7971, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.4452568428946382, |
|
"grad_norm": 0.039550878105764575, |
|
"learning_rate": 0.00020366352688078597, |
|
"loss": 0.7941, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.4461942257217848, |
|
"grad_norm": 0.04176542654646332, |
|
"learning_rate": 0.00020320485509516564, |
|
"loss": 0.7796, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.44713160854893136, |
|
"grad_norm": 0.04091658096529671, |
|
"learning_rate": 0.0002027456135282917, |
|
"loss": 0.7656, |
|
"step": 2385 |
|
}, |
|
{ |
|
"epoch": 0.448068991376078, |
|
"grad_norm": 0.04204041507697926, |
|
"learning_rate": 0.00020228580709827227, |
|
"loss": 0.7842, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.4490063742032246, |
|
"grad_norm": 0.043581111984004314, |
|
"learning_rate": 0.0002018254407292649, |
|
"loss": 0.8044, |
|
"step": 2395 |
|
}, |
|
{ |
|
"epoch": 0.4499437570303712, |
|
"grad_norm": 0.03967757526818896, |
|
"learning_rate": 0.00020136451935142349, |
|
"loss": 0.7807, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.4508811398575178, |
|
"grad_norm": 0.04119079014264244, |
|
"learning_rate": 0.00020090304790084572, |
|
"loss": 0.7949, |
|
"step": 2405 |
|
}, |
|
{ |
|
"epoch": 0.45181852268466444, |
|
"grad_norm": 0.04744494144786433, |
|
"learning_rate": 0.00020044103131952007, |
|
"loss": 0.7886, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.452755905511811, |
|
"grad_norm": 0.03865847999571606, |
|
"learning_rate": 0.000199978474555273, |
|
"loss": 0.7824, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 0.4536932883389576, |
|
"grad_norm": 0.04257054661223989, |
|
"learning_rate": 0.0001995153825617157, |
|
"loss": 0.7958, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.4546306711661042, |
|
"grad_norm": 0.03723070056879195, |
|
"learning_rate": 0.0001990517602981915, |
|
"loss": 0.7743, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.45556805399325084, |
|
"grad_norm": 0.04206537348222414, |
|
"learning_rate": 0.0001985876127297224, |
|
"loss": 0.7818, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.45650543682039746, |
|
"grad_norm": 0.046605846594271055, |
|
"learning_rate": 0.00019812294482695586, |
|
"loss": 0.7888, |
|
"step": 2435 |
|
}, |
|
{ |
|
"epoch": 0.4574428196475441, |
|
"grad_norm": 0.03927213215049627, |
|
"learning_rate": 0.00019765776156611189, |
|
"loss": 0.7941, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.4583802024746907, |
|
"grad_norm": 0.03773012168883939, |
|
"learning_rate": 0.00019719206792892944, |
|
"loss": 0.779, |
|
"step": 2445 |
|
}, |
|
{ |
|
"epoch": 0.45931758530183725, |
|
"grad_norm": 0.04033866134296589, |
|
"learning_rate": 0.00019672586890261322, |
|
"loss": 0.7548, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.46025496812898387, |
|
"grad_norm": 0.03863797353130748, |
|
"learning_rate": 0.00019625916947978029, |
|
"loss": 0.7519, |
|
"step": 2455 |
|
}, |
|
{ |
|
"epoch": 0.4611923509561305, |
|
"grad_norm": 0.03834068421875446, |
|
"learning_rate": 0.00019579197465840654, |
|
"loss": 0.7524, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.4621297337832771, |
|
"grad_norm": 0.04082115337906864, |
|
"learning_rate": 0.0001953242894417731, |
|
"loss": 0.7748, |
|
"step": 2465 |
|
}, |
|
{ |
|
"epoch": 0.4630671166104237, |
|
"grad_norm": 0.038882691470253396, |
|
"learning_rate": 0.000194856118838413, |
|
"loss": 0.7732, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.46400449943757033, |
|
"grad_norm": 0.04383855537670663, |
|
"learning_rate": 0.0001943874678620572, |
|
"loss": 0.7718, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.4649418822647169, |
|
"grad_norm": 0.04487220174913269, |
|
"learning_rate": 0.0001939183415315812, |
|
"loss": 0.8184, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.4658792650918635, |
|
"grad_norm": 0.04243430887998934, |
|
"learning_rate": 0.00019344874487095106, |
|
"loss": 0.7909, |
|
"step": 2485 |
|
}, |
|
{ |
|
"epoch": 0.4668166479190101, |
|
"grad_norm": 0.042879736936158284, |
|
"learning_rate": 0.00019297868290916973, |
|
"loss": 0.8164, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.46775403074615673, |
|
"grad_norm": 0.04530733011987864, |
|
"learning_rate": 0.00019250816068022326, |
|
"loss": 0.795, |
|
"step": 2495 |
|
}, |
|
{ |
|
"epoch": 0.46869141357330335, |
|
"grad_norm": 0.04578548882806771, |
|
"learning_rate": 0.0001920371832230266, |
|
"loss": 0.7974, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.46962879640044997, |
|
"grad_norm": 0.04042238169087785, |
|
"learning_rate": 0.00019156575558137003, |
|
"loss": 0.8004, |
|
"step": 2505 |
|
}, |
|
{ |
|
"epoch": 0.4705661792275965, |
|
"grad_norm": 0.03887172482698654, |
|
"learning_rate": 0.00019109388280386488, |
|
"loss": 0.7686, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.47150356205474314, |
|
"grad_norm": 0.03584281998116991, |
|
"learning_rate": 0.00019062156994388937, |
|
"loss": 0.7488, |
|
"step": 2515 |
|
}, |
|
{ |
|
"epoch": 0.47244094488188976, |
|
"grad_norm": 0.038784324923480024, |
|
"learning_rate": 0.00019014882205953485, |
|
"loss": 0.7797, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.4733783277090364, |
|
"grad_norm": 0.03811245259868312, |
|
"learning_rate": 0.00018967564421355134, |
|
"loss": 0.7566, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 0.474315710536183, |
|
"grad_norm": 0.03796594842270583, |
|
"learning_rate": 0.0001892020414732934, |
|
"loss": 0.7853, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.4752530933633296, |
|
"grad_norm": 0.04195011212848616, |
|
"learning_rate": 0.000188728018910666, |
|
"loss": 0.7924, |
|
"step": 2535 |
|
}, |
|
{ |
|
"epoch": 0.47619047619047616, |
|
"grad_norm": 0.045188665210356145, |
|
"learning_rate": 0.00018825358160206982, |
|
"loss": 0.7961, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.4771278590176228, |
|
"grad_norm": 0.04796346505150324, |
|
"learning_rate": 0.00018777873462834735, |
|
"loss": 0.7809, |
|
"step": 2545 |
|
}, |
|
{ |
|
"epoch": 0.4780652418447694, |
|
"grad_norm": 0.039964514626294564, |
|
"learning_rate": 0.00018730348307472824, |
|
"loss": 0.7653, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.479002624671916, |
|
"grad_norm": 0.0427034937035765, |
|
"learning_rate": 0.0001868278320307747, |
|
"loss": 0.7726, |
|
"step": 2555 |
|
}, |
|
{ |
|
"epoch": 0.4799400074990626, |
|
"grad_norm": 0.03972042150913065, |
|
"learning_rate": 0.00018635178659032732, |
|
"loss": 0.7805, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.48087739032620924, |
|
"grad_norm": 0.0371985040034197, |
|
"learning_rate": 0.0001858753518514503, |
|
"loss": 0.7561, |
|
"step": 2565 |
|
}, |
|
{ |
|
"epoch": 0.48181477315335586, |
|
"grad_norm": 0.04584757733571624, |
|
"learning_rate": 0.00018539853291637696, |
|
"loss": 0.7753, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.4827521559805024, |
|
"grad_norm": 0.03974757572397099, |
|
"learning_rate": 0.00018492133489145506, |
|
"loss": 0.7748, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 0.48368953880764903, |
|
"grad_norm": 0.03742920129272121, |
|
"learning_rate": 0.000184443762887092, |
|
"loss": 0.7917, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.48462692163479565, |
|
"grad_norm": 0.04694450953419577, |
|
"learning_rate": 0.00018396582201770032, |
|
"loss": 0.7859, |
|
"step": 2585 |
|
}, |
|
{ |
|
"epoch": 0.48556430446194226, |
|
"grad_norm": 0.042973639248807004, |
|
"learning_rate": 0.00018348751740164272, |
|
"loss": 0.7836, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.4865016872890889, |
|
"grad_norm": 0.04047247960728854, |
|
"learning_rate": 0.00018300885416117733, |
|
"loss": 0.7796, |
|
"step": 2595 |
|
}, |
|
{ |
|
"epoch": 0.4874390701162355, |
|
"grad_norm": 0.0396897138962343, |
|
"learning_rate": 0.000182529837422403, |
|
"loss": 0.7682, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.48837645294338206, |
|
"grad_norm": 0.0398548055720446, |
|
"learning_rate": 0.0001820504723152041, |
|
"loss": 0.7932, |
|
"step": 2605 |
|
}, |
|
{ |
|
"epoch": 0.48931383577052867, |
|
"grad_norm": 0.037622818268512194, |
|
"learning_rate": 0.0001815707639731958, |
|
"loss": 0.7781, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.4902512185976753, |
|
"grad_norm": 0.0392715706617893, |
|
"learning_rate": 0.00018109071753366916, |
|
"loss": 0.7929, |
|
"step": 2615 |
|
}, |
|
{ |
|
"epoch": 0.4911886014248219, |
|
"grad_norm": 0.039744227841198555, |
|
"learning_rate": 0.00018061033813753576, |
|
"loss": 0.7756, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.4921259842519685, |
|
"grad_norm": 0.04155014821494704, |
|
"learning_rate": 0.00018012963092927297, |
|
"loss": 0.7706, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.49306336707911513, |
|
"grad_norm": 0.03840166882540415, |
|
"learning_rate": 0.0001796486010568689, |
|
"loss": 0.7893, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.4940007499062617, |
|
"grad_norm": 0.039395802156394155, |
|
"learning_rate": 0.000179167253671767, |
|
"loss": 0.775, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 0.4949381327334083, |
|
"grad_norm": 0.03787300343153706, |
|
"learning_rate": 0.00017868559392881107, |
|
"loss": 0.795, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.4958755155605549, |
|
"grad_norm": 0.040301333180301116, |
|
"learning_rate": 0.0001782036269861899, |
|
"loss": 0.7775, |
|
"step": 2645 |
|
}, |
|
{ |
|
"epoch": 0.49681289838770154, |
|
"grad_norm": 0.03780702169928712, |
|
"learning_rate": 0.0001777213580053823, |
|
"loss": 0.7774, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.49775028121484816, |
|
"grad_norm": 0.04018675410195464, |
|
"learning_rate": 0.0001772387921511016, |
|
"loss": 0.7853, |
|
"step": 2655 |
|
}, |
|
{ |
|
"epoch": 0.49868766404199477, |
|
"grad_norm": 0.039105991236874874, |
|
"learning_rate": 0.00017675593459124045, |
|
"loss": 0.7853, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.49962504686914133, |
|
"grad_norm": 0.043058223660341025, |
|
"learning_rate": 0.00017627279049681538, |
|
"loss": 0.779, |
|
"step": 2665 |
|
}, |
|
{ |
|
"epoch": 0.500562429696288, |
|
"grad_norm": 0.038503318239807786, |
|
"learning_rate": 0.0001757893650419114, |
|
"loss": 0.7746, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.5014998125234346, |
|
"grad_norm": 0.038809904361382756, |
|
"learning_rate": 0.00017530566340362685, |
|
"loss": 0.7836, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 0.5024371953505812, |
|
"grad_norm": 0.03720404583133728, |
|
"learning_rate": 0.00017482169076201765, |
|
"loss": 0.7738, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.5033745781777278, |
|
"grad_norm": 0.04072977160239652, |
|
"learning_rate": 0.00017433745230004192, |
|
"loss": 0.7932, |
|
"step": 2685 |
|
}, |
|
{ |
|
"epoch": 0.5043119610048744, |
|
"grad_norm": 0.04007107726737037, |
|
"learning_rate": 0.00017385295320350463, |
|
"loss": 0.7925, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.505249343832021, |
|
"grad_norm": 0.03706793674088091, |
|
"learning_rate": 0.00017336819866100182, |
|
"loss": 0.7802, |
|
"step": 2695 |
|
}, |
|
{ |
|
"epoch": 0.5061867266591676, |
|
"grad_norm": 0.03970514233327491, |
|
"learning_rate": 0.00017288319386386515, |
|
"loss": 0.7641, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.5071241094863143, |
|
"grad_norm": 0.04248378336710558, |
|
"learning_rate": 0.0001723979440061064, |
|
"loss": 0.7843, |
|
"step": 2705 |
|
}, |
|
{ |
|
"epoch": 0.5080614923134609, |
|
"grad_norm": 0.035133630205708484, |
|
"learning_rate": 0.00017191245428436173, |
|
"loss": 0.7685, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.5089988751406074, |
|
"grad_norm": 0.03584192505625803, |
|
"learning_rate": 0.00017142672989783601, |
|
"loss": 0.7488, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 0.509936257967754, |
|
"grad_norm": 0.043073407159687965, |
|
"learning_rate": 0.00017094077604824708, |
|
"loss": 0.7625, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.5108736407949006, |
|
"grad_norm": 0.04373054551404995, |
|
"learning_rate": 0.00017045459793977037, |
|
"loss": 0.7793, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 0.5118110236220472, |
|
"grad_norm": 0.0401634365888691, |
|
"learning_rate": 0.00016996820077898285, |
|
"loss": 0.7673, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.5127484064491938, |
|
"grad_norm": 0.04228564245730852, |
|
"learning_rate": 0.00016948158977480722, |
|
"loss": 0.7829, |
|
"step": 2735 |
|
}, |
|
{ |
|
"epoch": 0.5136857892763405, |
|
"grad_norm": 0.04101625807575367, |
|
"learning_rate": 0.00016899477013845656, |
|
"loss": 0.7739, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.5146231721034871, |
|
"grad_norm": 0.037270510821881576, |
|
"learning_rate": 0.00016850774708337794, |
|
"loss": 0.7819, |
|
"step": 2745 |
|
}, |
|
{ |
|
"epoch": 0.5155605549306337, |
|
"grad_norm": 0.03719664186844611, |
|
"learning_rate": 0.00016802052582519706, |
|
"loss": 0.7547, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.5164979377577803, |
|
"grad_norm": 0.042746632036701274, |
|
"learning_rate": 0.00016753311158166216, |
|
"loss": 0.7875, |
|
"step": 2755 |
|
}, |
|
{ |
|
"epoch": 0.5174353205849269, |
|
"grad_norm": 0.040113090184156625, |
|
"learning_rate": 0.00016704550957258817, |
|
"loss": 0.7671, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.5183727034120735, |
|
"grad_norm": 0.04096689940473245, |
|
"learning_rate": 0.0001665577250198009, |
|
"loss": 0.7504, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 0.5193100862392201, |
|
"grad_norm": 0.03979245826251994, |
|
"learning_rate": 0.00016606976314708104, |
|
"loss": 0.7692, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.5202474690663667, |
|
"grad_norm": 0.03499884259333867, |
|
"learning_rate": 0.0001655816291801082, |
|
"loss": 0.7502, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 0.5211848518935133, |
|
"grad_norm": 0.038595691040614596, |
|
"learning_rate": 0.00016509332834640505, |
|
"loss": 0.7779, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.5221222347206599, |
|
"grad_norm": 0.03845551078176031, |
|
"learning_rate": 0.00016460486587528114, |
|
"loss": 0.7734, |
|
"step": 2785 |
|
}, |
|
{ |
|
"epoch": 0.5230596175478065, |
|
"grad_norm": 0.04268014575183859, |
|
"learning_rate": 0.00016411624699777717, |
|
"loss": 0.7932, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.5239970003749531, |
|
"grad_norm": 0.04217654750286591, |
|
"learning_rate": 0.0001636274769466087, |
|
"loss": 0.7755, |
|
"step": 2795 |
|
}, |
|
{ |
|
"epoch": 0.5249343832020997, |
|
"grad_norm": 0.03663813497845865, |
|
"learning_rate": 0.00016313856095611037, |
|
"loss": 0.7819, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.5258717660292463, |
|
"grad_norm": 0.03649865813691932, |
|
"learning_rate": 0.00016264950426217963, |
|
"loss": 0.7854, |
|
"step": 2805 |
|
}, |
|
{ |
|
"epoch": 0.526809148856393, |
|
"grad_norm": 0.035007178804664606, |
|
"learning_rate": 0.0001621603121022208, |
|
"loss": 0.7763, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.5277465316835396, |
|
"grad_norm": 0.03631407653031134, |
|
"learning_rate": 0.00016167098971508884, |
|
"loss": 0.75, |
|
"step": 2815 |
|
}, |
|
{ |
|
"epoch": 0.5286839145106862, |
|
"grad_norm": 0.03748295963128187, |
|
"learning_rate": 0.00016118154234103345, |
|
"loss": 0.7755, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.5296212973378328, |
|
"grad_norm": 0.03847790308231328, |
|
"learning_rate": 0.00016069197522164272, |
|
"loss": 0.7721, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 0.5305586801649794, |
|
"grad_norm": 0.0403928945535946, |
|
"learning_rate": 0.00016020229359978722, |
|
"loss": 0.7823, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.531496062992126, |
|
"grad_norm": 0.042093934154398625, |
|
"learning_rate": 0.0001597125027195637, |
|
"loss": 0.7594, |
|
"step": 2835 |
|
}, |
|
{ |
|
"epoch": 0.5324334458192725, |
|
"grad_norm": 0.03740153505333051, |
|
"learning_rate": 0.00015922260782623906, |
|
"loss": 0.775, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.5333708286464192, |
|
"grad_norm": 0.0365020182974372, |
|
"learning_rate": 0.00015873261416619395, |
|
"loss": 0.7788, |
|
"step": 2845 |
|
}, |
|
{ |
|
"epoch": 0.5343082114735658, |
|
"grad_norm": 0.04097368109261412, |
|
"learning_rate": 0.00015824252698686686, |
|
"loss": 0.7801, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.5352455943007124, |
|
"grad_norm": 0.040252898201126604, |
|
"learning_rate": 0.00015775235153669772, |
|
"loss": 0.7651, |
|
"step": 2855 |
|
}, |
|
{ |
|
"epoch": 0.536182977127859, |
|
"grad_norm": 0.04080680519751646, |
|
"learning_rate": 0.00015726209306507182, |
|
"loss": 0.7609, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.5371203599550056, |
|
"grad_norm": 0.04064129669100478, |
|
"learning_rate": 0.00015677175682226346, |
|
"loss": 0.7686, |
|
"step": 2865 |
|
}, |
|
{ |
|
"epoch": 0.5380577427821522, |
|
"grad_norm": 0.040674542378951246, |
|
"learning_rate": 0.0001562813480593799, |
|
"loss": 0.7616, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.5389951256092989, |
|
"grad_norm": 0.04054872658419522, |
|
"learning_rate": 0.0001557908720283051, |
|
"loss": 0.7938, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 0.5399325084364455, |
|
"grad_norm": 0.037193930697877325, |
|
"learning_rate": 0.00015530033398164318, |
|
"loss": 0.7671, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.5408698912635921, |
|
"grad_norm": 0.04330464356262373, |
|
"learning_rate": 0.00015480973917266256, |
|
"loss": 0.789, |
|
"step": 2885 |
|
}, |
|
{ |
|
"epoch": 0.5418072740907387, |
|
"grad_norm": 0.04054372294824471, |
|
"learning_rate": 0.0001543190928552395, |
|
"loss": 0.7511, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.5427446569178853, |
|
"grad_norm": 0.037141465243079236, |
|
"learning_rate": 0.00015382840028380193, |
|
"loss": 0.7806, |
|
"step": 2895 |
|
}, |
|
{ |
|
"epoch": 0.5436820397450318, |
|
"grad_norm": 0.04616963653816421, |
|
"learning_rate": 0.000153337666713273, |
|
"loss": 0.7685, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.5446194225721784, |
|
"grad_norm": 0.04026197513442802, |
|
"learning_rate": 0.000152846897399015, |
|
"loss": 0.7538, |
|
"step": 2905 |
|
}, |
|
{ |
|
"epoch": 0.545556805399325, |
|
"grad_norm": 0.040209956896230005, |
|
"learning_rate": 0.0001523560975967731, |
|
"loss": 0.7669, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.5464941882264717, |
|
"grad_norm": 0.039214472443270074, |
|
"learning_rate": 0.0001518652725626188, |
|
"loss": 0.7821, |
|
"step": 2915 |
|
}, |
|
{ |
|
"epoch": 0.5474315710536183, |
|
"grad_norm": 0.037735873161464144, |
|
"learning_rate": 0.00015137442755289388, |
|
"loss": 0.7669, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.5483689538807649, |
|
"grad_norm": 0.040692415941492, |
|
"learning_rate": 0.00015088356782415408, |
|
"loss": 0.7642, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 0.5493063367079115, |
|
"grad_norm": 0.037557444480219304, |
|
"learning_rate": 0.0001503926986331127, |
|
"loss": 0.7491, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.5502437195350581, |
|
"grad_norm": 0.04303948252096911, |
|
"learning_rate": 0.00014990182523658444, |
|
"loss": 0.7633, |
|
"step": 2935 |
|
}, |
|
{ |
|
"epoch": 0.5511811023622047, |
|
"grad_norm": 0.039445563951292666, |
|
"learning_rate": 0.0001494109528914291, |
|
"loss": 0.7837, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.5521184851893514, |
|
"grad_norm": 0.0369146298860869, |
|
"learning_rate": 0.000148920086854495, |
|
"loss": 0.7783, |
|
"step": 2945 |
|
}, |
|
{ |
|
"epoch": 0.553055868016498, |
|
"grad_norm": 0.03928589902574045, |
|
"learning_rate": 0.00014842923238256317, |
|
"loss": 0.7623, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.5539932508436446, |
|
"grad_norm": 0.040961368365580925, |
|
"learning_rate": 0.00014793839473229047, |
|
"loss": 0.7302, |
|
"step": 2955 |
|
}, |
|
{ |
|
"epoch": 0.5549306336707911, |
|
"grad_norm": 0.04066646466874108, |
|
"learning_rate": 0.000147447579160154, |
|
"loss": 0.7933, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.5558680164979377, |
|
"grad_norm": 0.04046950253782251, |
|
"learning_rate": 0.00014695679092239405, |
|
"loss": 0.7794, |
|
"step": 2965 |
|
}, |
|
{ |
|
"epoch": 0.5568053993250843, |
|
"grad_norm": 0.0395374977204719, |
|
"learning_rate": 0.00014646603527495848, |
|
"loss": 0.772, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.5577427821522309, |
|
"grad_norm": 0.03703594756866284, |
|
"learning_rate": 0.00014597531747344593, |
|
"loss": 0.7511, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 0.5586801649793776, |
|
"grad_norm": 0.036214320470059805, |
|
"learning_rate": 0.00014548464277304982, |
|
"loss": 0.7706, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.5596175478065242, |
|
"grad_norm": 0.03917835401091949, |
|
"learning_rate": 0.00014499401642850207, |
|
"loss": 0.739, |
|
"step": 2985 |
|
}, |
|
{ |
|
"epoch": 0.5605549306336708, |
|
"grad_norm": 0.0498467008354317, |
|
"learning_rate": 0.00014450344369401651, |
|
"loss": 0.763, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.5614923134608174, |
|
"grad_norm": 0.05384613905490524, |
|
"learning_rate": 0.0001440129298232332, |
|
"loss": 0.7892, |
|
"step": 2995 |
|
}, |
|
{ |
|
"epoch": 0.562429696287964, |
|
"grad_norm": 0.0457358145474257, |
|
"learning_rate": 0.0001435224800691614, |
|
"loss": 0.7988, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.5633670791151106, |
|
"grad_norm": 0.04336091654025471, |
|
"learning_rate": 0.00014303209968412418, |
|
"loss": 0.799, |
|
"step": 3005 |
|
}, |
|
{ |
|
"epoch": 0.5643044619422573, |
|
"grad_norm": 0.04292534665730051, |
|
"learning_rate": 0.00014254179391970132, |
|
"loss": 0.7775, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.5652418447694039, |
|
"grad_norm": 0.04250101254193063, |
|
"learning_rate": 0.00014205156802667374, |
|
"loss": 0.7788, |
|
"step": 3015 |
|
}, |
|
{ |
|
"epoch": 0.5661792275965505, |
|
"grad_norm": 0.04030937649018944, |
|
"learning_rate": 0.00014156142725496682, |
|
"loss": 0.7771, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.567116610423697, |
|
"grad_norm": 0.037700014044718524, |
|
"learning_rate": 0.00014107137685359457, |
|
"loss": 0.7603, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 0.5680539932508436, |
|
"grad_norm": 0.03917274407024749, |
|
"learning_rate": 0.000140581422070603, |
|
"loss": 0.7756, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.5689913760779902, |
|
"grad_norm": 0.03965878064631959, |
|
"learning_rate": 0.00014009156815301426, |
|
"loss": 0.781, |
|
"step": 3035 |
|
}, |
|
{ |
|
"epoch": 0.5699287589051368, |
|
"grad_norm": 0.03694985326600243, |
|
"learning_rate": 0.00013960182034677016, |
|
"loss": 0.7764, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.5708661417322834, |
|
"grad_norm": 0.03649532964794123, |
|
"learning_rate": 0.00013911218389667642, |
|
"loss": 0.7549, |
|
"step": 3045 |
|
}, |
|
{ |
|
"epoch": 0.5718035245594301, |
|
"grad_norm": 0.03787093545824704, |
|
"learning_rate": 0.0001386226640463459, |
|
"loss": 0.7472, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.5727409073865767, |
|
"grad_norm": 0.038098821200233814, |
|
"learning_rate": 0.00013813326603814296, |
|
"loss": 0.794, |
|
"step": 3055 |
|
}, |
|
{ |
|
"epoch": 0.5736782902137233, |
|
"grad_norm": 0.04007624623894434, |
|
"learning_rate": 0.00013764399511312716, |
|
"loss": 0.7973, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.5746156730408699, |
|
"grad_norm": 0.040480907401198925, |
|
"learning_rate": 0.0001371548565109969, |
|
"loss": 0.7693, |
|
"step": 3065 |
|
}, |
|
{ |
|
"epoch": 0.5755530558680165, |
|
"grad_norm": 0.037195195565526876, |
|
"learning_rate": 0.00013666585547003377, |
|
"loss": 0.7548, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.5764904386951631, |
|
"grad_norm": 0.04118922842272934, |
|
"learning_rate": 0.00013617699722704598, |
|
"loss": 0.781, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 0.5774278215223098, |
|
"grad_norm": 0.041231050481496015, |
|
"learning_rate": 0.0001356882870173126, |
|
"loss": 0.7764, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.5783652043494563, |
|
"grad_norm": 0.03825190175591559, |
|
"learning_rate": 0.0001351997300745273, |
|
"loss": 0.7604, |
|
"step": 3085 |
|
}, |
|
{ |
|
"epoch": 0.5793025871766029, |
|
"grad_norm": 0.041476811594546956, |
|
"learning_rate": 0.0001347113316307425, |
|
"loss": 0.774, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.5802399700037495, |
|
"grad_norm": 0.04062186609985697, |
|
"learning_rate": 0.0001342230969163131, |
|
"loss": 0.7669, |
|
"step": 3095 |
|
}, |
|
{ |
|
"epoch": 0.5811773528308961, |
|
"grad_norm": 0.037469179130772814, |
|
"learning_rate": 0.00013373503115984072, |
|
"loss": 0.8056, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.5821147356580427, |
|
"grad_norm": 0.04327856604489263, |
|
"learning_rate": 0.0001332471395881174, |
|
"loss": 0.7616, |
|
"step": 3105 |
|
}, |
|
{ |
|
"epoch": 0.5830521184851893, |
|
"grad_norm": 0.03958983021180182, |
|
"learning_rate": 0.00013275942742607002, |
|
"loss": 0.7756, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.583989501312336, |
|
"grad_norm": 0.03790940533047183, |
|
"learning_rate": 0.00013227189989670392, |
|
"loss": 0.7568, |
|
"step": 3115 |
|
}, |
|
{ |
|
"epoch": 0.5849268841394826, |
|
"grad_norm": 0.0403239958967278, |
|
"learning_rate": 0.00013178456222104733, |
|
"loss": 0.742, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.5858642669666292, |
|
"grad_norm": 0.03895542187325954, |
|
"learning_rate": 0.00013129741961809513, |
|
"loss": 0.7699, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 0.5868016497937758, |
|
"grad_norm": 0.03820864797313289, |
|
"learning_rate": 0.00013081047730475331, |
|
"loss": 0.7601, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.5877390326209224, |
|
"grad_norm": 0.04726355486417647, |
|
"learning_rate": 0.00013032374049578292, |
|
"loss": 0.7642, |
|
"step": 3135 |
|
}, |
|
{ |
|
"epoch": 0.588676415448069, |
|
"grad_norm": 0.04203366758155159, |
|
"learning_rate": 0.000129837214403744, |
|
"loss": 0.7488, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.5896137982752157, |
|
"grad_norm": 0.038981436347331345, |
|
"learning_rate": 0.00012935090423894015, |
|
"loss": 0.7862, |
|
"step": 3145 |
|
}, |
|
{ |
|
"epoch": 0.5905511811023622, |
|
"grad_norm": 0.03432435415992516, |
|
"learning_rate": 0.0001288648152093626, |
|
"loss": 0.7525, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.5914885639295088, |
|
"grad_norm": 0.03822931424545101, |
|
"learning_rate": 0.00012837895252063432, |
|
"loss": 0.8001, |
|
"step": 3155 |
|
}, |
|
{ |
|
"epoch": 0.5924259467566554, |
|
"grad_norm": 0.039214953158152176, |
|
"learning_rate": 0.00012789332137595427, |
|
"loss": 0.7245, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.593363329583802, |
|
"grad_norm": 0.037907598245917364, |
|
"learning_rate": 0.00012740792697604202, |
|
"loss": 0.7648, |
|
"step": 3165 |
|
}, |
|
{ |
|
"epoch": 0.5943007124109486, |
|
"grad_norm": 0.03785947037848109, |
|
"learning_rate": 0.00012692277451908145, |
|
"loss": 0.7654, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.5952380952380952, |
|
"grad_norm": 0.03850785633219605, |
|
"learning_rate": 0.0001264378692006658, |
|
"loss": 0.7663, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 0.5961754780652418, |
|
"grad_norm": 0.036428916422083596, |
|
"learning_rate": 0.00012595321621374135, |
|
"loss": 0.7596, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.5971128608923885, |
|
"grad_norm": 0.038639622718949614, |
|
"learning_rate": 0.0001254688207485522, |
|
"loss": 0.7318, |
|
"step": 3185 |
|
}, |
|
{ |
|
"epoch": 0.5980502437195351, |
|
"grad_norm": 0.04200767445250016, |
|
"learning_rate": 0.00012498468799258466, |
|
"loss": 0.7543, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.5989876265466817, |
|
"grad_norm": 0.039347067609858684, |
|
"learning_rate": 0.00012450082313051163, |
|
"loss": 0.7782, |
|
"step": 3195 |
|
}, |
|
{ |
|
"epoch": 0.5999250093738283, |
|
"grad_norm": 0.03956472616518923, |
|
"learning_rate": 0.00012401723134413694, |
|
"loss": 0.7748, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.6008623922009749, |
|
"grad_norm": 0.03906550581460343, |
|
"learning_rate": 0.00012353391781234026, |
|
"loss": 0.781, |
|
"step": 3205 |
|
}, |
|
{ |
|
"epoch": 0.6017997750281214, |
|
"grad_norm": 0.04226202156309189, |
|
"learning_rate": 0.0001230508877110211, |
|
"loss": 0.7813, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.602737157855268, |
|
"grad_norm": 0.04310759188393267, |
|
"learning_rate": 0.00012256814621304385, |
|
"loss": 0.7538, |
|
"step": 3215 |
|
}, |
|
{ |
|
"epoch": 0.6036745406824147, |
|
"grad_norm": 0.04137403063128998, |
|
"learning_rate": 0.00012208569848818214, |
|
"loss": 0.7785, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.6046119235095613, |
|
"grad_norm": 0.03490693407426236, |
|
"learning_rate": 0.00012160354970306339, |
|
"loss": 0.7615, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 0.6055493063367079, |
|
"grad_norm": 0.04058597804316207, |
|
"learning_rate": 0.0001211217050211139, |
|
"loss": 0.7737, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.6064866891638545, |
|
"grad_norm": 0.039857581905152456, |
|
"learning_rate": 0.00012064016960250294, |
|
"loss": 0.7624, |
|
"step": 3235 |
|
}, |
|
{ |
|
"epoch": 0.6074240719910011, |
|
"grad_norm": 0.03861470764960366, |
|
"learning_rate": 0.00012015894860408811, |
|
"loss": 0.732, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.6083614548181477, |
|
"grad_norm": 0.03893813785005398, |
|
"learning_rate": 0.00011967804717935964, |
|
"loss": 0.7755, |
|
"step": 3245 |
|
}, |
|
{ |
|
"epoch": 0.6092988376452944, |
|
"grad_norm": 0.03949254956830129, |
|
"learning_rate": 0.00011919747047838545, |
|
"loss": 0.7575, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.610236220472441, |
|
"grad_norm": 0.03697345161480537, |
|
"learning_rate": 0.00011871722364775583, |
|
"loss": 0.7606, |
|
"step": 3255 |
|
}, |
|
{ |
|
"epoch": 0.6111736032995876, |
|
"grad_norm": 0.036342062943778654, |
|
"learning_rate": 0.00011823731183052867, |
|
"loss": 0.766, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.6121109861267342, |
|
"grad_norm": 0.0370100352544129, |
|
"learning_rate": 0.00011775774016617381, |
|
"loss": 0.7623, |
|
"step": 3265 |
|
}, |
|
{ |
|
"epoch": 0.6130483689538808, |
|
"grad_norm": 0.03696482383175287, |
|
"learning_rate": 0.00011727851379051865, |
|
"loss": 0.7655, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.6139857517810273, |
|
"grad_norm": 0.03985849511649083, |
|
"learning_rate": 0.00011679963783569248, |
|
"loss": 0.7495, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 0.6149231346081739, |
|
"grad_norm": 0.038876495340474354, |
|
"learning_rate": 0.00011632111743007223, |
|
"loss": 0.7813, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.6158605174353206, |
|
"grad_norm": 0.03750007207907501, |
|
"learning_rate": 0.00011584295769822694, |
|
"loss": 0.7528, |
|
"step": 3285 |
|
}, |
|
{ |
|
"epoch": 0.6167979002624672, |
|
"grad_norm": 0.0408841339742789, |
|
"learning_rate": 0.00011536516376086311, |
|
"loss": 0.7719, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.6177352830896138, |
|
"grad_norm": 0.042011717790033064, |
|
"learning_rate": 0.0001148877407347701, |
|
"loss": 0.7493, |
|
"step": 3295 |
|
}, |
|
{ |
|
"epoch": 0.6186726659167604, |
|
"grad_norm": 0.0414847232838945, |
|
"learning_rate": 0.00011441069373276481, |
|
"loss": 0.7536, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.619610048743907, |
|
"grad_norm": 0.042292805389266726, |
|
"learning_rate": 0.00011393402786363753, |
|
"loss": 0.7602, |
|
"step": 3305 |
|
}, |
|
{ |
|
"epoch": 0.6205474315710536, |
|
"grad_norm": 0.038665796975353536, |
|
"learning_rate": 0.00011345774823209661, |
|
"loss": 0.7504, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.6214848143982002, |
|
"grad_norm": 0.04162065454602133, |
|
"learning_rate": 0.00011298185993871442, |
|
"loss": 0.7638, |
|
"step": 3315 |
|
}, |
|
{ |
|
"epoch": 0.6224221972253469, |
|
"grad_norm": 0.03784958872128606, |
|
"learning_rate": 0.00011250636807987208, |
|
"loss": 0.7368, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.6233595800524935, |
|
"grad_norm": 0.039769498866178414, |
|
"learning_rate": 0.00011203127774770554, |
|
"loss": 0.7664, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 0.6242969628796401, |
|
"grad_norm": 0.0373640006459259, |
|
"learning_rate": 0.00011155659403005048, |
|
"loss": 0.7524, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.6252343457067866, |
|
"grad_norm": 0.03809871959132381, |
|
"learning_rate": 0.00011108232201038828, |
|
"loss": 0.7484, |
|
"step": 3335 |
|
}, |
|
{ |
|
"epoch": 0.6261717285339332, |
|
"grad_norm": 0.03763522153071777, |
|
"learning_rate": 0.00011060846676779107, |
|
"loss": 0.7433, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.6271091113610798, |
|
"grad_norm": 0.03946664053179257, |
|
"learning_rate": 0.00011013503337686799, |
|
"loss": 0.7677, |
|
"step": 3345 |
|
}, |
|
{ |
|
"epoch": 0.6280464941882264, |
|
"grad_norm": 0.03901754964576778, |
|
"learning_rate": 0.00010966202690771014, |
|
"loss": 0.7561, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.6289838770153731, |
|
"grad_norm": 0.03912014672930949, |
|
"learning_rate": 0.00010918945242583688, |
|
"loss": 0.7789, |
|
"step": 3355 |
|
}, |
|
{ |
|
"epoch": 0.6299212598425197, |
|
"grad_norm": 0.03717356045637909, |
|
"learning_rate": 0.00010871731499214128, |
|
"loss": 0.7521, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.6308586426696663, |
|
"grad_norm": 0.037849859871512744, |
|
"learning_rate": 0.00010824561966283583, |
|
"loss": 0.7638, |
|
"step": 3365 |
|
}, |
|
{ |
|
"epoch": 0.6317960254968129, |
|
"grad_norm": 0.03680463786596637, |
|
"learning_rate": 0.00010777437148939868, |
|
"loss": 0.7417, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.6327334083239595, |
|
"grad_norm": 0.03897036657910483, |
|
"learning_rate": 0.0001073035755185191, |
|
"loss": 0.737, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 0.6336707911511061, |
|
"grad_norm": 0.04507944036400963, |
|
"learning_rate": 0.00010683323679204374, |
|
"loss": 0.7526, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.6346081739782528, |
|
"grad_norm": 0.041130560554180164, |
|
"learning_rate": 0.00010636336034692238, |
|
"loss": 0.7472, |
|
"step": 3385 |
|
}, |
|
{ |
|
"epoch": 0.6355455568053994, |
|
"grad_norm": 0.037500549260058, |
|
"learning_rate": 0.00010589395121515441, |
|
"loss": 0.7642, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.636482939632546, |
|
"grad_norm": 0.03501911608758103, |
|
"learning_rate": 0.00010542501442373441, |
|
"loss": 0.7552, |
|
"step": 3395 |
|
}, |
|
{ |
|
"epoch": 0.6374203224596925, |
|
"grad_norm": 0.041747590796681534, |
|
"learning_rate": 0.00010495655499459874, |
|
"loss": 0.7825, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.6383577052868391, |
|
"grad_norm": 0.0415454403917153, |
|
"learning_rate": 0.00010448857794457143, |
|
"loss": 0.7615, |
|
"step": 3405 |
|
}, |
|
{ |
|
"epoch": 0.6392950881139857, |
|
"grad_norm": 0.034393345932052226, |
|
"learning_rate": 0.00010402108828531086, |
|
"loss": 0.7501, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.6402324709411323, |
|
"grad_norm": 0.041393829589670965, |
|
"learning_rate": 0.00010355409102325557, |
|
"loss": 0.7725, |
|
"step": 3415 |
|
}, |
|
{ |
|
"epoch": 0.641169853768279, |
|
"grad_norm": 0.039070120349108735, |
|
"learning_rate": 0.00010308759115957118, |
|
"loss": 0.7953, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.6421072365954256, |
|
"grad_norm": 0.03792543146870935, |
|
"learning_rate": 0.00010262159369009628, |
|
"loss": 0.7584, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 0.6430446194225722, |
|
"grad_norm": 0.04012922678154492, |
|
"learning_rate": 0.00010215610360528948, |
|
"loss": 0.736, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.6439820022497188, |
|
"grad_norm": 0.03600161764198426, |
|
"learning_rate": 0.00010169112589017568, |
|
"loss": 0.739, |
|
"step": 3435 |
|
}, |
|
{ |
|
"epoch": 0.6449193850768654, |
|
"grad_norm": 0.04003494519804115, |
|
"learning_rate": 0.00010122666552429259, |
|
"loss": 0.7668, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.645856767904012, |
|
"grad_norm": 0.039267717082010545, |
|
"learning_rate": 0.00010076272748163756, |
|
"loss": 0.7683, |
|
"step": 3445 |
|
}, |
|
{ |
|
"epoch": 0.6467941507311586, |
|
"grad_norm": 0.037805171037559454, |
|
"learning_rate": 0.00010029931673061433, |
|
"loss": 0.7426, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.6477315335583053, |
|
"grad_norm": 0.037277881133197355, |
|
"learning_rate": 9.98364382339798e-05, |
|
"loss": 0.7267, |
|
"step": 3455 |
|
}, |
|
{ |
|
"epoch": 0.6486689163854518, |
|
"grad_norm": 0.043347070226570104, |
|
"learning_rate": 9.937409694879064e-05, |
|
"loss": 0.7548, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.6496062992125984, |
|
"grad_norm": 0.03829734267181962, |
|
"learning_rate": 9.891229782635074e-05, |
|
"loss": 0.7718, |
|
"step": 3465 |
|
}, |
|
{ |
|
"epoch": 0.650543682039745, |
|
"grad_norm": 0.038650875886255875, |
|
"learning_rate": 9.845104581215758e-05, |
|
"loss": 0.7705, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.6514810648668916, |
|
"grad_norm": 0.042871623377863474, |
|
"learning_rate": 9.799034584584975e-05, |
|
"loss": 0.7495, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 0.6524184476940382, |
|
"grad_norm": 0.03585928229669108, |
|
"learning_rate": 9.753020286115368e-05, |
|
"loss": 0.7432, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.6533558305211848, |
|
"grad_norm": 0.03786939257702819, |
|
"learning_rate": 9.707062178583119e-05, |
|
"loss": 0.7904, |
|
"step": 3485 |
|
}, |
|
{ |
|
"epoch": 0.6542932133483315, |
|
"grad_norm": 0.0368092502095729, |
|
"learning_rate": 9.661160754162618e-05, |
|
"loss": 0.7555, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.6552305961754781, |
|
"grad_norm": 0.03613287171684745, |
|
"learning_rate": 9.615316504421262e-05, |
|
"loss": 0.7439, |
|
"step": 3495 |
|
}, |
|
{ |
|
"epoch": 0.6561679790026247, |
|
"grad_norm": 0.036433445794931595, |
|
"learning_rate": 9.569529920314121e-05, |
|
"loss": 0.7529, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.6571053618297713, |
|
"grad_norm": 0.0424735007867028, |
|
"learning_rate": 9.523801492178736e-05, |
|
"loss": 0.7397, |
|
"step": 3505 |
|
}, |
|
{ |
|
"epoch": 0.6580427446569179, |
|
"grad_norm": 0.03698802222139945, |
|
"learning_rate": 9.47813170972983e-05, |
|
"loss": 0.7437, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.6589801274840645, |
|
"grad_norm": 0.03733477324154079, |
|
"learning_rate": 9.432521062054084e-05, |
|
"loss": 0.7705, |
|
"step": 3515 |
|
}, |
|
{ |
|
"epoch": 0.6599175103112112, |
|
"grad_norm": 0.03508532202382053, |
|
"learning_rate": 9.386970037604892e-05, |
|
"loss": 0.7392, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.6608548931383577, |
|
"grad_norm": 0.03827744300792672, |
|
"learning_rate": 9.341479124197123e-05, |
|
"loss": 0.7238, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 0.6617922759655043, |
|
"grad_norm": 0.034951831735317074, |
|
"learning_rate": 9.296048809001928e-05, |
|
"loss": 0.7445, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.6627296587926509, |
|
"grad_norm": 0.036754627748938604, |
|
"learning_rate": 9.250679578541465e-05, |
|
"loss": 0.7648, |
|
"step": 3535 |
|
}, |
|
{ |
|
"epoch": 0.6636670416197975, |
|
"grad_norm": 0.04075976815278343, |
|
"learning_rate": 9.205371918683761e-05, |
|
"loss": 0.7498, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.6646044244469441, |
|
"grad_norm": 0.03941979332992536, |
|
"learning_rate": 9.16012631463744e-05, |
|
"loss": 0.7599, |
|
"step": 3545 |
|
}, |
|
{ |
|
"epoch": 0.6655418072740907, |
|
"grad_norm": 0.03389585413188664, |
|
"learning_rate": 9.114943250946581e-05, |
|
"loss": 0.7242, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.6664791901012374, |
|
"grad_norm": 0.037973128654648046, |
|
"learning_rate": 9.069823211485485e-05, |
|
"loss": 0.7397, |
|
"step": 3555 |
|
}, |
|
{ |
|
"epoch": 0.667416572928384, |
|
"grad_norm": 0.03493012197632408, |
|
"learning_rate": 9.024766679453538e-05, |
|
"loss": 0.7218, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.6683539557555306, |
|
"grad_norm": 0.048208082428385556, |
|
"learning_rate": 8.979774137369989e-05, |
|
"loss": 0.7709, |
|
"step": 3565 |
|
}, |
|
{ |
|
"epoch": 0.6692913385826772, |
|
"grad_norm": 0.039871249220599175, |
|
"learning_rate": 8.934846067068825e-05, |
|
"loss": 0.7306, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.6702287214098238, |
|
"grad_norm": 0.03855933322661882, |
|
"learning_rate": 8.88998294969358e-05, |
|
"loss": 0.73, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 0.6711661042369704, |
|
"grad_norm": 0.0397648331368396, |
|
"learning_rate": 8.845185265692201e-05, |
|
"loss": 0.7324, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.6721034870641169, |
|
"grad_norm": 0.03767828224137, |
|
"learning_rate": 8.800453494811894e-05, |
|
"loss": 0.7372, |
|
"step": 3585 |
|
}, |
|
{ |
|
"epoch": 0.6730408698912635, |
|
"grad_norm": 0.03982314491187269, |
|
"learning_rate": 8.755788116093983e-05, |
|
"loss": 0.7735, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.6739782527184102, |
|
"grad_norm": 0.03712738479588227, |
|
"learning_rate": 8.711189607868795e-05, |
|
"loss": 0.725, |
|
"step": 3595 |
|
}, |
|
{ |
|
"epoch": 0.6749156355455568, |
|
"grad_norm": 0.03733488944845488, |
|
"learning_rate": 8.666658447750519e-05, |
|
"loss": 0.7372, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.6758530183727034, |
|
"grad_norm": 0.0377678114465905, |
|
"learning_rate": 8.622195112632107e-05, |
|
"loss": 0.7225, |
|
"step": 3605 |
|
}, |
|
{ |
|
"epoch": 0.67679040119985, |
|
"grad_norm": 0.03971467083786042, |
|
"learning_rate": 8.577800078680156e-05, |
|
"loss": 0.7691, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.6777277840269966, |
|
"grad_norm": 0.041555052270688675, |
|
"learning_rate": 8.533473821329814e-05, |
|
"loss": 0.746, |
|
"step": 3615 |
|
}, |
|
{ |
|
"epoch": 0.6786651668541432, |
|
"grad_norm": 0.03681199845630553, |
|
"learning_rate": 8.489216815279682e-05, |
|
"loss": 0.7326, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.6796025496812899, |
|
"grad_norm": 0.03639372767368361, |
|
"learning_rate": 8.445029534486741e-05, |
|
"loss": 0.7558, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 0.6805399325084365, |
|
"grad_norm": 0.03930037562387389, |
|
"learning_rate": 8.400912452161271e-05, |
|
"loss": 0.7566, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.6814773153355831, |
|
"grad_norm": 0.038113159588276734, |
|
"learning_rate": 8.356866040761786e-05, |
|
"loss": 0.7479, |
|
"step": 3635 |
|
}, |
|
{ |
|
"epoch": 0.6824146981627297, |
|
"grad_norm": 0.03668907273839101, |
|
"learning_rate": 8.312890771989943e-05, |
|
"loss": 0.7507, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.6833520809898763, |
|
"grad_norm": 0.03877486758990776, |
|
"learning_rate": 8.268987116785569e-05, |
|
"loss": 0.7119, |
|
"step": 3645 |
|
}, |
|
{ |
|
"epoch": 0.6842894638170228, |
|
"grad_norm": 0.03641698338134557, |
|
"learning_rate": 8.225155545321514e-05, |
|
"loss": 0.7419, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.6852268466441694, |
|
"grad_norm": 0.037028014658097706, |
|
"learning_rate": 8.181396526998713e-05, |
|
"loss": 0.7511, |
|
"step": 3655 |
|
}, |
|
{ |
|
"epoch": 0.6861642294713161, |
|
"grad_norm": 0.03742801285890605, |
|
"learning_rate": 8.13771053044109e-05, |
|
"loss": 0.7464, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.6871016122984627, |
|
"grad_norm": 0.03700498083910332, |
|
"learning_rate": 8.094098023490573e-05, |
|
"loss": 0.7509, |
|
"step": 3665 |
|
}, |
|
{ |
|
"epoch": 0.6880389951256093, |
|
"grad_norm": 0.041613081744209884, |
|
"learning_rate": 8.050559473202077e-05, |
|
"loss": 0.7547, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.6889763779527559, |
|
"grad_norm": 0.03578452096886478, |
|
"learning_rate": 8.00709534583848e-05, |
|
"loss": 0.7486, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.6899137607799025, |
|
"grad_norm": 0.039555712535017766, |
|
"learning_rate": 7.963706106865692e-05, |
|
"loss": 0.7441, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.6908511436070491, |
|
"grad_norm": 0.03614203981567192, |
|
"learning_rate": 7.920392220947577e-05, |
|
"loss": 0.7546, |
|
"step": 3685 |
|
}, |
|
{ |
|
"epoch": 0.6917885264341957, |
|
"grad_norm": 0.03936732269908494, |
|
"learning_rate": 7.877154151941082e-05, |
|
"loss": 0.7544, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.6927259092613424, |
|
"grad_norm": 0.04056135650395248, |
|
"learning_rate": 7.833992362891173e-05, |
|
"loss": 0.748, |
|
"step": 3695 |
|
}, |
|
{ |
|
"epoch": 0.693663292088489, |
|
"grad_norm": 0.03813900176127187, |
|
"learning_rate": 7.790907316025935e-05, |
|
"loss": 0.7566, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.6946006749156356, |
|
"grad_norm": 0.036115814487516676, |
|
"learning_rate": 7.74789947275161e-05, |
|
"loss": 0.731, |
|
"step": 3705 |
|
}, |
|
{ |
|
"epoch": 0.6955380577427821, |
|
"grad_norm": 0.041142128143449405, |
|
"learning_rate": 7.704969293647643e-05, |
|
"loss": 0.7686, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.6964754405699287, |
|
"grad_norm": 0.04017787329812371, |
|
"learning_rate": 7.662117238461769e-05, |
|
"loss": 0.7641, |
|
"step": 3715 |
|
}, |
|
{ |
|
"epoch": 0.6974128233970753, |
|
"grad_norm": 0.03912928253082181, |
|
"learning_rate": 7.619343766105065e-05, |
|
"loss": 0.7337, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.698350206224222, |
|
"grad_norm": 0.03739403328622796, |
|
"learning_rate": 7.576649334647063e-05, |
|
"loss": 0.7688, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 0.6992875890513686, |
|
"grad_norm": 0.0362755943904781, |
|
"learning_rate": 7.534034401310817e-05, |
|
"loss": 0.7449, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.7002249718785152, |
|
"grad_norm": 0.03857280564508149, |
|
"learning_rate": 7.49149942246803e-05, |
|
"loss": 0.7417, |
|
"step": 3735 |
|
}, |
|
{ |
|
"epoch": 0.7011623547056618, |
|
"grad_norm": 0.03765394646978099, |
|
"learning_rate": 7.449044853634153e-05, |
|
"loss": 0.7733, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.7020997375328084, |
|
"grad_norm": 0.038387501642174006, |
|
"learning_rate": 7.406671149463509e-05, |
|
"loss": 0.7157, |
|
"step": 3745 |
|
}, |
|
{ |
|
"epoch": 0.703037120359955, |
|
"grad_norm": 0.03475103792139527, |
|
"learning_rate": 7.364378763744429e-05, |
|
"loss": 0.7478, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.7039745031871016, |
|
"grad_norm": 0.0378553405313383, |
|
"learning_rate": 7.322168149394386e-05, |
|
"loss": 0.7566, |
|
"step": 3755 |
|
}, |
|
{ |
|
"epoch": 0.7049118860142483, |
|
"grad_norm": 0.039967415152981675, |
|
"learning_rate": 7.280039758455147e-05, |
|
"loss": 0.7541, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.7058492688413949, |
|
"grad_norm": 0.035394814696152195, |
|
"learning_rate": 7.23799404208794e-05, |
|
"loss": 0.7625, |
|
"step": 3765 |
|
}, |
|
{ |
|
"epoch": 0.7067866516685414, |
|
"grad_norm": 0.038752752547861896, |
|
"learning_rate": 7.19603145056859e-05, |
|
"loss": 0.7612, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.707724034495688, |
|
"grad_norm": 0.037109030416605036, |
|
"learning_rate": 7.154152433282762e-05, |
|
"loss": 0.7382, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 0.7086614173228346, |
|
"grad_norm": 0.03554207953122535, |
|
"learning_rate": 7.112357438721065e-05, |
|
"loss": 0.7455, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.7095988001499812, |
|
"grad_norm": 0.039676102077878514, |
|
"learning_rate": 7.070646914474335e-05, |
|
"loss": 0.7173, |
|
"step": 3785 |
|
}, |
|
{ |
|
"epoch": 0.7105361829771278, |
|
"grad_norm": 0.03421837506332554, |
|
"learning_rate": 7.029021307228755e-05, |
|
"loss": 0.7261, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.7114735658042745, |
|
"grad_norm": 0.04037332220576155, |
|
"learning_rate": 6.987481062761157e-05, |
|
"loss": 0.7435, |
|
"step": 3795 |
|
}, |
|
{ |
|
"epoch": 0.7124109486314211, |
|
"grad_norm": 0.03870777741485929, |
|
"learning_rate": 6.94602662593417e-05, |
|
"loss": 0.7263, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.7133483314585677, |
|
"grad_norm": 0.04496580126935979, |
|
"learning_rate": 6.90465844069151e-05, |
|
"loss": 0.776, |
|
"step": 3805 |
|
}, |
|
{ |
|
"epoch": 0.7142857142857143, |
|
"grad_norm": 0.038235977347547696, |
|
"learning_rate": 6.863376950053221e-05, |
|
"loss": 0.7408, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.7152230971128609, |
|
"grad_norm": 0.03531689033224846, |
|
"learning_rate": 6.82218259611088e-05, |
|
"loss": 0.738, |
|
"step": 3815 |
|
}, |
|
{ |
|
"epoch": 0.7161604799400075, |
|
"grad_norm": 0.036562035053447246, |
|
"learning_rate": 6.781075820022946e-05, |
|
"loss": 0.779, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.7170978627671541, |
|
"grad_norm": 0.03879265532726638, |
|
"learning_rate": 6.740057062009951e-05, |
|
"loss": 0.7514, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 0.7180352455943008, |
|
"grad_norm": 0.037768573089647754, |
|
"learning_rate": 6.69912676134984e-05, |
|
"loss": 0.7423, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.7189726284214473, |
|
"grad_norm": 0.03723804398233768, |
|
"learning_rate": 6.658285356373253e-05, |
|
"loss": 0.7277, |
|
"step": 3835 |
|
}, |
|
{ |
|
"epoch": 0.7199100112485939, |
|
"grad_norm": 0.03866540526571928, |
|
"learning_rate": 6.617533284458826e-05, |
|
"loss": 0.7452, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.7208473940757405, |
|
"grad_norm": 0.03743514627207939, |
|
"learning_rate": 6.576870982028508e-05, |
|
"loss": 0.7487, |
|
"step": 3845 |
|
}, |
|
{ |
|
"epoch": 0.7217847769028871, |
|
"grad_norm": 0.04147587697115252, |
|
"learning_rate": 6.53629888454289e-05, |
|
"loss": 0.7273, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.7227221597300337, |
|
"grad_norm": 0.04232917813229702, |
|
"learning_rate": 6.495817426496541e-05, |
|
"loss": 0.7413, |
|
"step": 3855 |
|
}, |
|
{ |
|
"epoch": 0.7236595425571803, |
|
"grad_norm": 0.041237294474918335, |
|
"learning_rate": 6.455427041413356e-05, |
|
"loss": 0.735, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.724596925384327, |
|
"grad_norm": 0.039856233442403854, |
|
"learning_rate": 6.415128161841909e-05, |
|
"loss": 0.7512, |
|
"step": 3865 |
|
}, |
|
{ |
|
"epoch": 0.7255343082114736, |
|
"grad_norm": 0.03843674829308366, |
|
"learning_rate": 6.374921219350826e-05, |
|
"loss": 0.7421, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.7264716910386202, |
|
"grad_norm": 0.03492500389641903, |
|
"learning_rate": 6.334806644524147e-05, |
|
"loss": 0.7613, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 0.7274090738657668, |
|
"grad_norm": 0.03869959252720361, |
|
"learning_rate": 6.294784866956757e-05, |
|
"loss": 0.7525, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.7283464566929134, |
|
"grad_norm": 0.038276776258313015, |
|
"learning_rate": 6.254856315249736e-05, |
|
"loss": 0.747, |
|
"step": 3885 |
|
}, |
|
{ |
|
"epoch": 0.72928383952006, |
|
"grad_norm": 0.035243252084036204, |
|
"learning_rate": 6.21502141700579e-05, |
|
"loss": 0.7306, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.7302212223472065, |
|
"grad_norm": 0.03786773805086975, |
|
"learning_rate": 6.175280598824678e-05, |
|
"loss": 0.724, |
|
"step": 3895 |
|
}, |
|
{ |
|
"epoch": 0.7311586051743532, |
|
"grad_norm": 0.039123147712291625, |
|
"learning_rate": 6.135634286298637e-05, |
|
"loss": 0.7349, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.7320959880014998, |
|
"grad_norm": 0.037251487752691576, |
|
"learning_rate": 6.0960829040078265e-05, |
|
"loss": 0.7345, |
|
"step": 3905 |
|
}, |
|
{ |
|
"epoch": 0.7330333708286464, |
|
"grad_norm": 0.03742284184866869, |
|
"learning_rate": 6.05662687551576e-05, |
|
"loss": 0.7795, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.733970753655793, |
|
"grad_norm": 0.04105680206555956, |
|
"learning_rate": 6.017266623364826e-05, |
|
"loss": 0.7498, |
|
"step": 3915 |
|
}, |
|
{ |
|
"epoch": 0.7349081364829396, |
|
"grad_norm": 0.039477278781974275, |
|
"learning_rate": 5.978002569071679e-05, |
|
"loss": 0.713, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.7358455193100862, |
|
"grad_norm": 0.03698403869400972, |
|
"learning_rate": 5.938835133122821e-05, |
|
"loss": 0.745, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 0.7367829021372329, |
|
"grad_norm": 0.0410733277463145, |
|
"learning_rate": 5.899764734970007e-05, |
|
"loss": 0.7341, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.7377202849643795, |
|
"grad_norm": 0.038917622759833966, |
|
"learning_rate": 5.860791793025817e-05, |
|
"loss": 0.7475, |
|
"step": 3935 |
|
}, |
|
{ |
|
"epoch": 0.7386576677915261, |
|
"grad_norm": 0.03627337453876378, |
|
"learning_rate": 5.821916724659148e-05, |
|
"loss": 0.746, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.7395950506186727, |
|
"grad_norm": 0.03964503193315036, |
|
"learning_rate": 5.783139946190751e-05, |
|
"loss": 0.7398, |
|
"step": 3945 |
|
}, |
|
{ |
|
"epoch": 0.7405324334458193, |
|
"grad_norm": 0.03762868562695812, |
|
"learning_rate": 5.744461872888771e-05, |
|
"loss": 0.7152, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.7414698162729659, |
|
"grad_norm": 0.03589488400010939, |
|
"learning_rate": 5.705882918964299e-05, |
|
"loss": 0.7461, |
|
"step": 3955 |
|
}, |
|
{ |
|
"epoch": 0.7424071991001124, |
|
"grad_norm": 0.03949112731558336, |
|
"learning_rate": 5.667403497566939e-05, |
|
"loss": 0.7344, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.743344581927259, |
|
"grad_norm": 0.04151757065593634, |
|
"learning_rate": 5.629024020780375e-05, |
|
"loss": 0.7436, |
|
"step": 3965 |
|
}, |
|
{ |
|
"epoch": 0.7442819647544057, |
|
"grad_norm": 0.037059488736541316, |
|
"learning_rate": 5.5907448996179766e-05, |
|
"loss": 0.7502, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.7452193475815523, |
|
"grad_norm": 0.03555904631512526, |
|
"learning_rate": 5.552566544018373e-05, |
|
"loss": 0.7465, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 0.7461567304086989, |
|
"grad_norm": 0.036464378499175255, |
|
"learning_rate": 5.514489362841083e-05, |
|
"loss": 0.715, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.7470941132358455, |
|
"grad_norm": 0.0394124136183908, |
|
"learning_rate": 5.4765137638621246e-05, |
|
"loss": 0.7613, |
|
"step": 3985 |
|
}, |
|
{ |
|
"epoch": 0.7480314960629921, |
|
"grad_norm": 0.03711650720168927, |
|
"learning_rate": 5.4386401537696536e-05, |
|
"loss": 0.7425, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.7489688788901387, |
|
"grad_norm": 0.03888911993575119, |
|
"learning_rate": 5.400868938159609e-05, |
|
"loss": 0.7659, |
|
"step": 3995 |
|
}, |
|
{ |
|
"epoch": 0.7499062617172854, |
|
"grad_norm": 0.03744744409013629, |
|
"learning_rate": 5.363200521531366e-05, |
|
"loss": 0.7325, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.750843644544432, |
|
"grad_norm": 0.03613327265209726, |
|
"learning_rate": 5.3256353072833936e-05, |
|
"loss": 0.7519, |
|
"step": 4005 |
|
}, |
|
{ |
|
"epoch": 0.7517810273715786, |
|
"grad_norm": 0.03595596503402885, |
|
"learning_rate": 5.288173697708973e-05, |
|
"loss": 0.7469, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.7527184101987252, |
|
"grad_norm": 0.03769961152327502, |
|
"learning_rate": 5.2508160939918286e-05, |
|
"loss": 0.752, |
|
"step": 4015 |
|
}, |
|
{ |
|
"epoch": 0.7536557930258717, |
|
"grad_norm": 0.03633117479738773, |
|
"learning_rate": 5.213562896201902e-05, |
|
"loss": 0.7446, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.7545931758530183, |
|
"grad_norm": 0.042037986286737154, |
|
"learning_rate": 5.176414503290993e-05, |
|
"loss": 0.7465, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 0.7555305586801649, |
|
"grad_norm": 0.03964006330583541, |
|
"learning_rate": 5.139371313088561e-05, |
|
"loss": 0.7409, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.7564679415073116, |
|
"grad_norm": 0.03557466108190626, |
|
"learning_rate": 5.1024337222974125e-05, |
|
"loss": 0.7482, |
|
"step": 4035 |
|
}, |
|
{ |
|
"epoch": 0.7574053243344582, |
|
"grad_norm": 0.03735795418886787, |
|
"learning_rate": 5.065602126489453e-05, |
|
"loss": 0.7429, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.7583427071616048, |
|
"grad_norm": 0.03696761727309839, |
|
"learning_rate": 5.028876920101504e-05, |
|
"loss": 0.7217, |
|
"step": 4045 |
|
}, |
|
{ |
|
"epoch": 0.7592800899887514, |
|
"grad_norm": 0.03683633483444796, |
|
"learning_rate": 4.992258496431002e-05, |
|
"loss": 0.7415, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.760217472815898, |
|
"grad_norm": 0.03527425447836841, |
|
"learning_rate": 4.955747247631865e-05, |
|
"loss": 0.7311, |
|
"step": 4055 |
|
}, |
|
{ |
|
"epoch": 0.7611548556430446, |
|
"grad_norm": 0.03674873764080841, |
|
"learning_rate": 4.91934356471022e-05, |
|
"loss": 0.7515, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.7620922384701913, |
|
"grad_norm": 0.036177949800544354, |
|
"learning_rate": 4.883047837520268e-05, |
|
"loss": 0.7752, |
|
"step": 4065 |
|
}, |
|
{ |
|
"epoch": 0.7630296212973379, |
|
"grad_norm": 0.03907021802297722, |
|
"learning_rate": 4.84686045476009e-05, |
|
"loss": 0.728, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.7639670041244845, |
|
"grad_norm": 0.037180348890681836, |
|
"learning_rate": 4.810781803967482e-05, |
|
"loss": 0.7221, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 0.7649043869516311, |
|
"grad_norm": 0.038648597139017825, |
|
"learning_rate": 4.7748122715158074e-05, |
|
"loss": 0.7171, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.7658417697787776, |
|
"grad_norm": 0.037736025165151156, |
|
"learning_rate": 4.7389522426098614e-05, |
|
"loss": 0.7521, |
|
"step": 4085 |
|
}, |
|
{ |
|
"epoch": 0.7667791526059242, |
|
"grad_norm": 0.03723459058036576, |
|
"learning_rate": 4.703202101281744e-05, |
|
"loss": 0.7352, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.7677165354330708, |
|
"grad_norm": 0.03642827803946468, |
|
"learning_rate": 4.667562230386749e-05, |
|
"loss": 0.7449, |
|
"step": 4095 |
|
}, |
|
{ |
|
"epoch": 0.7686539182602175, |
|
"grad_norm": 0.036360477245182196, |
|
"learning_rate": 4.63203301159926e-05, |
|
"loss": 0.735, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.7695913010873641, |
|
"grad_norm": 0.03758177450726638, |
|
"learning_rate": 4.596614825408666e-05, |
|
"loss": 0.7186, |
|
"step": 4105 |
|
}, |
|
{ |
|
"epoch": 0.7705286839145107, |
|
"grad_norm": 0.03748476699050447, |
|
"learning_rate": 4.561308051115285e-05, |
|
"loss": 0.757, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.7714660667416573, |
|
"grad_norm": 0.036097069379836325, |
|
"learning_rate": 4.5261130668263054e-05, |
|
"loss": 0.743, |
|
"step": 4115 |
|
}, |
|
{ |
|
"epoch": 0.7724034495688039, |
|
"grad_norm": 0.03661609433595777, |
|
"learning_rate": 4.4910302494517345e-05, |
|
"loss": 0.727, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.7733408323959505, |
|
"grad_norm": 0.036604261302591215, |
|
"learning_rate": 4.456059974700361e-05, |
|
"loss": 0.7198, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 0.7742782152230971, |
|
"grad_norm": 0.0375410578618143, |
|
"learning_rate": 4.4212026170757384e-05, |
|
"loss": 0.7385, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.7752155980502438, |
|
"grad_norm": 0.037797934383579375, |
|
"learning_rate": 4.3864585498721445e-05, |
|
"loss": 0.7375, |
|
"step": 4135 |
|
}, |
|
{ |
|
"epoch": 0.7761529808773904, |
|
"grad_norm": 0.036408551423671234, |
|
"learning_rate": 4.3518281451706477e-05, |
|
"loss": 0.7614, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.7770903637045369, |
|
"grad_norm": 0.036659780564322314, |
|
"learning_rate": 4.317311773835043e-05, |
|
"loss": 0.7333, |
|
"step": 4145 |
|
}, |
|
{ |
|
"epoch": 0.7780277465316835, |
|
"grad_norm": 0.03787359723072369, |
|
"learning_rate": 4.2829098055079524e-05, |
|
"loss": 0.7321, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.7789651293588301, |
|
"grad_norm": 0.039178508740307504, |
|
"learning_rate": 4.248622608606802e-05, |
|
"loss": 0.7568, |
|
"step": 4155 |
|
}, |
|
{ |
|
"epoch": 0.7799025121859767, |
|
"grad_norm": 0.03788147306371571, |
|
"learning_rate": 4.214450550319943e-05, |
|
"loss": 0.7002, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.7808398950131233, |
|
"grad_norm": 0.03764951434893332, |
|
"learning_rate": 4.180393996602651e-05, |
|
"loss": 0.7483, |
|
"step": 4165 |
|
}, |
|
{ |
|
"epoch": 0.78177727784027, |
|
"grad_norm": 0.03643925778123276, |
|
"learning_rate": 4.1464533121732613e-05, |
|
"loss": 0.7608, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.7827146606674166, |
|
"grad_norm": 0.03904458879716677, |
|
"learning_rate": 4.112628860509238e-05, |
|
"loss": 0.736, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 0.7836520434945632, |
|
"grad_norm": 0.0367790048472788, |
|
"learning_rate": 4.078921003843276e-05, |
|
"loss": 0.7516, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.7845894263217098, |
|
"grad_norm": 0.035606207842431264, |
|
"learning_rate": 4.045330103159454e-05, |
|
"loss": 0.7487, |
|
"step": 4185 |
|
}, |
|
{ |
|
"epoch": 0.7855268091488564, |
|
"grad_norm": 0.03778912384186634, |
|
"learning_rate": 4.01185651818932e-05, |
|
"loss": 0.7418, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.786464191976003, |
|
"grad_norm": 0.03616645503819011, |
|
"learning_rate": 3.97850060740808e-05, |
|
"loss": 0.72, |
|
"step": 4195 |
|
}, |
|
{ |
|
"epoch": 0.7874015748031497, |
|
"grad_norm": 0.04137105544838257, |
|
"learning_rate": 3.945262728030739e-05, |
|
"loss": 0.7297, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.7883389576302963, |
|
"grad_norm": 0.04008149649572554, |
|
"learning_rate": 3.912143236008286e-05, |
|
"loss": 0.7546, |
|
"step": 4205 |
|
}, |
|
{ |
|
"epoch": 0.7892763404574428, |
|
"grad_norm": 0.04178918635092829, |
|
"learning_rate": 3.879142486023869e-05, |
|
"loss": 0.705, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.7902137232845894, |
|
"grad_norm": 0.03910230683910834, |
|
"learning_rate": 3.8462608314890084e-05, |
|
"loss": 0.7227, |
|
"step": 4215 |
|
}, |
|
{ |
|
"epoch": 0.791151106111736, |
|
"grad_norm": 0.041392117493789675, |
|
"learning_rate": 3.8134986245398084e-05, |
|
"loss": 0.7221, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.7920884889388826, |
|
"grad_norm": 0.037713798352039335, |
|
"learning_rate": 3.780856216033185e-05, |
|
"loss": 0.7362, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 0.7930258717660292, |
|
"grad_norm": 0.0386257393458269, |
|
"learning_rate": 3.7483339555431055e-05, |
|
"loss": 0.75, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.7939632545931758, |
|
"grad_norm": 0.036597780629712674, |
|
"learning_rate": 3.715932191356861e-05, |
|
"loss": 0.7258, |
|
"step": 4235 |
|
}, |
|
{ |
|
"epoch": 0.7949006374203225, |
|
"grad_norm": 0.0351893354995898, |
|
"learning_rate": 3.683651270471296e-05, |
|
"loss": 0.7199, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.7958380202474691, |
|
"grad_norm": 0.038598355578632684, |
|
"learning_rate": 3.65149153858916e-05, |
|
"loss": 0.7298, |
|
"step": 4245 |
|
}, |
|
{ |
|
"epoch": 0.7967754030746157, |
|
"grad_norm": 0.03440364370849841, |
|
"learning_rate": 3.619453340115326e-05, |
|
"loss": 0.7312, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.7977127859017623, |
|
"grad_norm": 0.03802311815601217, |
|
"learning_rate": 3.5875370181531754e-05, |
|
"loss": 0.7424, |
|
"step": 4255 |
|
}, |
|
{ |
|
"epoch": 0.7986501687289089, |
|
"grad_norm": 0.03885312032393017, |
|
"learning_rate": 3.555742914500867e-05, |
|
"loss": 0.7447, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.7995875515560555, |
|
"grad_norm": 0.03952348857049222, |
|
"learning_rate": 3.5240713696477095e-05, |
|
"loss": 0.7275, |
|
"step": 4265 |
|
}, |
|
{ |
|
"epoch": 0.800524934383202, |
|
"grad_norm": 0.03783212853790451, |
|
"learning_rate": 3.4925227227705085e-05, |
|
"loss": 0.7673, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.8014623172103487, |
|
"grad_norm": 0.03855091891475635, |
|
"learning_rate": 3.461097311729914e-05, |
|
"loss": 0.7321, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 0.8023997000374953, |
|
"grad_norm": 0.03853653138387215, |
|
"learning_rate": 3.429795473066845e-05, |
|
"loss": 0.7502, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.8033370828646419, |
|
"grad_norm": 0.037938414011982106, |
|
"learning_rate": 3.3986175419988326e-05, |
|
"loss": 0.7827, |
|
"step": 4285 |
|
}, |
|
{ |
|
"epoch": 0.8042744656917885, |
|
"grad_norm": 0.03714608764447697, |
|
"learning_rate": 3.367563852416484e-05, |
|
"loss": 0.7504, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.8052118485189351, |
|
"grad_norm": 0.037586754731944845, |
|
"learning_rate": 3.336634736879857e-05, |
|
"loss": 0.7412, |
|
"step": 4295 |
|
}, |
|
{ |
|
"epoch": 0.8061492313460817, |
|
"grad_norm": 0.03609469062292054, |
|
"learning_rate": 3.3058305266149335e-05, |
|
"loss": 0.7309, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.8070866141732284, |
|
"grad_norm": 0.04157190953672981, |
|
"learning_rate": 3.275151551510057e-05, |
|
"loss": 0.7533, |
|
"step": 4305 |
|
}, |
|
{ |
|
"epoch": 0.808023997000375, |
|
"grad_norm": 0.03738613014861174, |
|
"learning_rate": 3.2445981401124035e-05, |
|
"loss": 0.7344, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.8089613798275216, |
|
"grad_norm": 0.037333684702503485, |
|
"learning_rate": 3.2141706196244646e-05, |
|
"loss": 0.7313, |
|
"step": 4315 |
|
}, |
|
{ |
|
"epoch": 0.8098987626546682, |
|
"grad_norm": 0.03849398738947332, |
|
"learning_rate": 3.183869315900537e-05, |
|
"loss": 0.7321, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.8108361454818148, |
|
"grad_norm": 0.03780735515283732, |
|
"learning_rate": 3.153694553443241e-05, |
|
"loss": 0.7604, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 0.8117735283089614, |
|
"grad_norm": 0.03877151747469851, |
|
"learning_rate": 3.1236466554000414e-05, |
|
"loss": 0.7123, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.8127109111361079, |
|
"grad_norm": 0.03868932391660062, |
|
"learning_rate": 3.093725943559784e-05, |
|
"loss": 0.7377, |
|
"step": 4335 |
|
}, |
|
{ |
|
"epoch": 0.8136482939632546, |
|
"grad_norm": 0.03748111742387308, |
|
"learning_rate": 3.0639327383492544e-05, |
|
"loss": 0.7171, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.8145856767904012, |
|
"grad_norm": 0.03879381228880918, |
|
"learning_rate": 3.0342673588297473e-05, |
|
"loss": 0.7177, |
|
"step": 4345 |
|
}, |
|
{ |
|
"epoch": 0.8155230596175478, |
|
"grad_norm": 0.03783669509160595, |
|
"learning_rate": 3.004730122693641e-05, |
|
"loss": 0.7492, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.8164604424446944, |
|
"grad_norm": 0.036123962271055896, |
|
"learning_rate": 2.9753213462610077e-05, |
|
"loss": 0.7327, |
|
"step": 4355 |
|
}, |
|
{ |
|
"epoch": 0.817397825271841, |
|
"grad_norm": 0.037712932440491655, |
|
"learning_rate": 2.9460413444762143e-05, |
|
"loss": 0.7382, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.8183352080989876, |
|
"grad_norm": 0.03722083272698044, |
|
"learning_rate": 2.9168904309045614e-05, |
|
"loss": 0.7259, |
|
"step": 4365 |
|
}, |
|
{ |
|
"epoch": 0.8192725909261342, |
|
"grad_norm": 0.035672510126638225, |
|
"learning_rate": 2.8878689177289005e-05, |
|
"loss": 0.7282, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.8202099737532809, |
|
"grad_norm": 0.03514557869239733, |
|
"learning_rate": 2.8589771157463394e-05, |
|
"loss": 0.741, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 0.8211473565804275, |
|
"grad_norm": 0.03779365346411691, |
|
"learning_rate": 2.8302153343648486e-05, |
|
"loss": 0.7367, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.8220847394075741, |
|
"grad_norm": 0.036530868365906646, |
|
"learning_rate": 2.8015838816000168e-05, |
|
"loss": 0.7395, |
|
"step": 4385 |
|
}, |
|
{ |
|
"epoch": 0.8230221222347207, |
|
"grad_norm": 0.03774672510896896, |
|
"learning_rate": 2.773083064071685e-05, |
|
"loss": 0.7439, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.8239595050618672, |
|
"grad_norm": 0.03737530943067525, |
|
"learning_rate": 2.7447131870007268e-05, |
|
"loss": 0.7391, |
|
"step": 4395 |
|
}, |
|
{ |
|
"epoch": 0.8248968878890138, |
|
"grad_norm": 0.03589949691489075, |
|
"learning_rate": 2.716474554205722e-05, |
|
"loss": 0.7201, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.8258342707161604, |
|
"grad_norm": 0.039108253157191145, |
|
"learning_rate": 2.688367468099739e-05, |
|
"loss": 0.715, |
|
"step": 4405 |
|
}, |
|
{ |
|
"epoch": 0.8267716535433071, |
|
"grad_norm": 0.040098561050262535, |
|
"learning_rate": 2.6603922296871043e-05, |
|
"loss": 0.7635, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.8277090363704537, |
|
"grad_norm": 0.035538429924797096, |
|
"learning_rate": 2.632549138560129e-05, |
|
"loss": 0.7304, |
|
"step": 4415 |
|
}, |
|
{ |
|
"epoch": 0.8286464191976003, |
|
"grad_norm": 0.035183348616448194, |
|
"learning_rate": 2.6048384928959653e-05, |
|
"loss": 0.723, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.8295838020247469, |
|
"grad_norm": 0.035542818739637466, |
|
"learning_rate": 2.5772605894533533e-05, |
|
"loss": 0.7513, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 0.8305211848518935, |
|
"grad_norm": 0.040912090543415644, |
|
"learning_rate": 2.5498157235694888e-05, |
|
"loss": 0.7463, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.8314585676790401, |
|
"grad_norm": 0.04115878743265954, |
|
"learning_rate": 2.5225041891568366e-05, |
|
"loss": 0.7421, |
|
"step": 4435 |
|
}, |
|
{ |
|
"epoch": 0.8323959505061868, |
|
"grad_norm": 0.03473020418203695, |
|
"learning_rate": 2.4953262786999846e-05, |
|
"loss": 0.7318, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 0.03945899992708612, |
|
"learning_rate": 2.468282283252524e-05, |
|
"loss": 0.7399, |
|
"step": 4445 |
|
}, |
|
{ |
|
"epoch": 0.83427071616048, |
|
"grad_norm": 0.041181932452618496, |
|
"learning_rate": 2.4413724924339166e-05, |
|
"loss": 0.7515, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.8352080989876266, |
|
"grad_norm": 0.03757379419175963, |
|
"learning_rate": 2.4145971944264025e-05, |
|
"loss": 0.7572, |
|
"step": 4455 |
|
}, |
|
{ |
|
"epoch": 0.8361454818147731, |
|
"grad_norm": 0.03812065383102868, |
|
"learning_rate": 2.3879566759719132e-05, |
|
"loss": 0.7111, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.8370828646419197, |
|
"grad_norm": 0.03758167524030431, |
|
"learning_rate": 2.3614512223690002e-05, |
|
"loss": 0.7408, |
|
"step": 4465 |
|
}, |
|
{ |
|
"epoch": 0.8380202474690663, |
|
"grad_norm": 0.0357989413923993, |
|
"learning_rate": 2.335081117469777e-05, |
|
"loss": 0.7404, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.838957630296213, |
|
"grad_norm": 0.03719143556333986, |
|
"learning_rate": 2.308846643676875e-05, |
|
"loss": 0.7421, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 0.8398950131233596, |
|
"grad_norm": 0.038416698176695595, |
|
"learning_rate": 2.2827480819404386e-05, |
|
"loss": 0.7391, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.8408323959505062, |
|
"grad_norm": 0.038838405309790824, |
|
"learning_rate": 2.2567857117550958e-05, |
|
"loss": 0.7504, |
|
"step": 4485 |
|
}, |
|
{ |
|
"epoch": 0.8417697787776528, |
|
"grad_norm": 0.03678430153531787, |
|
"learning_rate": 2.230959811156972e-05, |
|
"loss": 0.721, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.8427071616047994, |
|
"grad_norm": 0.037007576832510956, |
|
"learning_rate": 2.2052706567207156e-05, |
|
"loss": 0.7095, |
|
"step": 4495 |
|
}, |
|
{ |
|
"epoch": 0.843644544431946, |
|
"grad_norm": 0.03453973181827858, |
|
"learning_rate": 2.179718523556531e-05, |
|
"loss": 0.7229, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.8445819272590926, |
|
"grad_norm": 0.037382201951380103, |
|
"learning_rate": 2.1543036853072386e-05, |
|
"loss": 0.7185, |
|
"step": 4505 |
|
}, |
|
{ |
|
"epoch": 0.8455193100862393, |
|
"grad_norm": 0.04001888522833685, |
|
"learning_rate": 2.1290264141453313e-05, |
|
"loss": 0.7342, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.8464566929133859, |
|
"grad_norm": 0.035396516604348185, |
|
"learning_rate": 2.103886980770085e-05, |
|
"loss": 0.7128, |
|
"step": 4515 |
|
}, |
|
{ |
|
"epoch": 0.8473940757405324, |
|
"grad_norm": 0.03790943797877688, |
|
"learning_rate": 2.0788856544046216e-05, |
|
"loss": 0.7599, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.848331458567679, |
|
"grad_norm": 0.03690950991272133, |
|
"learning_rate": 2.0540227027930773e-05, |
|
"loss": 0.7094, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 0.8492688413948256, |
|
"grad_norm": 0.036299029934951134, |
|
"learning_rate": 2.0292983921976753e-05, |
|
"loss": 0.7197, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.8502062242219722, |
|
"grad_norm": 0.036464577070665444, |
|
"learning_rate": 2.004712987395924e-05, |
|
"loss": 0.7306, |
|
"step": 4535 |
|
}, |
|
{ |
|
"epoch": 0.8511436070491188, |
|
"grad_norm": 0.035761152065985326, |
|
"learning_rate": 1.9802667516777565e-05, |
|
"loss": 0.7289, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.8520809898762655, |
|
"grad_norm": 0.03812262175593615, |
|
"learning_rate": 1.9559599468427183e-05, |
|
"loss": 0.7406, |
|
"step": 4545 |
|
}, |
|
{ |
|
"epoch": 0.8530183727034121, |
|
"grad_norm": 0.03807115739595877, |
|
"learning_rate": 1.9317928331971592e-05, |
|
"loss": 0.7245, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.8539557555305587, |
|
"grad_norm": 0.03647185658765888, |
|
"learning_rate": 1.9077656695514526e-05, |
|
"loss": 0.7348, |
|
"step": 4555 |
|
}, |
|
{ |
|
"epoch": 0.8548931383577053, |
|
"grad_norm": 0.037735068284022226, |
|
"learning_rate": 1.8838787132172184e-05, |
|
"loss": 0.7433, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.8558305211848519, |
|
"grad_norm": 0.03411010074767823, |
|
"learning_rate": 1.860132220004565e-05, |
|
"loss": 0.7292, |
|
"step": 4565 |
|
}, |
|
{ |
|
"epoch": 0.8567679040119985, |
|
"grad_norm": 0.03512838788517134, |
|
"learning_rate": 1.8365264442193618e-05, |
|
"loss": 0.7125, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.8577052868391452, |
|
"grad_norm": 0.03797586269699929, |
|
"learning_rate": 1.8130616386604973e-05, |
|
"loss": 0.7457, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 0.8586426696662918, |
|
"grad_norm": 0.03846116349108381, |
|
"learning_rate": 1.789738054617193e-05, |
|
"loss": 0.7254, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.8595800524934383, |
|
"grad_norm": 0.03248052807212059, |
|
"learning_rate": 1.766555941866291e-05, |
|
"loss": 0.7287, |
|
"step": 4585 |
|
}, |
|
{ |
|
"epoch": 0.8605174353205849, |
|
"grad_norm": 0.03749670548712012, |
|
"learning_rate": 1.743515548669598e-05, |
|
"loss": 0.7345, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.8614548181477315, |
|
"grad_norm": 0.03868597238209571, |
|
"learning_rate": 1.7206171217712135e-05, |
|
"loss": 0.7387, |
|
"step": 4595 |
|
}, |
|
{ |
|
"epoch": 0.8623922009748781, |
|
"grad_norm": 0.03763605398447834, |
|
"learning_rate": 1.6978609063948973e-05, |
|
"loss": 0.7201, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.8633295838020247, |
|
"grad_norm": 0.04009619133432012, |
|
"learning_rate": 1.6752471462414226e-05, |
|
"loss": 0.7384, |
|
"step": 4605 |
|
}, |
|
{ |
|
"epoch": 0.8642669666291714, |
|
"grad_norm": 0.03572102487743062, |
|
"learning_rate": 1.6527760834860056e-05, |
|
"loss": 0.7348, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.865204349456318, |
|
"grad_norm": 0.038750256044430074, |
|
"learning_rate": 1.6304479587756652e-05, |
|
"loss": 0.7065, |
|
"step": 4615 |
|
}, |
|
{ |
|
"epoch": 0.8661417322834646, |
|
"grad_norm": 0.03675663032395201, |
|
"learning_rate": 1.6082630112266888e-05, |
|
"loss": 0.7077, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.8670791151106112, |
|
"grad_norm": 0.03352806721702409, |
|
"learning_rate": 1.5862214784220305e-05, |
|
"loss": 0.7214, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 0.8680164979377578, |
|
"grad_norm": 0.03651862777178404, |
|
"learning_rate": 1.5643235964088064e-05, |
|
"loss": 0.7158, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 0.8689538807649044, |
|
"grad_norm": 0.03455566062476393, |
|
"learning_rate": 1.5425695996957416e-05, |
|
"loss": 0.736, |
|
"step": 4635 |
|
}, |
|
{ |
|
"epoch": 0.869891263592051, |
|
"grad_norm": 0.038034552465222866, |
|
"learning_rate": 1.520959721250653e-05, |
|
"loss": 0.735, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.8708286464191975, |
|
"grad_norm": 0.03814002964874412, |
|
"learning_rate": 1.4994941924979919e-05, |
|
"loss": 0.725, |
|
"step": 4645 |
|
}, |
|
{ |
|
"epoch": 0.8717660292463442, |
|
"grad_norm": 0.03613963285066848, |
|
"learning_rate": 1.4781732433163129e-05, |
|
"loss": 0.7158, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.8727034120734908, |
|
"grad_norm": 0.0391963633366358, |
|
"learning_rate": 1.4569971020358656e-05, |
|
"loss": 0.7043, |
|
"step": 4655 |
|
}, |
|
{ |
|
"epoch": 0.8736407949006374, |
|
"grad_norm": 0.03573970458333823, |
|
"learning_rate": 1.4359659954360985e-05, |
|
"loss": 0.7262, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.874578177727784, |
|
"grad_norm": 0.03402185272612653, |
|
"learning_rate": 1.4150801487432727e-05, |
|
"loss": 0.726, |
|
"step": 4665 |
|
}, |
|
{ |
|
"epoch": 0.8755155605549306, |
|
"grad_norm": 0.03988457684610016, |
|
"learning_rate": 1.394339785628027e-05, |
|
"loss": 0.7116, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 0.8764529433820772, |
|
"grad_norm": 0.03749913331479699, |
|
"learning_rate": 1.373745128202986e-05, |
|
"loss": 0.7486, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 0.8773903262092239, |
|
"grad_norm": 0.039418943387831605, |
|
"learning_rate": 1.3532963970203848e-05, |
|
"loss": 0.7311, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.8783277090363705, |
|
"grad_norm": 0.03691545995561202, |
|
"learning_rate": 1.332993811069708e-05, |
|
"loss": 0.7464, |
|
"step": 4685 |
|
}, |
|
{ |
|
"epoch": 0.8792650918635171, |
|
"grad_norm": 0.03634444663432465, |
|
"learning_rate": 1.3128375877753393e-05, |
|
"loss": 0.7048, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 0.8802024746906637, |
|
"grad_norm": 0.03883997615492241, |
|
"learning_rate": 1.2928279429942362e-05, |
|
"loss": 0.743, |
|
"step": 4695 |
|
}, |
|
{ |
|
"epoch": 0.8811398575178103, |
|
"grad_norm": 0.03831787223481465, |
|
"learning_rate": 1.2729650910136196e-05, |
|
"loss": 0.72, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.8820772403449568, |
|
"grad_norm": 0.038782793377748884, |
|
"learning_rate": 1.2532492445486769e-05, |
|
"loss": 0.7556, |
|
"step": 4705 |
|
}, |
|
{ |
|
"epoch": 0.8830146231721034, |
|
"grad_norm": 0.035930508758284546, |
|
"learning_rate": 1.2336806147402828e-05, |
|
"loss": 0.7359, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 0.8839520059992501, |
|
"grad_norm": 0.03688784164900098, |
|
"learning_rate": 1.2142594111527415e-05, |
|
"loss": 0.7322, |
|
"step": 4715 |
|
}, |
|
{ |
|
"epoch": 0.8848893888263967, |
|
"grad_norm": 0.03418578395324309, |
|
"learning_rate": 1.1949858417715418e-05, |
|
"loss": 0.7145, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.8858267716535433, |
|
"grad_norm": 0.03839104854863887, |
|
"learning_rate": 1.1758601130011259e-05, |
|
"loss": 0.708, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 0.8867641544806899, |
|
"grad_norm": 0.03690073415729465, |
|
"learning_rate": 1.1568824296626866e-05, |
|
"loss": 0.7268, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 0.8877015373078365, |
|
"grad_norm": 0.03785506282627266, |
|
"learning_rate": 1.1380529949919593e-05, |
|
"loss": 0.7472, |
|
"step": 4735 |
|
}, |
|
{ |
|
"epoch": 0.8886389201349831, |
|
"grad_norm": 0.03505149905088435, |
|
"learning_rate": 1.1193720106370701e-05, |
|
"loss": 0.735, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.8895763029621298, |
|
"grad_norm": 0.038177240967898714, |
|
"learning_rate": 1.100839676656346e-05, |
|
"loss": 0.7181, |
|
"step": 4745 |
|
}, |
|
{ |
|
"epoch": 0.8905136857892764, |
|
"grad_norm": 0.03798215847626741, |
|
"learning_rate": 1.0824561915162016e-05, |
|
"loss": 0.719, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.891451068616423, |
|
"grad_norm": 0.03757317113276994, |
|
"learning_rate": 1.0642217520889873e-05, |
|
"loss": 0.7412, |
|
"step": 4755 |
|
}, |
|
{ |
|
"epoch": 0.8923884514435696, |
|
"grad_norm": 0.03762594674374493, |
|
"learning_rate": 1.0461365536509065e-05, |
|
"loss": 0.7365, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.8933258342707162, |
|
"grad_norm": 0.03752202520507965, |
|
"learning_rate": 1.0282007898798995e-05, |
|
"loss": 0.7146, |
|
"step": 4765 |
|
}, |
|
{ |
|
"epoch": 0.8942632170978627, |
|
"grad_norm": 0.031040074178325168, |
|
"learning_rate": 1.01041465285359e-05, |
|
"loss": 0.7121, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 0.8952005999250093, |
|
"grad_norm": 0.03608189060059074, |
|
"learning_rate": 9.927783330472139e-06, |
|
"loss": 0.7328, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 0.896137982752156, |
|
"grad_norm": 0.03589888025539565, |
|
"learning_rate": 9.752920193315865e-06, |
|
"loss": 0.6956, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.8970753655793026, |
|
"grad_norm": 0.03503081744003102, |
|
"learning_rate": 9.579558989710872e-06, |
|
"loss": 0.734, |
|
"step": 4785 |
|
}, |
|
{ |
|
"epoch": 0.8980127484064492, |
|
"grad_norm": 0.036423527513909, |
|
"learning_rate": 9.407701576216281e-06, |
|
"loss": 0.7139, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 0.8989501312335958, |
|
"grad_norm": 0.03773162502672724, |
|
"learning_rate": 9.237349793286963e-06, |
|
"loss": 0.7286, |
|
"step": 4795 |
|
}, |
|
{ |
|
"epoch": 0.8998875140607424, |
|
"grad_norm": 0.0370655727344935, |
|
"learning_rate": 9.068505465253656e-06, |
|
"loss": 0.7364, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.900824896887889, |
|
"grad_norm": 0.03793992378742871, |
|
"learning_rate": 8.901170400303443e-06, |
|
"loss": 0.7117, |
|
"step": 4805 |
|
}, |
|
{ |
|
"epoch": 0.9017622797150356, |
|
"grad_norm": 0.03749076700190644, |
|
"learning_rate": 8.735346390460452e-06, |
|
"loss": 0.7482, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 0.9026996625421823, |
|
"grad_norm": 0.03478713094360769, |
|
"learning_rate": 8.571035211566606e-06, |
|
"loss": 0.7282, |
|
"step": 4815 |
|
}, |
|
{ |
|
"epoch": 0.9036370453693289, |
|
"grad_norm": 0.03931631750725771, |
|
"learning_rate": 8.408238623262625e-06, |
|
"loss": 0.7347, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.9045744281964755, |
|
"grad_norm": 0.03832734319910059, |
|
"learning_rate": 8.246958368969164e-06, |
|
"loss": 0.7085, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 0.905511811023622, |
|
"grad_norm": 0.036342961917758904, |
|
"learning_rate": 8.087196175868204e-06, |
|
"loss": 0.7459, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 0.9064491938507686, |
|
"grad_norm": 0.03811297614516944, |
|
"learning_rate": 7.928953754884482e-06, |
|
"loss": 0.7245, |
|
"step": 4835 |
|
}, |
|
{ |
|
"epoch": 0.9073865766779152, |
|
"grad_norm": 0.03617635523056376, |
|
"learning_rate": 7.772232800667117e-06, |
|
"loss": 0.6879, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.9083239595050618, |
|
"grad_norm": 0.03686080577213026, |
|
"learning_rate": 7.617034991571747e-06, |
|
"loss": 0.7308, |
|
"step": 4845 |
|
}, |
|
{ |
|
"epoch": 0.9092613423322085, |
|
"grad_norm": 0.03468911090610316, |
|
"learning_rate": 7.463361989642108e-06, |
|
"loss": 0.7266, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.9101987251593551, |
|
"grad_norm": 0.03837314601114124, |
|
"learning_rate": 7.311215440592649e-06, |
|
"loss": 0.7244, |
|
"step": 4855 |
|
}, |
|
{ |
|
"epoch": 0.9111361079865017, |
|
"grad_norm": 0.03515020181576783, |
|
"learning_rate": 7.160596973790678e-06, |
|
"loss": 0.7095, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.9120734908136483, |
|
"grad_norm": 0.04067375872375338, |
|
"learning_rate": 7.011508202238958e-06, |
|
"loss": 0.7565, |
|
"step": 4865 |
|
}, |
|
{ |
|
"epoch": 0.9130108736407949, |
|
"grad_norm": 0.037896195508876414, |
|
"learning_rate": 6.8639507225584755e-06, |
|
"loss": 0.7479, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 0.9139482564679415, |
|
"grad_norm": 0.035925556435556304, |
|
"learning_rate": 6.7179261149712335e-06, |
|
"loss": 0.7287, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 0.9148856392950881, |
|
"grad_norm": 0.03437946260531882, |
|
"learning_rate": 6.57343594328355e-06, |
|
"loss": 0.7154, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.9158230221222348, |
|
"grad_norm": 0.036848739384731176, |
|
"learning_rate": 6.430481754868988e-06, |
|
"loss": 0.7306, |
|
"step": 4885 |
|
}, |
|
{ |
|
"epoch": 0.9167604049493814, |
|
"grad_norm": 0.03819174514800313, |
|
"learning_rate": 6.289065080652134e-06, |
|
"loss": 0.7423, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 0.9176977877765279, |
|
"grad_norm": 0.04015640945147137, |
|
"learning_rate": 6.149187435091912e-06, |
|
"loss": 0.7388, |
|
"step": 4895 |
|
}, |
|
{ |
|
"epoch": 0.9186351706036745, |
|
"grad_norm": 0.0354113100153628, |
|
"learning_rate": 6.010850316165533e-06, |
|
"loss": 0.7153, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.9195725534308211, |
|
"grad_norm": 0.03619736356277513, |
|
"learning_rate": 5.8740552053524185e-06, |
|
"loss": 0.7123, |
|
"step": 4905 |
|
}, |
|
{ |
|
"epoch": 0.9205099362579677, |
|
"grad_norm": 0.03357169683122578, |
|
"learning_rate": 5.7388035676183e-06, |
|
"loss": 0.7367, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 0.9214473190851143, |
|
"grad_norm": 0.035341368116608526, |
|
"learning_rate": 5.6050968513995484e-06, |
|
"loss": 0.735, |
|
"step": 4915 |
|
}, |
|
{ |
|
"epoch": 0.922384701912261, |
|
"grad_norm": 0.03463388384208746, |
|
"learning_rate": 5.472936488587687e-06, |
|
"loss": 0.7045, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.9233220847394076, |
|
"grad_norm": 0.03646756098895019, |
|
"learning_rate": 5.342323894514017e-06, |
|
"loss": 0.692, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 0.9242594675665542, |
|
"grad_norm": 0.0378552791061763, |
|
"learning_rate": 5.213260467934499e-06, |
|
"loss": 0.7308, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 0.9251968503937008, |
|
"grad_norm": 0.03651837390186468, |
|
"learning_rate": 5.085747591014716e-06, |
|
"loss": 0.7241, |
|
"step": 4935 |
|
}, |
|
{ |
|
"epoch": 0.9261342332208474, |
|
"grad_norm": 0.03496033002413129, |
|
"learning_rate": 4.959786629315166e-06, |
|
"loss": 0.7201, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.927071616047994, |
|
"grad_norm": 0.03807303842127231, |
|
"learning_rate": 4.83537893177654e-06, |
|
"loss": 0.7228, |
|
"step": 4945 |
|
}, |
|
{ |
|
"epoch": 0.9280089988751407, |
|
"grad_norm": 0.03388630160185748, |
|
"learning_rate": 4.712525830705338e-06, |
|
"loss": 0.7255, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.9289463817022872, |
|
"grad_norm": 0.03359401994529555, |
|
"learning_rate": 4.591228641759559e-06, |
|
"loss": 0.7145, |
|
"step": 4955 |
|
}, |
|
{ |
|
"epoch": 0.9298837645294338, |
|
"grad_norm": 0.03790506150006882, |
|
"learning_rate": 4.471488663934647e-06, |
|
"loss": 0.7182, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.9308211473565804, |
|
"grad_norm": 0.03717261368444523, |
|
"learning_rate": 4.3533071795496035e-06, |
|
"loss": 0.7423, |
|
"step": 4965 |
|
}, |
|
{ |
|
"epoch": 0.931758530183727, |
|
"grad_norm": 0.03725676902347381, |
|
"learning_rate": 4.236685454233113e-06, |
|
"loss": 0.7384, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 0.9326959130108736, |
|
"grad_norm": 0.03790059729911343, |
|
"learning_rate": 4.12162473691024e-06, |
|
"loss": 0.7408, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 0.9336332958380202, |
|
"grad_norm": 0.03917020457274146, |
|
"learning_rate": 4.008126259788752e-06, |
|
"loss": 0.7203, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.9345706786651669, |
|
"grad_norm": 0.037051465451931255, |
|
"learning_rate": 3.896191238346219e-06, |
|
"loss": 0.7295, |
|
"step": 4985 |
|
}, |
|
{ |
|
"epoch": 0.9355080614923135, |
|
"grad_norm": 0.037826370662724866, |
|
"learning_rate": 3.785820871316736e-06, |
|
"loss": 0.7087, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 0.9364454443194601, |
|
"grad_norm": 0.03734181026651175, |
|
"learning_rate": 3.677016340678318e-06, |
|
"loss": 0.7244, |
|
"step": 4995 |
|
}, |
|
{ |
|
"epoch": 0.9373828271466067, |
|
"grad_norm": 0.03568772503256821, |
|
"learning_rate": 3.5697788116400283e-06, |
|
"loss": 0.7204, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.9383202099737533, |
|
"grad_norm": 0.03537228998662857, |
|
"learning_rate": 3.4641094326296524e-06, |
|
"loss": 0.7089, |
|
"step": 5005 |
|
}, |
|
{ |
|
"epoch": 0.9392575928008999, |
|
"grad_norm": 0.037141105148803415, |
|
"learning_rate": 3.3600093352814107e-06, |
|
"loss": 0.7329, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 0.9401949756280465, |
|
"grad_norm": 0.035406141374499524, |
|
"learning_rate": 3.2574796344236153e-06, |
|
"loss": 0.7033, |
|
"step": 5015 |
|
}, |
|
{ |
|
"epoch": 0.941132358455193, |
|
"grad_norm": 0.041168173265274895, |
|
"learning_rate": 3.1565214280670825e-06, |
|
"loss": 0.7038, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.9420697412823397, |
|
"grad_norm": 0.039723554038275884, |
|
"learning_rate": 3.0571357973930234e-06, |
|
"loss": 0.7097, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 0.9430071241094863, |
|
"grad_norm": 0.032544537124457665, |
|
"learning_rate": 2.959323806741737e-06, |
|
"loss": 0.7272, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 0.9439445069366329, |
|
"grad_norm": 0.03461405648425469, |
|
"learning_rate": 2.8630865036010364e-06, |
|
"loss": 0.7141, |
|
"step": 5035 |
|
}, |
|
{ |
|
"epoch": 0.9448818897637795, |
|
"grad_norm": 0.036824105839646924, |
|
"learning_rate": 2.7684249185951415e-06, |
|
"loss": 0.7335, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.9458192725909261, |
|
"grad_norm": 0.04002092643887271, |
|
"learning_rate": 2.6753400654735524e-06, |
|
"loss": 0.7372, |
|
"step": 5045 |
|
}, |
|
{ |
|
"epoch": 0.9467566554180727, |
|
"grad_norm": 0.03570329291792829, |
|
"learning_rate": 2.5838329411002943e-06, |
|
"loss": 0.7127, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.9476940382452194, |
|
"grad_norm": 0.034274985857504735, |
|
"learning_rate": 2.493904525443141e-06, |
|
"loss": 0.7057, |
|
"step": 5055 |
|
}, |
|
{ |
|
"epoch": 0.948631421072366, |
|
"grad_norm": 0.03801311689032566, |
|
"learning_rate": 2.405555781563173e-06, |
|
"loss": 0.7282, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.9495688038995126, |
|
"grad_norm": 0.03756145522959914, |
|
"learning_rate": 2.3187876556044537e-06, |
|
"loss": 0.6886, |
|
"step": 5065 |
|
}, |
|
{ |
|
"epoch": 0.9505061867266592, |
|
"grad_norm": 0.03530134077170944, |
|
"learning_rate": 2.2336010767839194e-06, |
|
"loss": 0.7168, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 0.9514435695538058, |
|
"grad_norm": 0.03876854731664466, |
|
"learning_rate": 2.1499969573813724e-06, |
|
"loss": 0.7163, |
|
"step": 5075 |
|
}, |
|
{ |
|
"epoch": 0.9523809523809523, |
|
"grad_norm": 0.03235204006913281, |
|
"learning_rate": 2.06797619272977e-06, |
|
"loss": 0.7339, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.9533183352080989, |
|
"grad_norm": 0.035560439311177215, |
|
"learning_rate": 1.9875396612056005e-06, |
|
"loss": 0.7356, |
|
"step": 5085 |
|
}, |
|
{ |
|
"epoch": 0.9542557180352456, |
|
"grad_norm": 0.03566397362898293, |
|
"learning_rate": 1.9086882242195235e-06, |
|
"loss": 0.6973, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 0.9551931008623922, |
|
"grad_norm": 0.037624367410972304, |
|
"learning_rate": 1.8314227262070435e-06, |
|
"loss": 0.7273, |
|
"step": 5095 |
|
}, |
|
{ |
|
"epoch": 0.9561304836895388, |
|
"grad_norm": 0.03441332414126222, |
|
"learning_rate": 1.7557439946196017e-06, |
|
"loss": 0.722, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.9570678665166854, |
|
"grad_norm": 0.03664904948143634, |
|
"learning_rate": 1.6816528399155982e-06, |
|
"loss": 0.7412, |
|
"step": 5105 |
|
}, |
|
{ |
|
"epoch": 0.958005249343832, |
|
"grad_norm": 0.03715182226165275, |
|
"learning_rate": 1.60915005555175e-06, |
|
"loss": 0.7232, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 0.9589426321709786, |
|
"grad_norm": 0.03639187438599698, |
|
"learning_rate": 1.5382364179746808e-06, |
|
"loss": 0.737, |
|
"step": 5115 |
|
}, |
|
{ |
|
"epoch": 0.9598800149981253, |
|
"grad_norm": 0.03868780676867553, |
|
"learning_rate": 1.4689126866124278e-06, |
|
"loss": 0.7097, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.9608173978252719, |
|
"grad_norm": 0.036291056043944926, |
|
"learning_rate": 1.4011796038665145e-06, |
|
"loss": 0.7186, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 0.9617547806524185, |
|
"grad_norm": 0.06821587616968404, |
|
"learning_rate": 1.335037895103791e-06, |
|
"loss": 0.7374, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 0.9626921634795651, |
|
"grad_norm": 0.03887971150133188, |
|
"learning_rate": 1.2704882686488393e-06, |
|
"loss": 0.7624, |
|
"step": 5135 |
|
}, |
|
{ |
|
"epoch": 0.9636295463067117, |
|
"grad_norm": 0.036497584321372616, |
|
"learning_rate": 1.2075314157762972e-06, |
|
"loss": 0.718, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.9645669291338582, |
|
"grad_norm": 0.039035533336186354, |
|
"learning_rate": 1.1461680107034798e-06, |
|
"loss": 0.7421, |
|
"step": 5145 |
|
}, |
|
{ |
|
"epoch": 0.9655043119610048, |
|
"grad_norm": 0.035839951992695206, |
|
"learning_rate": 1.0863987105831696e-06, |
|
"loss": 0.73, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.9664416947881515, |
|
"grad_norm": 0.03563901861543763, |
|
"learning_rate": 1.0282241554965375e-06, |
|
"loss": 0.7314, |
|
"step": 5155 |
|
}, |
|
{ |
|
"epoch": 0.9673790776152981, |
|
"grad_norm": 0.034948413836722134, |
|
"learning_rate": 9.716449684463502e-07, |
|
"loss": 0.7079, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.9683164604424447, |
|
"grad_norm": 0.037670482239960106, |
|
"learning_rate": 9.166617553502064e-07, |
|
"loss": 0.7209, |
|
"step": 5165 |
|
}, |
|
{ |
|
"epoch": 0.9692538432695913, |
|
"grad_norm": 0.03868203643112819, |
|
"learning_rate": 8.632751050341946e-07, |
|
"loss": 0.7267, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 0.9701912260967379, |
|
"grad_norm": 0.03685196650404434, |
|
"learning_rate": 8.114855892264128e-07, |
|
"loss": 0.7288, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 0.9711286089238845, |
|
"grad_norm": 0.03480760162856044, |
|
"learning_rate": 7.612937625509741e-07, |
|
"loss": 0.714, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.9720659917510311, |
|
"grad_norm": 0.035112246544013306, |
|
"learning_rate": 7.127001625220286e-07, |
|
"loss": 0.725, |
|
"step": 5185 |
|
}, |
|
{ |
|
"epoch": 0.9730033745781778, |
|
"grad_norm": 0.034016779690611565, |
|
"learning_rate": 6.657053095380005e-07, |
|
"loss": 0.6879, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 0.9739407574053244, |
|
"grad_norm": 0.03711659986007524, |
|
"learning_rate": 6.203097068759933e-07, |
|
"loss": 0.7412, |
|
"step": 5195 |
|
}, |
|
{ |
|
"epoch": 0.974878140232471, |
|
"grad_norm": 0.039330504751635645, |
|
"learning_rate": 5.765138406864434e-07, |
|
"loss": 0.7359, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.9758155230596175, |
|
"grad_norm": 0.03613689473953272, |
|
"learning_rate": 5.343181799878916e-07, |
|
"loss": 0.7249, |
|
"step": 5205 |
|
}, |
|
{ |
|
"epoch": 0.9767529058867641, |
|
"grad_norm": 0.03766304326222476, |
|
"learning_rate": 4.937231766619698e-07, |
|
"loss": 0.7141, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 0.9776902887139107, |
|
"grad_norm": 0.03797367873629258, |
|
"learning_rate": 4.547292654485557e-07, |
|
"loss": 0.7325, |
|
"step": 5215 |
|
}, |
|
{ |
|
"epoch": 0.9786276715410573, |
|
"grad_norm": 0.038696832388832716, |
|
"learning_rate": 4.1733686394109236e-07, |
|
"loss": 0.735, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.979565054368204, |
|
"grad_norm": 0.03372024341796256, |
|
"learning_rate": 3.815463725821755e-07, |
|
"loss": 0.7132, |
|
"step": 5225 |
|
}, |
|
{ |
|
"epoch": 0.9805024371953506, |
|
"grad_norm": 0.03911193375759219, |
|
"learning_rate": 3.473581746592069e-07, |
|
"loss": 0.7413, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 0.9814398200224972, |
|
"grad_norm": 0.03595612615841676, |
|
"learning_rate": 3.1477263630033113e-07, |
|
"loss": 0.7263, |
|
"step": 5235 |
|
}, |
|
{ |
|
"epoch": 0.9823772028496438, |
|
"grad_norm": 0.035577055944527154, |
|
"learning_rate": 2.8379010647045506e-07, |
|
"loss": 0.724, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 0.9833145856767904, |
|
"grad_norm": 0.03939881083971358, |
|
"learning_rate": 2.5441091696761783e-07, |
|
"loss": 0.7345, |
|
"step": 5245 |
|
}, |
|
{ |
|
"epoch": 0.984251968503937, |
|
"grad_norm": 0.03509258626104014, |
|
"learning_rate": 2.266353824193101e-07, |
|
"loss": 0.689, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.9851893513310837, |
|
"grad_norm": 0.03642207354018445, |
|
"learning_rate": 2.0046380027921028e-07, |
|
"loss": 0.7582, |
|
"step": 5255 |
|
}, |
|
{ |
|
"epoch": 0.9861267341582303, |
|
"grad_norm": 0.03655317102724065, |
|
"learning_rate": 1.7589645082392024e-07, |
|
"loss": 0.728, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 0.9870641169853769, |
|
"grad_norm": 0.03681201840565274, |
|
"learning_rate": 1.529335971500345e-07, |
|
"loss": 0.7029, |
|
"step": 5265 |
|
}, |
|
{ |
|
"epoch": 0.9880014998125234, |
|
"grad_norm": 0.034195294045265584, |
|
"learning_rate": 1.315754851712425e-07, |
|
"loss": 0.7106, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 0.98893888263967, |
|
"grad_norm": 0.033879744761242093, |
|
"learning_rate": 1.1182234361579722e-07, |
|
"loss": 0.732, |
|
"step": 5275 |
|
}, |
|
{ |
|
"epoch": 0.9898762654668166, |
|
"grad_norm": 0.036906203150992155, |
|
"learning_rate": 9.367438402395066e-08, |
|
"loss": 0.7243, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.9908136482939632, |
|
"grad_norm": 0.03751280991426079, |
|
"learning_rate": 7.713180074577219e-08, |
|
"loss": 0.7429, |
|
"step": 5285 |
|
}, |
|
{ |
|
"epoch": 0.9917510311211098, |
|
"grad_norm": 0.03933888372810454, |
|
"learning_rate": 6.219477093905023e-08, |
|
"loss": 0.7368, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 0.9926884139482565, |
|
"grad_norm": 0.03920435371912244, |
|
"learning_rate": 4.8863454567360513e-08, |
|
"loss": 0.7415, |
|
"step": 5295 |
|
}, |
|
{ |
|
"epoch": 0.9936257967754031, |
|
"grad_norm": 0.0340028517083224, |
|
"learning_rate": 3.7137994398400705e-08, |
|
"loss": 0.736, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.9945631796025497, |
|
"grad_norm": 0.03596691936417686, |
|
"learning_rate": 2.7018516002424996e-08, |
|
"loss": 0.7027, |
|
"step": 5305 |
|
}, |
|
{ |
|
"epoch": 0.9955005624296963, |
|
"grad_norm": 0.035041534347360945, |
|
"learning_rate": 1.850512775091184e-08, |
|
"loss": 0.7338, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 0.9964379452568429, |
|
"grad_norm": 0.036327963873331384, |
|
"learning_rate": 1.1597920815414885e-08, |
|
"loss": 0.7262, |
|
"step": 5315 |
|
}, |
|
{ |
|
"epoch": 0.9973753280839895, |
|
"grad_norm": 0.036279192100004604, |
|
"learning_rate": 6.296969166580401e-09, |
|
"loss": 0.7153, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 0.9983127109111362, |
|
"grad_norm": 0.035186584742647825, |
|
"learning_rate": 2.6023295733312855e-09, |
|
"loss": 0.7068, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 0.9992500937382827, |
|
"grad_norm": 0.036172737685700306, |
|
"learning_rate": 5.140416022841875e-10, |
|
"loss": 0.7243, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.0353564023971558, |
|
"eval_runtime": 1193.1467, |
|
"eval_samples_per_second": 196.235, |
|
"eval_steps_per_second": 6.133, |
|
"step": 5334 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 5334, |
|
"total_flos": 777591764811776.0, |
|
"train_loss": 0.7984603145035815, |
|
"train_runtime": 21845.7301, |
|
"train_samples_per_second": 31.253, |
|
"train_steps_per_second": 0.244 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 5334, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 777591764811776.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|