|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9720534629404617, |
|
"eval_steps": 500, |
|
"global_step": 1900, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0005116070857581378, |
|
"grad_norm": 5.084794521331787, |
|
"learning_rate": 5.115089514066497e-07, |
|
"loss": 2.9408, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0010232141715162755, |
|
"grad_norm": 5.157843112945557, |
|
"learning_rate": 1.0230179028132994e-06, |
|
"loss": 3.0401, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.002046428343032551, |
|
"grad_norm": 5.386075973510742, |
|
"learning_rate": 2.0460358056265987e-06, |
|
"loss": 3.071, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0030696425145488263, |
|
"grad_norm": 5.142333984375, |
|
"learning_rate": 3.069053708439898e-06, |
|
"loss": 3.1039, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.004092856686065102, |
|
"grad_norm": 3.101259231567383, |
|
"learning_rate": 4.092071611253197e-06, |
|
"loss": 2.8181, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.005116070857581378, |
|
"grad_norm": 2.207404375076294, |
|
"learning_rate": 5.1150895140664966e-06, |
|
"loss": 2.5627, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.006139285029097653, |
|
"grad_norm": 1.8845449686050415, |
|
"learning_rate": 6.138107416879796e-06, |
|
"loss": 2.6209, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.007162499200613928, |
|
"grad_norm": 2.1659703254699707, |
|
"learning_rate": 7.161125319693095e-06, |
|
"loss": 2.5467, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.008185713372130204, |
|
"grad_norm": 1.6377224922180176, |
|
"learning_rate": 8.184143222506395e-06, |
|
"loss": 2.5057, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.009208927543646479, |
|
"grad_norm": 1.1535893678665161, |
|
"learning_rate": 9.207161125319694e-06, |
|
"loss": 2.538, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.010232141715162756, |
|
"grad_norm": 1.0513185262680054, |
|
"learning_rate": 1.0230179028132993e-05, |
|
"loss": 2.5247, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01125535588667903, |
|
"grad_norm": 0.8281764388084412, |
|
"learning_rate": 1.1253196930946292e-05, |
|
"loss": 2.4595, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.012278570058195305, |
|
"grad_norm": 0.5129208564758301, |
|
"learning_rate": 1.2276214833759591e-05, |
|
"loss": 2.3742, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.013301784229711582, |
|
"grad_norm": 0.46613597869873047, |
|
"learning_rate": 1.3299232736572892e-05, |
|
"loss": 2.4564, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.014324998401227857, |
|
"grad_norm": 0.354717493057251, |
|
"learning_rate": 1.432225063938619e-05, |
|
"loss": 2.3467, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.015348212572744133, |
|
"grad_norm": 0.3325178623199463, |
|
"learning_rate": 1.534526854219949e-05, |
|
"loss": 2.3978, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.016371426744260408, |
|
"grad_norm": 0.32920145988464355, |
|
"learning_rate": 1.636828644501279e-05, |
|
"loss": 2.2522, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.017394640915776683, |
|
"grad_norm": 0.25466033816337585, |
|
"learning_rate": 1.739130434782609e-05, |
|
"loss": 2.243, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.018417855087292958, |
|
"grad_norm": 0.35631808638572693, |
|
"learning_rate": 1.8414322250639388e-05, |
|
"loss": 2.2527, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.019441069258809233, |
|
"grad_norm": 0.23582319915294647, |
|
"learning_rate": 1.9437340153452684e-05, |
|
"loss": 2.1452, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.02046428343032551, |
|
"grad_norm": 0.2491885870695114, |
|
"learning_rate": 2.0460358056265986e-05, |
|
"loss": 2.1778, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.021487497601841786, |
|
"grad_norm": 0.2993784546852112, |
|
"learning_rate": 2.1483375959079285e-05, |
|
"loss": 2.1006, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.02251071177335806, |
|
"grad_norm": 0.21940283477306366, |
|
"learning_rate": 2.2506393861892585e-05, |
|
"loss": 2.1752, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.023533925944874336, |
|
"grad_norm": 0.15252649784088135, |
|
"learning_rate": 2.3529411764705884e-05, |
|
"loss": 2.1295, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.02455714011639061, |
|
"grad_norm": 0.19182737171649933, |
|
"learning_rate": 2.4552429667519183e-05, |
|
"loss": 2.1181, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.02558035428790689, |
|
"grad_norm": 0.19416701793670654, |
|
"learning_rate": 2.5575447570332482e-05, |
|
"loss": 2.0953, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.026603568459423164, |
|
"grad_norm": 0.12562625110149384, |
|
"learning_rate": 2.6598465473145784e-05, |
|
"loss": 2.0856, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.02762678263093944, |
|
"grad_norm": 0.13417182862758636, |
|
"learning_rate": 2.7621483375959077e-05, |
|
"loss": 2.0948, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.028649996802455713, |
|
"grad_norm": 0.10808593034744263, |
|
"learning_rate": 2.864450127877238e-05, |
|
"loss": 2.0541, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.02967321097397199, |
|
"grad_norm": 0.14162665605545044, |
|
"learning_rate": 2.966751918158568e-05, |
|
"loss": 2.0756, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.030696425145488267, |
|
"grad_norm": 0.10216689854860306, |
|
"learning_rate": 3.069053708439898e-05, |
|
"loss": 2.0502, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.03171963931700454, |
|
"grad_norm": 0.0772320106625557, |
|
"learning_rate": 3.171355498721228e-05, |
|
"loss": 2.0598, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.032742853488520816, |
|
"grad_norm": 0.07200902700424194, |
|
"learning_rate": 3.273657289002558e-05, |
|
"loss": 2.0416, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.03376606766003709, |
|
"grad_norm": 0.07764917612075806, |
|
"learning_rate": 3.375959079283887e-05, |
|
"loss": 2.04, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.034789281831553366, |
|
"grad_norm": 0.07703404128551483, |
|
"learning_rate": 3.478260869565218e-05, |
|
"loss": 2.0426, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.03581249600306964, |
|
"grad_norm": 0.05096273496747017, |
|
"learning_rate": 3.580562659846548e-05, |
|
"loss": 2.0264, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.036835710174585916, |
|
"grad_norm": 0.07172555476427078, |
|
"learning_rate": 3.6828644501278776e-05, |
|
"loss": 1.9799, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.03785892434610219, |
|
"grad_norm": 0.05563480406999588, |
|
"learning_rate": 3.7851662404092075e-05, |
|
"loss": 1.9922, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.038882138517618466, |
|
"grad_norm": 0.04726962745189667, |
|
"learning_rate": 3.887468030690537e-05, |
|
"loss": 1.9826, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.03990535268913475, |
|
"grad_norm": 0.040130794048309326, |
|
"learning_rate": 3.989769820971867e-05, |
|
"loss": 1.9693, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.04092856686065102, |
|
"grad_norm": 0.051317401230335236, |
|
"learning_rate": 4.092071611253197e-05, |
|
"loss": 1.9454, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0419517810321673, |
|
"grad_norm": 0.03843973949551582, |
|
"learning_rate": 4.194373401534527e-05, |
|
"loss": 1.9535, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.04297499520368357, |
|
"grad_norm": 0.04338320344686508, |
|
"learning_rate": 4.296675191815857e-05, |
|
"loss": 1.9017, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.04399820937519985, |
|
"grad_norm": 0.0422111339867115, |
|
"learning_rate": 4.398976982097187e-05, |
|
"loss": 1.9806, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.04502142354671612, |
|
"grad_norm": 0.043594423681497574, |
|
"learning_rate": 4.501278772378517e-05, |
|
"loss": 1.9809, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.0460446377182324, |
|
"grad_norm": 0.050932493060827255, |
|
"learning_rate": 4.603580562659847e-05, |
|
"loss": 2.002, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.04706785188974867, |
|
"grad_norm": 0.039923008531332016, |
|
"learning_rate": 4.705882352941177e-05, |
|
"loss": 1.9898, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.048091066061264946, |
|
"grad_norm": 0.04199720919132233, |
|
"learning_rate": 4.8081841432225067e-05, |
|
"loss": 1.9375, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.04911428023278122, |
|
"grad_norm": 0.03885011374950409, |
|
"learning_rate": 4.9104859335038366e-05, |
|
"loss": 1.9594, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.0501374944042975, |
|
"grad_norm": 0.04459952563047409, |
|
"learning_rate": 5.0127877237851665e-05, |
|
"loss": 1.9327, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.05116070857581378, |
|
"grad_norm": 0.04154925048351288, |
|
"learning_rate": 5.1150895140664964e-05, |
|
"loss": 1.9385, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05218392274733005, |
|
"grad_norm": 0.04149138927459717, |
|
"learning_rate": 5.217391304347826e-05, |
|
"loss": 1.9251, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.05320713691884633, |
|
"grad_norm": 0.05338102579116821, |
|
"learning_rate": 5.319693094629157e-05, |
|
"loss": 1.9211, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.0542303510903626, |
|
"grad_norm": 0.04964439943432808, |
|
"learning_rate": 5.421994884910486e-05, |
|
"loss": 1.8863, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.05525356526187888, |
|
"grad_norm": 0.040731314569711685, |
|
"learning_rate": 5.5242966751918154e-05, |
|
"loss": 1.9002, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.05627677943339515, |
|
"grad_norm": 0.05813027173280716, |
|
"learning_rate": 5.626598465473146e-05, |
|
"loss": 1.8944, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.05729999360491143, |
|
"grad_norm": 0.04966093972325325, |
|
"learning_rate": 5.728900255754476e-05, |
|
"loss": 1.898, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.0583232077764277, |
|
"grad_norm": 0.050573479384183884, |
|
"learning_rate": 5.8312020460358065e-05, |
|
"loss": 1.8778, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.05934642194794398, |
|
"grad_norm": 0.05025520175695419, |
|
"learning_rate": 5.933503836317136e-05, |
|
"loss": 1.9044, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.06036963611946025, |
|
"grad_norm": 0.05153055489063263, |
|
"learning_rate": 6.035805626598465e-05, |
|
"loss": 1.9045, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.06139285029097653, |
|
"grad_norm": 0.051311247050762177, |
|
"learning_rate": 6.138107416879796e-05, |
|
"loss": 1.9077, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.06241606446249281, |
|
"grad_norm": 0.05084897577762604, |
|
"learning_rate": 6.240409207161125e-05, |
|
"loss": 1.8538, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.06343927863400908, |
|
"grad_norm": 0.05961287021636963, |
|
"learning_rate": 6.342710997442456e-05, |
|
"loss": 1.8792, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.06446249280552535, |
|
"grad_norm": 0.05775010585784912, |
|
"learning_rate": 6.445012787723786e-05, |
|
"loss": 1.8587, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.06548570697704163, |
|
"grad_norm": 0.09344275295734406, |
|
"learning_rate": 6.547314578005116e-05, |
|
"loss": 1.8454, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.0665089211485579, |
|
"grad_norm": 0.0748172476887703, |
|
"learning_rate": 6.649616368286446e-05, |
|
"loss": 1.8998, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.06753213532007418, |
|
"grad_norm": 0.07188538461923599, |
|
"learning_rate": 6.751918158567774e-05, |
|
"loss": 1.8219, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.06855534949159046, |
|
"grad_norm": 0.05799673870205879, |
|
"learning_rate": 6.854219948849106e-05, |
|
"loss": 1.8549, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.06957856366310673, |
|
"grad_norm": 0.07886774092912674, |
|
"learning_rate": 6.956521739130436e-05, |
|
"loss": 1.8885, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.07060177783462301, |
|
"grad_norm": 0.0599171444773674, |
|
"learning_rate": 7.058823529411765e-05, |
|
"loss": 1.829, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.07162499200613928, |
|
"grad_norm": 0.07810111343860626, |
|
"learning_rate": 7.161125319693095e-05, |
|
"loss": 1.8878, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.07264820617765556, |
|
"grad_norm": 0.062123704701662064, |
|
"learning_rate": 7.263427109974424e-05, |
|
"loss": 1.8633, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.07367142034917183, |
|
"grad_norm": 0.08402098715305328, |
|
"learning_rate": 7.365728900255755e-05, |
|
"loss": 1.8377, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.07469463452068811, |
|
"grad_norm": 0.06189502775669098, |
|
"learning_rate": 7.468030690537085e-05, |
|
"loss": 1.8683, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.07571784869220438, |
|
"grad_norm": 0.07368986308574677, |
|
"learning_rate": 7.570332480818415e-05, |
|
"loss": 1.8636, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.07674106286372066, |
|
"grad_norm": 0.06430894136428833, |
|
"learning_rate": 7.672634271099745e-05, |
|
"loss": 1.8341, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.07776427703523693, |
|
"grad_norm": 0.05924483761191368, |
|
"learning_rate": 7.774936061381073e-05, |
|
"loss": 1.9151, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.07878749120675321, |
|
"grad_norm": 0.06166929751634598, |
|
"learning_rate": 7.877237851662405e-05, |
|
"loss": 1.8306, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.0798107053782695, |
|
"grad_norm": 0.07514499127864838, |
|
"learning_rate": 7.979539641943735e-05, |
|
"loss": 1.8572, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.08083391954978576, |
|
"grad_norm": 0.06925056874752045, |
|
"learning_rate": 8.081841432225065e-05, |
|
"loss": 1.8449, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.08185713372130204, |
|
"grad_norm": 0.08889607340097427, |
|
"learning_rate": 8.184143222506395e-05, |
|
"loss": 1.8217, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.08288034789281831, |
|
"grad_norm": 0.11205849796533585, |
|
"learning_rate": 8.286445012787724e-05, |
|
"loss": 1.7859, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.0839035620643346, |
|
"grad_norm": 0.13293609023094177, |
|
"learning_rate": 8.388746803069054e-05, |
|
"loss": 1.8245, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.08492677623585086, |
|
"grad_norm": 0.14082959294319153, |
|
"learning_rate": 8.491048593350384e-05, |
|
"loss": 1.8077, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.08594999040736714, |
|
"grad_norm": 0.0726478174328804, |
|
"learning_rate": 8.593350383631714e-05, |
|
"loss": 1.8081, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.08697320457888341, |
|
"grad_norm": 0.21175715327262878, |
|
"learning_rate": 8.695652173913044e-05, |
|
"loss": 1.8289, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.0879964187503997, |
|
"grad_norm": 0.19227363169193268, |
|
"learning_rate": 8.797953964194374e-05, |
|
"loss": 1.8092, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.08901963292191598, |
|
"grad_norm": 0.13788004219532013, |
|
"learning_rate": 8.900255754475704e-05, |
|
"loss": 1.7986, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.09004284709343224, |
|
"grad_norm": 0.09351494908332825, |
|
"learning_rate": 9.002557544757034e-05, |
|
"loss": 1.8077, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.09106606126494853, |
|
"grad_norm": 0.09681002050638199, |
|
"learning_rate": 9.104859335038364e-05, |
|
"loss": 1.794, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.0920892754364648, |
|
"grad_norm": 0.061654381453990936, |
|
"learning_rate": 9.207161125319694e-05, |
|
"loss": 1.7935, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.09311248960798107, |
|
"grad_norm": 0.06282493472099304, |
|
"learning_rate": 9.309462915601024e-05, |
|
"loss": 1.7758, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.09413570377949734, |
|
"grad_norm": 0.08118202537298203, |
|
"learning_rate": 9.411764705882353e-05, |
|
"loss": 1.8209, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.09515891795101362, |
|
"grad_norm": 0.0755864828824997, |
|
"learning_rate": 9.514066496163683e-05, |
|
"loss": 1.7672, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.09618213212252989, |
|
"grad_norm": 0.07810387760400772, |
|
"learning_rate": 9.616368286445013e-05, |
|
"loss": 1.7655, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.09720534629404617, |
|
"grad_norm": 0.08016899228096008, |
|
"learning_rate": 9.718670076726343e-05, |
|
"loss": 1.7818, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.09822856046556244, |
|
"grad_norm": 0.07527964562177658, |
|
"learning_rate": 9.820971867007673e-05, |
|
"loss": 1.7386, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.09925177463707872, |
|
"grad_norm": 0.08135760575532913, |
|
"learning_rate": 9.923273657289003e-05, |
|
"loss": 1.7678, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.100274988808595, |
|
"grad_norm": 0.06465744972229004, |
|
"learning_rate": 0.00010025575447570333, |
|
"loss": 1.8469, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.10129820298011127, |
|
"grad_norm": 0.0678311362862587, |
|
"learning_rate": 0.00010127877237851664, |
|
"loss": 1.7856, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.10232141715162756, |
|
"grad_norm": 0.06425610929727554, |
|
"learning_rate": 0.00010230179028132993, |
|
"loss": 1.7542, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.10334463132314382, |
|
"grad_norm": 0.06820003688335419, |
|
"learning_rate": 0.00010332480818414323, |
|
"loss": 1.783, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.1043678454946601, |
|
"grad_norm": 0.0690922886133194, |
|
"learning_rate": 0.00010434782608695653, |
|
"loss": 1.7612, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.10539105966617637, |
|
"grad_norm": 0.06488107144832611, |
|
"learning_rate": 0.00010537084398976983, |
|
"loss": 1.7648, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.10641427383769266, |
|
"grad_norm": 0.08278009295463562, |
|
"learning_rate": 0.00010639386189258314, |
|
"loss": 1.7661, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.10743748800920892, |
|
"grad_norm": 0.08722035586833954, |
|
"learning_rate": 0.00010741687979539642, |
|
"loss": 1.7578, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.1084607021807252, |
|
"grad_norm": 0.0737011507153511, |
|
"learning_rate": 0.00010843989769820972, |
|
"loss": 1.7381, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.10948391635224147, |
|
"grad_norm": 0.08060843497514725, |
|
"learning_rate": 0.00010946291560102302, |
|
"loss": 1.7967, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.11050713052375775, |
|
"grad_norm": 0.10279374569654465, |
|
"learning_rate": 0.00011048593350383631, |
|
"loss": 1.7703, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.11153034469527404, |
|
"grad_norm": 0.0777791365981102, |
|
"learning_rate": 0.00011150895140664963, |
|
"loss": 1.8015, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.1125535588667903, |
|
"grad_norm": 0.06883997470140457, |
|
"learning_rate": 0.00011253196930946292, |
|
"loss": 1.7731, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.11357677303830659, |
|
"grad_norm": 0.06231442466378212, |
|
"learning_rate": 0.00011355498721227622, |
|
"loss": 1.8063, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.11459998720982285, |
|
"grad_norm": 0.06607846170663834, |
|
"learning_rate": 0.00011457800511508952, |
|
"loss": 1.7616, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.11562320138133914, |
|
"grad_norm": 0.05903138220310211, |
|
"learning_rate": 0.0001156010230179028, |
|
"loss": 1.7993, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.1166464155528554, |
|
"grad_norm": 0.07282232493162155, |
|
"learning_rate": 0.00011662404092071613, |
|
"loss": 1.7374, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.11766962972437169, |
|
"grad_norm": 0.06793032586574554, |
|
"learning_rate": 0.00011764705882352942, |
|
"loss": 1.7852, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.11869284389588795, |
|
"grad_norm": 0.06404048949480057, |
|
"learning_rate": 0.00011867007672634271, |
|
"loss": 1.775, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.11971605806740424, |
|
"grad_norm": 0.08423135429620743, |
|
"learning_rate": 0.00011969309462915601, |
|
"loss": 1.779, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.1207392722389205, |
|
"grad_norm": 0.0814799889922142, |
|
"learning_rate": 0.0001207161125319693, |
|
"loss": 1.7082, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.12176248641043678, |
|
"grad_norm": 0.08876215666532516, |
|
"learning_rate": 0.00012173913043478263, |
|
"loss": 1.7767, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.12278570058195307, |
|
"grad_norm": 0.07051345705986023, |
|
"learning_rate": 0.00012276214833759592, |
|
"loss": 1.7181, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.12380891475346933, |
|
"grad_norm": 0.07023751735687256, |
|
"learning_rate": 0.00012378516624040922, |
|
"loss": 1.7308, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.12483212892498562, |
|
"grad_norm": 0.0754849910736084, |
|
"learning_rate": 0.0001248081841432225, |
|
"loss": 1.7782, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.1258553430965019, |
|
"grad_norm": 0.07223635166883469, |
|
"learning_rate": 0.0001258312020460358, |
|
"loss": 1.718, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.12687855726801817, |
|
"grad_norm": 0.07007969915866852, |
|
"learning_rate": 0.00012685421994884912, |
|
"loss": 1.7686, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.12790177143953443, |
|
"grad_norm": 0.06361662596464157, |
|
"learning_rate": 0.00012787723785166242, |
|
"loss": 1.7217, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1289249856110507, |
|
"grad_norm": 0.08723774552345276, |
|
"learning_rate": 0.00012890025575447572, |
|
"loss": 1.7369, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.129948199782567, |
|
"grad_norm": 0.06651702523231506, |
|
"learning_rate": 0.000129923273657289, |
|
"loss": 1.7163, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.13097141395408327, |
|
"grad_norm": 0.07153377681970596, |
|
"learning_rate": 0.00013094629156010232, |
|
"loss": 1.7168, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.13199462812559953, |
|
"grad_norm": 0.09451760351657867, |
|
"learning_rate": 0.00013196930946291562, |
|
"loss": 1.7182, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.1330178422971158, |
|
"grad_norm": 0.08822207897901535, |
|
"learning_rate": 0.00013299232736572892, |
|
"loss": 1.7483, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.1340410564686321, |
|
"grad_norm": 0.11073771119117737, |
|
"learning_rate": 0.00013401534526854221, |
|
"loss": 1.7087, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.13506427064014837, |
|
"grad_norm": 0.07717689871788025, |
|
"learning_rate": 0.0001350383631713555, |
|
"loss": 1.6943, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.13608748481166463, |
|
"grad_norm": 0.09418254345655441, |
|
"learning_rate": 0.0001360613810741688, |
|
"loss": 1.7084, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.13711069898318093, |
|
"grad_norm": 0.0922132208943367, |
|
"learning_rate": 0.0001370843989769821, |
|
"loss": 1.7526, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.1381339131546972, |
|
"grad_norm": 0.08973314613103867, |
|
"learning_rate": 0.0001381074168797954, |
|
"loss": 1.7049, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.13915712732621346, |
|
"grad_norm": 0.0772908478975296, |
|
"learning_rate": 0.0001391304347826087, |
|
"loss": 1.7444, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.14018034149772973, |
|
"grad_norm": 0.07179255038499832, |
|
"learning_rate": 0.00014015345268542198, |
|
"loss": 1.7309, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.14120355566924603, |
|
"grad_norm": 0.10786614567041397, |
|
"learning_rate": 0.0001411764705882353, |
|
"loss": 1.7413, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.1422267698407623, |
|
"grad_norm": 0.0815059244632721, |
|
"learning_rate": 0.0001421994884910486, |
|
"loss": 1.6895, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.14324998401227856, |
|
"grad_norm": 0.12658405303955078, |
|
"learning_rate": 0.0001432225063938619, |
|
"loss": 1.7013, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.14427319818379483, |
|
"grad_norm": 0.0807737335562706, |
|
"learning_rate": 0.0001442455242966752, |
|
"loss": 1.7378, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.14529641235531113, |
|
"grad_norm": 0.09726593643426895, |
|
"learning_rate": 0.00014526854219948848, |
|
"loss": 1.7143, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.1463196265268274, |
|
"grad_norm": 0.08326689153909683, |
|
"learning_rate": 0.0001462915601023018, |
|
"loss": 1.7395, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.14734284069834366, |
|
"grad_norm": 0.08783421665430069, |
|
"learning_rate": 0.0001473145780051151, |
|
"loss": 1.7466, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.14836605486985996, |
|
"grad_norm": 0.0639604702591896, |
|
"learning_rate": 0.0001483375959079284, |
|
"loss": 1.7019, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.14938926904137623, |
|
"grad_norm": 0.08028368651866913, |
|
"learning_rate": 0.0001493606138107417, |
|
"loss": 1.7134, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.1504124832128925, |
|
"grad_norm": 0.0739947184920311, |
|
"learning_rate": 0.00015038363171355497, |
|
"loss": 1.702, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.15143569738440876, |
|
"grad_norm": 0.07335802167654037, |
|
"learning_rate": 0.0001514066496163683, |
|
"loss": 1.7321, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.15245891155592506, |
|
"grad_norm": 0.07030144333839417, |
|
"learning_rate": 0.0001524296675191816, |
|
"loss": 1.6654, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.15348212572744133, |
|
"grad_norm": 0.07079968601465225, |
|
"learning_rate": 0.0001534526854219949, |
|
"loss": 1.7129, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.1545053398989576, |
|
"grad_norm": 0.06605160236358643, |
|
"learning_rate": 0.0001544757033248082, |
|
"loss": 1.713, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.15552855407047386, |
|
"grad_norm": 0.08417898416519165, |
|
"learning_rate": 0.00015549872122762147, |
|
"loss": 1.7063, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.15655176824199016, |
|
"grad_norm": 0.07255028933286667, |
|
"learning_rate": 0.0001565217391304348, |
|
"loss": 1.742, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.15757498241350643, |
|
"grad_norm": 0.06561743468046188, |
|
"learning_rate": 0.0001575447570332481, |
|
"loss": 1.6912, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.1585981965850227, |
|
"grad_norm": 0.07030262053012848, |
|
"learning_rate": 0.0001585677749360614, |
|
"loss": 1.7434, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.159621410756539, |
|
"grad_norm": 0.076111800968647, |
|
"learning_rate": 0.0001595907928388747, |
|
"loss": 1.6783, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.16064462492805526, |
|
"grad_norm": 0.06267083436250687, |
|
"learning_rate": 0.000160613810741688, |
|
"loss": 1.7193, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.16166783909957153, |
|
"grad_norm": 0.07638990879058838, |
|
"learning_rate": 0.0001616368286445013, |
|
"loss": 1.7395, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.1626910532710878, |
|
"grad_norm": 0.07447683811187744, |
|
"learning_rate": 0.0001626598465473146, |
|
"loss": 1.6574, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.1637142674426041, |
|
"grad_norm": 0.07413692772388458, |
|
"learning_rate": 0.0001636828644501279, |
|
"loss": 1.6868, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.16473748161412036, |
|
"grad_norm": 0.07566969096660614, |
|
"learning_rate": 0.0001647058823529412, |
|
"loss": 1.779, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.16576069578563662, |
|
"grad_norm": 0.09093326330184937, |
|
"learning_rate": 0.0001657289002557545, |
|
"loss": 1.6807, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.16678390995715292, |
|
"grad_norm": 0.0930614024400711, |
|
"learning_rate": 0.0001667519181585678, |
|
"loss": 1.7067, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.1678071241286692, |
|
"grad_norm": 0.06676892936229706, |
|
"learning_rate": 0.0001677749360613811, |
|
"loss": 1.6609, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.16883033830018546, |
|
"grad_norm": 0.08882534503936768, |
|
"learning_rate": 0.00016879795396419439, |
|
"loss": 1.6796, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.16985355247170172, |
|
"grad_norm": 0.07226958125829697, |
|
"learning_rate": 0.00016982097186700768, |
|
"loss": 1.7163, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.17087676664321802, |
|
"grad_norm": 0.07271122932434082, |
|
"learning_rate": 0.00017084398976982098, |
|
"loss": 1.7585, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.1718999808147343, |
|
"grad_norm": 0.08161617070436478, |
|
"learning_rate": 0.00017186700767263428, |
|
"loss": 1.6299, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.17292319498625056, |
|
"grad_norm": 0.08419859409332275, |
|
"learning_rate": 0.00017289002557544758, |
|
"loss": 1.6848, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.17394640915776682, |
|
"grad_norm": 0.08996909856796265, |
|
"learning_rate": 0.00017391304347826088, |
|
"loss": 1.6582, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.17496962332928312, |
|
"grad_norm": 0.09278981387615204, |
|
"learning_rate": 0.00017493606138107418, |
|
"loss": 1.7044, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.1759928375007994, |
|
"grad_norm": 0.08387704193592072, |
|
"learning_rate": 0.00017595907928388748, |
|
"loss": 1.6503, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.17701605167231566, |
|
"grad_norm": 0.07442387193441391, |
|
"learning_rate": 0.00017698209718670078, |
|
"loss": 1.7058, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.17803926584383195, |
|
"grad_norm": 0.06898263841867447, |
|
"learning_rate": 0.00017800511508951408, |
|
"loss": 1.6708, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.17906248001534822, |
|
"grad_norm": 0.07982076704502106, |
|
"learning_rate": 0.00017902813299232738, |
|
"loss": 1.6807, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.1800856941868645, |
|
"grad_norm": 0.07170634716749191, |
|
"learning_rate": 0.00018005115089514068, |
|
"loss": 1.6753, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.18110890835838075, |
|
"grad_norm": 0.07484789937734604, |
|
"learning_rate": 0.00018107416879795398, |
|
"loss": 1.6883, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.18213212252989705, |
|
"grad_norm": 0.08390472084283829, |
|
"learning_rate": 0.00018209718670076727, |
|
"loss": 1.6783, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.18315533670141332, |
|
"grad_norm": 0.0833701565861702, |
|
"learning_rate": 0.00018312020460358057, |
|
"loss": 1.6804, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.1841785508729296, |
|
"grad_norm": 0.07489979267120361, |
|
"learning_rate": 0.00018414322250639387, |
|
"loss": 1.6179, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.18520176504444585, |
|
"grad_norm": 0.14307746291160583, |
|
"learning_rate": 0.00018516624040920717, |
|
"loss": 1.6396, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.18622497921596215, |
|
"grad_norm": 0.13637496531009674, |
|
"learning_rate": 0.00018618925831202047, |
|
"loss": 1.6425, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.18724819338747842, |
|
"grad_norm": 0.13586537539958954, |
|
"learning_rate": 0.00018721227621483377, |
|
"loss": 1.6915, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.18827140755899469, |
|
"grad_norm": 0.07892754673957825, |
|
"learning_rate": 0.00018823529411764707, |
|
"loss": 1.6628, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.18929462173051098, |
|
"grad_norm": 0.20291955769062042, |
|
"learning_rate": 0.00018925831202046037, |
|
"loss": 1.6572, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.19031783590202725, |
|
"grad_norm": 0.3548440933227539, |
|
"learning_rate": 0.00019028132992327367, |
|
"loss": 1.6963, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.19134105007354352, |
|
"grad_norm": 0.19051846861839294, |
|
"learning_rate": 0.00019130434782608697, |
|
"loss": 1.6853, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.19236426424505979, |
|
"grad_norm": 0.3201465308666229, |
|
"learning_rate": 0.00019232736572890027, |
|
"loss": 1.6549, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.19338747841657608, |
|
"grad_norm": 0.1700785905122757, |
|
"learning_rate": 0.00019335038363171357, |
|
"loss": 1.658, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.19441069258809235, |
|
"grad_norm": 0.1742287576198578, |
|
"learning_rate": 0.00019437340153452686, |
|
"loss": 1.6644, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.19543390675960862, |
|
"grad_norm": 0.0945478230714798, |
|
"learning_rate": 0.00019539641943734016, |
|
"loss": 1.65, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.19645712093112488, |
|
"grad_norm": 0.06995284557342529, |
|
"learning_rate": 0.00019641943734015346, |
|
"loss": 1.6608, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.19748033510264118, |
|
"grad_norm": 0.07590003311634064, |
|
"learning_rate": 0.00019744245524296676, |
|
"loss": 1.6367, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.19850354927415745, |
|
"grad_norm": 0.09830451011657715, |
|
"learning_rate": 0.00019846547314578006, |
|
"loss": 1.6638, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.19952676344567372, |
|
"grad_norm": 0.10720949620008469, |
|
"learning_rate": 0.00019948849104859336, |
|
"loss": 1.6571, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.20054997761719, |
|
"grad_norm": 0.06915664672851562, |
|
"learning_rate": 0.0001999999910488914, |
|
"loss": 1.669, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.20157319178870628, |
|
"grad_norm": 0.04960264638066292, |
|
"learning_rate": 0.00019999991944003202, |
|
"loss": 1.6529, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.20259640596022255, |
|
"grad_norm": 0.05139967054128647, |
|
"learning_rate": 0.00019999977622236462, |
|
"loss": 1.6053, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.20361962013173882, |
|
"grad_norm": 0.05288904160261154, |
|
"learning_rate": 0.0001999995613959917, |
|
"loss": 1.6905, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.2046428343032551, |
|
"grad_norm": 0.056239306926727295, |
|
"learning_rate": 0.00019999927496106707, |
|
"loss": 1.6662, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.20566604847477138, |
|
"grad_norm": 0.06484871357679367, |
|
"learning_rate": 0.0001999989169177959, |
|
"loss": 1.6803, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.20668926264628765, |
|
"grad_norm": 0.11631152778863907, |
|
"learning_rate": 0.00019999848726643454, |
|
"loss": 1.6389, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.20771247681780391, |
|
"grad_norm": 0.06311234086751938, |
|
"learning_rate": 0.00019999798600729064, |
|
"loss": 1.7017, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.2087356909893202, |
|
"grad_norm": 0.06155601888895035, |
|
"learning_rate": 0.00019999741314072323, |
|
"loss": 1.7014, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.20975890516083648, |
|
"grad_norm": 0.06340397894382477, |
|
"learning_rate": 0.00019999676866714244, |
|
"loss": 1.6735, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.21078211933235275, |
|
"grad_norm": 0.06068040430545807, |
|
"learning_rate": 0.00019999605258700983, |
|
"loss": 1.6224, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.21180533350386904, |
|
"grad_norm": 0.06651381403207779, |
|
"learning_rate": 0.00019999526490083817, |
|
"loss": 1.6279, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.2128285476753853, |
|
"grad_norm": 0.06273658573627472, |
|
"learning_rate": 0.00019999440560919152, |
|
"loss": 1.6591, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.21385176184690158, |
|
"grad_norm": 0.06989671289920807, |
|
"learning_rate": 0.00019999347471268516, |
|
"loss": 1.6405, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.21487497601841785, |
|
"grad_norm": 0.06204582378268242, |
|
"learning_rate": 0.00019999247221198573, |
|
"loss": 1.6512, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.21589819018993414, |
|
"grad_norm": 0.1728357970714569, |
|
"learning_rate": 0.00019999139810781112, |
|
"loss": 1.6332, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.2169214043614504, |
|
"grad_norm": 0.0696343332529068, |
|
"learning_rate": 0.00019999025240093044, |
|
"loss": 1.6649, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.21794461853296668, |
|
"grad_norm": 0.060923777520656586, |
|
"learning_rate": 0.00019998903509216415, |
|
"loss": 1.6269, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.21896783270448295, |
|
"grad_norm": 0.061977677047252655, |
|
"learning_rate": 0.00019998774618238394, |
|
"loss": 1.6636, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.21999104687599924, |
|
"grad_norm": 0.07241713255643845, |
|
"learning_rate": 0.0001999863856725128, |
|
"loss": 1.643, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.2210142610475155, |
|
"grad_norm": 0.06513350456953049, |
|
"learning_rate": 0.000199984953563525, |
|
"loss": 1.6184, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.22203747521903178, |
|
"grad_norm": 0.06109536439180374, |
|
"learning_rate": 0.000199983449856446, |
|
"loss": 1.6734, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.22306068939054807, |
|
"grad_norm": 0.09125282615423203, |
|
"learning_rate": 0.0001999818745523526, |
|
"loss": 1.6617, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.22408390356206434, |
|
"grad_norm": 0.05963214859366417, |
|
"learning_rate": 0.00019998022765237288, |
|
"loss": 1.648, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.2251071177335806, |
|
"grad_norm": 0.18775390088558197, |
|
"learning_rate": 0.00019997850915768613, |
|
"loss": 1.6599, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.22613033190509688, |
|
"grad_norm": 0.05968334153294563, |
|
"learning_rate": 0.00019997671906952298, |
|
"loss": 1.6072, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.22715354607661317, |
|
"grad_norm": 0.05431201308965683, |
|
"learning_rate": 0.0001999748573891653, |
|
"loss": 1.6315, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.22817676024812944, |
|
"grad_norm": 0.05960986390709877, |
|
"learning_rate": 0.00019997292411794618, |
|
"loss": 1.6565, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.2291999744196457, |
|
"grad_norm": 0.07451862096786499, |
|
"learning_rate": 0.00019997091925725004, |
|
"loss": 1.6793, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.23022318859116198, |
|
"grad_norm": 0.05454723909497261, |
|
"learning_rate": 0.0001999688428085125, |
|
"loss": 1.6055, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.23124640276267827, |
|
"grad_norm": 0.05422728881239891, |
|
"learning_rate": 0.00019996669477322055, |
|
"loss": 1.6455, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.23226961693419454, |
|
"grad_norm": 0.06064201146364212, |
|
"learning_rate": 0.00019996447515291233, |
|
"loss": 1.5895, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.2332928311057108, |
|
"grad_norm": 0.04667961224913597, |
|
"learning_rate": 0.0001999621839491773, |
|
"loss": 1.652, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.2343160452772271, |
|
"grad_norm": 0.06072809919714928, |
|
"learning_rate": 0.00019995982116365616, |
|
"loss": 1.6073, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.23533925944874337, |
|
"grad_norm": 0.05477429926395416, |
|
"learning_rate": 0.00019995738679804085, |
|
"loss": 1.6412, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.23636247362025964, |
|
"grad_norm": 0.08307594060897827, |
|
"learning_rate": 0.00019995488085407462, |
|
"loss": 1.6396, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.2373856877917759, |
|
"grad_norm": 0.059893883764743805, |
|
"learning_rate": 0.00019995230333355192, |
|
"loss": 1.6426, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.2384089019632922, |
|
"grad_norm": 0.06132538989186287, |
|
"learning_rate": 0.00019994965423831854, |
|
"loss": 1.6133, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.23943211613480847, |
|
"grad_norm": 0.07076270133256912, |
|
"learning_rate": 0.00019994693357027138, |
|
"loss": 1.576, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.24045533030632474, |
|
"grad_norm": 0.06282426416873932, |
|
"learning_rate": 0.00019994414133135877, |
|
"loss": 1.6373, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.241478544477841, |
|
"grad_norm": 0.058667294681072235, |
|
"learning_rate": 0.00019994127752358013, |
|
"loss": 1.619, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.2425017586493573, |
|
"grad_norm": 0.08359505236148834, |
|
"learning_rate": 0.00019993834214898626, |
|
"loss": 1.6225, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.24352497282087357, |
|
"grad_norm": 0.06758000701665878, |
|
"learning_rate": 0.00019993533520967912, |
|
"loss": 1.5799, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.24454818699238984, |
|
"grad_norm": 0.11436283588409424, |
|
"learning_rate": 0.0001999322567078119, |
|
"loss": 1.6385, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.24557140116390613, |
|
"grad_norm": 0.05773819610476494, |
|
"learning_rate": 0.00019992910664558915, |
|
"loss": 1.6022, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.2465946153354224, |
|
"grad_norm": 0.052521176636219025, |
|
"learning_rate": 0.00019992588502526658, |
|
"loss": 1.6137, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.24761782950693867, |
|
"grad_norm": 0.056573059409856796, |
|
"learning_rate": 0.00019992259184915115, |
|
"loss": 1.6065, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.24864104367845494, |
|
"grad_norm": 0.05170164257287979, |
|
"learning_rate": 0.00019991922711960102, |
|
"loss": 1.6325, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.24966425784997123, |
|
"grad_norm": 0.05951111018657684, |
|
"learning_rate": 0.00019991579083902572, |
|
"loss": 1.6034, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.2506874720214875, |
|
"grad_norm": 0.054325833916664124, |
|
"learning_rate": 0.00019991228300988585, |
|
"loss": 1.6102, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.2517106861930038, |
|
"grad_norm": 0.07080011814832687, |
|
"learning_rate": 0.0001999087036346934, |
|
"loss": 1.6302, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.25273390036452004, |
|
"grad_norm": 0.06116727367043495, |
|
"learning_rate": 0.00019990505271601144, |
|
"loss": 1.6243, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.25375711453603633, |
|
"grad_norm": 0.0602283850312233, |
|
"learning_rate": 0.0001999013302564544, |
|
"loss": 1.6024, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.2547803287075526, |
|
"grad_norm": 0.06313999742269516, |
|
"learning_rate": 0.0001998975362586879, |
|
"loss": 1.6238, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.25580354287906887, |
|
"grad_norm": 0.06217190623283386, |
|
"learning_rate": 0.00019989367072542876, |
|
"loss": 1.6251, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.25682675705058516, |
|
"grad_norm": 0.07256064563989639, |
|
"learning_rate": 0.00019988973365944507, |
|
"loss": 1.5929, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.2578499712221014, |
|
"grad_norm": 0.062201980501413345, |
|
"learning_rate": 0.00019988572506355606, |
|
"loss": 1.5933, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.2588731853936177, |
|
"grad_norm": 0.07168910652399063, |
|
"learning_rate": 0.00019988164494063226, |
|
"loss": 1.6474, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.259896399565134, |
|
"grad_norm": 0.056935928761959076, |
|
"learning_rate": 0.00019987749329359548, |
|
"loss": 1.5992, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.26091961373665024, |
|
"grad_norm": 0.07088612020015717, |
|
"learning_rate": 0.00019987327012541855, |
|
"loss": 1.5952, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.26194282790816653, |
|
"grad_norm": 0.06023348495364189, |
|
"learning_rate": 0.0001998689754391257, |
|
"loss": 1.6064, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.2629660420796828, |
|
"grad_norm": 0.05686601996421814, |
|
"learning_rate": 0.0001998646092377923, |
|
"loss": 1.5992, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.26398925625119907, |
|
"grad_norm": 0.07028970122337341, |
|
"learning_rate": 0.00019986017152454495, |
|
"loss": 1.5835, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.26501247042271536, |
|
"grad_norm": 0.0645250454545021, |
|
"learning_rate": 0.0001998556623025614, |
|
"loss": 1.6055, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.2660356845942316, |
|
"grad_norm": 0.0723612904548645, |
|
"learning_rate": 0.00019985108157507067, |
|
"loss": 1.6248, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.2670588987657479, |
|
"grad_norm": 0.06222670525312424, |
|
"learning_rate": 0.00019984642934535297, |
|
"loss": 1.6411, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.2680821129372642, |
|
"grad_norm": 0.057786975055933, |
|
"learning_rate": 0.00019984170561673976, |
|
"loss": 1.6313, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.26910532710878043, |
|
"grad_norm": 0.061039313673973083, |
|
"learning_rate": 0.00019983691039261357, |
|
"loss": 1.5896, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.27012854128029673, |
|
"grad_norm": 0.04816308245062828, |
|
"learning_rate": 0.00019983204367640824, |
|
"loss": 1.5986, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.271151755451813, |
|
"grad_norm": 0.06095914542675018, |
|
"learning_rate": 0.0001998271054716088, |
|
"loss": 1.5995, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.27217496962332927, |
|
"grad_norm": 0.05422305688261986, |
|
"learning_rate": 0.00019982209578175137, |
|
"loss": 1.6047, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.27319818379484556, |
|
"grad_norm": 0.05381491780281067, |
|
"learning_rate": 0.0001998170146104234, |
|
"loss": 1.5748, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.27422139796636186, |
|
"grad_norm": 0.08168444782495499, |
|
"learning_rate": 0.0001998118619612634, |
|
"loss": 1.5941, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.2752446121378781, |
|
"grad_norm": 0.05323650687932968, |
|
"learning_rate": 0.00019980663783796118, |
|
"loss": 1.6015, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.2762678263093944, |
|
"grad_norm": 0.08093535900115967, |
|
"learning_rate": 0.0001998013422442577, |
|
"loss": 1.6325, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.27729104048091063, |
|
"grad_norm": 0.05909120664000511, |
|
"learning_rate": 0.00019979597518394491, |
|
"loss": 1.6684, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.27831425465242693, |
|
"grad_norm": 0.0684690847992897, |
|
"learning_rate": 0.00019979053666086634, |
|
"loss": 1.6682, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.2793374688239432, |
|
"grad_norm": 0.05854607746005058, |
|
"learning_rate": 0.00019978502667891625, |
|
"loss": 1.6133, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.28036068299545946, |
|
"grad_norm": 0.05019630119204521, |
|
"learning_rate": 0.00019977944524204037, |
|
"loss": 1.5968, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.28138389716697576, |
|
"grad_norm": 0.0662982240319252, |
|
"learning_rate": 0.00019977379235423551, |
|
"loss": 1.589, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.28240711133849206, |
|
"grad_norm": 0.049058698117733, |
|
"learning_rate": 0.00019976806801954964, |
|
"loss": 1.5979, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.2834303255100083, |
|
"grad_norm": 0.058459024876356125, |
|
"learning_rate": 0.00019976227224208183, |
|
"loss": 1.5813, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.2844535396815246, |
|
"grad_norm": 0.048455361276865005, |
|
"learning_rate": 0.00019975640502598244, |
|
"loss": 1.5652, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.2854767538530409, |
|
"grad_norm": 0.06029395014047623, |
|
"learning_rate": 0.00019975046637545288, |
|
"loss": 1.6166, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.28649996802455713, |
|
"grad_norm": 0.05902372673153877, |
|
"learning_rate": 0.00019974445629474574, |
|
"loss": 1.5955, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.2875231821960734, |
|
"grad_norm": 0.04898110404610634, |
|
"learning_rate": 0.0001997383747881648, |
|
"loss": 1.5554, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.28854639636758966, |
|
"grad_norm": 0.07228821516036987, |
|
"learning_rate": 0.00019973222186006498, |
|
"loss": 1.6178, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.28956961053910596, |
|
"grad_norm": 0.07162781804800034, |
|
"learning_rate": 0.00019972599751485226, |
|
"loss": 1.6128, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.29059282471062226, |
|
"grad_norm": 0.047708939760923386, |
|
"learning_rate": 0.00019971970175698385, |
|
"loss": 1.5776, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.2916160388821385, |
|
"grad_norm": 0.05930710583925247, |
|
"learning_rate": 0.0001997133345909681, |
|
"loss": 1.6095, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.2926392530536548, |
|
"grad_norm": 0.057511184364557266, |
|
"learning_rate": 0.00019970689602136438, |
|
"loss": 1.564, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.2936624672251711, |
|
"grad_norm": 0.0659165233373642, |
|
"learning_rate": 0.00019970038605278338, |
|
"loss": 1.6057, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.2946856813966873, |
|
"grad_norm": 0.0638163760304451, |
|
"learning_rate": 0.00019969380468988677, |
|
"loss": 1.5684, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.2957088955682036, |
|
"grad_norm": 0.0477282889187336, |
|
"learning_rate": 0.00019968715193738738, |
|
"loss": 1.5596, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.2967321097397199, |
|
"grad_norm": 0.055721577256917953, |
|
"learning_rate": 0.00019968042780004917, |
|
"loss": 1.5854, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.29775532391123616, |
|
"grad_norm": 0.05852237716317177, |
|
"learning_rate": 0.00019967363228268724, |
|
"loss": 1.5952, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.29877853808275245, |
|
"grad_norm": 0.04583214595913887, |
|
"learning_rate": 0.00019966676539016779, |
|
"loss": 1.5835, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.2998017522542687, |
|
"grad_norm": 0.052682552486658096, |
|
"learning_rate": 0.00019965982712740808, |
|
"loss": 1.5932, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.300824966425785, |
|
"grad_norm": 0.06101151555776596, |
|
"learning_rate": 0.00019965281749937655, |
|
"loss": 1.661, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.3018481805973013, |
|
"grad_norm": 0.052221182733774185, |
|
"learning_rate": 0.0001996457365110927, |
|
"loss": 1.5834, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.3028713947688175, |
|
"grad_norm": 0.05288353189826012, |
|
"learning_rate": 0.00019963858416762717, |
|
"loss": 1.561, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.3038946089403338, |
|
"grad_norm": 0.05072011053562164, |
|
"learning_rate": 0.00019963136047410166, |
|
"loss": 1.5542, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.3049178231118501, |
|
"grad_norm": 0.05482899025082588, |
|
"learning_rate": 0.00019962406543568898, |
|
"loss": 1.6568, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.30594103728336636, |
|
"grad_norm": 0.06114513427019119, |
|
"learning_rate": 0.00019961669905761302, |
|
"loss": 1.5619, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.30696425145488265, |
|
"grad_norm": 0.14878755807876587, |
|
"learning_rate": 0.00019960926134514873, |
|
"loss": 1.6222, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.30798746562639895, |
|
"grad_norm": 0.05369825288653374, |
|
"learning_rate": 0.00019960175230362222, |
|
"loss": 1.574, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.3090106797979152, |
|
"grad_norm": 0.04912363365292549, |
|
"learning_rate": 0.00019959417193841063, |
|
"loss": 1.5644, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.3100338939694315, |
|
"grad_norm": 0.055376555770635605, |
|
"learning_rate": 0.00019958652025494212, |
|
"loss": 1.5978, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.3110571081409477, |
|
"grad_norm": 0.054994821548461914, |
|
"learning_rate": 0.00019957879725869602, |
|
"loss": 1.6327, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.312080322312464, |
|
"grad_norm": 0.05939999222755432, |
|
"learning_rate": 0.00019957100295520266, |
|
"loss": 1.5706, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.3131035364839803, |
|
"grad_norm": 0.05616987124085426, |
|
"learning_rate": 0.00019956313735004346, |
|
"loss": 1.5932, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.31412675065549656, |
|
"grad_norm": 0.10900183767080307, |
|
"learning_rate": 0.00019955520044885087, |
|
"loss": 1.5757, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.31514996482701285, |
|
"grad_norm": 1.115419864654541, |
|
"learning_rate": 0.00019954719225730847, |
|
"loss": 1.666, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.31617317899852915, |
|
"grad_norm": 0.13737702369689941, |
|
"learning_rate": 0.00019953911278115078, |
|
"loss": 1.6406, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.3171963931700454, |
|
"grad_norm": 0.18733379244804382, |
|
"learning_rate": 0.00019953096202616344, |
|
"loss": 1.6465, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.3182196073415617, |
|
"grad_norm": 0.513283371925354, |
|
"learning_rate": 0.0001995227399981831, |
|
"loss": 1.6477, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.319242821513078, |
|
"grad_norm": 0.30918484926223755, |
|
"learning_rate": 0.0001995144467030975, |
|
"loss": 1.6566, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.3202660356845942, |
|
"grad_norm": 0.0951157733798027, |
|
"learning_rate": 0.00019950608214684535, |
|
"loss": 1.6034, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.3212892498561105, |
|
"grad_norm": 0.05696268379688263, |
|
"learning_rate": 0.00019949764633541643, |
|
"loss": 1.6518, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.32231246402762675, |
|
"grad_norm": 0.06777111440896988, |
|
"learning_rate": 0.00019948913927485146, |
|
"loss": 1.6585, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.32333567819914305, |
|
"grad_norm": 0.055656664073467255, |
|
"learning_rate": 0.00019948056097124234, |
|
"loss": 1.5623, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.32435889237065935, |
|
"grad_norm": 0.05220302939414978, |
|
"learning_rate": 0.00019947191143073186, |
|
"loss": 1.6067, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.3253821065421756, |
|
"grad_norm": 0.05276400223374367, |
|
"learning_rate": 0.00019946319065951382, |
|
"loss": 1.5997, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.3264053207136919, |
|
"grad_norm": 0.06689111888408661, |
|
"learning_rate": 0.00019945439866383312, |
|
"loss": 1.5621, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.3274285348852082, |
|
"grad_norm": 0.07574088871479034, |
|
"learning_rate": 0.00019944553544998562, |
|
"loss": 1.5873, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.3284517490567244, |
|
"grad_norm": 0.1480696201324463, |
|
"learning_rate": 0.0001994366010243181, |
|
"loss": 1.6142, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.3294749632282407, |
|
"grad_norm": 0.2425205558538437, |
|
"learning_rate": 0.00019942759539322844, |
|
"loss": 1.6513, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.330498177399757, |
|
"grad_norm": 0.10395582765340805, |
|
"learning_rate": 0.00019941851856316548, |
|
"loss": 1.6186, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.33152139157127325, |
|
"grad_norm": 0.07959388941526413, |
|
"learning_rate": 0.000199409370540629, |
|
"loss": 1.5954, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.33254460574278955, |
|
"grad_norm": 0.08391022682189941, |
|
"learning_rate": 0.00019940015133216985, |
|
"loss": 1.6359, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.33356781991430584, |
|
"grad_norm": 0.10863954573869705, |
|
"learning_rate": 0.00019939086094438975, |
|
"loss": 1.5591, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.3345910340858221, |
|
"grad_norm": 0.0719527155160904, |
|
"learning_rate": 0.00019938149938394145, |
|
"loss": 1.5536, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.3356142482573384, |
|
"grad_norm": 0.054009951651096344, |
|
"learning_rate": 0.0001993720666575287, |
|
"loss": 1.5925, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.3366374624288546, |
|
"grad_norm": 0.06805548816919327, |
|
"learning_rate": 0.00019936256277190608, |
|
"loss": 1.6079, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.3376606766003709, |
|
"grad_norm": 0.057809535413980484, |
|
"learning_rate": 0.0001993529877338793, |
|
"loss": 1.5569, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.3386838907718872, |
|
"grad_norm": 0.05796423181891441, |
|
"learning_rate": 0.0001993433415503049, |
|
"loss": 1.6148, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.33970710494340345, |
|
"grad_norm": 0.0450466088950634, |
|
"learning_rate": 0.0001993336242280904, |
|
"loss": 1.6024, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.34073031911491974, |
|
"grad_norm": 0.05356905981898308, |
|
"learning_rate": 0.00019932383577419432, |
|
"loss": 1.5696, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.34175353328643604, |
|
"grad_norm": 0.04915151000022888, |
|
"learning_rate": 0.00019931397619562597, |
|
"loss": 1.601, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.3427767474579523, |
|
"grad_norm": 0.2238396257162094, |
|
"learning_rate": 0.00019930404549944574, |
|
"loss": 1.6144, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.3437999616294686, |
|
"grad_norm": 0.07003773748874664, |
|
"learning_rate": 0.00019929404369276488, |
|
"loss": 1.6132, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.34482317580098487, |
|
"grad_norm": 0.07609610259532928, |
|
"learning_rate": 0.00019928397078274555, |
|
"loss": 1.5351, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.3458463899725011, |
|
"grad_norm": 0.057023849338293076, |
|
"learning_rate": 0.00019927382677660088, |
|
"loss": 1.5643, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.3468696041440174, |
|
"grad_norm": 0.0493864081799984, |
|
"learning_rate": 0.0001992636116815948, |
|
"loss": 1.5837, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.34789281831553365, |
|
"grad_norm": 0.05028039962053299, |
|
"learning_rate": 0.00019925332550504234, |
|
"loss": 1.6003, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.34891603248704994, |
|
"grad_norm": 0.050032299011945724, |
|
"learning_rate": 0.00019924296825430925, |
|
"loss": 1.5583, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.34993924665856624, |
|
"grad_norm": 0.04059847444295883, |
|
"learning_rate": 0.00019923253993681225, |
|
"loss": 1.6101, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.3509624608300825, |
|
"grad_norm": 0.045728132128715515, |
|
"learning_rate": 0.00019922204056001895, |
|
"loss": 1.5973, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.3519856750015988, |
|
"grad_norm": 0.04674302786588669, |
|
"learning_rate": 0.0001992114701314478, |
|
"loss": 1.5785, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.35300888917311507, |
|
"grad_norm": 0.04860880225896835, |
|
"learning_rate": 0.00019920082865866818, |
|
"loss": 1.5761, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.3540321033446313, |
|
"grad_norm": 0.04689641669392586, |
|
"learning_rate": 0.00019919011614930035, |
|
"loss": 1.6015, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.3550553175161476, |
|
"grad_norm": 0.04507840797305107, |
|
"learning_rate": 0.0001991793326110154, |
|
"loss": 1.5762, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.3560785316876639, |
|
"grad_norm": 0.04468555748462677, |
|
"learning_rate": 0.00019916847805153526, |
|
"loss": 1.5615, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.35710174585918014, |
|
"grad_norm": 0.07028740644454956, |
|
"learning_rate": 0.00019915755247863285, |
|
"loss": 1.6001, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.35812496003069644, |
|
"grad_norm": 0.03917892277240753, |
|
"learning_rate": 0.00019914655590013176, |
|
"loss": 1.6153, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.3591481742022127, |
|
"grad_norm": 0.06443695724010468, |
|
"learning_rate": 0.0001991354883239066, |
|
"loss": 1.5588, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.360171388373729, |
|
"grad_norm": 0.04684121161699295, |
|
"learning_rate": 0.00019912434975788264, |
|
"loss": 1.5726, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.36119460254524527, |
|
"grad_norm": 0.04538768157362938, |
|
"learning_rate": 0.00019911314021003613, |
|
"loss": 1.592, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.3622178167167615, |
|
"grad_norm": 0.040085602551698685, |
|
"learning_rate": 0.0001991018596883941, |
|
"loss": 1.577, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.3632410308882778, |
|
"grad_norm": 0.04734279587864876, |
|
"learning_rate": 0.00019909050820103442, |
|
"loss": 1.6194, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.3642642450597941, |
|
"grad_norm": 0.051557011902332306, |
|
"learning_rate": 0.00019907908575608573, |
|
"loss": 1.5776, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.36528745923131034, |
|
"grad_norm": 0.042105671018362045, |
|
"learning_rate": 0.00019906759236172752, |
|
"loss": 1.562, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.36631067340282664, |
|
"grad_norm": 0.04763809219002724, |
|
"learning_rate": 0.00019905602802619007, |
|
"loss": 1.5727, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.36733388757434293, |
|
"grad_norm": 0.05205756798386574, |
|
"learning_rate": 0.00019904439275775452, |
|
"loss": 1.5595, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.3683571017458592, |
|
"grad_norm": 0.04210933670401573, |
|
"learning_rate": 0.0001990326865647527, |
|
"loss": 1.5812, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.36938031591737547, |
|
"grad_norm": 0.04100721701979637, |
|
"learning_rate": 0.00019902090945556728, |
|
"loss": 1.5492, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.3704035300888917, |
|
"grad_norm": 0.04252148047089577, |
|
"learning_rate": 0.0001990090614386318, |
|
"loss": 1.5397, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.371426744260408, |
|
"grad_norm": 0.040999703109264374, |
|
"learning_rate": 0.00019899714252243035, |
|
"loss": 1.533, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.3724499584319243, |
|
"grad_norm": 0.03823763504624367, |
|
"learning_rate": 0.00019898515271549804, |
|
"loss": 1.5385, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.37347317260344054, |
|
"grad_norm": 0.041486915200948715, |
|
"learning_rate": 0.0001989730920264206, |
|
"loss": 1.5975, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.37449638677495684, |
|
"grad_norm": 0.042897533625364304, |
|
"learning_rate": 0.00019896096046383456, |
|
"loss": 1.574, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.37551960094647313, |
|
"grad_norm": 0.05677172914147377, |
|
"learning_rate": 0.00019894875803642715, |
|
"loss": 1.5564, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.37654281511798937, |
|
"grad_norm": 0.0416000559926033, |
|
"learning_rate": 0.00019893648475293648, |
|
"loss": 1.5982, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.37756602928950567, |
|
"grad_norm": 0.04389720410108566, |
|
"learning_rate": 0.00019892414062215122, |
|
"loss": 1.5661, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.37858924346102196, |
|
"grad_norm": 0.048660341650247574, |
|
"learning_rate": 0.0001989117256529109, |
|
"loss": 1.5554, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.3796124576325382, |
|
"grad_norm": 0.04659014940261841, |
|
"learning_rate": 0.00019889923985410576, |
|
"loss": 1.5932, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.3806356718040545, |
|
"grad_norm": 0.04693235456943512, |
|
"learning_rate": 0.00019888668323467669, |
|
"loss": 1.5985, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.38165888597557074, |
|
"grad_norm": 0.05906931310892105, |
|
"learning_rate": 0.00019887405580361537, |
|
"loss": 1.592, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.38268210014708703, |
|
"grad_norm": 0.0707060918211937, |
|
"learning_rate": 0.0001988613575699642, |
|
"loss": 1.5491, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.38370531431860333, |
|
"grad_norm": 0.0510844886302948, |
|
"learning_rate": 0.00019884858854281613, |
|
"loss": 1.5433, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.38472852849011957, |
|
"grad_norm": 0.058799102902412415, |
|
"learning_rate": 0.00019883574873131503, |
|
"loss": 1.5467, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.38575174266163587, |
|
"grad_norm": 0.04918012022972107, |
|
"learning_rate": 0.0001988228381446553, |
|
"loss": 1.5685, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.38677495683315216, |
|
"grad_norm": 0.044637810438871384, |
|
"learning_rate": 0.00019880985679208207, |
|
"loss": 1.5767, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.3877981710046684, |
|
"grad_norm": 0.052684806287288666, |
|
"learning_rate": 0.0001987968046828911, |
|
"loss": 1.5457, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.3888213851761847, |
|
"grad_norm": 0.045015860348939896, |
|
"learning_rate": 0.0001987836818264289, |
|
"loss": 1.5136, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.389844599347701, |
|
"grad_norm": 0.0538019984960556, |
|
"learning_rate": 0.0001987704882320926, |
|
"loss": 1.5673, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.39086781351921723, |
|
"grad_norm": 0.04201149195432663, |
|
"learning_rate": 0.00019875722390932997, |
|
"loss": 1.5559, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.39189102769073353, |
|
"grad_norm": 0.04188109561800957, |
|
"learning_rate": 0.00019874388886763944, |
|
"loss": 1.4982, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.39291424186224977, |
|
"grad_norm": 0.0503980815410614, |
|
"learning_rate": 0.00019873048311657007, |
|
"loss": 1.5018, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.39393745603376606, |
|
"grad_norm": 0.04854050651192665, |
|
"learning_rate": 0.0001987170066657216, |
|
"loss": 1.5331, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.39496067020528236, |
|
"grad_norm": 0.04634295031428337, |
|
"learning_rate": 0.00019870345952474437, |
|
"loss": 1.5304, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.3959838843767986, |
|
"grad_norm": 0.04464833438396454, |
|
"learning_rate": 0.0001986898417033393, |
|
"loss": 1.5518, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.3970070985483149, |
|
"grad_norm": 0.04434438794851303, |
|
"learning_rate": 0.00019867615321125795, |
|
"loss": 1.5372, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.3980303127198312, |
|
"grad_norm": 0.04564082249999046, |
|
"learning_rate": 0.00019866239405830248, |
|
"loss": 1.5373, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.39905352689134743, |
|
"grad_norm": 0.042439211159944534, |
|
"learning_rate": 0.00019864856425432574, |
|
"loss": 1.5682, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.4000767410628637, |
|
"grad_norm": 0.051853910088539124, |
|
"learning_rate": 0.00019863466380923105, |
|
"loss": 1.5408, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.40109995523438, |
|
"grad_norm": 0.04109041020274162, |
|
"learning_rate": 0.00019862069273297232, |
|
"loss": 1.5557, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.40212316940589626, |
|
"grad_norm": 0.04249493032693863, |
|
"learning_rate": 0.00019860665103555415, |
|
"loss": 1.5723, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.40314638357741256, |
|
"grad_norm": 0.041393015533685684, |
|
"learning_rate": 0.0001985925387270316, |
|
"loss": 1.6034, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.4041695977489288, |
|
"grad_norm": 0.03967997431755066, |
|
"learning_rate": 0.00019857835581751037, |
|
"loss": 1.5252, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.4051928119204451, |
|
"grad_norm": 0.0383961945772171, |
|
"learning_rate": 0.00019856410231714662, |
|
"loss": 1.5718, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.4062160260919614, |
|
"grad_norm": 0.04732939228415489, |
|
"learning_rate": 0.00019854977823614717, |
|
"loss": 1.5473, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.40723924026347763, |
|
"grad_norm": 0.04425951838493347, |
|
"learning_rate": 0.00019853538358476932, |
|
"loss": 1.5976, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.4082624544349939, |
|
"grad_norm": 0.041833970695734024, |
|
"learning_rate": 0.0001985209183733209, |
|
"loss": 1.6024, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.4092856686065102, |
|
"grad_norm": 0.04387862607836723, |
|
"learning_rate": 0.0001985063826121603, |
|
"loss": 1.5384, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.41030888277802646, |
|
"grad_norm": 0.04852529242634773, |
|
"learning_rate": 0.00019849177631169643, |
|
"loss": 1.5485, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.41133209694954276, |
|
"grad_norm": 0.04267437756061554, |
|
"learning_rate": 0.00019847709948238865, |
|
"loss": 1.5186, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.41235531112105905, |
|
"grad_norm": 0.04403737559914589, |
|
"learning_rate": 0.00019846235213474692, |
|
"loss": 1.5374, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.4133785252925753, |
|
"grad_norm": 0.04668973386287689, |
|
"learning_rate": 0.00019844753427933164, |
|
"loss": 1.5209, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.4144017394640916, |
|
"grad_norm": 0.045447513461112976, |
|
"learning_rate": 0.00019843264592675367, |
|
"loss": 1.5888, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.41542495363560783, |
|
"grad_norm": 0.04239337146282196, |
|
"learning_rate": 0.00019841768708767438, |
|
"loss": 1.5866, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.4164481678071241, |
|
"grad_norm": 0.04571668431162834, |
|
"learning_rate": 0.0001984026577728057, |
|
"loss": 1.5134, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.4174713819786404, |
|
"grad_norm": 0.041478246450424194, |
|
"learning_rate": 0.00019838755799290994, |
|
"loss": 1.5555, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.41849459615015666, |
|
"grad_norm": 0.04084784537553787, |
|
"learning_rate": 0.00019837238775879983, |
|
"loss": 1.5847, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.41951781032167296, |
|
"grad_norm": 0.0393175333738327, |
|
"learning_rate": 0.00019835714708133862, |
|
"loss": 1.5377, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.42054102449318925, |
|
"grad_norm": 0.03987790644168854, |
|
"learning_rate": 0.00019834183597143996, |
|
"loss": 1.5604, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.4215642386647055, |
|
"grad_norm": 0.04945560172200203, |
|
"learning_rate": 0.00019832645444006804, |
|
"loss": 1.5239, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.4225874528362218, |
|
"grad_norm": 0.042219970375299454, |
|
"learning_rate": 0.00019831100249823733, |
|
"loss": 1.5435, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.4236106670077381, |
|
"grad_norm": 0.06793594360351562, |
|
"learning_rate": 0.00019829548015701283, |
|
"loss": 1.5204, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.4246338811792543, |
|
"grad_norm": 0.04633813723921776, |
|
"learning_rate": 0.00019827988742750988, |
|
"loss": 1.5494, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.4256570953507706, |
|
"grad_norm": 0.041469499468803406, |
|
"learning_rate": 0.0001982642243208943, |
|
"loss": 1.5549, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.42668030952228686, |
|
"grad_norm": 0.039512719959020615, |
|
"learning_rate": 0.0001982484908483822, |
|
"loss": 1.5614, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.42770352369380316, |
|
"grad_norm": 0.04240869730710983, |
|
"learning_rate": 0.0001982326870212402, |
|
"loss": 1.5597, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.42872673786531945, |
|
"grad_norm": 0.04469761997461319, |
|
"learning_rate": 0.00019821681285078522, |
|
"loss": 1.575, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.4297499520368357, |
|
"grad_norm": 0.05203311890363693, |
|
"learning_rate": 0.00019820086834838456, |
|
"loss": 1.5144, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.430773166208352, |
|
"grad_norm": 0.046044569462537766, |
|
"learning_rate": 0.00019818485352545592, |
|
"loss": 1.5328, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.4317963803798683, |
|
"grad_norm": 0.05522793158888817, |
|
"learning_rate": 0.00019816876839346735, |
|
"loss": 1.5266, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.4328195945513845, |
|
"grad_norm": 0.04644525796175003, |
|
"learning_rate": 0.00019815261296393715, |
|
"loss": 1.5682, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.4338428087229008, |
|
"grad_norm": 0.06290300190448761, |
|
"learning_rate": 0.00019813638724843413, |
|
"loss": 1.5643, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.4348660228944171, |
|
"grad_norm": 0.050486985594034195, |
|
"learning_rate": 0.00019812009125857728, |
|
"loss": 1.5491, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.43588923706593335, |
|
"grad_norm": 0.05234065279364586, |
|
"learning_rate": 0.000198103725006036, |
|
"loss": 1.5718, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.43691245123744965, |
|
"grad_norm": 0.05265431106090546, |
|
"learning_rate": 0.00019808728850253, |
|
"loss": 1.56, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.4379356654089659, |
|
"grad_norm": 0.04220706969499588, |
|
"learning_rate": 0.00019807078175982924, |
|
"loss": 1.551, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.4389588795804822, |
|
"grad_norm": 0.042153794318437576, |
|
"learning_rate": 0.00019805420478975403, |
|
"loss": 1.5793, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.4399820937519985, |
|
"grad_norm": 0.04063679277896881, |
|
"learning_rate": 0.00019803755760417494, |
|
"loss": 1.5404, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.4410053079235147, |
|
"grad_norm": 0.04740441218018532, |
|
"learning_rate": 0.0001980208402150128, |
|
"loss": 1.526, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.442028522095031, |
|
"grad_norm": 0.04050862789154053, |
|
"learning_rate": 0.0001980040526342388, |
|
"loss": 1.5357, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.4430517362665473, |
|
"grad_norm": 0.050952885299921036, |
|
"learning_rate": 0.00019798719487387428, |
|
"loss": 1.5102, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.44407495043806355, |
|
"grad_norm": 0.048501502722501755, |
|
"learning_rate": 0.00019797026694599098, |
|
"loss": 1.5637, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.44509816460957985, |
|
"grad_norm": 0.03910909220576286, |
|
"learning_rate": 0.0001979532688627107, |
|
"loss": 1.5367, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.44612137878109615, |
|
"grad_norm": 0.05638305842876434, |
|
"learning_rate": 0.0001979362006362056, |
|
"loss": 1.5282, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.4471445929526124, |
|
"grad_norm": 0.05307792127132416, |
|
"learning_rate": 0.00019791906227869808, |
|
"loss": 1.5467, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.4481678071241287, |
|
"grad_norm": 0.04324028640985489, |
|
"learning_rate": 0.0001979018538024607, |
|
"loss": 1.5711, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.4491910212956449, |
|
"grad_norm": 0.03858278691768646, |
|
"learning_rate": 0.00019788457521981623, |
|
"loss": 1.5561, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.4502142354671612, |
|
"grad_norm": 0.043761543929576874, |
|
"learning_rate": 0.00019786722654313772, |
|
"loss": 1.5187, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.4512374496386775, |
|
"grad_norm": 0.08969100564718246, |
|
"learning_rate": 0.00019784980778484834, |
|
"loss": 1.5486, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.45226066381019375, |
|
"grad_norm": 0.04808567091822624, |
|
"learning_rate": 0.00019783231895742143, |
|
"loss": 1.5164, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.45328387798171005, |
|
"grad_norm": 0.04110665246844292, |
|
"learning_rate": 0.00019781476007338058, |
|
"loss": 1.5177, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.45430709215322634, |
|
"grad_norm": 0.050568196922540665, |
|
"learning_rate": 0.00019779713114529947, |
|
"loss": 1.5265, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.4553303063247426, |
|
"grad_norm": 0.04753986746072769, |
|
"learning_rate": 0.00019777943218580207, |
|
"loss": 1.5304, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.4563535204962589, |
|
"grad_norm": 0.05155970901250839, |
|
"learning_rate": 0.00019776166320756227, |
|
"loss": 1.566, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.4573767346677752, |
|
"grad_norm": 0.048765815794467926, |
|
"learning_rate": 0.00019774382422330433, |
|
"loss": 1.5276, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.4583999488392914, |
|
"grad_norm": 0.16882531344890594, |
|
"learning_rate": 0.0001977259152458025, |
|
"loss": 1.5074, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.4594231630108077, |
|
"grad_norm": 0.04014374688267708, |
|
"learning_rate": 0.00019770793628788122, |
|
"loss": 1.5262, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.46044637718232395, |
|
"grad_norm": 0.04874645173549652, |
|
"learning_rate": 0.000197689887362415, |
|
"loss": 1.5158, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.46146959135384025, |
|
"grad_norm": 0.049459170550107956, |
|
"learning_rate": 0.00019767176848232846, |
|
"loss": 1.5449, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.46249280552535654, |
|
"grad_norm": 0.04516777768731117, |
|
"learning_rate": 0.00019765357966059638, |
|
"loss": 1.5722, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.4635160196968728, |
|
"grad_norm": 0.04243026673793793, |
|
"learning_rate": 0.00019763532091024352, |
|
"loss": 1.5562, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.4645392338683891, |
|
"grad_norm": 0.04713771492242813, |
|
"learning_rate": 0.00019761699224434475, |
|
"loss": 1.5425, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.4655624480399054, |
|
"grad_norm": 0.0495879128575325, |
|
"learning_rate": 0.0001975985936760251, |
|
"loss": 1.5517, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.4665856622114216, |
|
"grad_norm": 0.037338342517614365, |
|
"learning_rate": 0.00019758012521845948, |
|
"loss": 1.5923, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.4676088763829379, |
|
"grad_norm": 0.044082753360271454, |
|
"learning_rate": 0.000197561586884873, |
|
"loss": 1.5582, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.4686320905544542, |
|
"grad_norm": 0.045763563364744186, |
|
"learning_rate": 0.00019754297868854073, |
|
"loss": 1.5435, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.46965530472597045, |
|
"grad_norm": 0.04221731796860695, |
|
"learning_rate": 0.00019752430064278777, |
|
"loss": 1.5365, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.47067851889748674, |
|
"grad_norm": 0.04800180345773697, |
|
"learning_rate": 0.0001975055527609893, |
|
"loss": 1.5534, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.471701733069003, |
|
"grad_norm": 0.05618242546916008, |
|
"learning_rate": 0.00019748673505657046, |
|
"loss": 1.5568, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.4727249472405193, |
|
"grad_norm": 0.04696999117732048, |
|
"learning_rate": 0.00019746784754300637, |
|
"loss": 1.5249, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.4737481614120356, |
|
"grad_norm": 0.041852448135614395, |
|
"learning_rate": 0.00019744889023382215, |
|
"loss": 1.5415, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.4747713755835518, |
|
"grad_norm": 0.04743418097496033, |
|
"learning_rate": 0.00019742986314259299, |
|
"loss": 1.5633, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.4757945897550681, |
|
"grad_norm": 0.04543265700340271, |
|
"learning_rate": 0.00019741076628294386, |
|
"loss": 1.5261, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.4768178039265844, |
|
"grad_norm": 0.04992993175983429, |
|
"learning_rate": 0.00019739159966854992, |
|
"loss": 1.5175, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.47784101809810064, |
|
"grad_norm": 0.05793948844075203, |
|
"learning_rate": 0.00019737236331313608, |
|
"loss": 1.59, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.47886423226961694, |
|
"grad_norm": 0.051816169172525406, |
|
"learning_rate": 0.00019735305723047732, |
|
"loss": 1.5008, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.47988744644113324, |
|
"grad_norm": 0.04754515737295151, |
|
"learning_rate": 0.0001973336814343985, |
|
"loss": 1.4773, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.4809106606126495, |
|
"grad_norm": 0.0393076054751873, |
|
"learning_rate": 0.0001973142359387744, |
|
"loss": 1.5568, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.48193387478416577, |
|
"grad_norm": 0.04164562746882439, |
|
"learning_rate": 0.00019729472075752974, |
|
"loss": 1.5319, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.482957088955682, |
|
"grad_norm": 0.04371575266122818, |
|
"learning_rate": 0.00019727513590463906, |
|
"loss": 1.5571, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.4839803031271983, |
|
"grad_norm": 0.0573207251727581, |
|
"learning_rate": 0.00019725548139412692, |
|
"loss": 1.5372, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.4850035172987146, |
|
"grad_norm": 0.04900820180773735, |
|
"learning_rate": 0.00019723575724006767, |
|
"loss": 1.5327, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.48602673147023084, |
|
"grad_norm": 0.039241593331098557, |
|
"learning_rate": 0.00019721596345658552, |
|
"loss": 1.5438, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.48704994564174714, |
|
"grad_norm": 0.043952930718660355, |
|
"learning_rate": 0.00019719610005785465, |
|
"loss": 1.5577, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.48807315981326344, |
|
"grad_norm": 0.038709525018930435, |
|
"learning_rate": 0.0001971761670580989, |
|
"loss": 1.5527, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.4890963739847797, |
|
"grad_norm": 0.03867029398679733, |
|
"learning_rate": 0.0001971561644715922, |
|
"loss": 1.5329, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.49011958815629597, |
|
"grad_norm": 0.0413273349404335, |
|
"learning_rate": 0.00019713609231265805, |
|
"loss": 1.5415, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.49114280232781227, |
|
"grad_norm": 0.03651106357574463, |
|
"learning_rate": 0.00019711595059566998, |
|
"loss": 1.5596, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.4921660164993285, |
|
"grad_norm": 0.03891696035861969, |
|
"learning_rate": 0.0001970957393350512, |
|
"loss": 1.5452, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.4931892306708448, |
|
"grad_norm": 0.03818392753601074, |
|
"learning_rate": 0.0001970754585452748, |
|
"loss": 1.5821, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.49421244484236104, |
|
"grad_norm": 0.03790618106722832, |
|
"learning_rate": 0.0001970551082408636, |
|
"loss": 1.5456, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.49523565901387734, |
|
"grad_norm": 0.043467581272125244, |
|
"learning_rate": 0.00019703468843639024, |
|
"loss": 1.4916, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.49625887318539363, |
|
"grad_norm": 0.03895978257060051, |
|
"learning_rate": 0.0001970141991464771, |
|
"loss": 1.5529, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.4972820873569099, |
|
"grad_norm": 0.03736645728349686, |
|
"learning_rate": 0.0001969936403857963, |
|
"loss": 1.5243, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.49830530152842617, |
|
"grad_norm": 0.03589653596282005, |
|
"learning_rate": 0.0001969730121690698, |
|
"loss": 1.5418, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.49932851569994247, |
|
"grad_norm": 0.03768768534064293, |
|
"learning_rate": 0.00019695231451106912, |
|
"loss": 1.5114, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.5003517298714587, |
|
"grad_norm": 0.04931550845503807, |
|
"learning_rate": 0.00019693154742661575, |
|
"loss": 1.564, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.501374944042975, |
|
"grad_norm": 0.04325348883867264, |
|
"learning_rate": 0.0001969107109305807, |
|
"loss": 1.5092, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.5023981582144913, |
|
"grad_norm": 0.03987947851419449, |
|
"learning_rate": 0.00019688980503788475, |
|
"loss": 1.5222, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.5034213723860076, |
|
"grad_norm": 0.04482003673911095, |
|
"learning_rate": 0.00019686882976349836, |
|
"loss": 1.517, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.5044445865575238, |
|
"grad_norm": 0.04025088995695114, |
|
"learning_rate": 0.00019684778512244172, |
|
"loss": 1.5188, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 0.5054678007290401, |
|
"grad_norm": 0.04705490544438362, |
|
"learning_rate": 0.00019682667112978463, |
|
"loss": 1.5266, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.5064910149005564, |
|
"grad_norm": 0.0493633933365345, |
|
"learning_rate": 0.0001968054878006466, |
|
"loss": 1.5079, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.5075142290720727, |
|
"grad_norm": 0.04063592851161957, |
|
"learning_rate": 0.00019678423515019674, |
|
"loss": 1.5169, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.508537443243589, |
|
"grad_norm": 0.04962534457445145, |
|
"learning_rate": 0.00019676291319365387, |
|
"loss": 1.5219, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 0.5095606574151051, |
|
"grad_norm": 0.03995488956570625, |
|
"learning_rate": 0.00019674152194628638, |
|
"loss": 1.5397, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.5105838715866214, |
|
"grad_norm": 0.04593009501695633, |
|
"learning_rate": 0.00019672006142341234, |
|
"loss": 1.5616, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 0.5116070857581377, |
|
"grad_norm": 0.04215447977185249, |
|
"learning_rate": 0.00019669853164039933, |
|
"loss": 1.5425, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.512630299929654, |
|
"grad_norm": 0.043728407472372055, |
|
"learning_rate": 0.0001966769326126646, |
|
"loss": 1.5044, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 0.5136535141011703, |
|
"grad_norm": 0.04384353384375572, |
|
"learning_rate": 0.00019665526435567497, |
|
"loss": 1.5734, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 0.5146767282726866, |
|
"grad_norm": 0.04542085528373718, |
|
"learning_rate": 0.00019663352688494684, |
|
"loss": 1.5023, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 0.5156999424442028, |
|
"grad_norm": 0.05727483332157135, |
|
"learning_rate": 0.0001966117202160462, |
|
"loss": 1.5668, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.5167231566157191, |
|
"grad_norm": 0.055995501577854156, |
|
"learning_rate": 0.0001965898443645885, |
|
"loss": 1.5533, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.5177463707872354, |
|
"grad_norm": 0.04521145299077034, |
|
"learning_rate": 0.00019656789934623881, |
|
"loss": 1.5196, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 0.5187695849587517, |
|
"grad_norm": 0.040051352232694626, |
|
"learning_rate": 0.0001965458851767117, |
|
"loss": 1.5293, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 0.519792799130268, |
|
"grad_norm": 0.04483609274029732, |
|
"learning_rate": 0.00019652380187177126, |
|
"loss": 1.5028, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 0.5208160133017842, |
|
"grad_norm": 0.04116397351026535, |
|
"learning_rate": 0.00019650164944723115, |
|
"loss": 1.5272, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 0.5218392274733005, |
|
"grad_norm": 0.04803440347313881, |
|
"learning_rate": 0.00019647942791895445, |
|
"loss": 1.525, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.5228624416448168, |
|
"grad_norm": 0.05390439182519913, |
|
"learning_rate": 0.00019645713730285366, |
|
"loss": 1.5446, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 0.5238856558163331, |
|
"grad_norm": 0.04475432634353638, |
|
"learning_rate": 0.00019643477761489096, |
|
"loss": 1.5213, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.5249088699878494, |
|
"grad_norm": 0.04424989968538284, |
|
"learning_rate": 0.00019641234887107778, |
|
"loss": 1.4888, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 0.5259320841593657, |
|
"grad_norm": 0.049827560782432556, |
|
"learning_rate": 0.00019638985108747515, |
|
"loss": 1.5555, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 0.5269552983308818, |
|
"grad_norm": 0.04092090204358101, |
|
"learning_rate": 0.0001963672842801934, |
|
"loss": 1.4815, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.5279785125023981, |
|
"grad_norm": 0.052185434848070145, |
|
"learning_rate": 0.00019634464846539246, |
|
"loss": 1.5657, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.5290017266739144, |
|
"grad_norm": 0.04300570487976074, |
|
"learning_rate": 0.00019632194365928153, |
|
"loss": 1.5259, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 0.5300249408454307, |
|
"grad_norm": 0.04205292835831642, |
|
"learning_rate": 0.00019629916987811926, |
|
"loss": 1.527, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 0.531048155016947, |
|
"grad_norm": 0.06136661395430565, |
|
"learning_rate": 0.00019627632713821368, |
|
"loss": 1.5541, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 0.5320713691884632, |
|
"grad_norm": 0.03824898600578308, |
|
"learning_rate": 0.00019625341545592226, |
|
"loss": 1.5496, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.5330945833599795, |
|
"grad_norm": 0.041780851781368256, |
|
"learning_rate": 0.0001962304348476518, |
|
"loss": 1.5283, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 0.5341177975314958, |
|
"grad_norm": 0.04486005753278732, |
|
"learning_rate": 0.0001962073853298584, |
|
"loss": 1.5312, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 0.5351410117030121, |
|
"grad_norm": 0.041384853422641754, |
|
"learning_rate": 0.00019618426691904762, |
|
"loss": 1.5011, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 0.5361642258745284, |
|
"grad_norm": 0.0440378412604332, |
|
"learning_rate": 0.00019616107963177425, |
|
"loss": 1.4855, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 0.5371874400460447, |
|
"grad_norm": 0.052033115178346634, |
|
"learning_rate": 0.00019613782348464244, |
|
"loss": 1.4811, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.5382106542175609, |
|
"grad_norm": 0.04121650755405426, |
|
"learning_rate": 0.00019611449849430565, |
|
"loss": 1.5653, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 0.5392338683890772, |
|
"grad_norm": 0.04445752128958702, |
|
"learning_rate": 0.00019609110467746666, |
|
"loss": 1.5098, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 0.5402570825605935, |
|
"grad_norm": 0.06591064482927322, |
|
"learning_rate": 0.00019606764205087757, |
|
"loss": 1.5304, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.5412802967321098, |
|
"grad_norm": 0.05301080271601677, |
|
"learning_rate": 0.0001960441106313396, |
|
"loss": 1.4871, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 0.542303510903626, |
|
"grad_norm": 0.040986523032188416, |
|
"learning_rate": 0.0001960205104357034, |
|
"loss": 1.5195, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.5433267250751422, |
|
"grad_norm": 0.03562408685684204, |
|
"learning_rate": 0.00019599684148086878, |
|
"loss": 1.5384, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 0.5443499392466585, |
|
"grad_norm": 0.04383963719010353, |
|
"learning_rate": 0.00019597310378378476, |
|
"loss": 1.4988, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 0.5453731534181748, |
|
"grad_norm": 0.06702277064323425, |
|
"learning_rate": 0.00019594929736144976, |
|
"loss": 1.4897, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 0.5463963675896911, |
|
"grad_norm": 0.0414276085793972, |
|
"learning_rate": 0.00019592542223091118, |
|
"loss": 1.5049, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 0.5474195817612074, |
|
"grad_norm": 0.0432027168571949, |
|
"learning_rate": 0.00019590147840926577, |
|
"loss": 1.4686, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.5484427959327237, |
|
"grad_norm": 0.044036637991666794, |
|
"learning_rate": 0.00019587746591365941, |
|
"loss": 1.5082, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 0.5494660101042399, |
|
"grad_norm": 0.04510560259222984, |
|
"learning_rate": 0.0001958533847612872, |
|
"loss": 1.5213, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 0.5504892242757562, |
|
"grad_norm": 0.04027169942855835, |
|
"learning_rate": 0.00019582923496939337, |
|
"loss": 1.4952, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 0.5515124384472725, |
|
"grad_norm": 0.08312036097049713, |
|
"learning_rate": 0.00019580501655527133, |
|
"loss": 1.512, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 0.5525356526187888, |
|
"grad_norm": 0.04634568840265274, |
|
"learning_rate": 0.00019578072953626357, |
|
"loss": 1.5248, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.5535588667903051, |
|
"grad_norm": 0.044149454683065414, |
|
"learning_rate": 0.00019575637392976178, |
|
"loss": 1.4911, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 0.5545820809618213, |
|
"grad_norm": 0.04358943551778793, |
|
"learning_rate": 0.00019573194975320673, |
|
"loss": 1.5427, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 0.5556052951333376, |
|
"grad_norm": 0.038042690604925156, |
|
"learning_rate": 0.0001957074570240883, |
|
"loss": 1.5032, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 0.5566285093048539, |
|
"grad_norm": 0.04171706736087799, |
|
"learning_rate": 0.00019568289575994544, |
|
"loss": 1.493, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 0.5576517234763702, |
|
"grad_norm": 0.04037075862288475, |
|
"learning_rate": 0.0001956582659783662, |
|
"loss": 1.5334, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.5586749376478864, |
|
"grad_norm": 0.036902882158756256, |
|
"learning_rate": 0.0001956335676969877, |
|
"loss": 1.5093, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 0.5596981518194027, |
|
"grad_norm": 0.04198329523205757, |
|
"learning_rate": 0.00019560880093349607, |
|
"loss": 1.5069, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 0.5607213659909189, |
|
"grad_norm": 0.034086357802152634, |
|
"learning_rate": 0.0001955839657056265, |
|
"loss": 1.5101, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 0.5617445801624352, |
|
"grad_norm": 0.03502487763762474, |
|
"learning_rate": 0.0001955590620311633, |
|
"loss": 1.5305, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 0.5627677943339515, |
|
"grad_norm": 0.03580254316329956, |
|
"learning_rate": 0.00019553408992793964, |
|
"loss": 1.4984, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.5637910085054678, |
|
"grad_norm": 0.0441250242292881, |
|
"learning_rate": 0.00019550904941383773, |
|
"loss": 1.4956, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 0.5648142226769841, |
|
"grad_norm": 0.039550572633743286, |
|
"learning_rate": 0.00019548394050678883, |
|
"loss": 1.5041, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 0.5658374368485003, |
|
"grad_norm": 0.03674033284187317, |
|
"learning_rate": 0.0001954587632247732, |
|
"loss": 1.4694, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 0.5668606510200166, |
|
"grad_norm": 0.03579515963792801, |
|
"learning_rate": 0.00019543351758581994, |
|
"loss": 1.4789, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 0.5678838651915329, |
|
"grad_norm": 0.04077816754579544, |
|
"learning_rate": 0.0001954082036080072, |
|
"loss": 1.5221, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.5689070793630492, |
|
"grad_norm": 0.03694437816739082, |
|
"learning_rate": 0.00019538282130946198, |
|
"loss": 1.5273, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 0.5699302935345655, |
|
"grad_norm": 0.03998146578669548, |
|
"learning_rate": 0.00019535737070836028, |
|
"loss": 1.5426, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 0.5709535077060818, |
|
"grad_norm": 0.03823567554354668, |
|
"learning_rate": 0.00019533185182292703, |
|
"loss": 1.5264, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 0.571976721877598, |
|
"grad_norm": 0.03891613706946373, |
|
"learning_rate": 0.000195306264671436, |
|
"loss": 1.5194, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 0.5729999360491143, |
|
"grad_norm": 0.035352472215890884, |
|
"learning_rate": 0.0001952806092722098, |
|
"loss": 1.5049, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.5740231502206306, |
|
"grad_norm": 0.03947431594133377, |
|
"learning_rate": 0.00019525488564362003, |
|
"loss": 1.5562, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 0.5750463643921468, |
|
"grad_norm": 0.0398818701505661, |
|
"learning_rate": 0.00019522909380408705, |
|
"loss": 1.5216, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 0.5760695785636631, |
|
"grad_norm": 0.03842191398143768, |
|
"learning_rate": 0.00019520323377208017, |
|
"loss": 1.5461, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 0.5770927927351793, |
|
"grad_norm": 0.03299557417631149, |
|
"learning_rate": 0.00019517730556611738, |
|
"loss": 1.4988, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 0.5781160069066956, |
|
"grad_norm": 0.032452985644340515, |
|
"learning_rate": 0.00019515130920476562, |
|
"loss": 1.4837, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.5791392210782119, |
|
"grad_norm": 0.03567085042595863, |
|
"learning_rate": 0.00019512524470664057, |
|
"loss": 1.5081, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 0.5801624352497282, |
|
"grad_norm": 0.04303791746497154, |
|
"learning_rate": 0.00019509911209040676, |
|
"loss": 1.517, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 0.5811856494212445, |
|
"grad_norm": 0.040586575865745544, |
|
"learning_rate": 0.00019507291137477742, |
|
"loss": 1.5494, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 0.5822088635927608, |
|
"grad_norm": 0.038383904844522476, |
|
"learning_rate": 0.0001950466425785146, |
|
"loss": 1.4641, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 0.583232077764277, |
|
"grad_norm": 0.0484977550804615, |
|
"learning_rate": 0.0001950203057204291, |
|
"loss": 1.4838, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.5842552919357933, |
|
"grad_norm": 0.03300706669688225, |
|
"learning_rate": 0.00019499390081938046, |
|
"loss": 1.4935, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 0.5852785061073096, |
|
"grad_norm": 0.041923582553863525, |
|
"learning_rate": 0.00019496742789427683, |
|
"loss": 1.484, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 0.5863017202788259, |
|
"grad_norm": 0.04476374387741089, |
|
"learning_rate": 0.00019494088696407532, |
|
"loss": 1.5222, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 0.5873249344503422, |
|
"grad_norm": 0.039443958550691605, |
|
"learning_rate": 0.00019491427804778147, |
|
"loss": 1.4899, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 0.5883481486218584, |
|
"grad_norm": 0.0458071269094944, |
|
"learning_rate": 0.00019488760116444966, |
|
"loss": 1.5006, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.5893713627933747, |
|
"grad_norm": 0.04912669211626053, |
|
"learning_rate": 0.00019486085633318293, |
|
"loss": 1.5193, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 0.590394576964891, |
|
"grad_norm": 0.05331273376941681, |
|
"learning_rate": 0.00019483404357313293, |
|
"loss": 1.5115, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 0.5914177911364072, |
|
"grad_norm": 0.04301870986819267, |
|
"learning_rate": 0.00019480716290349995, |
|
"loss": 1.4997, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 0.5924410053079235, |
|
"grad_norm": 0.042690206319093704, |
|
"learning_rate": 0.00019478021434353297, |
|
"loss": 1.5014, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 0.5934642194794398, |
|
"grad_norm": 0.045416899025440216, |
|
"learning_rate": 0.00019475319791252956, |
|
"loss": 1.5287, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.594487433650956, |
|
"grad_norm": 0.04627612978219986, |
|
"learning_rate": 0.0001947261136298358, |
|
"loss": 1.5238, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 0.5955106478224723, |
|
"grad_norm": 0.0443304218351841, |
|
"learning_rate": 0.00019469896151484654, |
|
"loss": 1.4956, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 0.5965338619939886, |
|
"grad_norm": 0.042293716222047806, |
|
"learning_rate": 0.00019467174158700504, |
|
"loss": 1.4962, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 0.5975570761655049, |
|
"grad_norm": 0.035955190658569336, |
|
"learning_rate": 0.0001946444538658032, |
|
"loss": 1.4799, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 0.5985802903370212, |
|
"grad_norm": 0.04025396704673767, |
|
"learning_rate": 0.00019461709837078145, |
|
"loss": 1.489, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.5996035045085374, |
|
"grad_norm": 0.057371869683265686, |
|
"learning_rate": 0.0001945896751215287, |
|
"loss": 1.4872, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 0.6006267186800537, |
|
"grad_norm": 0.05806579813361168, |
|
"learning_rate": 0.0001945621841376825, |
|
"loss": 1.5153, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 0.60164993285157, |
|
"grad_norm": 0.03980225697159767, |
|
"learning_rate": 0.00019453462543892882, |
|
"loss": 1.5093, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 0.6026731470230863, |
|
"grad_norm": 0.041456956416368484, |
|
"learning_rate": 0.0001945069990450021, |
|
"loss": 1.5115, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 0.6036963611946026, |
|
"grad_norm": 0.03392681106925011, |
|
"learning_rate": 0.00019447930497568528, |
|
"loss": 1.4863, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.6047195753661189, |
|
"grad_norm": 0.03312285616993904, |
|
"learning_rate": 0.0001944515432508098, |
|
"loss": 1.5321, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 0.605742789537635, |
|
"grad_norm": 0.03741718456149101, |
|
"learning_rate": 0.00019442371389025552, |
|
"loss": 1.4874, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 0.6067660037091513, |
|
"grad_norm": 0.03954221308231354, |
|
"learning_rate": 0.00019439581691395067, |
|
"loss": 1.5014, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 0.6077892178806676, |
|
"grad_norm": 0.03756248950958252, |
|
"learning_rate": 0.00019436785234187205, |
|
"loss": 1.522, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 0.6088124320521839, |
|
"grad_norm": 0.03895876556634903, |
|
"learning_rate": 0.00019433982019404473, |
|
"loss": 1.5546, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.6098356462237002, |
|
"grad_norm": 0.038288913667201996, |
|
"learning_rate": 0.0001943117204905422, |
|
"loss": 1.4859, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 0.6108588603952164, |
|
"grad_norm": 0.034622881561517715, |
|
"learning_rate": 0.00019428355325148633, |
|
"loss": 1.5246, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 0.6118820745667327, |
|
"grad_norm": 0.04585454985499382, |
|
"learning_rate": 0.0001942553184970474, |
|
"loss": 1.5001, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 0.612905288738249, |
|
"grad_norm": 0.03685140982270241, |
|
"learning_rate": 0.00019422701624744395, |
|
"loss": 1.5114, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 0.6139285029097653, |
|
"grad_norm": 0.033848248422145844, |
|
"learning_rate": 0.00019419864652294296, |
|
"loss": 1.5047, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.6149517170812816, |
|
"grad_norm": 0.03485368937253952, |
|
"learning_rate": 0.00019417020934385962, |
|
"loss": 1.5412, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 0.6159749312527979, |
|
"grad_norm": 0.03737105429172516, |
|
"learning_rate": 0.00019414170473055746, |
|
"loss": 1.5014, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 0.6169981454243141, |
|
"grad_norm": 0.0417652502655983, |
|
"learning_rate": 0.00019411313270344837, |
|
"loss": 1.4963, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 0.6180213595958304, |
|
"grad_norm": 0.037758734077215195, |
|
"learning_rate": 0.0001940844932829924, |
|
"loss": 1.4935, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 0.6190445737673467, |
|
"grad_norm": 0.03808191418647766, |
|
"learning_rate": 0.00019405578648969796, |
|
"loss": 1.5181, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.620067787938863, |
|
"grad_norm": 0.03454340249300003, |
|
"learning_rate": 0.00019402701234412162, |
|
"loss": 1.493, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 0.6210910021103793, |
|
"grad_norm": 0.03708413615822792, |
|
"learning_rate": 0.00019399817086686826, |
|
"loss": 1.4987, |
|
"step": 1214 |
|
}, |
|
{ |
|
"epoch": 0.6221142162818954, |
|
"grad_norm": 0.046957071870565414, |
|
"learning_rate": 0.00019396926207859084, |
|
"loss": 1.473, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 0.6231374304534117, |
|
"grad_norm": 0.03893362358212471, |
|
"learning_rate": 0.00019394028599999073, |
|
"loss": 1.4915, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 0.624160644624928, |
|
"grad_norm": 0.04247049614787102, |
|
"learning_rate": 0.0001939112426518173, |
|
"loss": 1.5384, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.6251838587964443, |
|
"grad_norm": 0.036440882831811905, |
|
"learning_rate": 0.00019388213205486822, |
|
"loss": 1.5124, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 0.6262070729679606, |
|
"grad_norm": 0.037374429404735565, |
|
"learning_rate": 0.00019385295422998921, |
|
"loss": 1.5244, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 0.6272302871394769, |
|
"grad_norm": 0.0383899062871933, |
|
"learning_rate": 0.00019382370919807419, |
|
"loss": 1.5078, |
|
"step": 1226 |
|
}, |
|
{ |
|
"epoch": 0.6282535013109931, |
|
"grad_norm": 0.03726350888609886, |
|
"learning_rate": 0.0001937943969800652, |
|
"loss": 1.4968, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 0.6292767154825094, |
|
"grad_norm": 0.037606336176395416, |
|
"learning_rate": 0.0001937650175969524, |
|
"loss": 1.4735, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.6302999296540257, |
|
"grad_norm": 0.03583415970206261, |
|
"learning_rate": 0.000193735571069774, |
|
"loss": 1.4872, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 0.631323143825542, |
|
"grad_norm": 0.029802750796079636, |
|
"learning_rate": 0.00019370605741961635, |
|
"loss": 1.5037, |
|
"step": 1234 |
|
}, |
|
{ |
|
"epoch": 0.6323463579970583, |
|
"grad_norm": 0.037094760686159134, |
|
"learning_rate": 0.00019367647666761385, |
|
"loss": 1.518, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 0.6333695721685745, |
|
"grad_norm": 0.03802032023668289, |
|
"learning_rate": 0.00019364682883494893, |
|
"loss": 1.4997, |
|
"step": 1238 |
|
}, |
|
{ |
|
"epoch": 0.6343927863400908, |
|
"grad_norm": 0.03934174031019211, |
|
"learning_rate": 0.00019361711394285202, |
|
"loss": 1.5033, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.6354160005116071, |
|
"grad_norm": 0.03484318405389786, |
|
"learning_rate": 0.00019358733201260169, |
|
"loss": 1.5068, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 0.6364392146831234, |
|
"grad_norm": 0.03633354604244232, |
|
"learning_rate": 0.00019355748306552442, |
|
"loss": 1.5462, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 0.6374624288546397, |
|
"grad_norm": 0.05548425391316414, |
|
"learning_rate": 0.00019352756712299468, |
|
"loss": 1.5036, |
|
"step": 1246 |
|
}, |
|
{ |
|
"epoch": 0.638485643026156, |
|
"grad_norm": 0.032225679606199265, |
|
"learning_rate": 0.00019349758420643493, |
|
"loss": 1.5026, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 0.6395088571976721, |
|
"grad_norm": 0.03236972540616989, |
|
"learning_rate": 0.00019346753433731564, |
|
"loss": 1.5199, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.6405320713691884, |
|
"grad_norm": 0.03576046973466873, |
|
"learning_rate": 0.00019343741753715516, |
|
"loss": 1.5146, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 0.6415552855407047, |
|
"grad_norm": 0.04308708757162094, |
|
"learning_rate": 0.00019340723382751978, |
|
"loss": 1.5, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 0.642578499712221, |
|
"grad_norm": 0.035895735025405884, |
|
"learning_rate": 0.0001933769832300237, |
|
"loss": 1.5043, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 0.6436017138837373, |
|
"grad_norm": 0.03789574280381203, |
|
"learning_rate": 0.00019334666576632906, |
|
"loss": 1.4935, |
|
"step": 1258 |
|
}, |
|
{ |
|
"epoch": 0.6446249280552535, |
|
"grad_norm": 0.03609545901417732, |
|
"learning_rate": 0.00019331628145814587, |
|
"loss": 1.5296, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.6456481422267698, |
|
"grad_norm": 0.0432671383023262, |
|
"learning_rate": 0.00019328583032723193, |
|
"loss": 1.5045, |
|
"step": 1262 |
|
}, |
|
{ |
|
"epoch": 0.6466713563982861, |
|
"grad_norm": 0.038937125355005264, |
|
"learning_rate": 0.000193255312395393, |
|
"loss": 1.4801, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 0.6476945705698024, |
|
"grad_norm": 0.03925538435578346, |
|
"learning_rate": 0.00019322472768448258, |
|
"loss": 1.4903, |
|
"step": 1266 |
|
}, |
|
{ |
|
"epoch": 0.6487177847413187, |
|
"grad_norm": 0.03581652417778969, |
|
"learning_rate": 0.00019319407621640208, |
|
"loss": 1.471, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 0.649740998912835, |
|
"grad_norm": 0.03643723577260971, |
|
"learning_rate": 0.00019316335801310063, |
|
"loss": 1.5019, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.6507642130843512, |
|
"grad_norm": 0.03839946910738945, |
|
"learning_rate": 0.0001931325730965752, |
|
"loss": 1.5148, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 0.6517874272558675, |
|
"grad_norm": 0.04306597262620926, |
|
"learning_rate": 0.00019310172148887054, |
|
"loss": 1.472, |
|
"step": 1274 |
|
}, |
|
{ |
|
"epoch": 0.6528106414273838, |
|
"grad_norm": 0.069839708507061, |
|
"learning_rate": 0.00019307080321207912, |
|
"loss": 1.521, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 0.6538338555989001, |
|
"grad_norm": 0.05618079751729965, |
|
"learning_rate": 0.00019303981828834113, |
|
"loss": 1.5019, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 0.6548570697704164, |
|
"grad_norm": 0.04359296336770058, |
|
"learning_rate": 0.00019300876673984462, |
|
"loss": 1.4676, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.6558802839419325, |
|
"grad_norm": 0.038589805364608765, |
|
"learning_rate": 0.00019297764858882514, |
|
"loss": 1.4791, |
|
"step": 1282 |
|
}, |
|
{ |
|
"epoch": 0.6569034981134488, |
|
"grad_norm": 0.0316338986158371, |
|
"learning_rate": 0.00019294646385756612, |
|
"loss": 1.4824, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 0.6579267122849651, |
|
"grad_norm": 0.03457920625805855, |
|
"learning_rate": 0.00019291521256839858, |
|
"loss": 1.4946, |
|
"step": 1286 |
|
}, |
|
{ |
|
"epoch": 0.6589499264564814, |
|
"grad_norm": 0.04637923464179039, |
|
"learning_rate": 0.00019288389474370117, |
|
"loss": 1.5049, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 0.6599731406279977, |
|
"grad_norm": 0.05314064025878906, |
|
"learning_rate": 0.0001928525104059003, |
|
"loss": 1.5021, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.660996354799514, |
|
"grad_norm": 0.041335079818964005, |
|
"learning_rate": 0.00019282105957746986, |
|
"loss": 1.4869, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 0.6620195689710302, |
|
"grad_norm": 0.040912263095378876, |
|
"learning_rate": 0.00019278954228093146, |
|
"loss": 1.5168, |
|
"step": 1294 |
|
}, |
|
{ |
|
"epoch": 0.6630427831425465, |
|
"grad_norm": 0.037110935896635056, |
|
"learning_rate": 0.00019275795853885433, |
|
"loss": 1.4973, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 0.6640659973140628, |
|
"grad_norm": 0.035204846411943436, |
|
"learning_rate": 0.00019272630837385518, |
|
"loss": 1.5062, |
|
"step": 1298 |
|
}, |
|
{ |
|
"epoch": 0.6650892114855791, |
|
"grad_norm": 0.0464470274746418, |
|
"learning_rate": 0.0001926945918085983, |
|
"loss": 1.5412, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.6661124256570954, |
|
"grad_norm": 0.033444374799728394, |
|
"learning_rate": 0.00019266280886579565, |
|
"loss": 1.4799, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 0.6671356398286117, |
|
"grad_norm": 0.036789704114198685, |
|
"learning_rate": 0.0001926309595682066, |
|
"loss": 1.5604, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 0.6681588540001279, |
|
"grad_norm": 0.03726235032081604, |
|
"learning_rate": 0.00019259904393863802, |
|
"loss": 1.5054, |
|
"step": 1306 |
|
}, |
|
{ |
|
"epoch": 0.6691820681716442, |
|
"grad_norm": 0.03499661013484001, |
|
"learning_rate": 0.00019256706199994442, |
|
"loss": 1.5039, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 0.6702052823431605, |
|
"grad_norm": 0.037414226680994034, |
|
"learning_rate": 0.00019253501377502764, |
|
"loss": 1.4952, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.6712284965146768, |
|
"grad_norm": 0.041186489164829254, |
|
"learning_rate": 0.00019250289928683705, |
|
"loss": 1.519, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 0.672251710686193, |
|
"grad_norm": 0.050159044563770294, |
|
"learning_rate": 0.0001924707185583695, |
|
"loss": 1.5112, |
|
"step": 1314 |
|
}, |
|
{ |
|
"epoch": 0.6732749248577092, |
|
"grad_norm": 0.05124843865633011, |
|
"learning_rate": 0.0001924384716126692, |
|
"loss": 1.4897, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 0.6742981390292255, |
|
"grad_norm": 0.03580416738986969, |
|
"learning_rate": 0.00019240615847282788, |
|
"loss": 1.4739, |
|
"step": 1318 |
|
}, |
|
{ |
|
"epoch": 0.6753213532007418, |
|
"grad_norm": 0.03572642430663109, |
|
"learning_rate": 0.00019237377916198458, |
|
"loss": 1.4735, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.6763445673722581, |
|
"grad_norm": 0.04381095990538597, |
|
"learning_rate": 0.00019234133370332578, |
|
"loss": 1.4817, |
|
"step": 1322 |
|
}, |
|
{ |
|
"epoch": 0.6773677815437744, |
|
"grad_norm": 0.03948042169213295, |
|
"learning_rate": 0.00019230882212008528, |
|
"loss": 1.5288, |
|
"step": 1324 |
|
}, |
|
{ |
|
"epoch": 0.6783909957152907, |
|
"grad_norm": 0.04092205688357353, |
|
"learning_rate": 0.00019227624443554425, |
|
"loss": 1.503, |
|
"step": 1326 |
|
}, |
|
{ |
|
"epoch": 0.6794142098868069, |
|
"grad_norm": 0.0372740812599659, |
|
"learning_rate": 0.0001922436006730312, |
|
"loss": 1.5186, |
|
"step": 1328 |
|
}, |
|
{ |
|
"epoch": 0.6804374240583232, |
|
"grad_norm": 0.03410439193248749, |
|
"learning_rate": 0.00019221089085592202, |
|
"loss": 1.5104, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.6814606382298395, |
|
"grad_norm": 0.04406609386205673, |
|
"learning_rate": 0.00019217811500763977, |
|
"loss": 1.497, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 0.6824838524013558, |
|
"grad_norm": 0.04020300507545471, |
|
"learning_rate": 0.00019214527315165487, |
|
"loss": 1.4589, |
|
"step": 1334 |
|
}, |
|
{ |
|
"epoch": 0.6835070665728721, |
|
"grad_norm": 0.03552987799048424, |
|
"learning_rate": 0.000192112365311485, |
|
"loss": 1.4938, |
|
"step": 1336 |
|
}, |
|
{ |
|
"epoch": 0.6845302807443883, |
|
"grad_norm": 0.035595186054706573, |
|
"learning_rate": 0.00019207939151069515, |
|
"loss": 1.4664, |
|
"step": 1338 |
|
}, |
|
{ |
|
"epoch": 0.6855534949159046, |
|
"grad_norm": 0.030798960477113724, |
|
"learning_rate": 0.00019204635177289743, |
|
"loss": 1.4786, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.6865767090874209, |
|
"grad_norm": 0.03413120657205582, |
|
"learning_rate": 0.00019201324612175123, |
|
"loss": 1.5409, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 0.6875999232589372, |
|
"grad_norm": 0.03786253184080124, |
|
"learning_rate": 0.0001919800745809631, |
|
"loss": 1.4725, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 0.6886231374304534, |
|
"grad_norm": 0.0414445661008358, |
|
"learning_rate": 0.00019194683717428687, |
|
"loss": 1.4993, |
|
"step": 1346 |
|
}, |
|
{ |
|
"epoch": 0.6896463516019697, |
|
"grad_norm": 0.0378003790974617, |
|
"learning_rate": 0.00019191353392552344, |
|
"loss": 1.5225, |
|
"step": 1348 |
|
}, |
|
{ |
|
"epoch": 0.6906695657734859, |
|
"grad_norm": 0.0343095101416111, |
|
"learning_rate": 0.0001918801648585209, |
|
"loss": 1.4671, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.6916927799450022, |
|
"grad_norm": 0.03458075597882271, |
|
"learning_rate": 0.0001918467299971744, |
|
"loss": 1.4843, |
|
"step": 1352 |
|
}, |
|
{ |
|
"epoch": 0.6927159941165185, |
|
"grad_norm": 0.03243357688188553, |
|
"learning_rate": 0.00019181322936542635, |
|
"loss": 1.494, |
|
"step": 1354 |
|
}, |
|
{ |
|
"epoch": 0.6937392082880348, |
|
"grad_norm": 0.03002413548529148, |
|
"learning_rate": 0.00019177966298726613, |
|
"loss": 1.5046, |
|
"step": 1356 |
|
}, |
|
{ |
|
"epoch": 0.6947624224595511, |
|
"grad_norm": 0.031211066991090775, |
|
"learning_rate": 0.00019174603088673026, |
|
"loss": 1.4664, |
|
"step": 1358 |
|
}, |
|
{ |
|
"epoch": 0.6957856366310673, |
|
"grad_norm": 0.03740109130740166, |
|
"learning_rate": 0.00019171233308790225, |
|
"loss": 1.4394, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.6968088508025836, |
|
"grad_norm": 0.03566642478108406, |
|
"learning_rate": 0.0001916785696149128, |
|
"loss": 1.4935, |
|
"step": 1362 |
|
}, |
|
{ |
|
"epoch": 0.6978320649740999, |
|
"grad_norm": 0.033135462552309036, |
|
"learning_rate": 0.00019164474049193948, |
|
"loss": 1.5171, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 0.6988552791456162, |
|
"grad_norm": 0.03240213543176651, |
|
"learning_rate": 0.00019161084574320696, |
|
"loss": 1.4644, |
|
"step": 1366 |
|
}, |
|
{ |
|
"epoch": 0.6998784933171325, |
|
"grad_norm": 0.0337255634367466, |
|
"learning_rate": 0.0001915768853929869, |
|
"loss": 1.4739, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 0.7009017074886488, |
|
"grad_norm": 0.033216070383787155, |
|
"learning_rate": 0.00019154285946559792, |
|
"loss": 1.4691, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.701924921660165, |
|
"grad_norm": 0.03151748329401016, |
|
"learning_rate": 0.0001915087679854056, |
|
"loss": 1.4882, |
|
"step": 1372 |
|
}, |
|
{ |
|
"epoch": 0.7029481358316813, |
|
"grad_norm": 0.03065643645823002, |
|
"learning_rate": 0.00019147461097682246, |
|
"loss": 1.4608, |
|
"step": 1374 |
|
}, |
|
{ |
|
"epoch": 0.7039713500031975, |
|
"grad_norm": 0.0341670848429203, |
|
"learning_rate": 0.0001914403884643079, |
|
"loss": 1.4714, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 0.7049945641747138, |
|
"grad_norm": 0.035825930535793304, |
|
"learning_rate": 0.00019140610047236833, |
|
"loss": 1.4752, |
|
"step": 1378 |
|
}, |
|
{ |
|
"epoch": 0.7060177783462301, |
|
"grad_norm": 0.042743559926748276, |
|
"learning_rate": 0.00019137174702555697, |
|
"loss": 1.5077, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.7070409925177463, |
|
"grad_norm": 0.03980020061135292, |
|
"learning_rate": 0.00019133732814847397, |
|
"loss": 1.4813, |
|
"step": 1382 |
|
}, |
|
{ |
|
"epoch": 0.7080642066892626, |
|
"grad_norm": 0.03854946047067642, |
|
"learning_rate": 0.00019130284386576624, |
|
"loss": 1.4623, |
|
"step": 1384 |
|
}, |
|
{ |
|
"epoch": 0.7090874208607789, |
|
"grad_norm": 0.037254948168992996, |
|
"learning_rate": 0.00019126829420212764, |
|
"loss": 1.5247, |
|
"step": 1386 |
|
}, |
|
{ |
|
"epoch": 0.7101106350322952, |
|
"grad_norm": 0.047802574932575226, |
|
"learning_rate": 0.00019123367918229874, |
|
"loss": 1.4989, |
|
"step": 1388 |
|
}, |
|
{ |
|
"epoch": 0.7111338492038115, |
|
"grad_norm": 0.039889827370643616, |
|
"learning_rate": 0.000191198998831067, |
|
"loss": 1.4727, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.7121570633753278, |
|
"grad_norm": 0.03746683895587921, |
|
"learning_rate": 0.0001911642531732666, |
|
"loss": 1.4929, |
|
"step": 1392 |
|
}, |
|
{ |
|
"epoch": 0.713180277546844, |
|
"grad_norm": 0.04323015734553337, |
|
"learning_rate": 0.00019112944223377855, |
|
"loss": 1.4989, |
|
"step": 1394 |
|
}, |
|
{ |
|
"epoch": 0.7142034917183603, |
|
"grad_norm": 0.04086681455373764, |
|
"learning_rate": 0.0001910945660375305, |
|
"loss": 1.4884, |
|
"step": 1396 |
|
}, |
|
{ |
|
"epoch": 0.7152267058898766, |
|
"grad_norm": 0.03528650477528572, |
|
"learning_rate": 0.00019105962460949698, |
|
"loss": 1.4932, |
|
"step": 1398 |
|
}, |
|
{ |
|
"epoch": 0.7162499200613929, |
|
"grad_norm": 0.041061852127313614, |
|
"learning_rate": 0.00019102461797469912, |
|
"loss": 1.5063, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.7172731342329092, |
|
"grad_norm": 0.033481474965810776, |
|
"learning_rate": 0.00019098954615820476, |
|
"loss": 1.4825, |
|
"step": 1402 |
|
}, |
|
{ |
|
"epoch": 0.7182963484044254, |
|
"grad_norm": 0.03925000876188278, |
|
"learning_rate": 0.00019095440918512842, |
|
"loss": 1.513, |
|
"step": 1404 |
|
}, |
|
{ |
|
"epoch": 0.7193195625759417, |
|
"grad_norm": 0.03856325149536133, |
|
"learning_rate": 0.0001909192070806313, |
|
"loss": 1.4907, |
|
"step": 1406 |
|
}, |
|
{ |
|
"epoch": 0.720342776747458, |
|
"grad_norm": 0.03494630753993988, |
|
"learning_rate": 0.00019088393986992124, |
|
"loss": 1.4604, |
|
"step": 1408 |
|
}, |
|
{ |
|
"epoch": 0.7213659909189742, |
|
"grad_norm": 0.03931909799575806, |
|
"learning_rate": 0.00019084860757825268, |
|
"loss": 1.4905, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.7223892050904905, |
|
"grad_norm": 0.03644140437245369, |
|
"learning_rate": 0.00019081321023092668, |
|
"loss": 1.49, |
|
"step": 1412 |
|
}, |
|
{ |
|
"epoch": 0.7234124192620068, |
|
"grad_norm": 0.03480161353945732, |
|
"learning_rate": 0.00019077774785329087, |
|
"loss": 1.5301, |
|
"step": 1414 |
|
}, |
|
{ |
|
"epoch": 0.724435633433523, |
|
"grad_norm": 0.03516329079866409, |
|
"learning_rate": 0.00019074222047073947, |
|
"loss": 1.4801, |
|
"step": 1416 |
|
}, |
|
{ |
|
"epoch": 0.7254588476050393, |
|
"grad_norm": 0.03371971845626831, |
|
"learning_rate": 0.00019070662810871322, |
|
"loss": 1.4724, |
|
"step": 1418 |
|
}, |
|
{ |
|
"epoch": 0.7264820617765556, |
|
"grad_norm": 0.034337956458330154, |
|
"learning_rate": 0.00019067097079269942, |
|
"loss": 1.4726, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.7275052759480719, |
|
"grad_norm": 0.0360429473221302, |
|
"learning_rate": 0.00019063524854823186, |
|
"loss": 1.4856, |
|
"step": 1422 |
|
}, |
|
{ |
|
"epoch": 0.7285284901195882, |
|
"grad_norm": 0.03850055858492851, |
|
"learning_rate": 0.0001905994614008908, |
|
"loss": 1.5022, |
|
"step": 1424 |
|
}, |
|
{ |
|
"epoch": 0.7295517042911044, |
|
"grad_norm": 0.03869333118200302, |
|
"learning_rate": 0.0001905636093763031, |
|
"loss": 1.4949, |
|
"step": 1426 |
|
}, |
|
{ |
|
"epoch": 0.7305749184626207, |
|
"grad_norm": 0.03506360575556755, |
|
"learning_rate": 0.0001905276925001419, |
|
"loss": 1.4617, |
|
"step": 1428 |
|
}, |
|
{ |
|
"epoch": 0.731598132634137, |
|
"grad_norm": 0.033819831907749176, |
|
"learning_rate": 0.00019049171079812692, |
|
"loss": 1.4698, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.7326213468056533, |
|
"grad_norm": 0.03606401011347771, |
|
"learning_rate": 0.00019045566429602424, |
|
"loss": 1.5038, |
|
"step": 1432 |
|
}, |
|
{ |
|
"epoch": 0.7336445609771696, |
|
"grad_norm": 0.04196172207593918, |
|
"learning_rate": 0.00019041955301964632, |
|
"loss": 1.5142, |
|
"step": 1434 |
|
}, |
|
{ |
|
"epoch": 0.7346677751486859, |
|
"grad_norm": 0.03859662637114525, |
|
"learning_rate": 0.00019038337699485208, |
|
"loss": 1.5072, |
|
"step": 1436 |
|
}, |
|
{ |
|
"epoch": 0.735690989320202, |
|
"grad_norm": 0.036224085837602615, |
|
"learning_rate": 0.00019034713624754672, |
|
"loss": 1.5033, |
|
"step": 1438 |
|
}, |
|
{ |
|
"epoch": 0.7367142034917183, |
|
"grad_norm": 0.04655170813202858, |
|
"learning_rate": 0.00019031083080368183, |
|
"loss": 1.5255, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.7377374176632346, |
|
"grad_norm": 0.040406614542007446, |
|
"learning_rate": 0.0001902744606892554, |
|
"loss": 1.5199, |
|
"step": 1442 |
|
}, |
|
{ |
|
"epoch": 0.7387606318347509, |
|
"grad_norm": 0.03488042950630188, |
|
"learning_rate": 0.00019023802593031154, |
|
"loss": 1.5127, |
|
"step": 1444 |
|
}, |
|
{ |
|
"epoch": 0.7397838460062672, |
|
"grad_norm": 0.031517501920461655, |
|
"learning_rate": 0.00019020152655294085, |
|
"loss": 1.4726, |
|
"step": 1446 |
|
}, |
|
{ |
|
"epoch": 0.7408070601777834, |
|
"grad_norm": 0.0331415981054306, |
|
"learning_rate": 0.0001901649625832801, |
|
"loss": 1.473, |
|
"step": 1448 |
|
}, |
|
{ |
|
"epoch": 0.7418302743492997, |
|
"grad_norm": 0.03110121190547943, |
|
"learning_rate": 0.00019012833404751235, |
|
"loss": 1.4693, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.742853488520816, |
|
"grad_norm": 0.03500855341553688, |
|
"learning_rate": 0.00019009164097186684, |
|
"loss": 1.4962, |
|
"step": 1452 |
|
}, |
|
{ |
|
"epoch": 0.7438767026923323, |
|
"grad_norm": 0.03449893742799759, |
|
"learning_rate": 0.0001900548833826191, |
|
"loss": 1.4938, |
|
"step": 1454 |
|
}, |
|
{ |
|
"epoch": 0.7448999168638486, |
|
"grad_norm": 0.03199852257966995, |
|
"learning_rate": 0.0001900180613060908, |
|
"loss": 1.4905, |
|
"step": 1456 |
|
}, |
|
{ |
|
"epoch": 0.7459231310353649, |
|
"grad_norm": 0.03547672927379608, |
|
"learning_rate": 0.00018998117476864984, |
|
"loss": 1.4495, |
|
"step": 1458 |
|
}, |
|
{ |
|
"epoch": 0.7469463452068811, |
|
"grad_norm": 0.03338061273097992, |
|
"learning_rate": 0.00018994422379671016, |
|
"loss": 1.4895, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.7479695593783974, |
|
"grad_norm": 0.036238085478544235, |
|
"learning_rate": 0.00018990720841673207, |
|
"loss": 1.5382, |
|
"step": 1462 |
|
}, |
|
{ |
|
"epoch": 0.7489927735499137, |
|
"grad_norm": 0.03941986709833145, |
|
"learning_rate": 0.0001898701286552218, |
|
"loss": 1.4917, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 0.75001598772143, |
|
"grad_norm": 0.03612781688570976, |
|
"learning_rate": 0.0001898329845387317, |
|
"loss": 1.4856, |
|
"step": 1466 |
|
}, |
|
{ |
|
"epoch": 0.7510392018929463, |
|
"grad_norm": 0.035338182002305984, |
|
"learning_rate": 0.00018979577609386033, |
|
"loss": 1.4787, |
|
"step": 1468 |
|
}, |
|
{ |
|
"epoch": 0.7520624160644624, |
|
"grad_norm": 0.035387344658374786, |
|
"learning_rate": 0.0001897585033472522, |
|
"loss": 1.489, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.7530856302359787, |
|
"grad_norm": 0.033865489065647125, |
|
"learning_rate": 0.00018972116632559786, |
|
"loss": 1.4958, |
|
"step": 1472 |
|
}, |
|
{ |
|
"epoch": 0.754108844407495, |
|
"grad_norm": 0.03240435943007469, |
|
"learning_rate": 0.000189683765055634, |
|
"loss": 1.48, |
|
"step": 1474 |
|
}, |
|
{ |
|
"epoch": 0.7551320585790113, |
|
"grad_norm": 0.0325872041285038, |
|
"learning_rate": 0.0001896462995641432, |
|
"loss": 1.4685, |
|
"step": 1476 |
|
}, |
|
{ |
|
"epoch": 0.7561552727505276, |
|
"grad_norm": 0.030261578038334846, |
|
"learning_rate": 0.00018960876987795413, |
|
"loss": 1.4985, |
|
"step": 1478 |
|
}, |
|
{ |
|
"epoch": 0.7571784869220439, |
|
"grad_norm": 0.034684158861637115, |
|
"learning_rate": 0.0001895711760239413, |
|
"loss": 1.4869, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.7582017010935601, |
|
"grad_norm": 0.03360000252723694, |
|
"learning_rate": 0.00018953351802902525, |
|
"loss": 1.5089, |
|
"step": 1482 |
|
}, |
|
{ |
|
"epoch": 0.7592249152650764, |
|
"grad_norm": 0.03356654942035675, |
|
"learning_rate": 0.0001894957959201725, |
|
"loss": 1.5119, |
|
"step": 1484 |
|
}, |
|
{ |
|
"epoch": 0.7602481294365927, |
|
"grad_norm": 0.035596925765275955, |
|
"learning_rate": 0.00018945800972439538, |
|
"loss": 1.5242, |
|
"step": 1486 |
|
}, |
|
{ |
|
"epoch": 0.761271343608109, |
|
"grad_norm": 0.03309349715709686, |
|
"learning_rate": 0.00018942015946875215, |
|
"loss": 1.519, |
|
"step": 1488 |
|
}, |
|
{ |
|
"epoch": 0.7622945577796253, |
|
"grad_norm": 0.03727027401328087, |
|
"learning_rate": 0.00018938224518034698, |
|
"loss": 1.4651, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.7633177719511415, |
|
"grad_norm": 0.03802427276968956, |
|
"learning_rate": 0.00018934426688632986, |
|
"loss": 1.4584, |
|
"step": 1492 |
|
}, |
|
{ |
|
"epoch": 0.7643409861226578, |
|
"grad_norm": 0.03257981687784195, |
|
"learning_rate": 0.00018930622461389655, |
|
"loss": 1.4622, |
|
"step": 1494 |
|
}, |
|
{ |
|
"epoch": 0.7653642002941741, |
|
"grad_norm": 0.03339976444840431, |
|
"learning_rate": 0.00018926811839028876, |
|
"loss": 1.4486, |
|
"step": 1496 |
|
}, |
|
{ |
|
"epoch": 0.7663874144656904, |
|
"grad_norm": 0.03176839277148247, |
|
"learning_rate": 0.00018922994824279395, |
|
"loss": 1.478, |
|
"step": 1498 |
|
}, |
|
{ |
|
"epoch": 0.7674106286372067, |
|
"grad_norm": 0.03458357974886894, |
|
"learning_rate": 0.00018919171419874524, |
|
"loss": 1.5167, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.768433842808723, |
|
"grad_norm": 0.037736013531684875, |
|
"learning_rate": 0.00018915341628552166, |
|
"loss": 1.5323, |
|
"step": 1502 |
|
}, |
|
{ |
|
"epoch": 0.7694570569802391, |
|
"grad_norm": 0.03360259160399437, |
|
"learning_rate": 0.00018911505453054786, |
|
"loss": 1.469, |
|
"step": 1504 |
|
}, |
|
{ |
|
"epoch": 0.7704802711517554, |
|
"grad_norm": 0.03466862440109253, |
|
"learning_rate": 0.00018907662896129433, |
|
"loss": 1.5173, |
|
"step": 1506 |
|
}, |
|
{ |
|
"epoch": 0.7715034853232717, |
|
"grad_norm": 0.036147862672805786, |
|
"learning_rate": 0.00018903813960527714, |
|
"loss": 1.4801, |
|
"step": 1508 |
|
}, |
|
{ |
|
"epoch": 0.772526699494788, |
|
"grad_norm": 0.03919236734509468, |
|
"learning_rate": 0.0001889995864900581, |
|
"loss": 1.479, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.7735499136663043, |
|
"grad_norm": 0.03543972223997116, |
|
"learning_rate": 0.0001889609696432446, |
|
"loss": 1.4771, |
|
"step": 1512 |
|
}, |
|
{ |
|
"epoch": 0.7745731278378205, |
|
"grad_norm": 0.04238108918070793, |
|
"learning_rate": 0.00018892228909248978, |
|
"loss": 1.4936, |
|
"step": 1514 |
|
}, |
|
{ |
|
"epoch": 0.7755963420093368, |
|
"grad_norm": 0.035696953535079956, |
|
"learning_rate": 0.00018888354486549237, |
|
"loss": 1.49, |
|
"step": 1516 |
|
}, |
|
{ |
|
"epoch": 0.7766195561808531, |
|
"grad_norm": 0.04000556096434593, |
|
"learning_rate": 0.00018884473698999661, |
|
"loss": 1.5206, |
|
"step": 1518 |
|
}, |
|
{ |
|
"epoch": 0.7776427703523694, |
|
"grad_norm": 0.06562638282775879, |
|
"learning_rate": 0.0001888058654937924, |
|
"loss": 1.4672, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.7786659845238857, |
|
"grad_norm": 0.03467231243848801, |
|
"learning_rate": 0.00018876693040471517, |
|
"loss": 1.5033, |
|
"step": 1522 |
|
}, |
|
{ |
|
"epoch": 0.779689198695402, |
|
"grad_norm": 0.03708554431796074, |
|
"learning_rate": 0.00018872793175064593, |
|
"loss": 1.4606, |
|
"step": 1524 |
|
}, |
|
{ |
|
"epoch": 0.7807124128669182, |
|
"grad_norm": 0.039738163352012634, |
|
"learning_rate": 0.00018868886955951115, |
|
"loss": 1.4506, |
|
"step": 1526 |
|
}, |
|
{ |
|
"epoch": 0.7817356270384345, |
|
"grad_norm": 0.036794066429138184, |
|
"learning_rate": 0.00018864974385928283, |
|
"loss": 1.516, |
|
"step": 1528 |
|
}, |
|
{ |
|
"epoch": 0.7827588412099508, |
|
"grad_norm": 0.037196848541498184, |
|
"learning_rate": 0.0001886105546779784, |
|
"loss": 1.5051, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.7837820553814671, |
|
"grad_norm": 0.03867275267839432, |
|
"learning_rate": 0.00018857130204366084, |
|
"loss": 1.5015, |
|
"step": 1532 |
|
}, |
|
{ |
|
"epoch": 0.7848052695529834, |
|
"grad_norm": 0.03784462809562683, |
|
"learning_rate": 0.00018853198598443852, |
|
"loss": 1.4713, |
|
"step": 1534 |
|
}, |
|
{ |
|
"epoch": 0.7858284837244995, |
|
"grad_norm": 0.04151632636785507, |
|
"learning_rate": 0.00018849260652846519, |
|
"loss": 1.4671, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 0.7868516978960158, |
|
"grad_norm": 0.04655742272734642, |
|
"learning_rate": 0.00018845316370394005, |
|
"loss": 1.4751, |
|
"step": 1538 |
|
}, |
|
{ |
|
"epoch": 0.7878749120675321, |
|
"grad_norm": 0.037444863468408585, |
|
"learning_rate": 0.00018841365753910765, |
|
"loss": 1.5155, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.7888981262390484, |
|
"grad_norm": 0.04184754192829132, |
|
"learning_rate": 0.0001883740880622579, |
|
"loss": 1.4717, |
|
"step": 1542 |
|
}, |
|
{ |
|
"epoch": 0.7899213404105647, |
|
"grad_norm": 0.042664580047130585, |
|
"learning_rate": 0.00018833445530172605, |
|
"loss": 1.5221, |
|
"step": 1544 |
|
}, |
|
{ |
|
"epoch": 0.790944554582081, |
|
"grad_norm": 0.05149197578430176, |
|
"learning_rate": 0.00018829475928589271, |
|
"loss": 1.4861, |
|
"step": 1546 |
|
}, |
|
{ |
|
"epoch": 0.7919677687535972, |
|
"grad_norm": 0.04174793139100075, |
|
"learning_rate": 0.0001882550000431837, |
|
"loss": 1.4887, |
|
"step": 1548 |
|
}, |
|
{ |
|
"epoch": 0.7929909829251135, |
|
"grad_norm": 0.03560099005699158, |
|
"learning_rate": 0.0001882151776020702, |
|
"loss": 1.5099, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.7940141970966298, |
|
"grad_norm": 0.049874622374773026, |
|
"learning_rate": 0.0001881752919910686, |
|
"loss": 1.4835, |
|
"step": 1552 |
|
}, |
|
{ |
|
"epoch": 0.7950374112681461, |
|
"grad_norm": 0.04354040324687958, |
|
"learning_rate": 0.0001881353432387405, |
|
"loss": 1.4778, |
|
"step": 1554 |
|
}, |
|
{ |
|
"epoch": 0.7960606254396624, |
|
"grad_norm": 0.04164579510688782, |
|
"learning_rate": 0.0001880953313736928, |
|
"loss": 1.4968, |
|
"step": 1556 |
|
}, |
|
{ |
|
"epoch": 0.7970838396111786, |
|
"grad_norm": 0.034870538860559464, |
|
"learning_rate": 0.0001880552564245775, |
|
"loss": 1.4628, |
|
"step": 1558 |
|
}, |
|
{ |
|
"epoch": 0.7981070537826949, |
|
"grad_norm": 0.034135766327381134, |
|
"learning_rate": 0.00018801511842009183, |
|
"loss": 1.4836, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.7991302679542112, |
|
"grad_norm": 0.03587375581264496, |
|
"learning_rate": 0.00018797491738897816, |
|
"loss": 1.4636, |
|
"step": 1562 |
|
}, |
|
{ |
|
"epoch": 0.8001534821257275, |
|
"grad_norm": 0.03559894114732742, |
|
"learning_rate": 0.000187934653360024, |
|
"loss": 1.4874, |
|
"step": 1564 |
|
}, |
|
{ |
|
"epoch": 0.8011766962972438, |
|
"grad_norm": 0.05410682037472725, |
|
"learning_rate": 0.00018789432636206197, |
|
"loss": 1.4701, |
|
"step": 1566 |
|
}, |
|
{ |
|
"epoch": 0.80219991046876, |
|
"grad_norm": 0.046682942658662796, |
|
"learning_rate": 0.00018785393642396976, |
|
"loss": 1.4993, |
|
"step": 1568 |
|
}, |
|
{ |
|
"epoch": 0.8032231246402762, |
|
"grad_norm": 0.03647172451019287, |
|
"learning_rate": 0.00018781348357467013, |
|
"loss": 1.5053, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.8042463388117925, |
|
"grad_norm": 0.035208649933338165, |
|
"learning_rate": 0.00018777296784313095, |
|
"loss": 1.5099, |
|
"step": 1572 |
|
}, |
|
{ |
|
"epoch": 0.8052695529833088, |
|
"grad_norm": 0.03541814163327217, |
|
"learning_rate": 0.00018773238925836507, |
|
"loss": 1.5027, |
|
"step": 1574 |
|
}, |
|
{ |
|
"epoch": 0.8062927671548251, |
|
"grad_norm": 0.04706384614109993, |
|
"learning_rate": 0.0001876917478494303, |
|
"loss": 1.5111, |
|
"step": 1576 |
|
}, |
|
{ |
|
"epoch": 0.8073159813263414, |
|
"grad_norm": 0.042128194123506546, |
|
"learning_rate": 0.00018765104364542955, |
|
"loss": 1.4832, |
|
"step": 1578 |
|
}, |
|
{ |
|
"epoch": 0.8083391954978576, |
|
"grad_norm": 0.033496059477329254, |
|
"learning_rate": 0.00018761027667551063, |
|
"loss": 1.49, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.8093624096693739, |
|
"grad_norm": 0.036655962467193604, |
|
"learning_rate": 0.0001875694469688663, |
|
"loss": 1.4835, |
|
"step": 1582 |
|
}, |
|
{ |
|
"epoch": 0.8103856238408902, |
|
"grad_norm": 0.036248572170734406, |
|
"learning_rate": 0.0001875285545547342, |
|
"loss": 1.5025, |
|
"step": 1584 |
|
}, |
|
{ |
|
"epoch": 0.8114088380124065, |
|
"grad_norm": 0.040282152593135834, |
|
"learning_rate": 0.000187487599462397, |
|
"loss": 1.4776, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 0.8124320521839228, |
|
"grad_norm": 0.03675289452075958, |
|
"learning_rate": 0.00018744658172118215, |
|
"loss": 1.5036, |
|
"step": 1588 |
|
}, |
|
{ |
|
"epoch": 0.8134552663554391, |
|
"grad_norm": 0.03431113436818123, |
|
"learning_rate": 0.00018740550136046196, |
|
"loss": 1.4701, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.8144784805269553, |
|
"grad_norm": 0.03184695914387703, |
|
"learning_rate": 0.00018736435840965366, |
|
"loss": 1.473, |
|
"step": 1592 |
|
}, |
|
{ |
|
"epoch": 0.8155016946984716, |
|
"grad_norm": 0.031748853623867035, |
|
"learning_rate": 0.00018732315289821921, |
|
"loss": 1.5039, |
|
"step": 1594 |
|
}, |
|
{ |
|
"epoch": 0.8165249088699879, |
|
"grad_norm": 0.034614481031894684, |
|
"learning_rate": 0.00018728188485566544, |
|
"loss": 1.4664, |
|
"step": 1596 |
|
}, |
|
{ |
|
"epoch": 0.8175481230415041, |
|
"grad_norm": 0.0308011993765831, |
|
"learning_rate": 0.0001872405543115439, |
|
"loss": 1.4719, |
|
"step": 1598 |
|
}, |
|
{ |
|
"epoch": 0.8185713372130204, |
|
"grad_norm": 0.031010661274194717, |
|
"learning_rate": 0.00018719916129545093, |
|
"loss": 1.4841, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.8195945513845366, |
|
"grad_norm": 0.03110615722835064, |
|
"learning_rate": 0.0001871577058370276, |
|
"loss": 1.4878, |
|
"step": 1602 |
|
}, |
|
{ |
|
"epoch": 0.8206177655560529, |
|
"grad_norm": 0.030799025669693947, |
|
"learning_rate": 0.00018711618796595972, |
|
"loss": 1.4391, |
|
"step": 1604 |
|
}, |
|
{ |
|
"epoch": 0.8216409797275692, |
|
"grad_norm": 0.029373083263635635, |
|
"learning_rate": 0.00018707460771197774, |
|
"loss": 1.5265, |
|
"step": 1606 |
|
}, |
|
{ |
|
"epoch": 0.8226641938990855, |
|
"grad_norm": 0.03043638914823532, |
|
"learning_rate": 0.0001870329651048568, |
|
"loss": 1.5027, |
|
"step": 1608 |
|
}, |
|
{ |
|
"epoch": 0.8236874080706018, |
|
"grad_norm": 0.0337023101747036, |
|
"learning_rate": 0.00018699126017441672, |
|
"loss": 1.4793, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.8247106222421181, |
|
"grad_norm": 0.03439760580658913, |
|
"learning_rate": 0.0001869494929505219, |
|
"loss": 1.4764, |
|
"step": 1612 |
|
}, |
|
{ |
|
"epoch": 0.8257338364136343, |
|
"grad_norm": 0.03283720836043358, |
|
"learning_rate": 0.00018690766346308145, |
|
"loss": 1.4829, |
|
"step": 1614 |
|
}, |
|
{ |
|
"epoch": 0.8267570505851506, |
|
"grad_norm": 0.030338643118739128, |
|
"learning_rate": 0.00018686577174204885, |
|
"loss": 1.4587, |
|
"step": 1616 |
|
}, |
|
{ |
|
"epoch": 0.8277802647566669, |
|
"grad_norm": 0.03556302934885025, |
|
"learning_rate": 0.00018682381781742245, |
|
"loss": 1.4924, |
|
"step": 1618 |
|
}, |
|
{ |
|
"epoch": 0.8288034789281832, |
|
"grad_norm": 0.032113250344991684, |
|
"learning_rate": 0.00018678180171924485, |
|
"loss": 1.4875, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.8298266930996995, |
|
"grad_norm": 0.1559678167104721, |
|
"learning_rate": 0.00018673972347760338, |
|
"loss": 1.5009, |
|
"step": 1622 |
|
}, |
|
{ |
|
"epoch": 0.8308499072712157, |
|
"grad_norm": 0.06492070108652115, |
|
"learning_rate": 0.00018669758312262976, |
|
"loss": 1.4632, |
|
"step": 1624 |
|
}, |
|
{ |
|
"epoch": 0.831873121442732, |
|
"grad_norm": 0.05882725864648819, |
|
"learning_rate": 0.00018665538068450023, |
|
"loss": 1.472, |
|
"step": 1626 |
|
}, |
|
{ |
|
"epoch": 0.8328963356142483, |
|
"grad_norm": 0.03860605135560036, |
|
"learning_rate": 0.00018661311619343546, |
|
"loss": 1.4662, |
|
"step": 1628 |
|
}, |
|
{ |
|
"epoch": 0.8339195497857645, |
|
"grad_norm": 0.04597290977835655, |
|
"learning_rate": 0.00018657078967970062, |
|
"loss": 1.4706, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.8349427639572808, |
|
"grad_norm": 0.04754943400621414, |
|
"learning_rate": 0.00018652840117360517, |
|
"loss": 1.475, |
|
"step": 1632 |
|
}, |
|
{ |
|
"epoch": 0.8359659781287971, |
|
"grad_norm": 0.03354303911328316, |
|
"learning_rate": 0.0001864859507055031, |
|
"loss": 1.5133, |
|
"step": 1634 |
|
}, |
|
{ |
|
"epoch": 0.8369891923003133, |
|
"grad_norm": 0.042201388627290726, |
|
"learning_rate": 0.0001864434383057927, |
|
"loss": 1.5125, |
|
"step": 1636 |
|
}, |
|
{ |
|
"epoch": 0.8380124064718296, |
|
"grad_norm": 0.0343627855181694, |
|
"learning_rate": 0.00018640086400491658, |
|
"loss": 1.4811, |
|
"step": 1638 |
|
}, |
|
{ |
|
"epoch": 0.8390356206433459, |
|
"grad_norm": 0.03558426350355148, |
|
"learning_rate": 0.00018635822783336174, |
|
"loss": 1.5171, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.8400588348148622, |
|
"grad_norm": 0.03267373517155647, |
|
"learning_rate": 0.00018631552982165944, |
|
"loss": 1.4758, |
|
"step": 1642 |
|
}, |
|
{ |
|
"epoch": 0.8410820489863785, |
|
"grad_norm": 0.03015967085957527, |
|
"learning_rate": 0.00018627277000038533, |
|
"loss": 1.4501, |
|
"step": 1644 |
|
}, |
|
{ |
|
"epoch": 0.8421052631578947, |
|
"grad_norm": 0.03152506798505783, |
|
"learning_rate": 0.0001862299484001591, |
|
"loss": 1.4625, |
|
"step": 1646 |
|
}, |
|
{ |
|
"epoch": 0.843128477329411, |
|
"grad_norm": 0.03820090368390083, |
|
"learning_rate": 0.0001861870650516449, |
|
"loss": 1.5065, |
|
"step": 1648 |
|
}, |
|
{ |
|
"epoch": 0.8441516915009273, |
|
"grad_norm": 0.030817920342087746, |
|
"learning_rate": 0.000186144119985551, |
|
"loss": 1.4814, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.8451749056724436, |
|
"grad_norm": 0.03546105697751045, |
|
"learning_rate": 0.00018610111323262986, |
|
"loss": 1.4554, |
|
"step": 1652 |
|
}, |
|
{ |
|
"epoch": 0.8461981198439599, |
|
"grad_norm": 0.033546384423971176, |
|
"learning_rate": 0.00018605804482367807, |
|
"loss": 1.4379, |
|
"step": 1654 |
|
}, |
|
{ |
|
"epoch": 0.8472213340154762, |
|
"grad_norm": 0.035938508808612823, |
|
"learning_rate": 0.00018601491478953657, |
|
"loss": 1.4931, |
|
"step": 1656 |
|
}, |
|
{ |
|
"epoch": 0.8482445481869924, |
|
"grad_norm": 0.03531987965106964, |
|
"learning_rate": 0.00018597172316109015, |
|
"loss": 1.4483, |
|
"step": 1658 |
|
}, |
|
{ |
|
"epoch": 0.8492677623585086, |
|
"grad_norm": 0.03041314333677292, |
|
"learning_rate": 0.00018592846996926793, |
|
"loss": 1.4541, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.850290976530025, |
|
"grad_norm": 0.03549192473292351, |
|
"learning_rate": 0.00018588515524504295, |
|
"loss": 1.4615, |
|
"step": 1662 |
|
}, |
|
{ |
|
"epoch": 0.8513141907015412, |
|
"grad_norm": 0.03376925736665726, |
|
"learning_rate": 0.0001858417790194325, |
|
"loss": 1.4722, |
|
"step": 1664 |
|
}, |
|
{ |
|
"epoch": 0.8523374048730575, |
|
"grad_norm": 0.03313841298222542, |
|
"learning_rate": 0.00018579834132349772, |
|
"loss": 1.4791, |
|
"step": 1666 |
|
}, |
|
{ |
|
"epoch": 0.8533606190445737, |
|
"grad_norm": 0.033985435962677, |
|
"learning_rate": 0.00018575484218834388, |
|
"loss": 1.4443, |
|
"step": 1668 |
|
}, |
|
{ |
|
"epoch": 0.85438383321609, |
|
"grad_norm": 0.032460469752550125, |
|
"learning_rate": 0.00018571128164512023, |
|
"loss": 1.4988, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.8554070473876063, |
|
"grad_norm": 0.03272455185651779, |
|
"learning_rate": 0.00018566765972501993, |
|
"loss": 1.4659, |
|
"step": 1672 |
|
}, |
|
{ |
|
"epoch": 0.8564302615591226, |
|
"grad_norm": 0.031708747148513794, |
|
"learning_rate": 0.0001856239764592802, |
|
"loss": 1.5007, |
|
"step": 1674 |
|
}, |
|
{ |
|
"epoch": 0.8574534757306389, |
|
"grad_norm": 0.034189220517873764, |
|
"learning_rate": 0.0001855802318791821, |
|
"loss": 1.4423, |
|
"step": 1676 |
|
}, |
|
{ |
|
"epoch": 0.8584766899021552, |
|
"grad_norm": 0.03221631050109863, |
|
"learning_rate": 0.00018553642601605068, |
|
"loss": 1.4701, |
|
"step": 1678 |
|
}, |
|
{ |
|
"epoch": 0.8594999040736714, |
|
"grad_norm": 0.029117561876773834, |
|
"learning_rate": 0.00018549255890125475, |
|
"loss": 1.4769, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.8605231182451877, |
|
"grad_norm": 0.029596133157610893, |
|
"learning_rate": 0.00018544863056620708, |
|
"loss": 1.4635, |
|
"step": 1682 |
|
}, |
|
{ |
|
"epoch": 0.861546332416704, |
|
"grad_norm": 0.030032752081751823, |
|
"learning_rate": 0.00018540464104236425, |
|
"loss": 1.4991, |
|
"step": 1684 |
|
}, |
|
{ |
|
"epoch": 0.8625695465882203, |
|
"grad_norm": 0.03227202966809273, |
|
"learning_rate": 0.00018536059036122667, |
|
"loss": 1.4608, |
|
"step": 1686 |
|
}, |
|
{ |
|
"epoch": 0.8635927607597366, |
|
"grad_norm": 0.03331397473812103, |
|
"learning_rate": 0.0001853164785543385, |
|
"loss": 1.4958, |
|
"step": 1688 |
|
}, |
|
{ |
|
"epoch": 0.8646159749312528, |
|
"grad_norm": 0.033648762851953506, |
|
"learning_rate": 0.00018527230565328778, |
|
"loss": 1.4949, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.865639189102769, |
|
"grad_norm": 0.03504339978098869, |
|
"learning_rate": 0.00018522807168970616, |
|
"loss": 1.439, |
|
"step": 1692 |
|
}, |
|
{ |
|
"epoch": 0.8666624032742853, |
|
"grad_norm": 0.034829430282115936, |
|
"learning_rate": 0.0001851837766952691, |
|
"loss": 1.5001, |
|
"step": 1694 |
|
}, |
|
{ |
|
"epoch": 0.8676856174458016, |
|
"grad_norm": 0.03803844377398491, |
|
"learning_rate": 0.0001851394207016957, |
|
"loss": 1.4905, |
|
"step": 1696 |
|
}, |
|
{ |
|
"epoch": 0.8687088316173179, |
|
"grad_norm": 0.0394139364361763, |
|
"learning_rate": 0.00018509500374074884, |
|
"loss": 1.4537, |
|
"step": 1698 |
|
}, |
|
{ |
|
"epoch": 0.8697320457888342, |
|
"grad_norm": 0.039348065853118896, |
|
"learning_rate": 0.000185050525844235, |
|
"loss": 1.4865, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.8707552599603504, |
|
"grad_norm": 0.03650161996483803, |
|
"learning_rate": 0.00018500598704400428, |
|
"loss": 1.4658, |
|
"step": 1702 |
|
}, |
|
{ |
|
"epoch": 0.8717784741318667, |
|
"grad_norm": 0.03312232345342636, |
|
"learning_rate": 0.00018496138737195036, |
|
"loss": 1.477, |
|
"step": 1704 |
|
}, |
|
{ |
|
"epoch": 0.872801688303383, |
|
"grad_norm": 0.031243184581398964, |
|
"learning_rate": 0.00018491672686001066, |
|
"loss": 1.4983, |
|
"step": 1706 |
|
}, |
|
{ |
|
"epoch": 0.8738249024748993, |
|
"grad_norm": 0.03666044771671295, |
|
"learning_rate": 0.00018487200554016602, |
|
"loss": 1.4606, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 0.8748481166464156, |
|
"grad_norm": 0.035856928676366806, |
|
"learning_rate": 0.00018482722344444086, |
|
"loss": 1.4808, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.8758713308179318, |
|
"grad_norm": 0.03538081422448158, |
|
"learning_rate": 0.00018478238060490312, |
|
"loss": 1.4734, |
|
"step": 1712 |
|
}, |
|
{ |
|
"epoch": 0.8768945449894481, |
|
"grad_norm": 0.02917349338531494, |
|
"learning_rate": 0.00018473747705366426, |
|
"loss": 1.4947, |
|
"step": 1714 |
|
}, |
|
{ |
|
"epoch": 0.8779177591609644, |
|
"grad_norm": 0.035214658826589584, |
|
"learning_rate": 0.0001846925128228792, |
|
"loss": 1.4773, |
|
"step": 1716 |
|
}, |
|
{ |
|
"epoch": 0.8789409733324807, |
|
"grad_norm": 0.03703998774290085, |
|
"learning_rate": 0.00018464748794474634, |
|
"loss": 1.4704, |
|
"step": 1718 |
|
}, |
|
{ |
|
"epoch": 0.879964187503997, |
|
"grad_norm": 0.03480003774166107, |
|
"learning_rate": 0.0001846024024515075, |
|
"loss": 1.4723, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.8809874016755133, |
|
"grad_norm": 0.04090346395969391, |
|
"learning_rate": 0.00018455725637544785, |
|
"loss": 1.4525, |
|
"step": 1722 |
|
}, |
|
{ |
|
"epoch": 0.8820106158470294, |
|
"grad_norm": 0.042412955313920975, |
|
"learning_rate": 0.00018451204974889596, |
|
"loss": 1.4418, |
|
"step": 1724 |
|
}, |
|
{ |
|
"epoch": 0.8830338300185457, |
|
"grad_norm": 0.03738129511475563, |
|
"learning_rate": 0.00018446678260422385, |
|
"loss": 1.4747, |
|
"step": 1726 |
|
}, |
|
{ |
|
"epoch": 0.884057044190062, |
|
"grad_norm": 0.03728758171200752, |
|
"learning_rate": 0.00018442145497384673, |
|
"loss": 1.5007, |
|
"step": 1728 |
|
}, |
|
{ |
|
"epoch": 0.8850802583615783, |
|
"grad_norm": 0.038157109171152115, |
|
"learning_rate": 0.0001843760668902233, |
|
"loss": 1.4937, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.8861034725330946, |
|
"grad_norm": 0.03238663077354431, |
|
"learning_rate": 0.00018433061838585534, |
|
"loss": 1.4631, |
|
"step": 1732 |
|
}, |
|
{ |
|
"epoch": 0.8871266867046108, |
|
"grad_norm": 0.03741516172885895, |
|
"learning_rate": 0.0001842851094932881, |
|
"loss": 1.4887, |
|
"step": 1734 |
|
}, |
|
{ |
|
"epoch": 0.8881499008761271, |
|
"grad_norm": 0.03934532031416893, |
|
"learning_rate": 0.00018423954024510996, |
|
"loss": 1.4208, |
|
"step": 1736 |
|
}, |
|
{ |
|
"epoch": 0.8891731150476434, |
|
"grad_norm": 0.03238905593752861, |
|
"learning_rate": 0.00018419391067395248, |
|
"loss": 1.4587, |
|
"step": 1738 |
|
}, |
|
{ |
|
"epoch": 0.8901963292191597, |
|
"grad_norm": 0.039086490869522095, |
|
"learning_rate": 0.00018414822081249058, |
|
"loss": 1.4545, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.891219543390676, |
|
"grad_norm": 0.0370473712682724, |
|
"learning_rate": 0.00018410247069344218, |
|
"loss": 1.4473, |
|
"step": 1742 |
|
}, |
|
{ |
|
"epoch": 0.8922427575621923, |
|
"grad_norm": 0.034061599522829056, |
|
"learning_rate": 0.00018405666034956844, |
|
"loss": 1.4831, |
|
"step": 1744 |
|
}, |
|
{ |
|
"epoch": 0.8932659717337085, |
|
"grad_norm": 0.0363328754901886, |
|
"learning_rate": 0.00018401078981367363, |
|
"loss": 1.4729, |
|
"step": 1746 |
|
}, |
|
{ |
|
"epoch": 0.8942891859052248, |
|
"grad_norm": 0.035310424864292145, |
|
"learning_rate": 0.00018396485911860512, |
|
"loss": 1.518, |
|
"step": 1748 |
|
}, |
|
{ |
|
"epoch": 0.8953124000767411, |
|
"grad_norm": 0.03476149961352348, |
|
"learning_rate": 0.00018391886829725334, |
|
"loss": 1.4611, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.8963356142482574, |
|
"grad_norm": 0.03310383856296539, |
|
"learning_rate": 0.00018387281738255185, |
|
"loss": 1.4746, |
|
"step": 1752 |
|
}, |
|
{ |
|
"epoch": 0.8973588284197737, |
|
"grad_norm": 0.0307275652885437, |
|
"learning_rate": 0.00018382670640747714, |
|
"loss": 1.4697, |
|
"step": 1754 |
|
}, |
|
{ |
|
"epoch": 0.8983820425912898, |
|
"grad_norm": 0.028024040162563324, |
|
"learning_rate": 0.00018378053540504873, |
|
"loss": 1.4608, |
|
"step": 1756 |
|
}, |
|
{ |
|
"epoch": 0.8994052567628061, |
|
"grad_norm": 0.029499476775527, |
|
"learning_rate": 0.00018373430440832923, |
|
"loss": 1.4614, |
|
"step": 1758 |
|
}, |
|
{ |
|
"epoch": 0.9004284709343224, |
|
"grad_norm": 0.033067066222429276, |
|
"learning_rate": 0.0001836880134504241, |
|
"loss": 1.479, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.9014516851058387, |
|
"grad_norm": 0.03787175565958023, |
|
"learning_rate": 0.00018364166256448173, |
|
"loss": 1.4712, |
|
"step": 1762 |
|
}, |
|
{ |
|
"epoch": 0.902474899277355, |
|
"grad_norm": 0.02690064162015915, |
|
"learning_rate": 0.0001835952517836935, |
|
"loss": 1.4673, |
|
"step": 1764 |
|
}, |
|
{ |
|
"epoch": 0.9034981134488713, |
|
"grad_norm": 0.026671042665839195, |
|
"learning_rate": 0.00018354878114129367, |
|
"loss": 1.4561, |
|
"step": 1766 |
|
}, |
|
{ |
|
"epoch": 0.9045213276203875, |
|
"grad_norm": 0.03277120366692543, |
|
"learning_rate": 0.00018350225067055925, |
|
"loss": 1.4879, |
|
"step": 1768 |
|
}, |
|
{ |
|
"epoch": 0.9055445417919038, |
|
"grad_norm": 0.03682045266032219, |
|
"learning_rate": 0.00018345566040481028, |
|
"loss": 1.467, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.9065677559634201, |
|
"grad_norm": 0.027602965012192726, |
|
"learning_rate": 0.0001834090103774095, |
|
"loss": 1.4514, |
|
"step": 1772 |
|
}, |
|
{ |
|
"epoch": 0.9075909701349364, |
|
"grad_norm": 0.03043595515191555, |
|
"learning_rate": 0.00018336230062176244, |
|
"loss": 1.4835, |
|
"step": 1774 |
|
}, |
|
{ |
|
"epoch": 0.9086141843064527, |
|
"grad_norm": 0.030672984197735786, |
|
"learning_rate": 0.0001833155311713174, |
|
"loss": 1.492, |
|
"step": 1776 |
|
}, |
|
{ |
|
"epoch": 0.9096373984779689, |
|
"grad_norm": 0.032694920897483826, |
|
"learning_rate": 0.00018326870205956553, |
|
"loss": 1.475, |
|
"step": 1778 |
|
}, |
|
{ |
|
"epoch": 0.9106606126494852, |
|
"grad_norm": 0.031511466950178146, |
|
"learning_rate": 0.00018322181332004056, |
|
"loss": 1.4457, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.9116838268210015, |
|
"grad_norm": 0.03155050054192543, |
|
"learning_rate": 0.00018317486498631899, |
|
"loss": 1.5165, |
|
"step": 1782 |
|
}, |
|
{ |
|
"epoch": 0.9127070409925178, |
|
"grad_norm": 0.03132548928260803, |
|
"learning_rate": 0.00018312785709202002, |
|
"loss": 1.5171, |
|
"step": 1784 |
|
}, |
|
{ |
|
"epoch": 0.913730255164034, |
|
"grad_norm": 0.036277156323194504, |
|
"learning_rate": 0.00018308078967080546, |
|
"loss": 1.4726, |
|
"step": 1786 |
|
}, |
|
{ |
|
"epoch": 0.9147534693355504, |
|
"grad_norm": 0.029615385457873344, |
|
"learning_rate": 0.00018303366275637976, |
|
"loss": 1.448, |
|
"step": 1788 |
|
}, |
|
{ |
|
"epoch": 0.9157766835070665, |
|
"grad_norm": 0.029571905732154846, |
|
"learning_rate": 0.00018298647638248996, |
|
"loss": 1.4629, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.9167998976785828, |
|
"grad_norm": 0.028433986008167267, |
|
"learning_rate": 0.0001829392305829257, |
|
"loss": 1.474, |
|
"step": 1792 |
|
}, |
|
{ |
|
"epoch": 0.9178231118500991, |
|
"grad_norm": 0.034186169505119324, |
|
"learning_rate": 0.0001828919253915191, |
|
"loss": 1.4828, |
|
"step": 1794 |
|
}, |
|
{ |
|
"epoch": 0.9188463260216154, |
|
"grad_norm": 0.03323967382311821, |
|
"learning_rate": 0.00018284456084214496, |
|
"loss": 1.4883, |
|
"step": 1796 |
|
}, |
|
{ |
|
"epoch": 0.9198695401931317, |
|
"grad_norm": 0.03627438098192215, |
|
"learning_rate": 0.00018279713696872047, |
|
"loss": 1.4505, |
|
"step": 1798 |
|
}, |
|
{ |
|
"epoch": 0.9208927543646479, |
|
"grad_norm": 0.037414826452732086, |
|
"learning_rate": 0.0001827496538052053, |
|
"loss": 1.5153, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.9219159685361642, |
|
"grad_norm": 0.036538898944854736, |
|
"learning_rate": 0.00018270211138560162, |
|
"loss": 1.4565, |
|
"step": 1802 |
|
}, |
|
{ |
|
"epoch": 0.9229391827076805, |
|
"grad_norm": 0.034286949783563614, |
|
"learning_rate": 0.00018265450974395403, |
|
"loss": 1.4596, |
|
"step": 1804 |
|
}, |
|
{ |
|
"epoch": 0.9239623968791968, |
|
"grad_norm": 0.03332148864865303, |
|
"learning_rate": 0.0001826068489143495, |
|
"loss": 1.4452, |
|
"step": 1806 |
|
}, |
|
{ |
|
"epoch": 0.9249856110507131, |
|
"grad_norm": 0.030349107459187508, |
|
"learning_rate": 0.00018255912893091743, |
|
"loss": 1.4937, |
|
"step": 1808 |
|
}, |
|
{ |
|
"epoch": 0.9260088252222294, |
|
"grad_norm": 0.030373625457286835, |
|
"learning_rate": 0.00018251134982782952, |
|
"loss": 1.4774, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.9270320393937456, |
|
"grad_norm": 0.03661259636282921, |
|
"learning_rate": 0.00018246351163929991, |
|
"loss": 1.4694, |
|
"step": 1812 |
|
}, |
|
{ |
|
"epoch": 0.9280552535652619, |
|
"grad_norm": 0.036550264805555344, |
|
"learning_rate": 0.00018241561439958495, |
|
"loss": 1.4944, |
|
"step": 1814 |
|
}, |
|
{ |
|
"epoch": 0.9290784677367782, |
|
"grad_norm": 0.03492378070950508, |
|
"learning_rate": 0.0001823676581429833, |
|
"loss": 1.445, |
|
"step": 1816 |
|
}, |
|
{ |
|
"epoch": 0.9301016819082945, |
|
"grad_norm": 0.03306609019637108, |
|
"learning_rate": 0.0001823196429038359, |
|
"loss": 1.4222, |
|
"step": 1818 |
|
}, |
|
{ |
|
"epoch": 0.9311248960798107, |
|
"grad_norm": 0.03200085088610649, |
|
"learning_rate": 0.0001822715687165259, |
|
"loss": 1.467, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.9321481102513269, |
|
"grad_norm": 0.036335378885269165, |
|
"learning_rate": 0.00018222343561547874, |
|
"loss": 1.4693, |
|
"step": 1822 |
|
}, |
|
{ |
|
"epoch": 0.9331713244228432, |
|
"grad_norm": 0.039753127843141556, |
|
"learning_rate": 0.00018217524363516193, |
|
"loss": 1.4594, |
|
"step": 1824 |
|
}, |
|
{ |
|
"epoch": 0.9341945385943595, |
|
"grad_norm": 0.03748109191656113, |
|
"learning_rate": 0.0001821269928100852, |
|
"loss": 1.5014, |
|
"step": 1826 |
|
}, |
|
{ |
|
"epoch": 0.9352177527658758, |
|
"grad_norm": 0.04106932878494263, |
|
"learning_rate": 0.00018207868317480046, |
|
"loss": 1.4823, |
|
"step": 1828 |
|
}, |
|
{ |
|
"epoch": 0.9362409669373921, |
|
"grad_norm": 0.032248884439468384, |
|
"learning_rate": 0.00018203031476390167, |
|
"loss": 1.4697, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.9372641811089084, |
|
"grad_norm": 0.047158315777778625, |
|
"learning_rate": 0.00018198188761202487, |
|
"loss": 1.5449, |
|
"step": 1832 |
|
}, |
|
{ |
|
"epoch": 0.9382873952804246, |
|
"grad_norm": 0.03881628066301346, |
|
"learning_rate": 0.00018193340175384824, |
|
"loss": 1.5129, |
|
"step": 1834 |
|
}, |
|
{ |
|
"epoch": 0.9393106094519409, |
|
"grad_norm": 0.038932789117097855, |
|
"learning_rate": 0.00018188485722409197, |
|
"loss": 1.4508, |
|
"step": 1836 |
|
}, |
|
{ |
|
"epoch": 0.9403338236234572, |
|
"grad_norm": 0.042171675711870193, |
|
"learning_rate": 0.00018183625405751816, |
|
"loss": 1.4976, |
|
"step": 1838 |
|
}, |
|
{ |
|
"epoch": 0.9413570377949735, |
|
"grad_norm": 0.03824607655405998, |
|
"learning_rate": 0.00018178759228893108, |
|
"loss": 1.4759, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.9423802519664898, |
|
"grad_norm": 0.0380014143884182, |
|
"learning_rate": 0.0001817388719531768, |
|
"loss": 1.4765, |
|
"step": 1842 |
|
}, |
|
{ |
|
"epoch": 0.943403466138006, |
|
"grad_norm": 0.03372355177998543, |
|
"learning_rate": 0.00018169009308514344, |
|
"loss": 1.4724, |
|
"step": 1844 |
|
}, |
|
{ |
|
"epoch": 0.9444266803095223, |
|
"grad_norm": 0.03503812104463577, |
|
"learning_rate": 0.00018164125571976098, |
|
"loss": 1.4537, |
|
"step": 1846 |
|
}, |
|
{ |
|
"epoch": 0.9454498944810386, |
|
"grad_norm": 0.03842812776565552, |
|
"learning_rate": 0.00018159235989200132, |
|
"loss": 1.4747, |
|
"step": 1848 |
|
}, |
|
{ |
|
"epoch": 0.9464731086525549, |
|
"grad_norm": 0.03686497360467911, |
|
"learning_rate": 0.0001815434056368782, |
|
"loss": 1.4433, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.9474963228240711, |
|
"grad_norm": 0.03216801956295967, |
|
"learning_rate": 0.00018149439298944717, |
|
"loss": 1.4628, |
|
"step": 1852 |
|
}, |
|
{ |
|
"epoch": 0.9485195369955874, |
|
"grad_norm": 0.04245101660490036, |
|
"learning_rate": 0.0001814453219848057, |
|
"loss": 1.5411, |
|
"step": 1854 |
|
}, |
|
{ |
|
"epoch": 0.9495427511671036, |
|
"grad_norm": 0.041708942502737045, |
|
"learning_rate": 0.0001813961926580929, |
|
"loss": 1.4828, |
|
"step": 1856 |
|
}, |
|
{ |
|
"epoch": 0.9505659653386199, |
|
"grad_norm": 0.038249559700489044, |
|
"learning_rate": 0.0001813470050444898, |
|
"loss": 1.4633, |
|
"step": 1858 |
|
}, |
|
{ |
|
"epoch": 0.9515891795101362, |
|
"grad_norm": 0.03623546287417412, |
|
"learning_rate": 0.00018129775917921905, |
|
"loss": 1.4644, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.9526123936816525, |
|
"grad_norm": 0.03886585682630539, |
|
"learning_rate": 0.00018124845509754505, |
|
"loss": 1.4642, |
|
"step": 1862 |
|
}, |
|
{ |
|
"epoch": 0.9536356078531688, |
|
"grad_norm": 0.03367486968636513, |
|
"learning_rate": 0.00018119909283477394, |
|
"loss": 1.4577, |
|
"step": 1864 |
|
}, |
|
{ |
|
"epoch": 0.954658822024685, |
|
"grad_norm": 0.034619078040122986, |
|
"learning_rate": 0.00018114967242625343, |
|
"loss": 1.4424, |
|
"step": 1866 |
|
}, |
|
{ |
|
"epoch": 0.9556820361962013, |
|
"grad_norm": 0.036260370165109634, |
|
"learning_rate": 0.00018110019390737292, |
|
"loss": 1.4749, |
|
"step": 1868 |
|
}, |
|
{ |
|
"epoch": 0.9567052503677176, |
|
"grad_norm": 0.037158943712711334, |
|
"learning_rate": 0.00018105065731356343, |
|
"loss": 1.4185, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.9577284645392339, |
|
"grad_norm": 0.03858686238527298, |
|
"learning_rate": 0.00018100106268029755, |
|
"loss": 1.5027, |
|
"step": 1872 |
|
}, |
|
{ |
|
"epoch": 0.9587516787107502, |
|
"grad_norm": 0.03699406236410141, |
|
"learning_rate": 0.00018095141004308943, |
|
"loss": 1.4283, |
|
"step": 1874 |
|
}, |
|
{ |
|
"epoch": 0.9597748928822665, |
|
"grad_norm": 0.030941152945160866, |
|
"learning_rate": 0.00018090169943749476, |
|
"loss": 1.4729, |
|
"step": 1876 |
|
}, |
|
{ |
|
"epoch": 0.9607981070537827, |
|
"grad_norm": 0.03944398835301399, |
|
"learning_rate": 0.00018085193089911075, |
|
"loss": 1.4636, |
|
"step": 1878 |
|
}, |
|
{ |
|
"epoch": 0.961821321225299, |
|
"grad_norm": 0.03944871574640274, |
|
"learning_rate": 0.00018080210446357606, |
|
"loss": 1.4458, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.9628445353968152, |
|
"grad_norm": 0.042511675506830215, |
|
"learning_rate": 0.00018075222016657088, |
|
"loss": 1.4868, |
|
"step": 1882 |
|
}, |
|
{ |
|
"epoch": 0.9638677495683315, |
|
"grad_norm": 0.036067429929971695, |
|
"learning_rate": 0.00018070227804381674, |
|
"loss": 1.4681, |
|
"step": 1884 |
|
}, |
|
{ |
|
"epoch": 0.9648909637398478, |
|
"grad_norm": 0.030013304203748703, |
|
"learning_rate": 0.00018065227813107666, |
|
"loss": 1.5088, |
|
"step": 1886 |
|
}, |
|
{ |
|
"epoch": 0.965914177911364, |
|
"grad_norm": 0.030714694410562515, |
|
"learning_rate": 0.000180602220464155, |
|
"loss": 1.4443, |
|
"step": 1888 |
|
}, |
|
{ |
|
"epoch": 0.9669373920828803, |
|
"grad_norm": 0.03553122654557228, |
|
"learning_rate": 0.0001805521050788975, |
|
"loss": 1.4667, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.9679606062543966, |
|
"grad_norm": 0.032518330961465836, |
|
"learning_rate": 0.0001805019320111912, |
|
"loss": 1.4756, |
|
"step": 1892 |
|
}, |
|
{ |
|
"epoch": 0.9689838204259129, |
|
"grad_norm": 0.032445941120386124, |
|
"learning_rate": 0.0001804517012969644, |
|
"loss": 1.474, |
|
"step": 1894 |
|
}, |
|
{ |
|
"epoch": 0.9700070345974292, |
|
"grad_norm": 0.03390254080295563, |
|
"learning_rate": 0.00018040141297218695, |
|
"loss": 1.4477, |
|
"step": 1896 |
|
}, |
|
{ |
|
"epoch": 0.9710302487689455, |
|
"grad_norm": 0.02915276773273945, |
|
"learning_rate": 0.00018035106707286954, |
|
"loss": 1.4784, |
|
"step": 1898 |
|
}, |
|
{ |
|
"epoch": 0.9720534629404617, |
|
"grad_norm": 0.028000080958008766, |
|
"learning_rate": 0.00018030066363506437, |
|
"loss": 1.45, |
|
"step": 1900 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 7816, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.236992921365381e+19, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|