|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 23940, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04177109440267335, |
|
"grad_norm": 3.0113365650177, |
|
"learning_rate": 1.9959896399030832e-05, |
|
"loss": 3.8241, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0835421888053467, |
|
"grad_norm": 83.7524642944336, |
|
"learning_rate": 1.9918121814687946e-05, |
|
"loss": 2.3334, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12531328320802004, |
|
"grad_norm": 3.9804179668426514, |
|
"learning_rate": 1.987634723034506e-05, |
|
"loss": 1.8564, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1670843776106934, |
|
"grad_norm": 4.718357563018799, |
|
"learning_rate": 1.9834572646002174e-05, |
|
"loss": 1.7395, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.20885547201336674, |
|
"grad_norm": 7.3187336921691895, |
|
"learning_rate": 1.9792798061659288e-05, |
|
"loss": 1.7964, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2506265664160401, |
|
"grad_norm": 4.755648136138916, |
|
"learning_rate": 1.9751023477316402e-05, |
|
"loss": 1.8215, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.29239766081871343, |
|
"grad_norm": 103.38441467285156, |
|
"learning_rate": 1.9709248892973516e-05, |
|
"loss": 1.8225, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.3341687552213868, |
|
"grad_norm": 2.0915794372558594, |
|
"learning_rate": 1.966747430863063e-05, |
|
"loss": 1.8623, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.37593984962406013, |
|
"grad_norm": 5.180814266204834, |
|
"learning_rate": 1.9625699724287744e-05, |
|
"loss": 1.7722, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.4177109440267335, |
|
"grad_norm": 2.494590997695923, |
|
"learning_rate": 1.9583925139944858e-05, |
|
"loss": 1.8884, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4594820384294068, |
|
"grad_norm": 2.6546897888183594, |
|
"learning_rate": 1.9542150555601972e-05, |
|
"loss": 1.8798, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.5012531328320802, |
|
"grad_norm": 3.439070224761963, |
|
"learning_rate": 1.9500375971259086e-05, |
|
"loss": 1.7612, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.5430242272347535, |
|
"grad_norm": 2.7611918449401855, |
|
"learning_rate": 1.9458601386916203e-05, |
|
"loss": 1.808, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.5847953216374269, |
|
"grad_norm": 3.4334182739257812, |
|
"learning_rate": 1.9416826802573317e-05, |
|
"loss": 1.6839, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.6265664160401002, |
|
"grad_norm": 4.846071720123291, |
|
"learning_rate": 1.937505221823043e-05, |
|
"loss": 1.7979, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.6683375104427736, |
|
"grad_norm": 1.9863933324813843, |
|
"learning_rate": 1.9333277633887545e-05, |
|
"loss": 1.8056, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.7101086048454469, |
|
"grad_norm": 3.9223458766937256, |
|
"learning_rate": 1.9291503049544656e-05, |
|
"loss": 1.6927, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.7518796992481203, |
|
"grad_norm": 2.9789743423461914, |
|
"learning_rate": 1.9249728465201773e-05, |
|
"loss": 1.8234, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.7936507936507936, |
|
"grad_norm": 2.060720443725586, |
|
"learning_rate": 1.9207953880858887e-05, |
|
"loss": 1.6206, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.835421888053467, |
|
"grad_norm": 2.2652785778045654, |
|
"learning_rate": 1.9166179296516e-05, |
|
"loss": 1.6918, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8771929824561403, |
|
"grad_norm": 2.8497183322906494, |
|
"learning_rate": 1.9124404712173115e-05, |
|
"loss": 1.6869, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.9189640768588136, |
|
"grad_norm": 4.50927209854126, |
|
"learning_rate": 1.908263012783023e-05, |
|
"loss": 1.8064, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.960735171261487, |
|
"grad_norm": 3.4589695930480957, |
|
"learning_rate": 1.9040855543487343e-05, |
|
"loss": 1.6045, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.0025062656641603, |
|
"grad_norm": 2.7117397785186768, |
|
"learning_rate": 1.8999080959144457e-05, |
|
"loss": 1.6438, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.0442773600668338, |
|
"grad_norm": 2.4935483932495117, |
|
"learning_rate": 1.895730637480157e-05, |
|
"loss": 1.6739, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.086048454469507, |
|
"grad_norm": 2.9701192378997803, |
|
"learning_rate": 1.8915531790458685e-05, |
|
"loss": 1.733, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.1278195488721805, |
|
"grad_norm": 3.8167076110839844, |
|
"learning_rate": 1.8873757206115803e-05, |
|
"loss": 1.6363, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.1695906432748537, |
|
"grad_norm": 2.2734756469726562, |
|
"learning_rate": 1.8831982621772917e-05, |
|
"loss": 1.6808, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.2113617376775272, |
|
"grad_norm": 3.4276649951934814, |
|
"learning_rate": 1.879020803743003e-05, |
|
"loss": 1.7535, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.2531328320802004, |
|
"grad_norm": 4.668817043304443, |
|
"learning_rate": 1.874843345308714e-05, |
|
"loss": 1.6547, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.294903926482874, |
|
"grad_norm": 6.21998405456543, |
|
"learning_rate": 1.870665886874426e-05, |
|
"loss": 1.5982, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.3366750208855471, |
|
"grad_norm": 8.45278549194336, |
|
"learning_rate": 1.8664884284401372e-05, |
|
"loss": 1.6862, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.3784461152882206, |
|
"grad_norm": 3.1017799377441406, |
|
"learning_rate": 1.8623109700058486e-05, |
|
"loss": 1.5701, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.4202172096908938, |
|
"grad_norm": 5.954344272613525, |
|
"learning_rate": 1.85813351157156e-05, |
|
"loss": 1.5799, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.4619883040935673, |
|
"grad_norm": 4.534444808959961, |
|
"learning_rate": 1.8539560531372714e-05, |
|
"loss": 1.6148, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.5037593984962405, |
|
"grad_norm": 6.303493976593018, |
|
"learning_rate": 1.849778594702983e-05, |
|
"loss": 1.527, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.545530492898914, |
|
"grad_norm": 3.002648115158081, |
|
"learning_rate": 1.8456011362686942e-05, |
|
"loss": 1.6922, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.5873015873015874, |
|
"grad_norm": 10.854593276977539, |
|
"learning_rate": 1.8414236778344056e-05, |
|
"loss": 1.5546, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.6290726817042607, |
|
"grad_norm": 4.601009368896484, |
|
"learning_rate": 1.837246219400117e-05, |
|
"loss": 1.5693, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.670843776106934, |
|
"grad_norm": 3.6931846141815186, |
|
"learning_rate": 1.8330687609658288e-05, |
|
"loss": 1.6022, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.7126148705096074, |
|
"grad_norm": 11.248991012573242, |
|
"learning_rate": 1.82889130253154e-05, |
|
"loss": 1.4866, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.7543859649122808, |
|
"grad_norm": 2.7923028469085693, |
|
"learning_rate": 1.8247138440972512e-05, |
|
"loss": 1.5897, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.796157059314954, |
|
"grad_norm": 4.407562732696533, |
|
"learning_rate": 1.8205363856629626e-05, |
|
"loss": 1.4225, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.8379281537176273, |
|
"grad_norm": 7.397324562072754, |
|
"learning_rate": 1.816358927228674e-05, |
|
"loss": 1.5255, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.8796992481203008, |
|
"grad_norm": 12.785990715026855, |
|
"learning_rate": 1.8121814687943858e-05, |
|
"loss": 1.5248, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.9214703425229742, |
|
"grad_norm": 6.455284595489502, |
|
"learning_rate": 1.808004010360097e-05, |
|
"loss": 1.5326, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.9632414369256475, |
|
"grad_norm": 3.2796213626861572, |
|
"learning_rate": 1.8038265519258086e-05, |
|
"loss": 1.5007, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 2.0050125313283207, |
|
"grad_norm": 7.908225059509277, |
|
"learning_rate": 1.79964909349152e-05, |
|
"loss": 1.4949, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.046783625730994, |
|
"grad_norm": 12.498689651489258, |
|
"learning_rate": 1.7954716350572314e-05, |
|
"loss": 1.3854, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 2.0885547201336676, |
|
"grad_norm": 13.456962585449219, |
|
"learning_rate": 1.7912941766229427e-05, |
|
"loss": 1.3631, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.1303258145363406, |
|
"grad_norm": 12.199966430664062, |
|
"learning_rate": 1.787116718188654e-05, |
|
"loss": 1.3836, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 2.172096908939014, |
|
"grad_norm": 3.8431236743927, |
|
"learning_rate": 1.7829392597543655e-05, |
|
"loss": 1.34, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.2138680033416875, |
|
"grad_norm": 16.643613815307617, |
|
"learning_rate": 1.778761801320077e-05, |
|
"loss": 1.4281, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 2.255639097744361, |
|
"grad_norm": 14.22512435913086, |
|
"learning_rate": 1.7745843428857887e-05, |
|
"loss": 1.3201, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.2974101921470345, |
|
"grad_norm": 12.082077026367188, |
|
"learning_rate": 1.7704068844514997e-05, |
|
"loss": 1.4239, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 2.3391812865497075, |
|
"grad_norm": 11.998297691345215, |
|
"learning_rate": 1.766229426017211e-05, |
|
"loss": 1.4133, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.380952380952381, |
|
"grad_norm": 20.736862182617188, |
|
"learning_rate": 1.7620519675829225e-05, |
|
"loss": 1.2625, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 2.4227234753550544, |
|
"grad_norm": 8.49312973022461, |
|
"learning_rate": 1.7578745091486343e-05, |
|
"loss": 1.3471, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.4644945697577274, |
|
"grad_norm": 8.388376235961914, |
|
"learning_rate": 1.7536970507143457e-05, |
|
"loss": 1.2723, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 2.506265664160401, |
|
"grad_norm": 11.559739112854004, |
|
"learning_rate": 1.749519592280057e-05, |
|
"loss": 1.306, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.5480367585630743, |
|
"grad_norm": 10.767087936401367, |
|
"learning_rate": 1.7453421338457685e-05, |
|
"loss": 1.3359, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 2.589807852965748, |
|
"grad_norm": 14.302842140197754, |
|
"learning_rate": 1.74116467541148e-05, |
|
"loss": 1.3609, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.6315789473684212, |
|
"grad_norm": 26.425922393798828, |
|
"learning_rate": 1.7369872169771913e-05, |
|
"loss": 1.3165, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 2.6733500417710943, |
|
"grad_norm": 21.576194763183594, |
|
"learning_rate": 1.7328097585429027e-05, |
|
"loss": 1.4171, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.7151211361737677, |
|
"grad_norm": 13.986672401428223, |
|
"learning_rate": 1.728632300108614e-05, |
|
"loss": 1.2982, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 2.756892230576441, |
|
"grad_norm": 20.48072052001953, |
|
"learning_rate": 1.7244548416743255e-05, |
|
"loss": 1.3041, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.798663324979114, |
|
"grad_norm": 0.8754037618637085, |
|
"learning_rate": 1.720277383240037e-05, |
|
"loss": 1.2004, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 2.8404344193817876, |
|
"grad_norm": 32.664878845214844, |
|
"learning_rate": 1.7160999248057483e-05, |
|
"loss": 1.2166, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.882205513784461, |
|
"grad_norm": 13.420926094055176, |
|
"learning_rate": 1.7119224663714597e-05, |
|
"loss": 1.2593, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 2.9239766081871346, |
|
"grad_norm": 16.320348739624023, |
|
"learning_rate": 1.707745007937171e-05, |
|
"loss": 1.2575, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.965747702589808, |
|
"grad_norm": 51.816585540771484, |
|
"learning_rate": 1.7035675495028824e-05, |
|
"loss": 1.3123, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 3.007518796992481, |
|
"grad_norm": 7.434196472167969, |
|
"learning_rate": 1.6993900910685942e-05, |
|
"loss": 1.1879, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 3.0492898913951545, |
|
"grad_norm": 19.095539093017578, |
|
"learning_rate": 1.6952126326343056e-05, |
|
"loss": 1.0774, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 3.091060985797828, |
|
"grad_norm": 11.363150596618652, |
|
"learning_rate": 1.691035174200017e-05, |
|
"loss": 1.1259, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 3.1328320802005014, |
|
"grad_norm": 40.07558822631836, |
|
"learning_rate": 1.6868577157657284e-05, |
|
"loss": 1.058, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 3.1746031746031744, |
|
"grad_norm": 23.62975311279297, |
|
"learning_rate": 1.6826802573314394e-05, |
|
"loss": 1.0941, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 3.216374269005848, |
|
"grad_norm": 23.654922485351562, |
|
"learning_rate": 1.6785027988971512e-05, |
|
"loss": 0.9878, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 3.2581453634085213, |
|
"grad_norm": 15.095159530639648, |
|
"learning_rate": 1.6743253404628626e-05, |
|
"loss": 1.0991, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 3.299916457811195, |
|
"grad_norm": 24.225435256958008, |
|
"learning_rate": 1.670147882028574e-05, |
|
"loss": 1.0553, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 3.341687552213868, |
|
"grad_norm": 41.239261627197266, |
|
"learning_rate": 1.6659704235942854e-05, |
|
"loss": 1.015, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.3834586466165413, |
|
"grad_norm": 21.136133193969727, |
|
"learning_rate": 1.6617929651599968e-05, |
|
"loss": 1.0451, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 3.4252297410192147, |
|
"grad_norm": 14.230918884277344, |
|
"learning_rate": 1.657615506725708e-05, |
|
"loss": 1.0563, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 3.467000835421888, |
|
"grad_norm": 16.35744285583496, |
|
"learning_rate": 1.6534380482914196e-05, |
|
"loss": 0.9853, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 3.5087719298245617, |
|
"grad_norm": 19.833560943603516, |
|
"learning_rate": 1.649260589857131e-05, |
|
"loss": 1.0809, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 3.5505430242272347, |
|
"grad_norm": 19.3465518951416, |
|
"learning_rate": 1.6450831314228424e-05, |
|
"loss": 1.06, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 3.592314118629908, |
|
"grad_norm": 47.185245513916016, |
|
"learning_rate": 1.640905672988554e-05, |
|
"loss": 0.9934, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 3.6340852130325816, |
|
"grad_norm": 19.286062240600586, |
|
"learning_rate": 1.6367282145542655e-05, |
|
"loss": 0.9328, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 3.6758563074352546, |
|
"grad_norm": 27.635419845581055, |
|
"learning_rate": 1.6325507561199766e-05, |
|
"loss": 0.9286, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 3.717627401837928, |
|
"grad_norm": 28.93077850341797, |
|
"learning_rate": 1.628373297685688e-05, |
|
"loss": 0.8911, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 3.7593984962406015, |
|
"grad_norm": 6.602542877197266, |
|
"learning_rate": 1.6241958392513997e-05, |
|
"loss": 0.9376, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.801169590643275, |
|
"grad_norm": 13.683168411254883, |
|
"learning_rate": 1.620018380817111e-05, |
|
"loss": 0.9827, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 3.8429406850459484, |
|
"grad_norm": 8.653618812561035, |
|
"learning_rate": 1.6158409223828225e-05, |
|
"loss": 0.9686, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 3.8847117794486214, |
|
"grad_norm": 29.841218948364258, |
|
"learning_rate": 1.611663463948534e-05, |
|
"loss": 0.9835, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 3.926482873851295, |
|
"grad_norm": 16.030054092407227, |
|
"learning_rate": 1.6074860055142453e-05, |
|
"loss": 0.9497, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 3.9682539682539684, |
|
"grad_norm": 41.21371078491211, |
|
"learning_rate": 1.6033085470799567e-05, |
|
"loss": 0.9437, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 4.010025062656641, |
|
"grad_norm": 31.541624069213867, |
|
"learning_rate": 1.599131088645668e-05, |
|
"loss": 0.8401, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 4.051796157059315, |
|
"grad_norm": 28.929073333740234, |
|
"learning_rate": 1.5949536302113795e-05, |
|
"loss": 0.8333, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 4.093567251461988, |
|
"grad_norm": 12.41563606262207, |
|
"learning_rate": 1.590776171777091e-05, |
|
"loss": 0.7276, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 4.135338345864661, |
|
"grad_norm": 33.126747131347656, |
|
"learning_rate": 1.5865987133428026e-05, |
|
"loss": 0.7591, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 4.177109440267335, |
|
"grad_norm": 18.336153030395508, |
|
"learning_rate": 1.5824212549085137e-05, |
|
"loss": 0.8042, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.218880534670008, |
|
"grad_norm": 13.671285629272461, |
|
"learning_rate": 1.578243796474225e-05, |
|
"loss": 0.798, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 4.260651629072681, |
|
"grad_norm": 17.29936408996582, |
|
"learning_rate": 1.5740663380399365e-05, |
|
"loss": 0.7078, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 4.302422723475355, |
|
"grad_norm": 14.447928428649902, |
|
"learning_rate": 1.569888879605648e-05, |
|
"loss": 0.7174, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 4.344193817878028, |
|
"grad_norm": 57.8358268737793, |
|
"learning_rate": 1.5657114211713596e-05, |
|
"loss": 0.7757, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 4.385964912280702, |
|
"grad_norm": 25.56312370300293, |
|
"learning_rate": 1.561533962737071e-05, |
|
"loss": 0.8152, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 4.427736006683375, |
|
"grad_norm": 21.909448623657227, |
|
"learning_rate": 1.5573565043027824e-05, |
|
"loss": 0.6839, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 4.469507101086048, |
|
"grad_norm": 34.13754653930664, |
|
"learning_rate": 1.5531790458684938e-05, |
|
"loss": 0.7502, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 4.511278195488722, |
|
"grad_norm": 32.14202117919922, |
|
"learning_rate": 1.5490015874342052e-05, |
|
"loss": 0.786, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 4.553049289891395, |
|
"grad_norm": 22.156665802001953, |
|
"learning_rate": 1.5448241289999166e-05, |
|
"loss": 0.7236, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 4.594820384294069, |
|
"grad_norm": 25.08436393737793, |
|
"learning_rate": 1.540646670565628e-05, |
|
"loss": 0.7738, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 4.636591478696742, |
|
"grad_norm": 32.960784912109375, |
|
"learning_rate": 1.5364692121313394e-05, |
|
"loss": 0.7778, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 4.678362573099415, |
|
"grad_norm": 16.070016860961914, |
|
"learning_rate": 1.5322917536970508e-05, |
|
"loss": 0.792, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 4.720133667502089, |
|
"grad_norm": 12.606107711791992, |
|
"learning_rate": 1.5281142952627622e-05, |
|
"loss": 0.7382, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 4.761904761904762, |
|
"grad_norm": 22.041399002075195, |
|
"learning_rate": 1.5239368368284738e-05, |
|
"loss": 0.7351, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 4.803675856307435, |
|
"grad_norm": 22.424896240234375, |
|
"learning_rate": 1.5197593783941851e-05, |
|
"loss": 0.6698, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 4.845446950710109, |
|
"grad_norm": 18.415851593017578, |
|
"learning_rate": 1.5155819199598964e-05, |
|
"loss": 0.7545, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 4.887218045112782, |
|
"grad_norm": 52.402984619140625, |
|
"learning_rate": 1.5114044615256081e-05, |
|
"loss": 0.6534, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 4.928989139515455, |
|
"grad_norm": 28.06797218322754, |
|
"learning_rate": 1.5072270030913193e-05, |
|
"loss": 0.5699, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 4.970760233918129, |
|
"grad_norm": 11.340112686157227, |
|
"learning_rate": 1.5030495446570307e-05, |
|
"loss": 0.6786, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 5.012531328320802, |
|
"grad_norm": 18.82775115966797, |
|
"learning_rate": 1.4988720862227421e-05, |
|
"loss": 0.6637, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 5.054302422723476, |
|
"grad_norm": 66.8216781616211, |
|
"learning_rate": 1.4946946277884535e-05, |
|
"loss": 0.5432, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 5.096073517126149, |
|
"grad_norm": 8.892007827758789, |
|
"learning_rate": 1.4905171693541651e-05, |
|
"loss": 0.695, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 5.137844611528822, |
|
"grad_norm": 16.294639587402344, |
|
"learning_rate": 1.4863397109198765e-05, |
|
"loss": 0.5784, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 5.179615705931496, |
|
"grad_norm": 19.78766441345215, |
|
"learning_rate": 1.4821622524855879e-05, |
|
"loss": 0.503, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 5.221386800334169, |
|
"grad_norm": 13.428692817687988, |
|
"learning_rate": 1.4779847940512993e-05, |
|
"loss": 0.6292, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 5.2631578947368425, |
|
"grad_norm": 29.248811721801758, |
|
"learning_rate": 1.4738073356170109e-05, |
|
"loss": 0.5902, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 5.3049289891395155, |
|
"grad_norm": 7.9793877601623535, |
|
"learning_rate": 1.4696298771827223e-05, |
|
"loss": 0.5765, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 5.3467000835421885, |
|
"grad_norm": 10.095882415771484, |
|
"learning_rate": 1.4654524187484337e-05, |
|
"loss": 0.4703, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 5.388471177944862, |
|
"grad_norm": 25.082889556884766, |
|
"learning_rate": 1.4612749603141449e-05, |
|
"loss": 0.4934, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 5.430242272347535, |
|
"grad_norm": 31.18419075012207, |
|
"learning_rate": 1.4570975018798563e-05, |
|
"loss": 0.5474, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 5.472013366750208, |
|
"grad_norm": 18.715656280517578, |
|
"learning_rate": 1.4529200434455679e-05, |
|
"loss": 0.63, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 5.513784461152882, |
|
"grad_norm": 14.067596435546875, |
|
"learning_rate": 1.4487425850112793e-05, |
|
"loss": 0.621, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 5.555555555555555, |
|
"grad_norm": 15.184345245361328, |
|
"learning_rate": 1.4445651265769907e-05, |
|
"loss": 0.5204, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 5.597326649958229, |
|
"grad_norm": 15.532989501953125, |
|
"learning_rate": 1.440387668142702e-05, |
|
"loss": 0.5897, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 5.639097744360902, |
|
"grad_norm": 54.043617248535156, |
|
"learning_rate": 1.4362102097084136e-05, |
|
"loss": 0.4629, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 5.680868838763575, |
|
"grad_norm": 39.661746978759766, |
|
"learning_rate": 1.432032751274125e-05, |
|
"loss": 0.4543, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 5.722639933166249, |
|
"grad_norm": 20.885473251342773, |
|
"learning_rate": 1.4278552928398364e-05, |
|
"loss": 0.5593, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 5.764411027568922, |
|
"grad_norm": 17.765832901000977, |
|
"learning_rate": 1.4236778344055478e-05, |
|
"loss": 0.5543, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 5.806182121971595, |
|
"grad_norm": 32.63746643066406, |
|
"learning_rate": 1.419500375971259e-05, |
|
"loss": 0.3481, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 5.847953216374269, |
|
"grad_norm": 16.857807159423828, |
|
"learning_rate": 1.4153229175369708e-05, |
|
"loss": 0.4595, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 5.889724310776942, |
|
"grad_norm": 10.114936828613281, |
|
"learning_rate": 1.411145459102682e-05, |
|
"loss": 0.4995, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 5.931495405179616, |
|
"grad_norm": 16.946565628051758, |
|
"learning_rate": 1.4069680006683934e-05, |
|
"loss": 0.4598, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 5.973266499582289, |
|
"grad_norm": 15.710683822631836, |
|
"learning_rate": 1.4027905422341048e-05, |
|
"loss": 0.4452, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 6.015037593984962, |
|
"grad_norm": 25.516210556030273, |
|
"learning_rate": 1.3986130837998162e-05, |
|
"loss": 0.5015, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 6.056808688387636, |
|
"grad_norm": 14.8129243850708, |
|
"learning_rate": 1.3944356253655278e-05, |
|
"loss": 0.4292, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 6.098579782790309, |
|
"grad_norm": 3.4943878650665283, |
|
"learning_rate": 1.3902581669312392e-05, |
|
"loss": 0.3825, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 6.140350877192983, |
|
"grad_norm": 34.67987060546875, |
|
"learning_rate": 1.3860807084969506e-05, |
|
"loss": 0.368, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 6.182121971595656, |
|
"grad_norm": 13.267960548400879, |
|
"learning_rate": 1.381903250062662e-05, |
|
"loss": 0.3394, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 6.223893065998329, |
|
"grad_norm": 25.280872344970703, |
|
"learning_rate": 1.3777257916283735e-05, |
|
"loss": 0.4007, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 6.265664160401003, |
|
"grad_norm": 28.221872329711914, |
|
"learning_rate": 1.373548333194085e-05, |
|
"loss": 0.386, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 6.307435254803676, |
|
"grad_norm": 23.17299461364746, |
|
"learning_rate": 1.3693708747597963e-05, |
|
"loss": 0.445, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 6.349206349206349, |
|
"grad_norm": 4.611133575439453, |
|
"learning_rate": 1.3651934163255076e-05, |
|
"loss": 0.3661, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 6.390977443609023, |
|
"grad_norm": 35.275779724121094, |
|
"learning_rate": 1.361015957891219e-05, |
|
"loss": 0.4559, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 6.432748538011696, |
|
"grad_norm": 27.652978897094727, |
|
"learning_rate": 1.3568384994569305e-05, |
|
"loss": 0.3594, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 6.474519632414369, |
|
"grad_norm": 12.323234558105469, |
|
"learning_rate": 1.352661041022642e-05, |
|
"loss": 0.4166, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 6.516290726817043, |
|
"grad_norm": 19.783565521240234, |
|
"learning_rate": 1.3484835825883533e-05, |
|
"loss": 0.4232, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 6.558061821219716, |
|
"grad_norm": 14.999006271362305, |
|
"learning_rate": 1.3443061241540647e-05, |
|
"loss": 0.4427, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 6.59983291562239, |
|
"grad_norm": 12.637155532836914, |
|
"learning_rate": 1.3401286657197763e-05, |
|
"loss": 0.3311, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 6.641604010025063, |
|
"grad_norm": 24.413984298706055, |
|
"learning_rate": 1.3359512072854877e-05, |
|
"loss": 0.4073, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 6.683375104427736, |
|
"grad_norm": 20.41080665588379, |
|
"learning_rate": 1.331773748851199e-05, |
|
"loss": 0.4343, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 6.7251461988304095, |
|
"grad_norm": 32.916595458984375, |
|
"learning_rate": 1.3275962904169105e-05, |
|
"loss": 0.355, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 6.7669172932330826, |
|
"grad_norm": 13.167571067810059, |
|
"learning_rate": 1.3234188319826217e-05, |
|
"loss": 0.4479, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 6.8086883876357565, |
|
"grad_norm": 18.277618408203125, |
|
"learning_rate": 1.3192413735483334e-05, |
|
"loss": 0.3349, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 6.8504594820384295, |
|
"grad_norm": 18.956066131591797, |
|
"learning_rate": 1.3150639151140447e-05, |
|
"loss": 0.2944, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 6.8922305764411025, |
|
"grad_norm": 29.309457778930664, |
|
"learning_rate": 1.310886456679756e-05, |
|
"loss": 0.3497, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 6.934001670843776, |
|
"grad_norm": 0.8253272771835327, |
|
"learning_rate": 1.3067089982454675e-05, |
|
"loss": 0.411, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 6.975772765246449, |
|
"grad_norm": 38.29353332519531, |
|
"learning_rate": 1.302531539811179e-05, |
|
"loss": 0.3151, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 7.017543859649122, |
|
"grad_norm": 26.124536514282227, |
|
"learning_rate": 1.2983540813768904e-05, |
|
"loss": 0.322, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 7.059314954051796, |
|
"grad_norm": 24.413715362548828, |
|
"learning_rate": 1.2941766229426018e-05, |
|
"loss": 0.1929, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 7.101086048454469, |
|
"grad_norm": 28.574317932128906, |
|
"learning_rate": 1.2899991645083132e-05, |
|
"loss": 0.2357, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 7.142857142857143, |
|
"grad_norm": 13.467860221862793, |
|
"learning_rate": 1.2858217060740246e-05, |
|
"loss": 0.3188, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 7.184628237259816, |
|
"grad_norm": 23.04376983642578, |
|
"learning_rate": 1.2816442476397362e-05, |
|
"loss": 0.3352, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 7.226399331662489, |
|
"grad_norm": 0.04887882620096207, |
|
"learning_rate": 1.2774667892054476e-05, |
|
"loss": 0.356, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 7.268170426065163, |
|
"grad_norm": 15.562792778015137, |
|
"learning_rate": 1.2732893307711588e-05, |
|
"loss": 0.3169, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 7.309941520467836, |
|
"grad_norm": 7.763381004333496, |
|
"learning_rate": 1.2691118723368702e-05, |
|
"loss": 0.2877, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 7.351712614870509, |
|
"grad_norm": 42.26727294921875, |
|
"learning_rate": 1.2649344139025818e-05, |
|
"loss": 0.2561, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 7.393483709273183, |
|
"grad_norm": 16.665681838989258, |
|
"learning_rate": 1.2607569554682932e-05, |
|
"loss": 0.2958, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 7.435254803675856, |
|
"grad_norm": 44.200042724609375, |
|
"learning_rate": 1.2565794970340046e-05, |
|
"loss": 0.2894, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 7.47702589807853, |
|
"grad_norm": 14.340874671936035, |
|
"learning_rate": 1.252402038599716e-05, |
|
"loss": 0.3186, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 7.518796992481203, |
|
"grad_norm": 41.146812438964844, |
|
"learning_rate": 1.2482245801654274e-05, |
|
"loss": 0.2913, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 7.560568086883876, |
|
"grad_norm": 13.177423477172852, |
|
"learning_rate": 1.244047121731139e-05, |
|
"loss": 0.2311, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 7.60233918128655, |
|
"grad_norm": 19.08924102783203, |
|
"learning_rate": 1.2398696632968503e-05, |
|
"loss": 0.2501, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 7.644110275689223, |
|
"grad_norm": 13.80128002166748, |
|
"learning_rate": 1.2356922048625617e-05, |
|
"loss": 0.27, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 7.685881370091897, |
|
"grad_norm": 32.72663497924805, |
|
"learning_rate": 1.2315147464282731e-05, |
|
"loss": 0.268, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 7.72765246449457, |
|
"grad_norm": 28.15250587463379, |
|
"learning_rate": 1.2273372879939847e-05, |
|
"loss": 0.3212, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 7.769423558897243, |
|
"grad_norm": 13.896389961242676, |
|
"learning_rate": 1.2231598295596961e-05, |
|
"loss": 0.3293, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 7.811194653299917, |
|
"grad_norm": 14.249073028564453, |
|
"learning_rate": 1.2189823711254073e-05, |
|
"loss": 0.2786, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 7.85296574770259, |
|
"grad_norm": 19.6274356842041, |
|
"learning_rate": 1.2148049126911187e-05, |
|
"loss": 0.2812, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 7.894736842105263, |
|
"grad_norm": 15.745018005371094, |
|
"learning_rate": 1.2106274542568301e-05, |
|
"loss": 0.2721, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 7.936507936507937, |
|
"grad_norm": 11.491929054260254, |
|
"learning_rate": 1.2064499958225417e-05, |
|
"loss": 0.2833, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 7.97827903091061, |
|
"grad_norm": 4.299226760864258, |
|
"learning_rate": 1.2022725373882531e-05, |
|
"loss": 0.2335, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 8.020050125313283, |
|
"grad_norm": 11.306835174560547, |
|
"learning_rate": 1.1980950789539645e-05, |
|
"loss": 0.1985, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 8.061821219715956, |
|
"grad_norm": 45.70457077026367, |
|
"learning_rate": 1.1939176205196759e-05, |
|
"loss": 0.1748, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 8.10359231411863, |
|
"grad_norm": 140.19454956054688, |
|
"learning_rate": 1.1897401620853875e-05, |
|
"loss": 0.2033, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 8.145363408521304, |
|
"grad_norm": 16.645404815673828, |
|
"learning_rate": 1.1855627036510989e-05, |
|
"loss": 0.1642, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 8.187134502923977, |
|
"grad_norm": 16.9295654296875, |
|
"learning_rate": 1.1813852452168103e-05, |
|
"loss": 0.1904, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 8.22890559732665, |
|
"grad_norm": 34.388484954833984, |
|
"learning_rate": 1.1772077867825215e-05, |
|
"loss": 0.217, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 8.270676691729323, |
|
"grad_norm": 12.104029655456543, |
|
"learning_rate": 1.1730303283482329e-05, |
|
"loss": 0.2691, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 8.312447786131997, |
|
"grad_norm": 2.027843475341797, |
|
"learning_rate": 1.1688528699139445e-05, |
|
"loss": 0.2058, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 8.35421888053467, |
|
"grad_norm": 26.941038131713867, |
|
"learning_rate": 1.1646754114796558e-05, |
|
"loss": 0.1581, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 8.395989974937343, |
|
"grad_norm": 28.439321517944336, |
|
"learning_rate": 1.1604979530453672e-05, |
|
"loss": 0.2169, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 8.437761069340016, |
|
"grad_norm": 3.916905641555786, |
|
"learning_rate": 1.1563204946110786e-05, |
|
"loss": 0.1771, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 8.47953216374269, |
|
"grad_norm": 30.46429443359375, |
|
"learning_rate": 1.1521430361767902e-05, |
|
"loss": 0.2606, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 8.521303258145362, |
|
"grad_norm": 4.947411060333252, |
|
"learning_rate": 1.1479655777425016e-05, |
|
"loss": 0.203, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 8.563074352548037, |
|
"grad_norm": 12.219408988952637, |
|
"learning_rate": 1.143788119308213e-05, |
|
"loss": 0.2081, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 8.60484544695071, |
|
"grad_norm": 45.41501998901367, |
|
"learning_rate": 1.1396106608739244e-05, |
|
"loss": 0.1994, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 8.646616541353383, |
|
"grad_norm": 1.2857117652893066, |
|
"learning_rate": 1.1354332024396358e-05, |
|
"loss": 0.2014, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 8.688387635756056, |
|
"grad_norm": 18.340967178344727, |
|
"learning_rate": 1.1312557440053474e-05, |
|
"loss": 0.179, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 8.73015873015873, |
|
"grad_norm": 35.21903610229492, |
|
"learning_rate": 1.1270782855710588e-05, |
|
"loss": 0.1677, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 8.771929824561404, |
|
"grad_norm": 16.27471160888672, |
|
"learning_rate": 1.12290082713677e-05, |
|
"loss": 0.1792, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 8.813700918964077, |
|
"grad_norm": 22.30707550048828, |
|
"learning_rate": 1.1187233687024814e-05, |
|
"loss": 0.2007, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 8.85547201336675, |
|
"grad_norm": 2.8700950145721436, |
|
"learning_rate": 1.114545910268193e-05, |
|
"loss": 0.2363, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 8.897243107769423, |
|
"grad_norm": 24.8457088470459, |
|
"learning_rate": 1.1103684518339044e-05, |
|
"loss": 0.1915, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 8.939014202172096, |
|
"grad_norm": 9.145515441894531, |
|
"learning_rate": 1.1061909933996158e-05, |
|
"loss": 0.244, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 8.980785296574771, |
|
"grad_norm": 0.5550752282142639, |
|
"learning_rate": 1.1020135349653272e-05, |
|
"loss": 0.1613, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 9.022556390977444, |
|
"grad_norm": 18.79015350341797, |
|
"learning_rate": 1.0978360765310386e-05, |
|
"loss": 0.2604, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 9.064327485380117, |
|
"grad_norm": 3.671290397644043, |
|
"learning_rate": 1.0936586180967501e-05, |
|
"loss": 0.1305, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 9.10609857978279, |
|
"grad_norm": 10.10545539855957, |
|
"learning_rate": 1.0894811596624615e-05, |
|
"loss": 0.1584, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 9.147869674185463, |
|
"grad_norm": 0.5803263187408447, |
|
"learning_rate": 1.085303701228173e-05, |
|
"loss": 0.1038, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 9.189640768588138, |
|
"grad_norm": 13.21904468536377, |
|
"learning_rate": 1.0811262427938841e-05, |
|
"loss": 0.156, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 9.23141186299081, |
|
"grad_norm": 19.366302490234375, |
|
"learning_rate": 1.0769487843595955e-05, |
|
"loss": 0.1567, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 9.273182957393484, |
|
"grad_norm": 63.86299514770508, |
|
"learning_rate": 1.0727713259253071e-05, |
|
"loss": 0.134, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 9.314954051796157, |
|
"grad_norm": 9.471511840820312, |
|
"learning_rate": 1.0685938674910185e-05, |
|
"loss": 0.1439, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 9.35672514619883, |
|
"grad_norm": 17.094606399536133, |
|
"learning_rate": 1.0644164090567299e-05, |
|
"loss": 0.1504, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 9.398496240601503, |
|
"grad_norm": 0.7686102390289307, |
|
"learning_rate": 1.0602389506224413e-05, |
|
"loss": 0.198, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 9.440267335004178, |
|
"grad_norm": 4.632621765136719, |
|
"learning_rate": 1.0560614921881529e-05, |
|
"loss": 0.1299, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 9.48203842940685, |
|
"grad_norm": 17.580154418945312, |
|
"learning_rate": 1.0518840337538643e-05, |
|
"loss": 0.1221, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 9.523809523809524, |
|
"grad_norm": 28.859804153442383, |
|
"learning_rate": 1.0477065753195757e-05, |
|
"loss": 0.1497, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 9.565580618212197, |
|
"grad_norm": 3.4997804164886475, |
|
"learning_rate": 1.043529116885287e-05, |
|
"loss": 0.1228, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 9.60735171261487, |
|
"grad_norm": 21.530311584472656, |
|
"learning_rate": 1.0393516584509985e-05, |
|
"loss": 0.122, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 9.649122807017545, |
|
"grad_norm": 4.085865020751953, |
|
"learning_rate": 1.03517420001671e-05, |
|
"loss": 0.1625, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 9.690893901420218, |
|
"grad_norm": 13.100555419921875, |
|
"learning_rate": 1.0309967415824214e-05, |
|
"loss": 0.1361, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 9.73266499582289, |
|
"grad_norm": 22.437877655029297, |
|
"learning_rate": 1.0268192831481327e-05, |
|
"loss": 0.2052, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 9.774436090225564, |
|
"grad_norm": 24.361730575561523, |
|
"learning_rate": 1.022641824713844e-05, |
|
"loss": 0.1314, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 9.816207184628237, |
|
"grad_norm": 4.0465497970581055, |
|
"learning_rate": 1.0184643662795556e-05, |
|
"loss": 0.1526, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 9.857978279030911, |
|
"grad_norm": 0.056704986840486526, |
|
"learning_rate": 1.014286907845267e-05, |
|
"loss": 0.1579, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 9.899749373433584, |
|
"grad_norm": 1.126349687576294, |
|
"learning_rate": 1.0101094494109784e-05, |
|
"loss": 0.1602, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 9.941520467836257, |
|
"grad_norm": 42.518741607666016, |
|
"learning_rate": 1.0059319909766898e-05, |
|
"loss": 0.1495, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 9.98329156223893, |
|
"grad_norm": 15.24010944366455, |
|
"learning_rate": 1.0017545325424012e-05, |
|
"loss": 0.1109, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 10.025062656641603, |
|
"grad_norm": 2.6156651973724365, |
|
"learning_rate": 9.975770741081126e-06, |
|
"loss": 0.165, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 10.066833751044278, |
|
"grad_norm": 2.6075878143310547, |
|
"learning_rate": 9.933996156738242e-06, |
|
"loss": 0.0826, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 10.108604845446951, |
|
"grad_norm": 9.561171531677246, |
|
"learning_rate": 9.892221572395356e-06, |
|
"loss": 0.0878, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 10.150375939849624, |
|
"grad_norm": 3.180940628051758, |
|
"learning_rate": 9.85044698805247e-06, |
|
"loss": 0.0723, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 10.192147034252297, |
|
"grad_norm": 2.523481845855713, |
|
"learning_rate": 9.808672403709584e-06, |
|
"loss": 0.1363, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 10.23391812865497, |
|
"grad_norm": 14.353055000305176, |
|
"learning_rate": 9.766897819366698e-06, |
|
"loss": 0.099, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 10.275689223057643, |
|
"grad_norm": 1.3948580026626587, |
|
"learning_rate": 9.725123235023812e-06, |
|
"loss": 0.11, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 10.317460317460318, |
|
"grad_norm": 18.495405197143555, |
|
"learning_rate": 9.683348650680926e-06, |
|
"loss": 0.1414, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 10.359231411862991, |
|
"grad_norm": 33.3175163269043, |
|
"learning_rate": 9.641574066338041e-06, |
|
"loss": 0.126, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 10.401002506265664, |
|
"grad_norm": 6.8393235206604, |
|
"learning_rate": 9.599799481995154e-06, |
|
"loss": 0.1304, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 10.442773600668337, |
|
"grad_norm": 20.7825927734375, |
|
"learning_rate": 9.55802489765227e-06, |
|
"loss": 0.1272, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 10.48454469507101, |
|
"grad_norm": 19.048812866210938, |
|
"learning_rate": 9.516250313309383e-06, |
|
"loss": 0.108, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 10.526315789473685, |
|
"grad_norm": 19.517911911010742, |
|
"learning_rate": 9.474475728966497e-06, |
|
"loss": 0.1222, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 10.568086883876358, |
|
"grad_norm": 0.2565244734287262, |
|
"learning_rate": 9.432701144623611e-06, |
|
"loss": 0.0917, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 10.609857978279031, |
|
"grad_norm": 10.629936218261719, |
|
"learning_rate": 9.390926560280727e-06, |
|
"loss": 0.1145, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 10.651629072681704, |
|
"grad_norm": 1.9969385862350464, |
|
"learning_rate": 9.349151975937841e-06, |
|
"loss": 0.0877, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 10.693400167084377, |
|
"grad_norm": 13.717659950256348, |
|
"learning_rate": 9.307377391594953e-06, |
|
"loss": 0.0868, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 10.73517126148705, |
|
"grad_norm": 13.955449104309082, |
|
"learning_rate": 9.265602807252069e-06, |
|
"loss": 0.0922, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 10.776942355889725, |
|
"grad_norm": 9.980537414550781, |
|
"learning_rate": 9.223828222909183e-06, |
|
"loss": 0.0768, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 10.818713450292398, |
|
"grad_norm": 24.630155563354492, |
|
"learning_rate": 9.182053638566297e-06, |
|
"loss": 0.1128, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 10.86048454469507, |
|
"grad_norm": 2.8442764282226562, |
|
"learning_rate": 9.140279054223411e-06, |
|
"loss": 0.0794, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 10.902255639097744, |
|
"grad_norm": 39.42329025268555, |
|
"learning_rate": 9.098504469880527e-06, |
|
"loss": 0.0781, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 10.944026733500417, |
|
"grad_norm": 103.70584106445312, |
|
"learning_rate": 9.056729885537639e-06, |
|
"loss": 0.0872, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 10.985797827903092, |
|
"grad_norm": 0.15249623358249664, |
|
"learning_rate": 9.014955301194755e-06, |
|
"loss": 0.1059, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 11.027568922305765, |
|
"grad_norm": 10.956684112548828, |
|
"learning_rate": 8.973180716851868e-06, |
|
"loss": 0.0955, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 11.069340016708438, |
|
"grad_norm": 7.523435115814209, |
|
"learning_rate": 8.931406132508982e-06, |
|
"loss": 0.0671, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 11.11111111111111, |
|
"grad_norm": 7.550344467163086, |
|
"learning_rate": 8.889631548166096e-06, |
|
"loss": 0.0896, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 11.152882205513784, |
|
"grad_norm": 31.79115867614746, |
|
"learning_rate": 8.84785696382321e-06, |
|
"loss": 0.0464, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 11.194653299916459, |
|
"grad_norm": 3.681047201156616, |
|
"learning_rate": 8.806082379480324e-06, |
|
"loss": 0.0848, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 11.236424394319132, |
|
"grad_norm": 0.7510688304901123, |
|
"learning_rate": 8.764307795137438e-06, |
|
"loss": 0.0511, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 11.278195488721805, |
|
"grad_norm": 8.21001148223877, |
|
"learning_rate": 8.722533210794554e-06, |
|
"loss": 0.1051, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 11.319966583124478, |
|
"grad_norm": 8.100149154663086, |
|
"learning_rate": 8.680758626451668e-06, |
|
"loss": 0.0728, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 11.36173767752715, |
|
"grad_norm": 3.457951545715332, |
|
"learning_rate": 8.638984042108782e-06, |
|
"loss": 0.0934, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 11.403508771929825, |
|
"grad_norm": 1.1937059164047241, |
|
"learning_rate": 8.597209457765896e-06, |
|
"loss": 0.0834, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 11.445279866332498, |
|
"grad_norm": 0.9485940337181091, |
|
"learning_rate": 8.55543487342301e-06, |
|
"loss": 0.0755, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 11.487050960735171, |
|
"grad_norm": 12.561485290527344, |
|
"learning_rate": 8.513660289080124e-06, |
|
"loss": 0.0602, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 11.528822055137844, |
|
"grad_norm": 21.742900848388672, |
|
"learning_rate": 8.471885704737238e-06, |
|
"loss": 0.0639, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 11.570593149540517, |
|
"grad_norm": 1.016876220703125, |
|
"learning_rate": 8.430111120394354e-06, |
|
"loss": 0.0677, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 11.61236424394319, |
|
"grad_norm": 16.71541404724121, |
|
"learning_rate": 8.388336536051468e-06, |
|
"loss": 0.0618, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 11.654135338345865, |
|
"grad_norm": 14.695789337158203, |
|
"learning_rate": 8.346561951708582e-06, |
|
"loss": 0.0884, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 11.695906432748538, |
|
"grad_norm": 26.240062713623047, |
|
"learning_rate": 8.304787367365696e-06, |
|
"loss": 0.0623, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 11.737677527151211, |
|
"grad_norm": 0.9301555752754211, |
|
"learning_rate": 8.26301278302281e-06, |
|
"loss": 0.0856, |
|
"step": 14050 |
|
}, |
|
{ |
|
"epoch": 11.779448621553884, |
|
"grad_norm": 0.215810164809227, |
|
"learning_rate": 8.221238198679924e-06, |
|
"loss": 0.0892, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 11.821219715956557, |
|
"grad_norm": 20.944414138793945, |
|
"learning_rate": 8.179463614337038e-06, |
|
"loss": 0.0455, |
|
"step": 14150 |
|
}, |
|
{ |
|
"epoch": 11.862990810359232, |
|
"grad_norm": 16.5097599029541, |
|
"learning_rate": 8.137689029994153e-06, |
|
"loss": 0.0588, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 11.904761904761905, |
|
"grad_norm": 10.964118957519531, |
|
"learning_rate": 8.095914445651265e-06, |
|
"loss": 0.0798, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 11.946532999164578, |
|
"grad_norm": 4.6156134605407715, |
|
"learning_rate": 8.054139861308381e-06, |
|
"loss": 0.0681, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 11.988304093567251, |
|
"grad_norm": 15.3454008102417, |
|
"learning_rate": 8.012365276965495e-06, |
|
"loss": 0.0767, |
|
"step": 14350 |
|
}, |
|
{ |
|
"epoch": 12.030075187969924, |
|
"grad_norm": 2.1442370414733887, |
|
"learning_rate": 7.970590692622609e-06, |
|
"loss": 0.0346, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 12.071846282372599, |
|
"grad_norm": 17.47662353515625, |
|
"learning_rate": 7.928816108279723e-06, |
|
"loss": 0.0359, |
|
"step": 14450 |
|
}, |
|
{ |
|
"epoch": 12.113617376775272, |
|
"grad_norm": 9.307701110839844, |
|
"learning_rate": 7.887041523936837e-06, |
|
"loss": 0.0536, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 12.155388471177945, |
|
"grad_norm": 1.3280810117721558, |
|
"learning_rate": 7.845266939593951e-06, |
|
"loss": 0.0726, |
|
"step": 14550 |
|
}, |
|
{ |
|
"epoch": 12.197159565580618, |
|
"grad_norm": 1.0069313049316406, |
|
"learning_rate": 7.803492355251065e-06, |
|
"loss": 0.0725, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 12.238930659983291, |
|
"grad_norm": 42.40116500854492, |
|
"learning_rate": 7.76171777090818e-06, |
|
"loss": 0.0281, |
|
"step": 14650 |
|
}, |
|
{ |
|
"epoch": 12.280701754385966, |
|
"grad_norm": 0.04481910914182663, |
|
"learning_rate": 7.719943186565295e-06, |
|
"loss": 0.0288, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 12.322472848788639, |
|
"grad_norm": 1.7356605529785156, |
|
"learning_rate": 7.678168602222409e-06, |
|
"loss": 0.0698, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 12.364243943191312, |
|
"grad_norm": 13.85879135131836, |
|
"learning_rate": 7.636394017879523e-06, |
|
"loss": 0.0511, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 12.406015037593985, |
|
"grad_norm": 4.5668253898620605, |
|
"learning_rate": 7.5946194335366375e-06, |
|
"loss": 0.0349, |
|
"step": 14850 |
|
}, |
|
{ |
|
"epoch": 12.447786131996658, |
|
"grad_norm": 0.4067160189151764, |
|
"learning_rate": 7.5528448491937515e-06, |
|
"loss": 0.0309, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 12.48955722639933, |
|
"grad_norm": 1.8756296634674072, |
|
"learning_rate": 7.511070264850865e-06, |
|
"loss": 0.089, |
|
"step": 14950 |
|
}, |
|
{ |
|
"epoch": 12.531328320802006, |
|
"grad_norm": 0.7279142141342163, |
|
"learning_rate": 7.469295680507979e-06, |
|
"loss": 0.0592, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 12.573099415204679, |
|
"grad_norm": 20.90660285949707, |
|
"learning_rate": 7.427521096165093e-06, |
|
"loss": 0.0613, |
|
"step": 15050 |
|
}, |
|
{ |
|
"epoch": 12.614870509607352, |
|
"grad_norm": 21.889909744262695, |
|
"learning_rate": 7.385746511822208e-06, |
|
"loss": 0.0526, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 12.656641604010025, |
|
"grad_norm": 0.16797950863838196, |
|
"learning_rate": 7.343971927479322e-06, |
|
"loss": 0.044, |
|
"step": 15150 |
|
}, |
|
{ |
|
"epoch": 12.698412698412698, |
|
"grad_norm": 2.835975408554077, |
|
"learning_rate": 7.302197343136437e-06, |
|
"loss": 0.0481, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 12.740183792815372, |
|
"grad_norm": 17.579313278198242, |
|
"learning_rate": 7.26042275879355e-06, |
|
"loss": 0.0322, |
|
"step": 15250 |
|
}, |
|
{ |
|
"epoch": 12.781954887218046, |
|
"grad_norm": 4.634361267089844, |
|
"learning_rate": 7.218648174450665e-06, |
|
"loss": 0.0523, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 12.823725981620719, |
|
"grad_norm": 0.9335712194442749, |
|
"learning_rate": 7.176873590107779e-06, |
|
"loss": 0.0618, |
|
"step": 15350 |
|
}, |
|
{ |
|
"epoch": 12.865497076023392, |
|
"grad_norm": 0.37698858976364136, |
|
"learning_rate": 7.135099005764893e-06, |
|
"loss": 0.0698, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 12.907268170426065, |
|
"grad_norm": 12.935955047607422, |
|
"learning_rate": 7.093324421422008e-06, |
|
"loss": 0.0293, |
|
"step": 15450 |
|
}, |
|
{ |
|
"epoch": 12.949039264828738, |
|
"grad_norm": 27.532899856567383, |
|
"learning_rate": 7.051549837079121e-06, |
|
"loss": 0.0345, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 12.990810359231412, |
|
"grad_norm": 13.847272872924805, |
|
"learning_rate": 7.009775252736236e-06, |
|
"loss": 0.0266, |
|
"step": 15550 |
|
}, |
|
{ |
|
"epoch": 13.032581453634085, |
|
"grad_norm": 0.8636922240257263, |
|
"learning_rate": 6.96800066839335e-06, |
|
"loss": 0.0283, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 13.074352548036758, |
|
"grad_norm": 9.82494831085205, |
|
"learning_rate": 6.9262260840504646e-06, |
|
"loss": 0.0219, |
|
"step": 15650 |
|
}, |
|
{ |
|
"epoch": 13.116123642439431, |
|
"grad_norm": 0.6327198147773743, |
|
"learning_rate": 6.8844514997075785e-06, |
|
"loss": 0.032, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 13.157894736842104, |
|
"grad_norm": 0.04686570540070534, |
|
"learning_rate": 6.8426769153646925e-06, |
|
"loss": 0.0247, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 13.19966583124478, |
|
"grad_norm": 3.148859977722168, |
|
"learning_rate": 6.800902331021807e-06, |
|
"loss": 0.0408, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 13.241436925647452, |
|
"grad_norm": 14.439146995544434, |
|
"learning_rate": 6.7591277466789205e-06, |
|
"loss": 0.0261, |
|
"step": 15850 |
|
}, |
|
{ |
|
"epoch": 13.283208020050125, |
|
"grad_norm": 2.363865375518799, |
|
"learning_rate": 6.717353162336035e-06, |
|
"loss": 0.0497, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 13.324979114452798, |
|
"grad_norm": 11.770255088806152, |
|
"learning_rate": 6.675578577993149e-06, |
|
"loss": 0.0395, |
|
"step": 15950 |
|
}, |
|
{ |
|
"epoch": 13.366750208855471, |
|
"grad_norm": 0.6963861584663391, |
|
"learning_rate": 6.633803993650264e-06, |
|
"loss": 0.0408, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 13.408521303258146, |
|
"grad_norm": 9.609217643737793, |
|
"learning_rate": 6.592029409307378e-06, |
|
"loss": 0.0364, |
|
"step": 16050 |
|
}, |
|
{ |
|
"epoch": 13.450292397660819, |
|
"grad_norm": 7.188763618469238, |
|
"learning_rate": 6.550254824964493e-06, |
|
"loss": 0.0381, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 13.492063492063492, |
|
"grad_norm": 0.07387153059244156, |
|
"learning_rate": 6.508480240621606e-06, |
|
"loss": 0.0504, |
|
"step": 16150 |
|
}, |
|
{ |
|
"epoch": 13.533834586466165, |
|
"grad_norm": 7.845670223236084, |
|
"learning_rate": 6.46670565627872e-06, |
|
"loss": 0.0326, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 13.575605680868838, |
|
"grad_norm": 6.502562522888184, |
|
"learning_rate": 6.424931071935835e-06, |
|
"loss": 0.0458, |
|
"step": 16250 |
|
}, |
|
{ |
|
"epoch": 13.617376775271513, |
|
"grad_norm": 0.5432217121124268, |
|
"learning_rate": 6.383156487592949e-06, |
|
"loss": 0.0229, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 13.659147869674186, |
|
"grad_norm": 0.12599503993988037, |
|
"learning_rate": 6.341381903250064e-06, |
|
"loss": 0.0313, |
|
"step": 16350 |
|
}, |
|
{ |
|
"epoch": 13.700918964076859, |
|
"grad_norm": 1.3932889699935913, |
|
"learning_rate": 6.299607318907177e-06, |
|
"loss": 0.036, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 13.742690058479532, |
|
"grad_norm": 8.858582496643066, |
|
"learning_rate": 6.257832734564292e-06, |
|
"loss": 0.0259, |
|
"step": 16450 |
|
}, |
|
{ |
|
"epoch": 13.784461152882205, |
|
"grad_norm": 2.9520416259765625, |
|
"learning_rate": 6.216058150221406e-06, |
|
"loss": 0.0385, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 13.826232247284878, |
|
"grad_norm": 0.8918272852897644, |
|
"learning_rate": 6.1742835658785204e-06, |
|
"loss": 0.0216, |
|
"step": 16550 |
|
}, |
|
{ |
|
"epoch": 13.868003341687553, |
|
"grad_norm": 24.075159072875977, |
|
"learning_rate": 6.1325089815356344e-06, |
|
"loss": 0.0164, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 13.909774436090226, |
|
"grad_norm": 7.580496311187744, |
|
"learning_rate": 6.0907343971927476e-06, |
|
"loss": 0.0266, |
|
"step": 16650 |
|
}, |
|
{ |
|
"epoch": 13.951545530492899, |
|
"grad_norm": 19.381996154785156, |
|
"learning_rate": 6.048959812849862e-06, |
|
"loss": 0.0436, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 13.993316624895572, |
|
"grad_norm": 0.01642206870019436, |
|
"learning_rate": 6.007185228506976e-06, |
|
"loss": 0.0139, |
|
"step": 16750 |
|
}, |
|
{ |
|
"epoch": 14.035087719298245, |
|
"grad_norm": 19.350568771362305, |
|
"learning_rate": 5.965410644164091e-06, |
|
"loss": 0.0173, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 14.07685881370092, |
|
"grad_norm": 0.03829874470829964, |
|
"learning_rate": 5.923636059821205e-06, |
|
"loss": 0.0285, |
|
"step": 16850 |
|
}, |
|
{ |
|
"epoch": 14.118629908103593, |
|
"grad_norm": 0.529155433177948, |
|
"learning_rate": 5.88186147547832e-06, |
|
"loss": 0.0236, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 14.160401002506266, |
|
"grad_norm": 1.4649921655654907, |
|
"learning_rate": 5.840086891135434e-06, |
|
"loss": 0.0402, |
|
"step": 16950 |
|
}, |
|
{ |
|
"epoch": 14.202172096908939, |
|
"grad_norm": 15.808168411254883, |
|
"learning_rate": 5.798312306792549e-06, |
|
"loss": 0.0397, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 14.243943191311612, |
|
"grad_norm": 0.01485319435596466, |
|
"learning_rate": 5.756537722449662e-06, |
|
"loss": 0.0259, |
|
"step": 17050 |
|
}, |
|
{ |
|
"epoch": 14.285714285714286, |
|
"grad_norm": 0.036885835230350494, |
|
"learning_rate": 5.714763138106776e-06, |
|
"loss": 0.0116, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 14.32748538011696, |
|
"grad_norm": 0.09616250544786453, |
|
"learning_rate": 5.672988553763891e-06, |
|
"loss": 0.0165, |
|
"step": 17150 |
|
}, |
|
{ |
|
"epoch": 14.369256474519633, |
|
"grad_norm": 0.14387579262256622, |
|
"learning_rate": 5.631213969421005e-06, |
|
"loss": 0.0357, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 14.411027568922306, |
|
"grad_norm": 0.9712551236152649, |
|
"learning_rate": 5.5894393850781196e-06, |
|
"loss": 0.0318, |
|
"step": 17250 |
|
}, |
|
{ |
|
"epoch": 14.452798663324979, |
|
"grad_norm": 1.3842188119888306, |
|
"learning_rate": 5.547664800735233e-06, |
|
"loss": 0.0077, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 14.494569757727653, |
|
"grad_norm": 0.19776180386543274, |
|
"learning_rate": 5.5058902163923475e-06, |
|
"loss": 0.0215, |
|
"step": 17350 |
|
}, |
|
{ |
|
"epoch": 14.536340852130326, |
|
"grad_norm": 0.09455841779708862, |
|
"learning_rate": 5.4641156320494615e-06, |
|
"loss": 0.0178, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 14.578111946533, |
|
"grad_norm": 0.8986994028091431, |
|
"learning_rate": 5.4223410477065755e-06, |
|
"loss": 0.0197, |
|
"step": 17450 |
|
}, |
|
{ |
|
"epoch": 14.619883040935672, |
|
"grad_norm": 0.009756785817444324, |
|
"learning_rate": 5.38056646336369e-06, |
|
"loss": 0.0185, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 14.661654135338345, |
|
"grad_norm": 0.09191206842660904, |
|
"learning_rate": 5.3387918790208035e-06, |
|
"loss": 0.0149, |
|
"step": 17550 |
|
}, |
|
{ |
|
"epoch": 14.703425229741018, |
|
"grad_norm": 1.768214225769043, |
|
"learning_rate": 5.297017294677918e-06, |
|
"loss": 0.0115, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 14.745196324143693, |
|
"grad_norm": 1.6435145139694214, |
|
"learning_rate": 5.255242710335032e-06, |
|
"loss": 0.0153, |
|
"step": 17650 |
|
}, |
|
{ |
|
"epoch": 14.786967418546366, |
|
"grad_norm": 0.0256047360599041, |
|
"learning_rate": 5.213468125992147e-06, |
|
"loss": 0.0223, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 14.82873851294904, |
|
"grad_norm": 2.076021671295166, |
|
"learning_rate": 5.171693541649261e-06, |
|
"loss": 0.0229, |
|
"step": 17750 |
|
}, |
|
{ |
|
"epoch": 14.870509607351712, |
|
"grad_norm": 0.2123277485370636, |
|
"learning_rate": 5.129918957306376e-06, |
|
"loss": 0.0101, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 14.912280701754385, |
|
"grad_norm": 0.03163406625390053, |
|
"learning_rate": 5.088144372963489e-06, |
|
"loss": 0.0186, |
|
"step": 17850 |
|
}, |
|
{ |
|
"epoch": 14.95405179615706, |
|
"grad_norm": 0.5532212257385254, |
|
"learning_rate": 5.046369788620603e-06, |
|
"loss": 0.0191, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 14.995822890559733, |
|
"grad_norm": 0.05094052106142044, |
|
"learning_rate": 5.004595204277718e-06, |
|
"loss": 0.022, |
|
"step": 17950 |
|
}, |
|
{ |
|
"epoch": 15.037593984962406, |
|
"grad_norm": 0.04656049981713295, |
|
"learning_rate": 4.962820619934832e-06, |
|
"loss": 0.0155, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 15.079365079365079, |
|
"grad_norm": 9.495512008666992, |
|
"learning_rate": 4.921046035591947e-06, |
|
"loss": 0.012, |
|
"step": 18050 |
|
}, |
|
{ |
|
"epoch": 15.121136173767752, |
|
"grad_norm": 0.034731555730104446, |
|
"learning_rate": 4.879271451249061e-06, |
|
"loss": 0.0042, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 15.162907268170427, |
|
"grad_norm": 0.02487257681787014, |
|
"learning_rate": 4.837496866906175e-06, |
|
"loss": 0.0131, |
|
"step": 18150 |
|
}, |
|
{ |
|
"epoch": 15.2046783625731, |
|
"grad_norm": 10.468315124511719, |
|
"learning_rate": 4.795722282563289e-06, |
|
"loss": 0.0167, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 15.246449456975773, |
|
"grad_norm": 3.7355704307556152, |
|
"learning_rate": 4.753947698220403e-06, |
|
"loss": 0.0062, |
|
"step": 18250 |
|
}, |
|
{ |
|
"epoch": 15.288220551378446, |
|
"grad_norm": 0.007894457317888737, |
|
"learning_rate": 4.712173113877517e-06, |
|
"loss": 0.0102, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 15.329991645781119, |
|
"grad_norm": 0.007423860020935535, |
|
"learning_rate": 4.670398529534631e-06, |
|
"loss": 0.0187, |
|
"step": 18350 |
|
}, |
|
{ |
|
"epoch": 15.371762740183792, |
|
"grad_norm": 0.5450906753540039, |
|
"learning_rate": 4.628623945191746e-06, |
|
"loss": 0.0278, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 15.413533834586467, |
|
"grad_norm": 0.5019950270652771, |
|
"learning_rate": 4.58684936084886e-06, |
|
"loss": 0.0299, |
|
"step": 18450 |
|
}, |
|
{ |
|
"epoch": 15.45530492898914, |
|
"grad_norm": 1.3405569791793823, |
|
"learning_rate": 4.545074776505974e-06, |
|
"loss": 0.022, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 15.497076023391813, |
|
"grad_norm": 7.201114177703857, |
|
"learning_rate": 4.503300192163088e-06, |
|
"loss": 0.0144, |
|
"step": 18550 |
|
}, |
|
{ |
|
"epoch": 15.538847117794486, |
|
"grad_norm": 1.0454356670379639, |
|
"learning_rate": 4.461525607820202e-06, |
|
"loss": 0.0144, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 15.580618212197159, |
|
"grad_norm": 1.4038639068603516, |
|
"learning_rate": 4.419751023477317e-06, |
|
"loss": 0.0219, |
|
"step": 18650 |
|
}, |
|
{ |
|
"epoch": 15.622389306599834, |
|
"grad_norm": 0.048461660742759705, |
|
"learning_rate": 4.377976439134431e-06, |
|
"loss": 0.009, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 15.664160401002507, |
|
"grad_norm": 0.1847960352897644, |
|
"learning_rate": 4.336201854791545e-06, |
|
"loss": 0.0076, |
|
"step": 18750 |
|
}, |
|
{ |
|
"epoch": 15.70593149540518, |
|
"grad_norm": 0.23470734059810638, |
|
"learning_rate": 4.29442727044866e-06, |
|
"loss": 0.0065, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 15.747702589807853, |
|
"grad_norm": 10.248291969299316, |
|
"learning_rate": 4.252652686105774e-06, |
|
"loss": 0.0114, |
|
"step": 18850 |
|
}, |
|
{ |
|
"epoch": 15.789473684210526, |
|
"grad_norm": 0.04861776903271675, |
|
"learning_rate": 4.210878101762888e-06, |
|
"loss": 0.0083, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 15.8312447786132, |
|
"grad_norm": 0.00836202036589384, |
|
"learning_rate": 4.169103517420002e-06, |
|
"loss": 0.0204, |
|
"step": 18950 |
|
}, |
|
{ |
|
"epoch": 15.873015873015873, |
|
"grad_norm": 39.85865783691406, |
|
"learning_rate": 4.127328933077116e-06, |
|
"loss": 0.0101, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 15.914786967418546, |
|
"grad_norm": 0.07122869789600372, |
|
"learning_rate": 4.0855543487342305e-06, |
|
"loss": 0.0087, |
|
"step": 19050 |
|
}, |
|
{ |
|
"epoch": 15.95655806182122, |
|
"grad_norm": 0.07924563437700272, |
|
"learning_rate": 4.0437797643913445e-06, |
|
"loss": 0.0127, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 15.998329156223893, |
|
"grad_norm": 0.040103524923324585, |
|
"learning_rate": 4.0020051800484585e-06, |
|
"loss": 0.0114, |
|
"step": 19150 |
|
}, |
|
{ |
|
"epoch": 16.040100250626566, |
|
"grad_norm": 0.3489997684955597, |
|
"learning_rate": 3.960230595705573e-06, |
|
"loss": 0.0034, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 16.08187134502924, |
|
"grad_norm": 0.0418890118598938, |
|
"learning_rate": 3.918456011362687e-06, |
|
"loss": 0.0168, |
|
"step": 19250 |
|
}, |
|
{ |
|
"epoch": 16.12364243943191, |
|
"grad_norm": 0.024545153602957726, |
|
"learning_rate": 3.876681427019801e-06, |
|
"loss": 0.0121, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 16.165413533834588, |
|
"grad_norm": 8.744132041931152, |
|
"learning_rate": 3.834906842676916e-06, |
|
"loss": 0.0089, |
|
"step": 19350 |
|
}, |
|
{ |
|
"epoch": 16.20718462823726, |
|
"grad_norm": 0.2575714886188507, |
|
"learning_rate": 3.7931322583340296e-06, |
|
"loss": 0.0097, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 16.248955722639934, |
|
"grad_norm": 0.00845133326947689, |
|
"learning_rate": 3.751357673991144e-06, |
|
"loss": 0.0158, |
|
"step": 19450 |
|
}, |
|
{ |
|
"epoch": 16.290726817042607, |
|
"grad_norm": 2.029202699661255, |
|
"learning_rate": 3.709583089648258e-06, |
|
"loss": 0.0085, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 16.33249791144528, |
|
"grad_norm": 0.009376639500260353, |
|
"learning_rate": 3.6678085053053724e-06, |
|
"loss": 0.0038, |
|
"step": 19550 |
|
}, |
|
{ |
|
"epoch": 16.374269005847953, |
|
"grad_norm": 0.05808446928858757, |
|
"learning_rate": 3.626033920962487e-06, |
|
"loss": 0.0179, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 16.416040100250626, |
|
"grad_norm": 0.11780844628810883, |
|
"learning_rate": 3.584259336619601e-06, |
|
"loss": 0.0087, |
|
"step": 19650 |
|
}, |
|
{ |
|
"epoch": 16.4578111946533, |
|
"grad_norm": 0.15603607892990112, |
|
"learning_rate": 3.542484752276715e-06, |
|
"loss": 0.0041, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 16.499582289055972, |
|
"grad_norm": 0.8933451175689697, |
|
"learning_rate": 3.5007101679338296e-06, |
|
"loss": 0.0133, |
|
"step": 19750 |
|
}, |
|
{ |
|
"epoch": 16.541353383458645, |
|
"grad_norm": 0.1364767998456955, |
|
"learning_rate": 3.458935583590943e-06, |
|
"loss": 0.0069, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 16.58312447786132, |
|
"grad_norm": 1.5579296350479126, |
|
"learning_rate": 3.4171609992480576e-06, |
|
"loss": 0.0086, |
|
"step": 19850 |
|
}, |
|
{ |
|
"epoch": 16.624895572263995, |
|
"grad_norm": 0.11376599967479706, |
|
"learning_rate": 3.375386414905172e-06, |
|
"loss": 0.0018, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 16.666666666666668, |
|
"grad_norm": 3.527081251144409, |
|
"learning_rate": 3.333611830562286e-06, |
|
"loss": 0.0068, |
|
"step": 19950 |
|
}, |
|
{ |
|
"epoch": 16.70843776106934, |
|
"grad_norm": 0.16554129123687744, |
|
"learning_rate": 3.2918372462194004e-06, |
|
"loss": 0.0058, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 16.750208855472014, |
|
"grad_norm": 7.927056312561035, |
|
"learning_rate": 3.2500626618765148e-06, |
|
"loss": 0.0041, |
|
"step": 20050 |
|
}, |
|
{ |
|
"epoch": 16.791979949874687, |
|
"grad_norm": 6.927394390106201, |
|
"learning_rate": 3.2082880775336287e-06, |
|
"loss": 0.0112, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 16.83375104427736, |
|
"grad_norm": 3.62300181388855, |
|
"learning_rate": 3.166513493190743e-06, |
|
"loss": 0.0088, |
|
"step": 20150 |
|
}, |
|
{ |
|
"epoch": 16.875522138680033, |
|
"grad_norm": 37.684913635253906, |
|
"learning_rate": 3.1247389088478576e-06, |
|
"loss": 0.0044, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 16.917293233082706, |
|
"grad_norm": 0.34140291810035706, |
|
"learning_rate": 3.082964324504971e-06, |
|
"loss": 0.0092, |
|
"step": 20250 |
|
}, |
|
{ |
|
"epoch": 16.95906432748538, |
|
"grad_norm": 0.7771002650260925, |
|
"learning_rate": 3.0411897401620855e-06, |
|
"loss": 0.0084, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 17.000835421888052, |
|
"grad_norm": 0.10395874083042145, |
|
"learning_rate": 2.9994151558192e-06, |
|
"loss": 0.0062, |
|
"step": 20350 |
|
}, |
|
{ |
|
"epoch": 17.04260651629073, |
|
"grad_norm": 0.12849818170070648, |
|
"learning_rate": 2.957640571476314e-06, |
|
"loss": 0.0101, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 17.0843776106934, |
|
"grad_norm": 0.010177390649914742, |
|
"learning_rate": 2.9158659871334283e-06, |
|
"loss": 0.0056, |
|
"step": 20450 |
|
}, |
|
{ |
|
"epoch": 17.126148705096075, |
|
"grad_norm": 10.7208833694458, |
|
"learning_rate": 2.8740914027905427e-06, |
|
"loss": 0.0063, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 17.167919799498748, |
|
"grad_norm": 0.31796014308929443, |
|
"learning_rate": 2.8323168184476567e-06, |
|
"loss": 0.0082, |
|
"step": 20550 |
|
}, |
|
{ |
|
"epoch": 17.20969089390142, |
|
"grad_norm": 0.3368360698223114, |
|
"learning_rate": 2.790542234104771e-06, |
|
"loss": 0.0027, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 17.251461988304094, |
|
"grad_norm": 0.871994137763977, |
|
"learning_rate": 2.7487676497618855e-06, |
|
"loss": 0.0057, |
|
"step": 20650 |
|
}, |
|
{ |
|
"epoch": 17.293233082706767, |
|
"grad_norm": 3.6776578426361084, |
|
"learning_rate": 2.706993065418999e-06, |
|
"loss": 0.0039, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 17.33500417710944, |
|
"grad_norm": 0.03436708822846413, |
|
"learning_rate": 2.6652184810761135e-06, |
|
"loss": 0.0029, |
|
"step": 20750 |
|
}, |
|
{ |
|
"epoch": 17.376775271512113, |
|
"grad_norm": 12.215385437011719, |
|
"learning_rate": 2.6234438967332274e-06, |
|
"loss": 0.0104, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 17.418546365914786, |
|
"grad_norm": 0.1122766062617302, |
|
"learning_rate": 2.581669312390342e-06, |
|
"loss": 0.0082, |
|
"step": 20850 |
|
}, |
|
{ |
|
"epoch": 17.46031746031746, |
|
"grad_norm": 0.01590600423514843, |
|
"learning_rate": 2.5398947280474563e-06, |
|
"loss": 0.0059, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 17.502088554720135, |
|
"grad_norm": 8.229164123535156, |
|
"learning_rate": 2.4981201437045707e-06, |
|
"loss": 0.0014, |
|
"step": 20950 |
|
}, |
|
{ |
|
"epoch": 17.54385964912281, |
|
"grad_norm": 0.13155962526798248, |
|
"learning_rate": 2.4563455593616846e-06, |
|
"loss": 0.0027, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 17.58563074352548, |
|
"grad_norm": 2.3657147884368896, |
|
"learning_rate": 2.4145709750187986e-06, |
|
"loss": 0.0061, |
|
"step": 21050 |
|
}, |
|
{ |
|
"epoch": 17.627401837928154, |
|
"grad_norm": 0.07491889595985413, |
|
"learning_rate": 2.372796390675913e-06, |
|
"loss": 0.0006, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 17.669172932330827, |
|
"grad_norm": 0.2693181037902832, |
|
"learning_rate": 2.3310218063330274e-06, |
|
"loss": 0.0082, |
|
"step": 21150 |
|
}, |
|
{ |
|
"epoch": 17.7109440267335, |
|
"grad_norm": 8.37026309967041, |
|
"learning_rate": 2.2892472219901414e-06, |
|
"loss": 0.0062, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 17.752715121136173, |
|
"grad_norm": 0.2343984991312027, |
|
"learning_rate": 2.2474726376472554e-06, |
|
"loss": 0.0047, |
|
"step": 21250 |
|
}, |
|
{ |
|
"epoch": 17.794486215538846, |
|
"grad_norm": 0.3801390528678894, |
|
"learning_rate": 2.20569805330437e-06, |
|
"loss": 0.0054, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 17.83625730994152, |
|
"grad_norm": 0.00528654083609581, |
|
"learning_rate": 2.163923468961484e-06, |
|
"loss": 0.0008, |
|
"step": 21350 |
|
}, |
|
{ |
|
"epoch": 17.878028404344192, |
|
"grad_norm": 0.014739965088665485, |
|
"learning_rate": 2.122148884618598e-06, |
|
"loss": 0.0031, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 17.91979949874687, |
|
"grad_norm": 0.04143223166465759, |
|
"learning_rate": 2.080374300275712e-06, |
|
"loss": 0.0013, |
|
"step": 21450 |
|
}, |
|
{ |
|
"epoch": 17.961570593149542, |
|
"grad_norm": 0.11256618052721024, |
|
"learning_rate": 2.0385997159328266e-06, |
|
"loss": 0.0038, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 18.003341687552215, |
|
"grad_norm": 0.31975257396698, |
|
"learning_rate": 1.996825131589941e-06, |
|
"loss": 0.0076, |
|
"step": 21550 |
|
}, |
|
{ |
|
"epoch": 18.045112781954888, |
|
"grad_norm": 0.0022968221455812454, |
|
"learning_rate": 1.9550505472470554e-06, |
|
"loss": 0.0031, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 18.08688387635756, |
|
"grad_norm": 0.06881808489561081, |
|
"learning_rate": 1.9132759629041693e-06, |
|
"loss": 0.005, |
|
"step": 21650 |
|
}, |
|
{ |
|
"epoch": 18.128654970760234, |
|
"grad_norm": 0.26212799549102783, |
|
"learning_rate": 1.8715013785612835e-06, |
|
"loss": 0.0016, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 18.170426065162907, |
|
"grad_norm": 0.0171457901597023, |
|
"learning_rate": 1.8297267942183977e-06, |
|
"loss": 0.0067, |
|
"step": 21750 |
|
}, |
|
{ |
|
"epoch": 18.21219715956558, |
|
"grad_norm": 0.2334776520729065, |
|
"learning_rate": 1.787952209875512e-06, |
|
"loss": 0.0027, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 18.253968253968253, |
|
"grad_norm": 0.08637866377830505, |
|
"learning_rate": 1.746177625532626e-06, |
|
"loss": 0.0053, |
|
"step": 21850 |
|
}, |
|
{ |
|
"epoch": 18.295739348370926, |
|
"grad_norm": 0.11498326063156128, |
|
"learning_rate": 1.7044030411897403e-06, |
|
"loss": 0.0023, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 18.3375104427736, |
|
"grad_norm": 0.014128020964562893, |
|
"learning_rate": 1.6626284568468545e-06, |
|
"loss": 0.008, |
|
"step": 21950 |
|
}, |
|
{ |
|
"epoch": 18.379281537176276, |
|
"grad_norm": 0.13422049582004547, |
|
"learning_rate": 1.620853872503969e-06, |
|
"loss": 0.0024, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 18.42105263157895, |
|
"grad_norm": 0.13545355200767517, |
|
"learning_rate": 1.5790792881610829e-06, |
|
"loss": 0.0039, |
|
"step": 22050 |
|
}, |
|
{ |
|
"epoch": 18.46282372598162, |
|
"grad_norm": 0.10272631794214249, |
|
"learning_rate": 1.537304703818197e-06, |
|
"loss": 0.003, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 18.504594820384295, |
|
"grad_norm": 3.581731081008911, |
|
"learning_rate": 1.4955301194753113e-06, |
|
"loss": 0.003, |
|
"step": 22150 |
|
}, |
|
{ |
|
"epoch": 18.546365914786968, |
|
"grad_norm": 0.019360538572072983, |
|
"learning_rate": 1.4537555351324257e-06, |
|
"loss": 0.001, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 18.58813700918964, |
|
"grad_norm": 0.20149047672748566, |
|
"learning_rate": 1.4119809507895399e-06, |
|
"loss": 0.0029, |
|
"step": 22250 |
|
}, |
|
{ |
|
"epoch": 18.629908103592314, |
|
"grad_norm": 0.017035024240612984, |
|
"learning_rate": 1.3702063664466539e-06, |
|
"loss": 0.0014, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 18.671679197994987, |
|
"grad_norm": 0.325811505317688, |
|
"learning_rate": 1.3284317821037683e-06, |
|
"loss": 0.0052, |
|
"step": 22350 |
|
}, |
|
{ |
|
"epoch": 18.71345029239766, |
|
"grad_norm": 0.020387643948197365, |
|
"learning_rate": 1.2866571977608824e-06, |
|
"loss": 0.0011, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 18.755221386800333, |
|
"grad_norm": 1.3581466674804688, |
|
"learning_rate": 1.2448826134179966e-06, |
|
"loss": 0.0013, |
|
"step": 22450 |
|
}, |
|
{ |
|
"epoch": 18.796992481203006, |
|
"grad_norm": 0.05018683522939682, |
|
"learning_rate": 1.2031080290751108e-06, |
|
"loss": 0.0012, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 18.838763575605682, |
|
"grad_norm": 0.17220334708690643, |
|
"learning_rate": 1.161333444732225e-06, |
|
"loss": 0.003, |
|
"step": 22550 |
|
}, |
|
{ |
|
"epoch": 18.880534670008355, |
|
"grad_norm": 0.02596069872379303, |
|
"learning_rate": 1.1195588603893392e-06, |
|
"loss": 0.0011, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 18.92230576441103, |
|
"grad_norm": 0.21498483419418335, |
|
"learning_rate": 1.0777842760464534e-06, |
|
"loss": 0.0049, |
|
"step": 22650 |
|
}, |
|
{ |
|
"epoch": 18.9640768588137, |
|
"grad_norm": 0.21940931677818298, |
|
"learning_rate": 1.0360096917035676e-06, |
|
"loss": 0.0007, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 19.005847953216374, |
|
"grad_norm": 0.09297411888837814, |
|
"learning_rate": 9.942351073606818e-07, |
|
"loss": 0.0008, |
|
"step": 22750 |
|
}, |
|
{ |
|
"epoch": 19.047619047619047, |
|
"grad_norm": 0.01873987540602684, |
|
"learning_rate": 9.524605230177961e-07, |
|
"loss": 0.0005, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 19.08939014202172, |
|
"grad_norm": 0.029663298279047012, |
|
"learning_rate": 9.106859386749102e-07, |
|
"loss": 0.0022, |
|
"step": 22850 |
|
}, |
|
{ |
|
"epoch": 19.131161236424393, |
|
"grad_norm": 0.20812617242336273, |
|
"learning_rate": 8.689113543320245e-07, |
|
"loss": 0.0065, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 19.172932330827066, |
|
"grad_norm": 0.047941118478775024, |
|
"learning_rate": 8.271367699891386e-07, |
|
"loss": 0.0047, |
|
"step": 22950 |
|
}, |
|
{ |
|
"epoch": 19.21470342522974, |
|
"grad_norm": 0.18795344233512878, |
|
"learning_rate": 7.853621856462529e-07, |
|
"loss": 0.0022, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 19.256474519632416, |
|
"grad_norm": 5.738811492919922, |
|
"learning_rate": 7.435876013033672e-07, |
|
"loss": 0.0025, |
|
"step": 23050 |
|
}, |
|
{ |
|
"epoch": 19.29824561403509, |
|
"grad_norm": 0.030096910893917084, |
|
"learning_rate": 7.018130169604812e-07, |
|
"loss": 0.0006, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 19.340016708437762, |
|
"grad_norm": 0.07230346649885178, |
|
"learning_rate": 6.600384326175955e-07, |
|
"loss": 0.0006, |
|
"step": 23150 |
|
}, |
|
{ |
|
"epoch": 19.381787802840435, |
|
"grad_norm": 0.1296992152929306, |
|
"learning_rate": 6.182638482747097e-07, |
|
"loss": 0.0003, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 19.423558897243108, |
|
"grad_norm": 0.15579549968242645, |
|
"learning_rate": 5.764892639318239e-07, |
|
"loss": 0.0013, |
|
"step": 23250 |
|
}, |
|
{ |
|
"epoch": 19.46532999164578, |
|
"grad_norm": 0.018042083829641342, |
|
"learning_rate": 5.347146795889381e-07, |
|
"loss": 0.0016, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 19.507101086048454, |
|
"grad_norm": 0.4105440378189087, |
|
"learning_rate": 4.929400952460523e-07, |
|
"loss": 0.0006, |
|
"step": 23350 |
|
}, |
|
{ |
|
"epoch": 19.548872180451127, |
|
"grad_norm": 0.038732532411813736, |
|
"learning_rate": 4.5116551090316656e-07, |
|
"loss": 0.004, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 19.5906432748538, |
|
"grad_norm": 2.204296827316284, |
|
"learning_rate": 4.093909265602808e-07, |
|
"loss": 0.0011, |
|
"step": 23450 |
|
}, |
|
{ |
|
"epoch": 19.632414369256473, |
|
"grad_norm": 0.014660513959825039, |
|
"learning_rate": 3.67616342217395e-07, |
|
"loss": 0.0009, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 19.674185463659146, |
|
"grad_norm": 0.011966018006205559, |
|
"learning_rate": 3.258417578745092e-07, |
|
"loss": 0.0006, |
|
"step": 23550 |
|
}, |
|
{ |
|
"epoch": 19.715956558061823, |
|
"grad_norm": 0.03099227510392666, |
|
"learning_rate": 2.840671735316234e-07, |
|
"loss": 0.0009, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 19.757727652464496, |
|
"grad_norm": 0.09417425096035004, |
|
"learning_rate": 2.422925891887376e-07, |
|
"loss": 0.0032, |
|
"step": 23650 |
|
}, |
|
{ |
|
"epoch": 19.79949874686717, |
|
"grad_norm": 0.153071790933609, |
|
"learning_rate": 2.005180048458518e-07, |
|
"loss": 0.0021, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 19.841269841269842, |
|
"grad_norm": 0.015282063744962215, |
|
"learning_rate": 1.5874342050296602e-07, |
|
"loss": 0.0005, |
|
"step": 23750 |
|
}, |
|
{ |
|
"epoch": 19.883040935672515, |
|
"grad_norm": 0.012147588655352592, |
|
"learning_rate": 1.1696883616008022e-07, |
|
"loss": 0.0041, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 19.924812030075188, |
|
"grad_norm": 0.16458311676979065, |
|
"learning_rate": 7.519425181719443e-08, |
|
"loss": 0.0018, |
|
"step": 23850 |
|
}, |
|
{ |
|
"epoch": 19.96658312447786, |
|
"grad_norm": 0.11444168537855148, |
|
"learning_rate": 3.341966747430863e-08, |
|
"loss": 0.002, |
|
"step": 23900 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 23940, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.5544269721786112e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|