|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 412, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0048543689320388345, |
|
"grad_norm": 3.966326270540717, |
|
"learning_rate": 9.999854640567861e-06, |
|
"loss": 0.1451, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.009708737864077669, |
|
"grad_norm": 3.287744517939351, |
|
"learning_rate": 9.999418570723189e-06, |
|
"loss": 0.1051, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.014563106796116505, |
|
"grad_norm": 6.1026081677947355, |
|
"learning_rate": 9.998691815820732e-06, |
|
"loss": 0.191, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.019417475728155338, |
|
"grad_norm": 4.0426543837051305, |
|
"learning_rate": 9.997674418116759e-06, |
|
"loss": 0.1835, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.024271844660194174, |
|
"grad_norm": 3.5236475990680036, |
|
"learning_rate": 9.996366436766612e-06, |
|
"loss": 0.1656, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02912621359223301, |
|
"grad_norm": 5.112858587219301, |
|
"learning_rate": 9.994767947821261e-06, |
|
"loss": 0.1854, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.03398058252427184, |
|
"grad_norm": 4.22628208254603, |
|
"learning_rate": 9.992879044222887e-06, |
|
"loss": 0.1572, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.038834951456310676, |
|
"grad_norm": 3.567730367516855, |
|
"learning_rate": 9.99069983579947e-06, |
|
"loss": 0.16, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.043689320388349516, |
|
"grad_norm": 3.864818841735565, |
|
"learning_rate": 9.988230449258409e-06, |
|
"loss": 0.1742, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.04854368932038835, |
|
"grad_norm": 3.149896380777374, |
|
"learning_rate": 9.985471028179155e-06, |
|
"loss": 0.1201, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05339805825242718, |
|
"grad_norm": 3.9606265348197227, |
|
"learning_rate": 9.982421733004857e-06, |
|
"loss": 0.1417, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.05825242718446602, |
|
"grad_norm": 3.698148621668834, |
|
"learning_rate": 9.979082741033047e-06, |
|
"loss": 0.149, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.06310679611650485, |
|
"grad_norm": 3.64315456609058, |
|
"learning_rate": 9.975454246405312e-06, |
|
"loss": 0.1553, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.06796116504854369, |
|
"grad_norm": 4.553403154538875, |
|
"learning_rate": 9.971536460096021e-06, |
|
"loss": 0.2022, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.07281553398058252, |
|
"grad_norm": 4.2360586991459295, |
|
"learning_rate": 9.96732960990005e-06, |
|
"loss": 0.1951, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.07766990291262135, |
|
"grad_norm": 3.4862603646834183, |
|
"learning_rate": 9.96283394041954e-06, |
|
"loss": 0.1482, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0825242718446602, |
|
"grad_norm": 3.7419659592583376, |
|
"learning_rate": 9.95804971304968e-06, |
|
"loss": 0.1373, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.08737864077669903, |
|
"grad_norm": 3.346682825396517, |
|
"learning_rate": 9.952977205963496e-06, |
|
"loss": 0.1513, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.09223300970873786, |
|
"grad_norm": 3.565488914066717, |
|
"learning_rate": 9.94761671409569e-06, |
|
"loss": 0.1986, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0970873786407767, |
|
"grad_norm": 3.2240941606945714, |
|
"learning_rate": 9.941968549125481e-06, |
|
"loss": 0.168, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.10194174757281553, |
|
"grad_norm": 4.077234412735414, |
|
"learning_rate": 9.936033039458494e-06, |
|
"loss": 0.1707, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.10679611650485436, |
|
"grad_norm": 3.3128671441668898, |
|
"learning_rate": 9.929810530207651e-06, |
|
"loss": 0.152, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.11165048543689321, |
|
"grad_norm": 3.2570467959059988, |
|
"learning_rate": 9.923301383173119e-06, |
|
"loss": 0.1449, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.11650485436893204, |
|
"grad_norm": 3.353932510107021, |
|
"learning_rate": 9.916505976821262e-06, |
|
"loss": 0.168, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.12135922330097088, |
|
"grad_norm": 3.2426471767737053, |
|
"learning_rate": 9.909424706262647e-06, |
|
"loss": 0.1388, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.1262135922330097, |
|
"grad_norm": 4.252865275476676, |
|
"learning_rate": 9.902057983229059e-06, |
|
"loss": 0.1879, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.13106796116504854, |
|
"grad_norm": 3.3838647811500233, |
|
"learning_rate": 9.894406236049569e-06, |
|
"loss": 0.1422, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.13592233009708737, |
|
"grad_norm": 3.9608322469384687, |
|
"learning_rate": 9.886469909625624e-06, |
|
"loss": 0.1787, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.1407766990291262, |
|
"grad_norm": 4.165890923962387, |
|
"learning_rate": 9.87824946540519e-06, |
|
"loss": 0.2098, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.14563106796116504, |
|
"grad_norm": 3.8367946779580215, |
|
"learning_rate": 9.869745381355906e-06, |
|
"loss": 0.1846, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.15048543689320387, |
|
"grad_norm": 3.529517961756975, |
|
"learning_rate": 9.860958151937303e-06, |
|
"loss": 0.1556, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.1553398058252427, |
|
"grad_norm": 4.030665510190116, |
|
"learning_rate": 9.851888288072053e-06, |
|
"loss": 0.1704, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.16019417475728157, |
|
"grad_norm": 4.032532812969292, |
|
"learning_rate": 9.842536317116262e-06, |
|
"loss": 0.2156, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.1650485436893204, |
|
"grad_norm": 3.78915979164578, |
|
"learning_rate": 9.832902782828801e-06, |
|
"loss": 0.1792, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.16990291262135923, |
|
"grad_norm": 4.306930939426049, |
|
"learning_rate": 9.822988245339701e-06, |
|
"loss": 0.1997, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.17475728155339806, |
|
"grad_norm": 3.520397643345112, |
|
"learning_rate": 9.81279328111758e-06, |
|
"loss": 0.1455, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.1796116504854369, |
|
"grad_norm": 3.529888916980051, |
|
"learning_rate": 9.802318482936121e-06, |
|
"loss": 0.1562, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.18446601941747573, |
|
"grad_norm": 3.4616819416943496, |
|
"learning_rate": 9.791564459839609e-06, |
|
"loss": 0.1642, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.18932038834951456, |
|
"grad_norm": 3.941961272191275, |
|
"learning_rate": 9.780531837107519e-06, |
|
"loss": 0.2215, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.1941747572815534, |
|
"grad_norm": 4.780469064890577, |
|
"learning_rate": 9.769221256218165e-06, |
|
"loss": 0.2028, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.19902912621359223, |
|
"grad_norm": 3.9426812986083895, |
|
"learning_rate": 9.75763337481139e-06, |
|
"loss": 0.2029, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.20388349514563106, |
|
"grad_norm": 3.8852684414905916, |
|
"learning_rate": 9.745768866650339e-06, |
|
"loss": 0.2415, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.2087378640776699, |
|
"grad_norm": 3.138531558995714, |
|
"learning_rate": 9.73362842158228e-06, |
|
"loss": 0.1392, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.21359223300970873, |
|
"grad_norm": 3.7919984108957268, |
|
"learning_rate": 9.721212745498493e-06, |
|
"loss": 0.1749, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.21844660194174756, |
|
"grad_norm": 3.803059250095341, |
|
"learning_rate": 9.70852256029323e-06, |
|
"loss": 0.187, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.22330097087378642, |
|
"grad_norm": 3.8695384817772287, |
|
"learning_rate": 9.695558603821735e-06, |
|
"loss": 0.205, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.22815533980582525, |
|
"grad_norm": 2.9767594966056055, |
|
"learning_rate": 9.682321629857348e-06, |
|
"loss": 0.1579, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.23300970873786409, |
|
"grad_norm": 3.7623612894930014, |
|
"learning_rate": 9.66881240804768e-06, |
|
"loss": 0.1848, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.23786407766990292, |
|
"grad_norm": 3.7271523114968366, |
|
"learning_rate": 9.655031723869848e-06, |
|
"loss": 0.1994, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.24271844660194175, |
|
"grad_norm": 4.400012400732805, |
|
"learning_rate": 9.64098037858483e-06, |
|
"loss": 0.2074, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.24757281553398058, |
|
"grad_norm": 3.1869928963297918, |
|
"learning_rate": 9.626659189190852e-06, |
|
"loss": 0.153, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.2524271844660194, |
|
"grad_norm": 3.230129858375766, |
|
"learning_rate": 9.612068988375898e-06, |
|
"loss": 0.1534, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.25728155339805825, |
|
"grad_norm": 4.907659763954274, |
|
"learning_rate": 9.597210624469288e-06, |
|
"loss": 0.2174, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.2621359223300971, |
|
"grad_norm": 3.269713020110104, |
|
"learning_rate": 9.582084961392358e-06, |
|
"loss": 0.1559, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.2669902912621359, |
|
"grad_norm": 3.5167228870990006, |
|
"learning_rate": 9.566692878608229e-06, |
|
"loss": 0.1633, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.27184466019417475, |
|
"grad_norm": 3.8810457882786147, |
|
"learning_rate": 9.551035271070665e-06, |
|
"loss": 0.2024, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.2766990291262136, |
|
"grad_norm": 3.80611069136377, |
|
"learning_rate": 9.53511304917204e-06, |
|
"loss": 0.1979, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.2815533980582524, |
|
"grad_norm": 3.1235256364043065, |
|
"learning_rate": 9.51892713869041e-06, |
|
"loss": 0.122, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.28640776699029125, |
|
"grad_norm": 3.8471845422238644, |
|
"learning_rate": 9.502478480735678e-06, |
|
"loss": 0.1916, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.2912621359223301, |
|
"grad_norm": 4.4081074680844, |
|
"learning_rate": 9.485768031694872e-06, |
|
"loss": 0.2133, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.2961165048543689, |
|
"grad_norm": 3.9702629690926843, |
|
"learning_rate": 9.468796763176549e-06, |
|
"loss": 0.1905, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.30097087378640774, |
|
"grad_norm": 3.5755744298092114, |
|
"learning_rate": 9.45156566195429e-06, |
|
"loss": 0.1532, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.3058252427184466, |
|
"grad_norm": 3.9555728902165663, |
|
"learning_rate": 9.43407572990933e-06, |
|
"loss": 0.1908, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.3106796116504854, |
|
"grad_norm": 3.9196407680027376, |
|
"learning_rate": 9.416327983972304e-06, |
|
"loss": 0.2065, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.3155339805825243, |
|
"grad_norm": 3.3646702709666627, |
|
"learning_rate": 9.398323456064124e-06, |
|
"loss": 0.1858, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.32038834951456313, |
|
"grad_norm": 3.5598616176869697, |
|
"learning_rate": 9.380063193035968e-06, |
|
"loss": 0.17, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.32524271844660196, |
|
"grad_norm": 3.432171018519614, |
|
"learning_rate": 9.361548256608421e-06, |
|
"loss": 0.1695, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.3300970873786408, |
|
"grad_norm": 3.315657620066766, |
|
"learning_rate": 9.342779723309746e-06, |
|
"loss": 0.2164, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.33495145631067963, |
|
"grad_norm": 3.3203719440637816, |
|
"learning_rate": 9.323758684413272e-06, |
|
"loss": 0.1545, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.33980582524271846, |
|
"grad_norm": 3.714541686249799, |
|
"learning_rate": 9.304486245873973e-06, |
|
"loss": 0.1703, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.3446601941747573, |
|
"grad_norm": 4.067810000216949, |
|
"learning_rate": 9.284963528264133e-06, |
|
"loss": 0.2109, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.34951456310679613, |
|
"grad_norm": 3.433850603401137, |
|
"learning_rate": 9.26519166670821e-06, |
|
"loss": 0.1612, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.35436893203883496, |
|
"grad_norm": 3.853150500703727, |
|
"learning_rate": 9.24517181081683e-06, |
|
"loss": 0.2045, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.3592233009708738, |
|
"grad_norm": 3.365186399222606, |
|
"learning_rate": 9.22490512461995e-06, |
|
"loss": 0.1626, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.3640776699029126, |
|
"grad_norm": 4.035836570077309, |
|
"learning_rate": 9.204392786499168e-06, |
|
"loss": 0.2155, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.36893203883495146, |
|
"grad_norm": 3.2647040022419334, |
|
"learning_rate": 9.183635989119211e-06, |
|
"loss": 0.1775, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.3737864077669903, |
|
"grad_norm": 3.378815192345755, |
|
"learning_rate": 9.162635939358593e-06, |
|
"loss": 0.178, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.3786407766990291, |
|
"grad_norm": 3.421171685594885, |
|
"learning_rate": 9.141393858239435e-06, |
|
"loss": 0.1652, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.38349514563106796, |
|
"grad_norm": 3.4838388469466173, |
|
"learning_rate": 9.119910980856477e-06, |
|
"loss": 0.2023, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.3883495145631068, |
|
"grad_norm": 3.163762044499404, |
|
"learning_rate": 9.098188556305262e-06, |
|
"loss": 0.1895, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.3932038834951456, |
|
"grad_norm": 3.2177332207226006, |
|
"learning_rate": 9.076227847609513e-06, |
|
"loss": 0.1512, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.39805825242718446, |
|
"grad_norm": 2.7348500350080323, |
|
"learning_rate": 9.054030131647682e-06, |
|
"loss": 0.1501, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.4029126213592233, |
|
"grad_norm": 3.7715366941320263, |
|
"learning_rate": 9.031596699078727e-06, |
|
"loss": 0.2278, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.4077669902912621, |
|
"grad_norm": 3.8384805890305116, |
|
"learning_rate": 9.008928854267054e-06, |
|
"loss": 0.2054, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.41262135922330095, |
|
"grad_norm": 3.7523234300491906, |
|
"learning_rate": 8.986027915206686e-06, |
|
"loss": 0.173, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.4174757281553398, |
|
"grad_norm": 3.653083593177836, |
|
"learning_rate": 8.962895213444618e-06, |
|
"loss": 0.1711, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.4223300970873786, |
|
"grad_norm": 2.810255317524991, |
|
"learning_rate": 8.939532094003409e-06, |
|
"loss": 0.1457, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.42718446601941745, |
|
"grad_norm": 4.190114857066821, |
|
"learning_rate": 8.91593991530297e-06, |
|
"loss": 0.1597, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.4320388349514563, |
|
"grad_norm": 4.862687243251444, |
|
"learning_rate": 8.892120049081577e-06, |
|
"loss": 0.2573, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.4368932038834951, |
|
"grad_norm": 3.6153533015247237, |
|
"learning_rate": 8.868073880316125e-06, |
|
"loss": 0.2157, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.441747572815534, |
|
"grad_norm": 3.183931699603958, |
|
"learning_rate": 8.843802807141584e-06, |
|
"loss": 0.1594, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.44660194174757284, |
|
"grad_norm": 3.995838025390297, |
|
"learning_rate": 8.819308240769726e-06, |
|
"loss": 0.2005, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.45145631067961167, |
|
"grad_norm": 3.277845575596573, |
|
"learning_rate": 8.794591605407047e-06, |
|
"loss": 0.1986, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.4563106796116505, |
|
"grad_norm": 4.398003077782128, |
|
"learning_rate": 8.769654338171986e-06, |
|
"loss": 0.2253, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.46116504854368934, |
|
"grad_norm": 3.389102273686641, |
|
"learning_rate": 8.744497889011344e-06, |
|
"loss": 0.1851, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.46601941747572817, |
|
"grad_norm": 3.434805541663128, |
|
"learning_rate": 8.71912372061598e-06, |
|
"loss": 0.1929, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.470873786407767, |
|
"grad_norm": 2.949657473114478, |
|
"learning_rate": 8.693533308335786e-06, |
|
"loss": 0.1407, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.47572815533980584, |
|
"grad_norm": 3.527354861263602, |
|
"learning_rate": 8.667728140093876e-06, |
|
"loss": 0.2136, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.48058252427184467, |
|
"grad_norm": 3.574021701069902, |
|
"learning_rate": 8.641709716300092e-06, |
|
"loss": 0.1987, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.4854368932038835, |
|
"grad_norm": 3.366187355130273, |
|
"learning_rate": 8.615479549763756e-06, |
|
"loss": 0.1794, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.49029126213592233, |
|
"grad_norm": 2.943784862498522, |
|
"learning_rate": 8.589039165605716e-06, |
|
"loss": 0.159, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.49514563106796117, |
|
"grad_norm": 3.3541994870307357, |
|
"learning_rate": 8.56239010116966e-06, |
|
"loss": 0.1983, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 3.970946939491204, |
|
"learning_rate": 8.535533905932739e-06, |
|
"loss": 0.1929, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.5048543689320388, |
|
"grad_norm": 3.533692725781102, |
|
"learning_rate": 8.508472141415468e-06, |
|
"loss": 0.1563, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.5097087378640777, |
|
"grad_norm": 3.332925768522305, |
|
"learning_rate": 8.481206381090934e-06, |
|
"loss": 0.1383, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.5145631067961165, |
|
"grad_norm": 4.124133153344098, |
|
"learning_rate": 8.453738210293316e-06, |
|
"loss": 0.1592, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.5194174757281553, |
|
"grad_norm": 3.930207715532846, |
|
"learning_rate": 8.426069226125695e-06, |
|
"loss": 0.1774, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.5242718446601942, |
|
"grad_norm": 3.4981066338300093, |
|
"learning_rate": 8.398201037367202e-06, |
|
"loss": 0.1676, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.529126213592233, |
|
"grad_norm": 3.9559129500953554, |
|
"learning_rate": 8.370135264379475e-06, |
|
"loss": 0.2122, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.5339805825242718, |
|
"grad_norm": 3.1892128735017202, |
|
"learning_rate": 8.341873539012443e-06, |
|
"loss": 0.1507, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.5388349514563107, |
|
"grad_norm": 2.8975515911690883, |
|
"learning_rate": 8.313417504509446e-06, |
|
"loss": 0.1208, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.5436893203883495, |
|
"grad_norm": 3.1795582458355263, |
|
"learning_rate": 8.284768815411693e-06, |
|
"loss": 0.1573, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.5485436893203883, |
|
"grad_norm": 4.5158717706606035, |
|
"learning_rate": 8.255929137462049e-06, |
|
"loss": 0.2067, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.5533980582524272, |
|
"grad_norm": 4.244112011731956, |
|
"learning_rate": 8.226900147508205e-06, |
|
"loss": 0.191, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.558252427184466, |
|
"grad_norm": 3.7492946786671615, |
|
"learning_rate": 8.197683533405156e-06, |
|
"loss": 0.1839, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.5631067961165048, |
|
"grad_norm": 3.648017879536666, |
|
"learning_rate": 8.168280993917078e-06, |
|
"loss": 0.1899, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.5679611650485437, |
|
"grad_norm": 3.7020099363609877, |
|
"learning_rate": 8.138694238618543e-06, |
|
"loss": 0.1694, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.5728155339805825, |
|
"grad_norm": 3.1678272010712716, |
|
"learning_rate": 8.108924987795137e-06, |
|
"loss": 0.1717, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.5776699029126213, |
|
"grad_norm": 2.946487871723226, |
|
"learning_rate": 8.078974972343414e-06, |
|
"loss": 0.1395, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.5825242718446602, |
|
"grad_norm": 3.8229174929451135, |
|
"learning_rate": 8.048845933670274e-06, |
|
"loss": 0.1717, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.587378640776699, |
|
"grad_norm": 3.4722235503056096, |
|
"learning_rate": 8.01853962359169e-06, |
|
"loss": 0.1663, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.5922330097087378, |
|
"grad_norm": 3.8434017455244422, |
|
"learning_rate": 7.988057804230878e-06, |
|
"loss": 0.171, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.5970873786407767, |
|
"grad_norm": 3.309914479885208, |
|
"learning_rate": 7.957402247915817e-06, |
|
"loss": 0.1365, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.6019417475728155, |
|
"grad_norm": 3.7378581970376445, |
|
"learning_rate": 7.92657473707621e-06, |
|
"loss": 0.1522, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.6067961165048543, |
|
"grad_norm": 3.0086349638458723, |
|
"learning_rate": 7.895577064139847e-06, |
|
"loss": 0.1545, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.6116504854368932, |
|
"grad_norm": 3.485975652298689, |
|
"learning_rate": 7.864411031428379e-06, |
|
"loss": 0.1759, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.616504854368932, |
|
"grad_norm": 3.1799079684705576, |
|
"learning_rate": 7.833078451052537e-06, |
|
"loss": 0.1592, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.6213592233009708, |
|
"grad_norm": 3.4962291009418114, |
|
"learning_rate": 7.801581144806752e-06, |
|
"loss": 0.1819, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.6262135922330098, |
|
"grad_norm": 2.790639847669124, |
|
"learning_rate": 7.769920944063244e-06, |
|
"loss": 0.1294, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.6310679611650486, |
|
"grad_norm": 3.692116350897978, |
|
"learning_rate": 7.73809968966554e-06, |
|
"loss": 0.1633, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.6359223300970874, |
|
"grad_norm": 3.978321523304929, |
|
"learning_rate": 7.706119231821423e-06, |
|
"loss": 0.2083, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.6407766990291263, |
|
"grad_norm": 2.9267398399161126, |
|
"learning_rate": 7.673981429995372e-06, |
|
"loss": 0.163, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.6456310679611651, |
|
"grad_norm": 3.566641752297845, |
|
"learning_rate": 7.641688152800433e-06, |
|
"loss": 0.1735, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.6504854368932039, |
|
"grad_norm": 3.8836342645758726, |
|
"learning_rate": 7.609241277889583e-06, |
|
"loss": 0.1734, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.6553398058252428, |
|
"grad_norm": 3.284705347055019, |
|
"learning_rate": 7.5766426918465455e-06, |
|
"loss": 0.1547, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.6601941747572816, |
|
"grad_norm": 3.4280728507988134, |
|
"learning_rate": 7.5438942900761035e-06, |
|
"loss": 0.1498, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.6650485436893204, |
|
"grad_norm": 3.1094417342434824, |
|
"learning_rate": 7.51099797669389e-06, |
|
"loss": 0.1833, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.6699029126213593, |
|
"grad_norm": 3.635713263211301, |
|
"learning_rate": 7.477955664415678e-06, |
|
"loss": 0.1847, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.6747572815533981, |
|
"grad_norm": 3.082108474651334, |
|
"learning_rate": 7.444769274446168e-06, |
|
"loss": 0.1518, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.6796116504854369, |
|
"grad_norm": 3.275121622817452, |
|
"learning_rate": 7.411440736367281e-06, |
|
"loss": 0.1541, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.6844660194174758, |
|
"grad_norm": 3.463656129167697, |
|
"learning_rate": 7.377971988025964e-06, |
|
"loss": 0.1596, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.6893203883495146, |
|
"grad_norm": 3.6688953630475085, |
|
"learning_rate": 7.3443649754215175e-06, |
|
"loss": 0.162, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.6941747572815534, |
|
"grad_norm": 3.2977691883495783, |
|
"learning_rate": 7.310621652592449e-06, |
|
"loss": 0.1497, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.6990291262135923, |
|
"grad_norm": 2.9919702655716107, |
|
"learning_rate": 7.276743981502856e-06, |
|
"loss": 0.149, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.7038834951456311, |
|
"grad_norm": 3.402863697978128, |
|
"learning_rate": 7.242733931928352e-06, |
|
"loss": 0.1452, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.7087378640776699, |
|
"grad_norm": 2.9397952022985776, |
|
"learning_rate": 7.208593481341536e-06, |
|
"loss": 0.1688, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.7135922330097088, |
|
"grad_norm": 2.786690378526301, |
|
"learning_rate": 7.1743246147970095e-06, |
|
"loss": 0.1389, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.7184466019417476, |
|
"grad_norm": 3.3854162495099085, |
|
"learning_rate": 7.139929324815965e-06, |
|
"loss": 0.1929, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.7233009708737864, |
|
"grad_norm": 3.0441624701446326, |
|
"learning_rate": 7.105409611270332e-06, |
|
"loss": 0.1438, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.7281553398058253, |
|
"grad_norm": 3.2250000949259494, |
|
"learning_rate": 7.070767481266493e-06, |
|
"loss": 0.1579, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.7330097087378641, |
|
"grad_norm": 3.2502115185683333, |
|
"learning_rate": 7.036004949028587e-06, |
|
"loss": 0.1561, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.7378640776699029, |
|
"grad_norm": 3.665237512676154, |
|
"learning_rate": 7.00112403578139e-06, |
|
"loss": 0.2204, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.7427184466019418, |
|
"grad_norm": 3.210768048996398, |
|
"learning_rate": 6.9661267696328015e-06, |
|
"loss": 0.1838, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.7475728155339806, |
|
"grad_norm": 3.722304024110409, |
|
"learning_rate": 6.931015185455915e-06, |
|
"loss": 0.1687, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.7524271844660194, |
|
"grad_norm": 3.909093916637989, |
|
"learning_rate": 6.895791324770702e-06, |
|
"loss": 0.2008, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.7572815533980582, |
|
"grad_norm": 3.5535897544943484, |
|
"learning_rate": 6.860457235625322e-06, |
|
"loss": 0.1862, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.7621359223300971, |
|
"grad_norm": 3.086442734785655, |
|
"learning_rate": 6.825014972477024e-06, |
|
"loss": 0.1571, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.7669902912621359, |
|
"grad_norm": 3.7441594601356947, |
|
"learning_rate": 6.7894665960727105e-06, |
|
"loss": 0.2045, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.7718446601941747, |
|
"grad_norm": 3.6222387277033574, |
|
"learning_rate": 6.7538141733291e-06, |
|
"loss": 0.1671, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.7766990291262136, |
|
"grad_norm": 3.852188115969548, |
|
"learning_rate": 6.7180597772125665e-06, |
|
"loss": 0.2012, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.7815533980582524, |
|
"grad_norm": 3.296385344532143, |
|
"learning_rate": 6.682205486618592e-06, |
|
"loss": 0.1606, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.7864077669902912, |
|
"grad_norm": 3.48850094356451, |
|
"learning_rate": 6.646253386250909e-06, |
|
"loss": 0.2292, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.7912621359223301, |
|
"grad_norm": 3.556444605781854, |
|
"learning_rate": 6.610205566500272e-06, |
|
"loss": 0.2045, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.7961165048543689, |
|
"grad_norm": 3.3729077588540903, |
|
"learning_rate": 6.574064123322925e-06, |
|
"loss": 0.1683, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.8009708737864077, |
|
"grad_norm": 3.3843904849756137, |
|
"learning_rate": 6.537831158118733e-06, |
|
"loss": 0.1605, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.8058252427184466, |
|
"grad_norm": 2.982235868498706, |
|
"learning_rate": 6.50150877760899e-06, |
|
"loss": 0.174, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.8106796116504854, |
|
"grad_norm": 3.4756594206536087, |
|
"learning_rate": 6.465099093713944e-06, |
|
"loss": 0.1906, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.8155339805825242, |
|
"grad_norm": 3.030809809529912, |
|
"learning_rate": 6.42860422342998e-06, |
|
"loss": 0.1581, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.8203883495145631, |
|
"grad_norm": 3.4952700647449175, |
|
"learning_rate": 6.392026288706549e-06, |
|
"loss": 0.1336, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.8252427184466019, |
|
"grad_norm": 3.1165499270876538, |
|
"learning_rate": 6.3553674163227786e-06, |
|
"loss": 0.155, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.8300970873786407, |
|
"grad_norm": 2.9628557740369863, |
|
"learning_rate": 6.318629737763818e-06, |
|
"loss": 0.161, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.8349514563106796, |
|
"grad_norm": 3.690512514002413, |
|
"learning_rate": 6.281815389096903e-06, |
|
"loss": 0.1811, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.8398058252427184, |
|
"grad_norm": 3.4149380316333815, |
|
"learning_rate": 6.244926510847162e-06, |
|
"loss": 0.1391, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.8446601941747572, |
|
"grad_norm": 5.241732765365241, |
|
"learning_rate": 6.207965247873151e-06, |
|
"loss": 0.1586, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.8495145631067961, |
|
"grad_norm": 3.5677389851744663, |
|
"learning_rate": 6.1709337492421515e-06, |
|
"loss": 0.1465, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.8543689320388349, |
|
"grad_norm": 3.352995725574537, |
|
"learning_rate": 6.133834168105206e-06, |
|
"loss": 0.1636, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.8592233009708737, |
|
"grad_norm": 3.5445075180843477, |
|
"learning_rate": 6.096668661571934e-06, |
|
"loss": 0.1832, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.8640776699029126, |
|
"grad_norm": 4.631280052639027, |
|
"learning_rate": 6.0594393905851065e-06, |
|
"loss": 0.2243, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.8689320388349514, |
|
"grad_norm": 3.806289364419148, |
|
"learning_rate": 6.0221485197949995e-06, |
|
"loss": 0.1737, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.8737864077669902, |
|
"grad_norm": 3.5248010681170383, |
|
"learning_rate": 5.9847982174335314e-06, |
|
"loss": 0.2009, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.8786407766990292, |
|
"grad_norm": 3.101104764524827, |
|
"learning_rate": 5.9473906551881985e-06, |
|
"loss": 0.1603, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.883495145631068, |
|
"grad_norm": 3.9618906438501433, |
|
"learning_rate": 5.9099280080758085e-06, |
|
"loss": 0.1842, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.8883495145631068, |
|
"grad_norm": 3.1988833525581657, |
|
"learning_rate": 5.872412454315999e-06, |
|
"loss": 0.1596, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.8932038834951457, |
|
"grad_norm": 6.6043342145179125, |
|
"learning_rate": 5.834846175204612e-06, |
|
"loss": 0.2276, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.8980582524271845, |
|
"grad_norm": 3.6241814154915786, |
|
"learning_rate": 5.797231354986842e-06, |
|
"loss": 0.1745, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.9029126213592233, |
|
"grad_norm": 2.95062859589642, |
|
"learning_rate": 5.759570180730255e-06, |
|
"loss": 0.1705, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.9077669902912622, |
|
"grad_norm": 3.4637453685042936, |
|
"learning_rate": 5.721864842197612e-06, |
|
"loss": 0.1606, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.912621359223301, |
|
"grad_norm": 3.827650102803788, |
|
"learning_rate": 5.684117531719552e-06, |
|
"loss": 0.2137, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.9174757281553398, |
|
"grad_norm": 3.8468557427536143, |
|
"learning_rate": 5.646330444067121e-06, |
|
"loss": 0.1556, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.9223300970873787, |
|
"grad_norm": 2.8474128606502354, |
|
"learning_rate": 5.608505776324158e-06, |
|
"loss": 0.1351, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.9271844660194175, |
|
"grad_norm": 3.507850395452768, |
|
"learning_rate": 5.570645727759558e-06, |
|
"loss": 0.1949, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.9320388349514563, |
|
"grad_norm": 2.997718572696592, |
|
"learning_rate": 5.532752499699381e-06, |
|
"loss": 0.1852, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.9368932038834952, |
|
"grad_norm": 3.19210145748532, |
|
"learning_rate": 5.494828295398874e-06, |
|
"loss": 0.158, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.941747572815534, |
|
"grad_norm": 2.6804697784997407, |
|
"learning_rate": 5.456875319914355e-06, |
|
"loss": 0.1296, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.9466019417475728, |
|
"grad_norm": 3.4178634345048753, |
|
"learning_rate": 5.4188957799750145e-06, |
|
"loss": 0.1297, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.9514563106796117, |
|
"grad_norm": 3.209602154994729, |
|
"learning_rate": 5.380891883854591e-06, |
|
"loss": 0.1515, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.9563106796116505, |
|
"grad_norm": 2.809335723259345, |
|
"learning_rate": 5.34286584124299e-06, |
|
"loss": 0.1265, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.9611650485436893, |
|
"grad_norm": 3.165487146981837, |
|
"learning_rate": 5.304819863117796e-06, |
|
"loss": 0.1194, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.9660194174757282, |
|
"grad_norm": 3.0538642497840804, |
|
"learning_rate": 5.266756161615719e-06, |
|
"loss": 0.1407, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.970873786407767, |
|
"grad_norm": 3.3639825412685465, |
|
"learning_rate": 5.228676949903974e-06, |
|
"loss": 0.1618, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.9757281553398058, |
|
"grad_norm": 3.7109546175241706, |
|
"learning_rate": 5.190584442051594e-06, |
|
"loss": 0.2007, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.9805825242718447, |
|
"grad_norm": 3.55944594096005, |
|
"learning_rate": 5.1524808529007075e-06, |
|
"loss": 0.2059, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.9854368932038835, |
|
"grad_norm": 3.6920021535814063, |
|
"learning_rate": 5.114368397937744e-06, |
|
"loss": 0.1964, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.9902912621359223, |
|
"grad_norm": 3.2357337946433717, |
|
"learning_rate": 5.07624929316463e-06, |
|
"loss": 0.1492, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.9951456310679612, |
|
"grad_norm": 3.732365728058814, |
|
"learning_rate": 5.038125754969933e-06, |
|
"loss": 0.1507, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 2.2967049476623247, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0637, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 1.0048543689320388, |
|
"grad_norm": 2.5226978479044826, |
|
"learning_rate": 4.9618742450300675e-06, |
|
"loss": 0.0941, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 1.0097087378640777, |
|
"grad_norm": 2.6073706397528293, |
|
"learning_rate": 4.923750706835371e-06, |
|
"loss": 0.0998, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 1.0145631067961165, |
|
"grad_norm": 2.524982019563185, |
|
"learning_rate": 4.8856316020622564e-06, |
|
"loss": 0.0814, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 1.0194174757281553, |
|
"grad_norm": 2.0946857127782588, |
|
"learning_rate": 4.847519147099294e-06, |
|
"loss": 0.0666, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.0242718446601942, |
|
"grad_norm": 2.538290041731963, |
|
"learning_rate": 4.809415557948407e-06, |
|
"loss": 0.1131, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 1.029126213592233, |
|
"grad_norm": 2.0465760681173673, |
|
"learning_rate": 4.771323050096028e-06, |
|
"loss": 0.0706, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 1.0339805825242718, |
|
"grad_norm": 2.3508610389964164, |
|
"learning_rate": 4.733243838384282e-06, |
|
"loss": 0.0659, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 1.0388349514563107, |
|
"grad_norm": 2.971612647656734, |
|
"learning_rate": 4.6951801368822055e-06, |
|
"loss": 0.0981, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.0436893203883495, |
|
"grad_norm": 2.9420665874838985, |
|
"learning_rate": 4.6571341587570114e-06, |
|
"loss": 0.078, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.0485436893203883, |
|
"grad_norm": 2.079382704929123, |
|
"learning_rate": 4.619108116145411e-06, |
|
"loss": 0.0861, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.0533980582524272, |
|
"grad_norm": 2.3989525704538366, |
|
"learning_rate": 4.581104220024988e-06, |
|
"loss": 0.0866, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 1.058252427184466, |
|
"grad_norm": 2.301224458683677, |
|
"learning_rate": 4.5431246800856455e-06, |
|
"loss": 0.07, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 1.0631067961165048, |
|
"grad_norm": 3.055362327673038, |
|
"learning_rate": 4.505171704601128e-06, |
|
"loss": 0.0871, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 1.0679611650485437, |
|
"grad_norm": 2.156619499236112, |
|
"learning_rate": 4.467247500300621e-06, |
|
"loss": 0.0615, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.0728155339805825, |
|
"grad_norm": 3.3343949043528727, |
|
"learning_rate": 4.4293542722404435e-06, |
|
"loss": 0.0857, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 1.0776699029126213, |
|
"grad_norm": 4.336729224578723, |
|
"learning_rate": 4.391494223675843e-06, |
|
"loss": 0.0975, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 1.0825242718446602, |
|
"grad_norm": 3.654581906399023, |
|
"learning_rate": 4.3536695559328816e-06, |
|
"loss": 0.0869, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 1.087378640776699, |
|
"grad_norm": 3.48839651675299, |
|
"learning_rate": 4.31588246828045e-06, |
|
"loss": 0.0699, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.0922330097087378, |
|
"grad_norm": 3.527358996116628, |
|
"learning_rate": 4.278135157802389e-06, |
|
"loss": 0.0778, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.0970873786407767, |
|
"grad_norm": 3.0484712762315485, |
|
"learning_rate": 4.240429819269746e-06, |
|
"loss": 0.0741, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 1.1019417475728155, |
|
"grad_norm": 2.724855271620847, |
|
"learning_rate": 4.20276864501316e-06, |
|
"loss": 0.0544, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 1.1067961165048543, |
|
"grad_norm": 3.418458471187941, |
|
"learning_rate": 4.165153824795391e-06, |
|
"loss": 0.0823, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 1.1116504854368932, |
|
"grad_norm": 3.2054146648148323, |
|
"learning_rate": 4.127587545684002e-06, |
|
"loss": 0.0775, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 1.116504854368932, |
|
"grad_norm": 2.8842952697992996, |
|
"learning_rate": 4.090071991924194e-06, |
|
"loss": 0.073, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.1213592233009708, |
|
"grad_norm": 3.332510451317236, |
|
"learning_rate": 4.052609344811802e-06, |
|
"loss": 0.0651, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 1.1262135922330097, |
|
"grad_norm": 3.175654906852291, |
|
"learning_rate": 4.015201782566471e-06, |
|
"loss": 0.0887, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 1.1310679611650485, |
|
"grad_norm": 3.059530752591451, |
|
"learning_rate": 3.977851480205003e-06, |
|
"loss": 0.0928, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 1.1359223300970873, |
|
"grad_norm": 2.6904558701382224, |
|
"learning_rate": 3.940560609414894e-06, |
|
"loss": 0.0722, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 1.1407766990291262, |
|
"grad_norm": 2.2693610706285816, |
|
"learning_rate": 3.903331338428067e-06, |
|
"loss": 0.0552, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.145631067961165, |
|
"grad_norm": 2.377334924236447, |
|
"learning_rate": 3.866165831894796e-06, |
|
"loss": 0.0706, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 1.1504854368932038, |
|
"grad_norm": 2.724444194625347, |
|
"learning_rate": 3.829066250757851e-06, |
|
"loss": 0.0669, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 1.1553398058252426, |
|
"grad_norm": 3.3465238121560748, |
|
"learning_rate": 3.7920347521268514e-06, |
|
"loss": 0.0803, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.1601941747572815, |
|
"grad_norm": 2.3227258906146218, |
|
"learning_rate": 3.7550734891528413e-06, |
|
"loss": 0.0781, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 1.1650485436893203, |
|
"grad_norm": 2.315917458233191, |
|
"learning_rate": 3.7181846109031007e-06, |
|
"loss": 0.0565, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.1699029126213591, |
|
"grad_norm": 2.436340284248906, |
|
"learning_rate": 3.6813702622361858e-06, |
|
"loss": 0.0622, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 1.174757281553398, |
|
"grad_norm": 4.316673044669812, |
|
"learning_rate": 3.6446325836772244e-06, |
|
"loss": 0.0662, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.1796116504854368, |
|
"grad_norm": 2.81625935026341, |
|
"learning_rate": 3.6079737112934533e-06, |
|
"loss": 0.074, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.1844660194174756, |
|
"grad_norm": 3.307274795040302, |
|
"learning_rate": 3.5713957765700224e-06, |
|
"loss": 0.0788, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 1.1893203883495145, |
|
"grad_norm": 3.7045219379511822, |
|
"learning_rate": 3.5349009062860586e-06, |
|
"loss": 0.0824, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.1941747572815533, |
|
"grad_norm": 4.445864663798351, |
|
"learning_rate": 3.4984912223910105e-06, |
|
"loss": 0.0796, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 1.1990291262135921, |
|
"grad_norm": 3.2668711412572073, |
|
"learning_rate": 3.46216884188127e-06, |
|
"loss": 0.074, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 1.203883495145631, |
|
"grad_norm": 2.4601134476136846, |
|
"learning_rate": 3.425935876677077e-06, |
|
"loss": 0.0572, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 1.2087378640776698, |
|
"grad_norm": 3.1352393379631325, |
|
"learning_rate": 3.38979443349973e-06, |
|
"loss": 0.0737, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 1.2135922330097086, |
|
"grad_norm": 2.898735529756216, |
|
"learning_rate": 3.3537466137490937e-06, |
|
"loss": 0.069, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.2184466019417475, |
|
"grad_norm": 2.907656556133524, |
|
"learning_rate": 3.3177945133814093e-06, |
|
"loss": 0.0754, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 1.2233009708737863, |
|
"grad_norm": 2.7284562249841655, |
|
"learning_rate": 3.2819402227874364e-06, |
|
"loss": 0.0736, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.2281553398058254, |
|
"grad_norm": 2.8597278103076955, |
|
"learning_rate": 3.2461858266709017e-06, |
|
"loss": 0.0683, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 1.233009708737864, |
|
"grad_norm": 2.3089430605128647, |
|
"learning_rate": 3.2105334039272924e-06, |
|
"loss": 0.0607, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 1.237864077669903, |
|
"grad_norm": 2.415987796695676, |
|
"learning_rate": 3.1749850275229777e-06, |
|
"loss": 0.0563, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.2427184466019416, |
|
"grad_norm": 2.6598893579331557, |
|
"learning_rate": 3.1395427643746802e-06, |
|
"loss": 0.0765, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.2475728155339807, |
|
"grad_norm": 2.7501073530207054, |
|
"learning_rate": 3.1042086752292995e-06, |
|
"loss": 0.0751, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 1.2524271844660193, |
|
"grad_norm": 2.6246175824836566, |
|
"learning_rate": 3.068984814544087e-06, |
|
"loss": 0.0821, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 1.2572815533980584, |
|
"grad_norm": 3.1933770223308144, |
|
"learning_rate": 3.0338732303671993e-06, |
|
"loss": 0.0664, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 1.262135922330097, |
|
"grad_norm": 3.073936680231647, |
|
"learning_rate": 2.99887596421861e-06, |
|
"loss": 0.0585, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.266990291262136, |
|
"grad_norm": 2.862725126412591, |
|
"learning_rate": 2.9639950509714138e-06, |
|
"loss": 0.0752, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 1.2718446601941746, |
|
"grad_norm": 2.634350417382172, |
|
"learning_rate": 2.929232518733507e-06, |
|
"loss": 0.0746, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 1.2766990291262137, |
|
"grad_norm": 2.7230463424506093, |
|
"learning_rate": 2.8945903887296686e-06, |
|
"loss": 0.0693, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 1.2815533980582523, |
|
"grad_norm": 2.8448668934690198, |
|
"learning_rate": 2.860070675184036e-06, |
|
"loss": 0.0786, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.2864077669902914, |
|
"grad_norm": 5.143418540550123, |
|
"learning_rate": 2.8256753852029917e-06, |
|
"loss": 0.0896, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.29126213592233, |
|
"grad_norm": 3.9671382248664724, |
|
"learning_rate": 2.7914065186584637e-06, |
|
"loss": 0.0997, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.296116504854369, |
|
"grad_norm": 3.2147206473311205, |
|
"learning_rate": 2.757266068071648e-06, |
|
"loss": 0.0887, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 1.3009708737864076, |
|
"grad_norm": 3.1889018204207926, |
|
"learning_rate": 2.7232560184971437e-06, |
|
"loss": 0.0758, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.3058252427184467, |
|
"grad_norm": 2.677766822912337, |
|
"learning_rate": 2.689378347407553e-06, |
|
"loss": 0.0921, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 1.3106796116504853, |
|
"grad_norm": 3.5542252563806573, |
|
"learning_rate": 2.6556350245784833e-06, |
|
"loss": 0.0715, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.3155339805825244, |
|
"grad_norm": 2.6876907942268518, |
|
"learning_rate": 2.6220280119740376e-06, |
|
"loss": 0.0699, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 1.3203883495145632, |
|
"grad_norm": 2.7520473701148673, |
|
"learning_rate": 2.588559263632719e-06, |
|
"loss": 0.0787, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 1.325242718446602, |
|
"grad_norm": 2.614643966342531, |
|
"learning_rate": 2.555230725553832e-06, |
|
"loss": 0.0587, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.3300970873786409, |
|
"grad_norm": 3.1593856198687185, |
|
"learning_rate": 2.522044335584322e-06, |
|
"loss": 0.0881, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 1.3349514563106797, |
|
"grad_norm": 4.034678895029279, |
|
"learning_rate": 2.489002023306112e-06, |
|
"loss": 0.1248, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.3398058252427185, |
|
"grad_norm": 3.470975409492464, |
|
"learning_rate": 2.4561057099238973e-06, |
|
"loss": 0.1034, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.3446601941747574, |
|
"grad_norm": 3.201707902382977, |
|
"learning_rate": 2.423357308153454e-06, |
|
"loss": 0.0689, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 1.3495145631067962, |
|
"grad_norm": 2.7615918441216887, |
|
"learning_rate": 2.390758722110418e-06, |
|
"loss": 0.0915, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 1.354368932038835, |
|
"grad_norm": 2.4952322901383557, |
|
"learning_rate": 2.358311847199567e-06, |
|
"loss": 0.0593, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 1.3592233009708738, |
|
"grad_norm": 2.2700687647313447, |
|
"learning_rate": 2.3260185700046295e-06, |
|
"loss": 0.0554, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.3640776699029127, |
|
"grad_norm": 2.6442938969211345, |
|
"learning_rate": 2.2938807681785764e-06, |
|
"loss": 0.061, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 1.3689320388349515, |
|
"grad_norm": 3.0212702464004693, |
|
"learning_rate": 2.2619003103344607e-06, |
|
"loss": 0.0711, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 1.3737864077669903, |
|
"grad_norm": 3.0056008577786932, |
|
"learning_rate": 2.2300790559367553e-06, |
|
"loss": 0.0975, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 1.3786407766990292, |
|
"grad_norm": 3.0409200355769723, |
|
"learning_rate": 2.1984188551932513e-06, |
|
"loss": 0.0828, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 1.383495145631068, |
|
"grad_norm": 5.144056298054031, |
|
"learning_rate": 2.166921548947466e-06, |
|
"loss": 0.1069, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.3883495145631068, |
|
"grad_norm": 2.974437087020466, |
|
"learning_rate": 2.1355889685716225e-06, |
|
"loss": 0.0796, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 1.3932038834951457, |
|
"grad_norm": 3.1193704417489188, |
|
"learning_rate": 2.1044229358601543e-06, |
|
"loss": 0.0836, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 1.3980582524271845, |
|
"grad_norm": 3.105267358377638, |
|
"learning_rate": 2.0734252629237892e-06, |
|
"loss": 0.0861, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.4029126213592233, |
|
"grad_norm": 2.7361006254685636, |
|
"learning_rate": 2.0425977520841837e-06, |
|
"loss": 0.0693, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 1.4077669902912622, |
|
"grad_norm": 2.6357478152648164, |
|
"learning_rate": 2.011942195769122e-06, |
|
"loss": 0.0719, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.412621359223301, |
|
"grad_norm": 3.037299229178945, |
|
"learning_rate": 1.9814603764083112e-06, |
|
"loss": 0.0607, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 1.4174757281553398, |
|
"grad_norm": 3.128206199439905, |
|
"learning_rate": 1.9511540663297284e-06, |
|
"loss": 0.0689, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 1.4223300970873787, |
|
"grad_norm": 2.310510431415397, |
|
"learning_rate": 1.921025027656587e-06, |
|
"loss": 0.0656, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 1.4271844660194175, |
|
"grad_norm": 2.5238751934999115, |
|
"learning_rate": 1.8910750122048638e-06, |
|
"loss": 0.0662, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 1.4320388349514563, |
|
"grad_norm": 5.0477914204436685, |
|
"learning_rate": 1.8613057613814584e-06, |
|
"loss": 0.0734, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.4368932038834952, |
|
"grad_norm": 2.7015468302353236, |
|
"learning_rate": 1.8317190060829242e-06, |
|
"loss": 0.0925, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 1.441747572815534, |
|
"grad_norm": 2.703829586022864, |
|
"learning_rate": 1.8023164665948455e-06, |
|
"loss": 0.0747, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.4466019417475728, |
|
"grad_norm": 2.4885667346942304, |
|
"learning_rate": 1.773099852491796e-06, |
|
"loss": 0.071, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 1.4514563106796117, |
|
"grad_norm": 3.0454139981289607, |
|
"learning_rate": 1.7440708625379503e-06, |
|
"loss": 0.0714, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 1.4563106796116505, |
|
"grad_norm": 3.884767974878282, |
|
"learning_rate": 1.7152311845883096e-06, |
|
"loss": 0.0987, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.4611650485436893, |
|
"grad_norm": 2.749392804233707, |
|
"learning_rate": 1.686582495490554e-06, |
|
"loss": 0.0779, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 1.4660194174757282, |
|
"grad_norm": 2.650651441845639, |
|
"learning_rate": 1.658126460987558e-06, |
|
"loss": 0.0711, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 1.470873786407767, |
|
"grad_norm": 3.3389201404559765, |
|
"learning_rate": 1.6298647356205255e-06, |
|
"loss": 0.0896, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 1.4757281553398058, |
|
"grad_norm": 3.0570927397843173, |
|
"learning_rate": 1.601798962632799e-06, |
|
"loss": 0.05, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 1.4805825242718447, |
|
"grad_norm": 3.206513042944716, |
|
"learning_rate": 1.573930773874306e-06, |
|
"loss": 0.0668, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.4854368932038835, |
|
"grad_norm": 2.544043286282122, |
|
"learning_rate": 1.5462617897066863e-06, |
|
"loss": 0.0699, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 1.4902912621359223, |
|
"grad_norm": 2.265077458841769, |
|
"learning_rate": 1.5187936189090668e-06, |
|
"loss": 0.068, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 1.4951456310679612, |
|
"grad_norm": 2.676973013819137, |
|
"learning_rate": 1.491527858584535e-06, |
|
"loss": 0.0577, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 2.442817331160949, |
|
"learning_rate": 1.4644660940672628e-06, |
|
"loss": 0.0656, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 1.5048543689320388, |
|
"grad_norm": 2.6868210261229626, |
|
"learning_rate": 1.4376098988303406e-06, |
|
"loss": 0.06, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.5097087378640777, |
|
"grad_norm": 3.1300569873865323, |
|
"learning_rate": 1.4109608343942855e-06, |
|
"loss": 0.0898, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 1.5145631067961165, |
|
"grad_norm": 2.3667328224735757, |
|
"learning_rate": 1.3845204502362442e-06, |
|
"loss": 0.056, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.5194174757281553, |
|
"grad_norm": 3.2267024536329156, |
|
"learning_rate": 1.35829028369991e-06, |
|
"loss": 0.09, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 1.5242718446601942, |
|
"grad_norm": 2.48628112394617, |
|
"learning_rate": 1.3322718599061252e-06, |
|
"loss": 0.0559, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 1.529126213592233, |
|
"grad_norm": 3.637726836595628, |
|
"learning_rate": 1.306466691664216e-06, |
|
"loss": 0.0891, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.5339805825242718, |
|
"grad_norm": 3.4241592079538608, |
|
"learning_rate": 1.28087627938402e-06, |
|
"loss": 0.0836, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 1.5388349514563107, |
|
"grad_norm": 2.874681659316859, |
|
"learning_rate": 1.2555021109886589e-06, |
|
"loss": 0.088, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 1.5436893203883495, |
|
"grad_norm": 2.8891719293529077, |
|
"learning_rate": 1.2303456618280141e-06, |
|
"loss": 0.0847, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 1.5485436893203883, |
|
"grad_norm": 2.3465565126184993, |
|
"learning_rate": 1.2054083945929534e-06, |
|
"loss": 0.0569, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 1.5533980582524272, |
|
"grad_norm": 2.4174453339193294, |
|
"learning_rate": 1.1806917592302763e-06, |
|
"loss": 0.0703, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.558252427184466, |
|
"grad_norm": 2.4857789747027046, |
|
"learning_rate": 1.1561971928584158e-06, |
|
"loss": 0.0504, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.5631067961165048, |
|
"grad_norm": 2.5496812979782275, |
|
"learning_rate": 1.1319261196838782e-06, |
|
"loss": 0.0615, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 1.5679611650485437, |
|
"grad_norm": 3.420916113476439, |
|
"learning_rate": 1.1078799509184246e-06, |
|
"loss": 0.0788, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 1.5728155339805825, |
|
"grad_norm": 3.656319511152626, |
|
"learning_rate": 1.0840600846970333e-06, |
|
"loss": 0.0992, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.5776699029126213, |
|
"grad_norm": 3.0519107806111827, |
|
"learning_rate": 1.0604679059965923e-06, |
|
"loss": 0.0855, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.5825242718446602, |
|
"grad_norm": 2.7744945951763667, |
|
"learning_rate": 1.0371047865553847e-06, |
|
"loss": 0.0725, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 1.587378640776699, |
|
"grad_norm": 2.8775012719382866, |
|
"learning_rate": 1.0139720847933166e-06, |
|
"loss": 0.0682, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 1.5922330097087378, |
|
"grad_norm": 2.6048584949258378, |
|
"learning_rate": 9.91071145732948e-07, |
|
"loss": 0.0671, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 1.5970873786407767, |
|
"grad_norm": 3.586149243679513, |
|
"learning_rate": 9.684033009212752e-07, |
|
"loss": 0.1146, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 1.6019417475728155, |
|
"grad_norm": 3.2219483119895207, |
|
"learning_rate": 9.459698683523205e-07, |
|
"loss": 0.0801, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.6067961165048543, |
|
"grad_norm": 3.0767281701504507, |
|
"learning_rate": 9.237721523904891e-07, |
|
"loss": 0.0873, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 1.6116504854368932, |
|
"grad_norm": 1.7472755271643685, |
|
"learning_rate": 9.018114436947373e-07, |
|
"loss": 0.0429, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 1.616504854368932, |
|
"grad_norm": 2.816376343628945, |
|
"learning_rate": 8.80089019143524e-07, |
|
"loss": 0.0706, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 1.6213592233009708, |
|
"grad_norm": 2.70777640745328, |
|
"learning_rate": 8.586061417605668e-07, |
|
"loss": 0.0656, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 1.6262135922330097, |
|
"grad_norm": 2.6349647980166675, |
|
"learning_rate": 8.373640606414097e-07, |
|
"loss": 0.0634, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.6310679611650487, |
|
"grad_norm": 3.2704428966200623, |
|
"learning_rate": 8.163640108807897e-07, |
|
"loss": 0.081, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.6359223300970873, |
|
"grad_norm": 2.511126908552343, |
|
"learning_rate": 7.956072135008336e-07, |
|
"loss": 0.0545, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 1.6407766990291264, |
|
"grad_norm": 2.7993958637974026, |
|
"learning_rate": 7.750948753800508e-07, |
|
"loss": 0.0578, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 1.645631067961165, |
|
"grad_norm": 2.972715991435699, |
|
"learning_rate": 7.548281891831715e-07, |
|
"loss": 0.0501, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 1.650485436893204, |
|
"grad_norm": 2.7450293464222577, |
|
"learning_rate": 7.348083332917927e-07, |
|
"loss": 0.0586, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.6553398058252426, |
|
"grad_norm": 8.28860476219087, |
|
"learning_rate": 7.150364717358699e-07, |
|
"loss": 0.1289, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.6601941747572817, |
|
"grad_norm": 2.440470109318376, |
|
"learning_rate": 6.955137541260287e-07, |
|
"loss": 0.0588, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 1.6650485436893203, |
|
"grad_norm": 2.8757698428982263, |
|
"learning_rate": 6.762413155867276e-07, |
|
"loss": 0.0661, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 1.6699029126213594, |
|
"grad_norm": 3.804173003652149, |
|
"learning_rate": 6.572202766902569e-07, |
|
"loss": 0.104, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 1.674757281553398, |
|
"grad_norm": 2.46986531134066, |
|
"learning_rate": 6.384517433915794e-07, |
|
"loss": 0.061, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.679611650485437, |
|
"grad_norm": 2.6463281240919225, |
|
"learning_rate": 6.199368069640343e-07, |
|
"loss": 0.0646, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 1.6844660194174756, |
|
"grad_norm": 2.810788622491645, |
|
"learning_rate": 6.016765439358774e-07, |
|
"loss": 0.092, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 1.6893203883495147, |
|
"grad_norm": 3.2667839807311077, |
|
"learning_rate": 5.836720160276971e-07, |
|
"loss": 0.0657, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.6941747572815533, |
|
"grad_norm": 2.670163046271941, |
|
"learning_rate": 5.659242700906719e-07, |
|
"loss": 0.083, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 1.6990291262135924, |
|
"grad_norm": 2.5837862652890204, |
|
"learning_rate": 5.484343380457124e-07, |
|
"loss": 0.0674, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.703883495145631, |
|
"grad_norm": 2.7363343715132062, |
|
"learning_rate": 5.312032368234527e-07, |
|
"loss": 0.0596, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.70873786407767, |
|
"grad_norm": 3.0228555872693845, |
|
"learning_rate": 5.1423196830513e-07, |
|
"loss": 0.0604, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.7135922330097086, |
|
"grad_norm": 3.0565054099723348, |
|
"learning_rate": 4.975215192643246e-07, |
|
"loss": 0.0582, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 1.7184466019417477, |
|
"grad_norm": 2.3566462732850098, |
|
"learning_rate": 4.81072861309591e-07, |
|
"loss": 0.0465, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 1.7233009708737863, |
|
"grad_norm": 2.8490708438106, |
|
"learning_rate": 4.648869508279613e-07, |
|
"loss": 0.0757, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.7281553398058254, |
|
"grad_norm": 2.660359080763229, |
|
"learning_rate": 4.4896472892933693e-07, |
|
"loss": 0.0616, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 1.733009708737864, |
|
"grad_norm": 2.642742322680263, |
|
"learning_rate": 4.333071213917722e-07, |
|
"loss": 0.059, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 1.737864077669903, |
|
"grad_norm": 2.399017053040208, |
|
"learning_rate": 4.179150386076425e-07, |
|
"loss": 0.0698, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 1.7427184466019416, |
|
"grad_norm": 3.4978321874996903, |
|
"learning_rate": 4.027893755307144e-07, |
|
"loss": 0.0882, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 1.7475728155339807, |
|
"grad_norm": 4.144265818637208, |
|
"learning_rate": 3.8793101162410417e-07, |
|
"loss": 0.0915, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.7524271844660193, |
|
"grad_norm": 2.392339375233356, |
|
"learning_rate": 3.733408108091485e-07, |
|
"loss": 0.0613, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 1.7572815533980584, |
|
"grad_norm": 3.4801545030947607, |
|
"learning_rate": 3.5901962141516975e-07, |
|
"loss": 0.088, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 1.762135922330097, |
|
"grad_norm": 3.362689445023539, |
|
"learning_rate": 3.4496827613015206e-07, |
|
"loss": 0.1049, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 1.766990291262136, |
|
"grad_norm": 2.7636300260531343, |
|
"learning_rate": 3.3118759195232273e-07, |
|
"loss": 0.0661, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 1.7718446601941746, |
|
"grad_norm": 3.2482584469927778, |
|
"learning_rate": 3.176783701426528e-07, |
|
"loss": 0.0884, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.7766990291262137, |
|
"grad_norm": 3.5571787403658885, |
|
"learning_rate": 3.0444139617826605e-07, |
|
"loss": 0.0811, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.7815533980582523, |
|
"grad_norm": 2.933188307527586, |
|
"learning_rate": 2.91477439706771e-07, |
|
"loss": 0.0811, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 1.7864077669902914, |
|
"grad_norm": 2.675257274146178, |
|
"learning_rate": 2.787872545015069e-07, |
|
"loss": 0.0699, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 1.79126213592233, |
|
"grad_norm": 2.7745214175343644, |
|
"learning_rate": 2.663715784177201e-07, |
|
"loss": 0.066, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 1.796116504854369, |
|
"grad_norm": 3.5713118959116907, |
|
"learning_rate": 2.542311333496622e-07, |
|
"loss": 0.0754, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.8009708737864076, |
|
"grad_norm": 2.914355478281906, |
|
"learning_rate": 2.423666251886114e-07, |
|
"loss": 0.0821, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 1.8058252427184467, |
|
"grad_norm": 3.0203487862599308, |
|
"learning_rate": 2.307787437818365e-07, |
|
"loss": 0.0617, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.8106796116504853, |
|
"grad_norm": 3.1990045776173144, |
|
"learning_rate": 2.1946816289248163e-07, |
|
"loss": 0.0782, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 1.8155339805825244, |
|
"grad_norm": 3.382551685974258, |
|
"learning_rate": 2.0843554016039326e-07, |
|
"loss": 0.0837, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 1.820388349514563, |
|
"grad_norm": 3.331840289280731, |
|
"learning_rate": 1.9768151706388016e-07, |
|
"loss": 0.0798, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.825242718446602, |
|
"grad_norm": 3.475937322801651, |
|
"learning_rate": 1.8720671888242058e-07, |
|
"loss": 0.0689, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 1.8300970873786406, |
|
"grad_norm": 3.3192828865816173, |
|
"learning_rate": 1.7701175466029895e-07, |
|
"loss": 0.0656, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 1.8349514563106797, |
|
"grad_norm": 3.1773475588789344, |
|
"learning_rate": 1.6709721717120042e-07, |
|
"loss": 0.1311, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 1.8398058252427183, |
|
"grad_norm": 2.4645535480162537, |
|
"learning_rate": 1.574636828837395e-07, |
|
"loss": 0.0774, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 1.8446601941747574, |
|
"grad_norm": 2.8840789872151302, |
|
"learning_rate": 1.4811171192794628e-07, |
|
"loss": 0.0698, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.849514563106796, |
|
"grad_norm": 2.8471315825178394, |
|
"learning_rate": 1.3904184806269705e-07, |
|
"loss": 0.0789, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 1.854368932038835, |
|
"grad_norm": 2.8848070824831993, |
|
"learning_rate": 1.3025461864409395e-07, |
|
"loss": 0.051, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 1.8592233009708736, |
|
"grad_norm": 3.0214290380641584, |
|
"learning_rate": 1.2175053459481e-07, |
|
"loss": 0.0843, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 1.8640776699029127, |
|
"grad_norm": 2.843045948807881, |
|
"learning_rate": 1.1353009037437523e-07, |
|
"loss": 0.0592, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.8689320388349513, |
|
"grad_norm": 3.0555591438006253, |
|
"learning_rate": 1.0559376395043285e-07, |
|
"loss": 0.0945, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.8737864077669903, |
|
"grad_norm": 3.051111295496214, |
|
"learning_rate": 9.794201677094162e-08, |
|
"loss": 0.0797, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 1.8786407766990292, |
|
"grad_norm": 2.7716661599394357, |
|
"learning_rate": 9.05752937373533e-08, |
|
"loss": 0.0656, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 1.883495145631068, |
|
"grad_norm": 2.6599768066898766, |
|
"learning_rate": 8.34940231787379e-08, |
|
"loss": 0.0573, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 1.8883495145631068, |
|
"grad_norm": 2.4583865470007717, |
|
"learning_rate": 7.66986168268824e-08, |
|
"loss": 0.0555, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 1.8932038834951457, |
|
"grad_norm": 2.5792819446943325, |
|
"learning_rate": 7.018946979234997e-08, |
|
"loss": 0.0597, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.8980582524271845, |
|
"grad_norm": 2.453764883034279, |
|
"learning_rate": 6.396696054150719e-08, |
|
"loss": 0.0434, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 1.9029126213592233, |
|
"grad_norm": 2.768337888063692, |
|
"learning_rate": 5.803145087451945e-08, |
|
"loss": 0.0564, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 1.9077669902912622, |
|
"grad_norm": 3.1511468580593327, |
|
"learning_rate": 5.238328590431163e-08, |
|
"loss": 0.0965, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 1.912621359223301, |
|
"grad_norm": 2.561994726549062, |
|
"learning_rate": 4.702279403650534e-08, |
|
"loss": 0.0564, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 1.9174757281553398, |
|
"grad_norm": 3.476610276033248, |
|
"learning_rate": 4.195028695032133e-08, |
|
"loss": 0.1239, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.9223300970873787, |
|
"grad_norm": 2.925141553300206, |
|
"learning_rate": 3.716605958046071e-08, |
|
"loss": 0.0627, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 1.9271844660194175, |
|
"grad_norm": 2.7611469760094933, |
|
"learning_rate": 3.2670390099951985e-08, |
|
"loss": 0.0692, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 1.9320388349514563, |
|
"grad_norm": 3.360007890401177, |
|
"learning_rate": 2.846353990398065e-08, |
|
"loss": 0.091, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 1.9368932038834952, |
|
"grad_norm": 2.863922888806355, |
|
"learning_rate": 2.4545753594688582e-08, |
|
"loss": 0.0544, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 1.941747572815534, |
|
"grad_norm": 3.1781521212223316, |
|
"learning_rate": 2.0917258966953735e-08, |
|
"loss": 0.0899, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.9466019417475728, |
|
"grad_norm": 3.2060857261214903, |
|
"learning_rate": 1.757826699514298e-08, |
|
"loss": 0.0753, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 1.9514563106796117, |
|
"grad_norm": 3.1429894090044566, |
|
"learning_rate": 1.4528971820846894e-08, |
|
"loss": 0.0599, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 1.9563106796116505, |
|
"grad_norm": 2.877788029173042, |
|
"learning_rate": 1.176955074159214e-08, |
|
"loss": 0.0663, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 1.9611650485436893, |
|
"grad_norm": 3.0772599996496894, |
|
"learning_rate": 9.300164200530815e-09, |
|
"loss": 0.0929, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 1.9660194174757282, |
|
"grad_norm": 2.677761723865022, |
|
"learning_rate": 7.120955777112915e-09, |
|
"loss": 0.0698, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.970873786407767, |
|
"grad_norm": 3.1409534098169702, |
|
"learning_rate": 5.232052178738567e-09, |
|
"loss": 0.074, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 1.9757281553398058, |
|
"grad_norm": 3.162448563524175, |
|
"learning_rate": 3.633563233388926e-09, |
|
"loss": 0.0721, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 1.9805825242718447, |
|
"grad_norm": 2.249075348480327, |
|
"learning_rate": 2.3255818832423894e-09, |
|
"loss": 0.044, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.9854368932038835, |
|
"grad_norm": 2.8655517567670534, |
|
"learning_rate": 1.3081841792694783e-09, |
|
"loss": 0.0686, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 1.9902912621359223, |
|
"grad_norm": 2.5577916292025225, |
|
"learning_rate": 5.814292768108187e-10, |
|
"loss": 0.068, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.9951456310679612, |
|
"grad_norm": 3.031478562660941, |
|
"learning_rate": 1.453594321393359e-10, |
|
"loss": 0.0799, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.7599660923000546, |
|
"learning_rate": 0.0, |
|
"loss": 0.0409, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 412, |
|
"total_flos": 927334526976.0, |
|
"train_loss": 0.12373686270806396, |
|
"train_runtime": 256.674, |
|
"train_samples_per_second": 12.787, |
|
"train_steps_per_second": 1.605 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 412, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 50000000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 927334526976.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|