|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 230, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.8571428571428573e-06, |
|
"loss": 0.1731, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5.7142857142857145e-06, |
|
"loss": 0.1107, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 8.571428571428571e-06, |
|
"loss": 0.1074, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.1428571428571429e-05, |
|
"loss": 0.1454, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.4285714285714287e-05, |
|
"loss": 0.1185, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.7142857142857142e-05, |
|
"loss": 0.2155, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2e-05, |
|
"loss": 0.1626, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.9999007677495127e-05, |
|
"loss": 0.1486, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.9996030906921302e-05, |
|
"loss": 0.1611, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.9991070279061808e-05, |
|
"loss": 0.1354, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.9984126778425178e-05, |
|
"loss": 0.1309, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.9975201783049804e-05, |
|
"loss": 0.1228, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.9964297064230437e-05, |
|
"loss": 0.1096, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.9951414786166656e-05, |
|
"loss": 0.1152, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.9936557505533346e-05, |
|
"loss": 0.1058, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.9919728170973297e-05, |
|
"loss": 0.0853, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.9900930122511993e-05, |
|
"loss": 0.1111, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.988016709089474e-05, |
|
"loss": 1.8086, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.985744319684625e-05, |
|
"loss": 0.6606, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.9832762950252813e-05, |
|
"loss": 0.2228, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.9806131249267256e-05, |
|
"loss": 0.1629, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.977755337933682e-05, |
|
"loss": 0.1505, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.9747035012154203e-05, |
|
"loss": 0.1273, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.971458220453192e-05, |
|
"loss": 0.159, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.968020139720024e-05, |
|
"loss": 0.1383, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.9643899413528926e-05, |
|
"loss": 0.1456, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.960568345817306e-05, |
|
"loss": 0.1609, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9565561115643153e-05, |
|
"loss": 0.1258, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.9523540348799887e-05, |
|
"loss": 0.1219, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.9479629497273783e-05, |
|
"loss": 0.122, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.9433837275810084e-05, |
|
"loss": 0.1164, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.9386172772539162e-05, |
|
"loss": 0.1234, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.933664544717288e-05, |
|
"loss": 0.1189, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.928526512912715e-05, |
|
"loss": 0.1278, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.9232042015571152e-05, |
|
"loss": 0.1117, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.9176986669403556e-05, |
|
"loss": 0.1157, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.9120110017156172e-05, |
|
"loss": 0.1032, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.9061423346825395e-05, |
|
"loss": 0.1035, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.9000938305631975e-05, |
|
"loss": 0.1029, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.8938666897709427e-05, |
|
"loss": 0.1058, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.8874621481721645e-05, |
|
"loss": 0.1039, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.8808814768410157e-05, |
|
"loss": 0.114, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.874125981807148e-05, |
|
"loss": 0.1076, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.867197003796512e-05, |
|
"loss": 0.0863, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.8600959179652708e-05, |
|
"loss": 0.0893, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.852824133626881e-05, |
|
"loss": 0.0946, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.8453830939723913e-05, |
|
"loss": 0.0782, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.8377742757840246e-05, |
|
"loss": 0.0882, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.8299991891420848e-05, |
|
"loss": 0.0871, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.822059377125263e-05, |
|
"loss": 0.073, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 1.8139564155043885e-05, |
|
"loss": 0.0777, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.8056919124296957e-05, |
|
"loss": 0.0825, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.797267508111664e-05, |
|
"loss": 0.0732, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.788684874495491e-05, |
|
"loss": 0.079, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.7799457149292752e-05, |
|
"loss": 0.0681, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.7710517638259593e-05, |
|
"loss": 0.0683, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.76200478631911e-05, |
|
"loss": 0.0743, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.7528065779126035e-05, |
|
"loss": 0.0763, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.7434589641242814e-05, |
|
"loss": 0.067, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.7339638001236495e-05, |
|
"loss": 0.0667, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.7243229703636924e-05, |
|
"loss": 0.0703, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.714538388206878e-05, |
|
"loss": 0.0716, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.704611995545421e-05, |
|
"loss": 0.0513, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.694545762415887e-05, |
|
"loss": 0.0663, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.6843416866082118e-05, |
|
"loss": 0.0827, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.6740017932692073e-05, |
|
"loss": 0.0845, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.663528134500646e-05, |
|
"loss": 0.0791, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.6529227889519884e-05, |
|
"loss": 0.0596, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.642187861407847e-05, |
|
"loss": 0.0678, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 1.631325482370259e-05, |
|
"loss": 0.0822, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.6203378076358602e-05, |
|
"loss": 0.0595, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 1.609227017868033e-05, |
|
"loss": 0.0673, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.5979953181641246e-05, |
|
"loss": 0.0865, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 1.5866449376178118e-05, |
|
"loss": 0.0639, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 1.5751781288767052e-05, |
|
"loss": 0.0703, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 1.56359716769528e-05, |
|
"loss": 0.0707, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 1.551904352483217e-05, |
|
"loss": 0.0527, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 1.540102003849253e-05, |
|
"loss": 0.0503, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 1.52819246414062e-05, |
|
"loss": 0.0625, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 1.5161780969781728e-05, |
|
"loss": 0.0628, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 1.5040612867872945e-05, |
|
"loss": 0.0685, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 1.4918444383246738e-05, |
|
"loss": 0.0839, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.479529976201044e-05, |
|
"loss": 0.0643, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.4671203443999847e-05, |
|
"loss": 0.068, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.4546180057928792e-05, |
|
"loss": 0.0494, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.4420254416501198e-05, |
|
"loss": 0.0942, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.4293451511486658e-05, |
|
"loss": 0.0798, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.416579650876043e-05, |
|
"loss": 0.0801, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.403731474330893e-05, |
|
"loss": 0.0556, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.3908031714201621e-05, |
|
"loss": 0.0655, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.3777973079530362e-05, |
|
"loss": 0.0596, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.3647164651317178e-05, |
|
"loss": 0.0463, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.35156323903915e-05, |
|
"loss": 0.0551, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.338340240123785e-05, |
|
"loss": 0.0386, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.3250500926815046e-05, |
|
"loss": 0.047, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.3116954343347882e-05, |
|
"loss": 0.0612, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.2982789155092407e-05, |
|
"loss": 0.0409, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.2848031989075754e-05, |
|
"loss": 0.0528, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.2712709589811629e-05, |
|
"loss": 0.0525, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.2576848813992475e-05, |
|
"loss": 0.0381, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.2440476625159363e-05, |
|
"loss": 0.0542, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.23036200883507e-05, |
|
"loss": 0.0405, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.2166306364730766e-05, |
|
"loss": 0.055, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.2028562706199201e-05, |
|
"loss": 0.0436, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.1890416449982451e-05, |
|
"loss": 0.0467, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.1751895013208325e-05, |
|
"loss": 0.0693, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.1613025887464642e-05, |
|
"loss": 0.0495, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.1473836633343145e-05, |
|
"loss": 0.0571, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.133435487496969e-05, |
|
"loss": 0.0331, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.1194608294521853e-05, |
|
"loss": 0.0342, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.1054624626734985e-05, |
|
"loss": 0.0679, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.0914431653397856e-05, |
|
"loss": 0.0509, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.0774057197838963e-05, |
|
"loss": 0.0543, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 1.0633529119404571e-05, |
|
"loss": 0.0893, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.0492875307929643e-05, |
|
"loss": 0.0386, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 1.0352123678202686e-05, |
|
"loss": 0.0581, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 1.0211302164425657e-05, |
|
"loss": 0.0599, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 1.0070438714670004e-05, |
|
"loss": 0.0426, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 9.929561285329998e-06, |
|
"loss": 0.0425, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 9.788697835574348e-06, |
|
"loss": 0.0571, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 9.647876321797314e-06, |
|
"loss": 0.0478, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 9.507124692070356e-06, |
|
"loss": 0.0626, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 9.366470880595434e-06, |
|
"loss": 0.0438, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 9.225942802161042e-06, |
|
"loss": 0.0385, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 9.085568346602146e-06, |
|
"loss": 0.0533, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 8.945375373265017e-06, |
|
"loss": 0.0568, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 8.805391705478149e-06, |
|
"loss": 0.0449, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 8.665645125030312e-06, |
|
"loss": 0.0579, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 8.526163366656858e-06, |
|
"loss": 0.0386, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 8.38697411253536e-06, |
|
"loss": 0.0435, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 8.248104986791677e-06, |
|
"loss": 0.0453, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 8.10958355001755e-06, |
|
"loss": 0.0455, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 7.971437293800804e-06, |
|
"loss": 0.0434, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 7.833693635269235e-06, |
|
"loss": 0.0447, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 7.696379911649303e-06, |
|
"loss": 0.0376, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 7.559523374840639e-06, |
|
"loss": 0.0608, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 7.423151186007527e-06, |
|
"loss": 0.0406, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 7.287290410188374e-06, |
|
"loss": 0.0457, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 7.1519680109242486e-06, |
|
"loss": 0.0456, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 7.017210844907598e-06, |
|
"loss": 0.0251, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 6.883045656652122e-06, |
|
"loss": 0.0294, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 6.749499073184957e-06, |
|
"loss": 0.0357, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 6.616597598762151e-06, |
|
"loss": 0.036, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 6.484367609608503e-06, |
|
"loss": 0.0287, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 6.352835348682824e-06, |
|
"loss": 0.0295, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 6.22202692046964e-06, |
|
"loss": 0.0306, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 6.09196828579838e-06, |
|
"loss": 0.0247, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 5.962685256691071e-06, |
|
"loss": 0.0372, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 5.834203491239574e-06, |
|
"loss": 0.0227, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 5.706548488513347e-06, |
|
"loss": 0.0257, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 5.579745583498802e-06, |
|
"loss": 0.0251, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 5.453819942071212e-06, |
|
"loss": 0.0247, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 5.328796556000153e-06, |
|
"loss": 0.0356, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 5.204700237989564e-06, |
|
"loss": 0.0303, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 5.081555616753264e-06, |
|
"loss": 0.02, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 4.959387132127054e-06, |
|
"loss": 0.0328, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 4.838219030218274e-06, |
|
"loss": 0.0264, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 4.718075358593802e-06, |
|
"loss": 0.0308, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 4.598979961507472e-06, |
|
"loss": 0.0358, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 4.48095647516783e-06, |
|
"loss": 0.0272, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 4.364028323047205e-06, |
|
"loss": 0.0306, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 4.248218711232952e-06, |
|
"loss": 0.0193, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 4.133550623821884e-06, |
|
"loss": 0.0243, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 4.0200468183587556e-06, |
|
"loss": 0.0359, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 3.90772982131967e-06, |
|
"loss": 0.0305, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 3.7966219236414036e-06, |
|
"loss": 0.0324, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 3.6867451762974117e-06, |
|
"loss": 0.0355, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 3.5781213859215334e-06, |
|
"loss": 0.0308, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 3.4707721104801175e-06, |
|
"loss": 0.0487, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 3.3647186549935407e-06, |
|
"loss": 0.0165, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 3.2599820673079286e-06, |
|
"loss": 0.03, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 3.1565831339178844e-06, |
|
"loss": 0.0225, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 3.0545423758411298e-06, |
|
"loss": 0.0364, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 2.953880044545795e-06, |
|
"loss": 0.0269, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 2.8546161179312247e-06, |
|
"loss": 0.0142, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 2.7567702963630805e-06, |
|
"loss": 0.0377, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 2.6603619987635087e-06, |
|
"loss": 0.0295, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 2.5654103587571887e-06, |
|
"loss": 0.031, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 2.4719342208739695e-06, |
|
"loss": 0.0387, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 2.379952136808903e-06, |
|
"loss": 0.0274, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 2.2894823617404107e-06, |
|
"loss": 0.0341, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 2.200542850707247e-06, |
|
"loss": 0.032, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 2.113151255045095e-06, |
|
"loss": 0.0333, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 2.0273249188833656e-06, |
|
"loss": 0.0298, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 1.9430808757030452e-06, |
|
"loss": 0.0201, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 1.860435844956121e-06, |
|
"loss": 0.0156, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 1.7794062287473734e-06, |
|
"loss": 0.021, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 1.7000081085791541e-06, |
|
"loss": 0.0197, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 1.622257242159756e-06, |
|
"loss": 0.029, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 1.5461690602760882e-06, |
|
"loss": 0.0249, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 1.4717586637311943e-06, |
|
"loss": 0.0294, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 1.3990408203472938e-06, |
|
"loss": 0.0112, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 1.3280299620348847e-06, |
|
"loss": 0.0172, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 1.258740181928524e-06, |
|
"loss": 0.0159, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 1.1911852315898465e-06, |
|
"loss": 0.0241, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 1.1253785182783571e-06, |
|
"loss": 0.0178, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 1.061333102290576e-06, |
|
"loss": 0.0173, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 9.990616943680266e-07, |
|
"loss": 0.0149, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 9.385766531746055e-07, |
|
"loss": 0.0354, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 8.798899828438334e-07, |
|
"loss": 0.0179, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 8.23013330596445e-07, |
|
"loss": 0.0145, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 7.679579844288509e-07, |
|
"loss": 0.0276, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 7.147348708728508e-07, |
|
"loss": 0.0117, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 6.633545528271213e-07, |
|
"loss": 0.0189, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 6.138272274608404e-07, |
|
"loss": 0.0257, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 5.661627241899193e-07, |
|
"loss": 0.0158, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 5.203705027262185e-07, |
|
"loss": 0.013, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 4.7645965120011627e-07, |
|
"loss": 0.0146, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 4.344388843568503e-07, |
|
"loss": 0.0208, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 3.943165418269401e-07, |
|
"loss": 0.0241, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 3.561005864710754e-07, |
|
"loss": 0.016, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 3.197986027997657e-07, |
|
"loss": 0.0222, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 2.8541779546808255e-07, |
|
"loss": 0.0155, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 2.529649878457985e-07, |
|
"loss": 0.0228, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 2.2244662066318146e-07, |
|
"loss": 0.0134, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 1.9386875073274636e-07, |
|
"loss": 0.0233, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 1.6723704974718758e-07, |
|
"loss": 0.0204, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 1.4255680315375164e-07, |
|
"loss": 0.0167, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 1.198329091052608e-07, |
|
"loss": 0.0147, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 9.906987748800945e-08, |
|
"loss": 0.0194, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 8.027182902670571e-08, |
|
"loss": 0.0162, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 6.344249446665673e-08, |
|
"loss": 0.0126, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 4.8585213833348686e-08, |
|
"loss": 0.0132, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 3.570293576956596e-08, |
|
"loss": 0.0175, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 2.479821695019813e-08, |
|
"loss": 0.0136, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 1.587322157482252e-08, |
|
"loss": 0.0182, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 8.929720938193331e-09, |
|
"loss": 0.017, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 3.9690930786995266e-09, |
|
"loss": 0.0134, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 9.923225048724672e-10, |
|
"loss": 0.0196, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.0165, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 230, |
|
"total_flos": 5455864135680.0, |
|
"train_loss": 0.06894507200821587, |
|
"train_runtime": 1542.9779, |
|
"train_samples_per_second": 9.482, |
|
"train_steps_per_second": 0.149 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 230, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 800, |
|
"total_flos": 5455864135680.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|