|
{ |
|
"best_metric": 0.14487937092781067, |
|
"best_model_checkpoint": "limbxy_pose/checkpoint-1602", |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 1780, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2808988764044944, |
|
"grad_norm": 1.6329148591013448e-13, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3282, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.5617977528089888, |
|
"grad_norm": 609844.4375, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3197, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.8426966292134831, |
|
"grad_norm": 51261.34765625, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.2128, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.17451409995555878, |
|
"eval_rmse": 0.4177488386631012, |
|
"eval_runtime": 9.5905, |
|
"eval_samples_per_second": 104.27, |
|
"eval_steps_per_second": 1.668, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 1.1235955056179776, |
|
"grad_norm": 2084511.375, |
|
"learning_rate": 2e-05, |
|
"loss": 0.198, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.404494382022472, |
|
"grad_norm": 721293.5625, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.1779, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.6853932584269664, |
|
"grad_norm": 1280031.75, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1604, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.9662921348314608, |
|
"grad_norm": 1277506.125, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.1574, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.148615300655365, |
|
"eval_rmse": 0.3855065405368805, |
|
"eval_runtime": 9.3357, |
|
"eval_samples_per_second": 107.116, |
|
"eval_steps_per_second": 1.714, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 2.247191011235955, |
|
"grad_norm": 1975641.0, |
|
"learning_rate": 4e-05, |
|
"loss": 0.1589, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.5280898876404496, |
|
"grad_norm": 2331497.75, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.1757, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 2.808988764044944, |
|
"grad_norm": 4412882.0, |
|
"learning_rate": 5e-05, |
|
"loss": 0.2045, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.15186643600463867, |
|
"eval_rmse": 0.3897004723548889, |
|
"eval_runtime": 9.3334, |
|
"eval_samples_per_second": 107.142, |
|
"eval_steps_per_second": 1.714, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 3.0898876404494384, |
|
"grad_norm": 1961631.5, |
|
"learning_rate": 4.918300653594771e-05, |
|
"loss": 0.1809, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 3.370786516853933, |
|
"grad_norm": 5652000.5, |
|
"learning_rate": 4.8366013071895424e-05, |
|
"loss": 0.1778, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.6516853932584272, |
|
"grad_norm": 1794376.5, |
|
"learning_rate": 4.7549019607843135e-05, |
|
"loss": 0.1861, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 3.932584269662921, |
|
"grad_norm": 1615815.75, |
|
"learning_rate": 4.673202614379085e-05, |
|
"loss": 0.1697, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.16323314607143402, |
|
"eval_rmse": 0.4040212333202362, |
|
"eval_runtime": 9.309, |
|
"eval_samples_per_second": 107.423, |
|
"eval_steps_per_second": 1.719, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 4.213483146067416, |
|
"grad_norm": 2846568.25, |
|
"learning_rate": 4.5915032679738564e-05, |
|
"loss": 0.1743, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 4.49438202247191, |
|
"grad_norm": 2384729.0, |
|
"learning_rate": 4.5098039215686275e-05, |
|
"loss": 0.1723, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.775280898876405, |
|
"grad_norm": 1569872.375, |
|
"learning_rate": 4.4281045751633986e-05, |
|
"loss": 0.1818, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.19486868381500244, |
|
"eval_rmse": 0.44143933057785034, |
|
"eval_runtime": 9.4254, |
|
"eval_samples_per_second": 106.096, |
|
"eval_steps_per_second": 1.698, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 5.056179775280899, |
|
"grad_norm": 343663.96875, |
|
"learning_rate": 4.3464052287581704e-05, |
|
"loss": 0.1845, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 5.337078651685394, |
|
"grad_norm": 1288543.625, |
|
"learning_rate": 4.2647058823529415e-05, |
|
"loss": 0.1941, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 5.617977528089888, |
|
"grad_norm": 4547344.0, |
|
"learning_rate": 4.1830065359477126e-05, |
|
"loss": 0.1685, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.898876404494382, |
|
"grad_norm": 185522.03125, |
|
"learning_rate": 4.101307189542484e-05, |
|
"loss": 0.1624, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.14749938249588013, |
|
"eval_rmse": 0.3840564787387848, |
|
"eval_runtime": 9.7128, |
|
"eval_samples_per_second": 102.957, |
|
"eval_steps_per_second": 1.647, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 6.179775280898877, |
|
"grad_norm": 2298808.25, |
|
"learning_rate": 4.0196078431372555e-05, |
|
"loss": 0.1578, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 6.460674157303371, |
|
"grad_norm": 1625852.375, |
|
"learning_rate": 3.9379084967320266e-05, |
|
"loss": 0.162, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 6.741573033707866, |
|
"grad_norm": 2281359.0, |
|
"learning_rate": 3.8562091503267977e-05, |
|
"loss": 0.1645, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.14835500717163086, |
|
"eval_rmse": 0.38516879081726074, |
|
"eval_runtime": 9.1497, |
|
"eval_samples_per_second": 109.293, |
|
"eval_steps_per_second": 1.749, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 7.022471910112359, |
|
"grad_norm": 673713.0625, |
|
"learning_rate": 3.774509803921569e-05, |
|
"loss": 0.1554, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 7.303370786516854, |
|
"grad_norm": 260786.171875, |
|
"learning_rate": 3.6928104575163405e-05, |
|
"loss": 0.1656, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 7.584269662921348, |
|
"grad_norm": 2860977.5, |
|
"learning_rate": 3.611111111111111e-05, |
|
"loss": 0.1642, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 7.865168539325842, |
|
"grad_norm": 1183203.125, |
|
"learning_rate": 3.529411764705883e-05, |
|
"loss": 0.1655, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.14708983898162842, |
|
"eval_rmse": 0.3835229277610779, |
|
"eval_runtime": 9.5185, |
|
"eval_samples_per_second": 105.059, |
|
"eval_steps_per_second": 1.681, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 8.146067415730338, |
|
"grad_norm": 2235303.5, |
|
"learning_rate": 3.447712418300654e-05, |
|
"loss": 0.1543, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 8.426966292134832, |
|
"grad_norm": 1311017.875, |
|
"learning_rate": 3.366013071895425e-05, |
|
"loss": 0.1507, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 8.707865168539326, |
|
"grad_norm": 1330708.25, |
|
"learning_rate": 3.284313725490196e-05, |
|
"loss": 0.1625, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 8.98876404494382, |
|
"grad_norm": 553780.625, |
|
"learning_rate": 3.202614379084967e-05, |
|
"loss": 0.1594, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.15354213118553162, |
|
"eval_rmse": 0.3918445110321045, |
|
"eval_runtime": 9.1616, |
|
"eval_samples_per_second": 109.151, |
|
"eval_steps_per_second": 1.746, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 9.269662921348315, |
|
"grad_norm": 208979.359375, |
|
"learning_rate": 3.120915032679739e-05, |
|
"loss": 0.1518, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 9.55056179775281, |
|
"grad_norm": 78840.34375, |
|
"learning_rate": 3.0392156862745097e-05, |
|
"loss": 0.1552, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 9.831460674157304, |
|
"grad_norm": 1873128.625, |
|
"learning_rate": 2.957516339869281e-05, |
|
"loss": 0.1513, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.1448940634727478, |
|
"eval_rmse": 0.38064953684806824, |
|
"eval_runtime": 9.364, |
|
"eval_samples_per_second": 106.792, |
|
"eval_steps_per_second": 1.709, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 10.112359550561798, |
|
"grad_norm": 1098680.625, |
|
"learning_rate": 2.8758169934640522e-05, |
|
"loss": 0.1551, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 10.393258426966293, |
|
"grad_norm": 1874154.875, |
|
"learning_rate": 2.7941176470588236e-05, |
|
"loss": 0.1516, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 10.674157303370787, |
|
"grad_norm": 53160.01953125, |
|
"learning_rate": 2.7124183006535947e-05, |
|
"loss": 0.1466, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 10.955056179775282, |
|
"grad_norm": 929561.25, |
|
"learning_rate": 2.630718954248366e-05, |
|
"loss": 0.1488, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 0.14547079801559448, |
|
"eval_rmse": 0.38140633702278137, |
|
"eval_runtime": 9.5104, |
|
"eval_samples_per_second": 105.148, |
|
"eval_steps_per_second": 1.682, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 11.235955056179776, |
|
"grad_norm": 1361347.875, |
|
"learning_rate": 2.5490196078431373e-05, |
|
"loss": 0.1457, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 11.51685393258427, |
|
"grad_norm": 72723.1953125, |
|
"learning_rate": 2.4673202614379087e-05, |
|
"loss": 0.1508, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 11.797752808988765, |
|
"grad_norm": 93677.0625, |
|
"learning_rate": 2.38562091503268e-05, |
|
"loss": 0.1507, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 0.1535731852054596, |
|
"eval_rmse": 0.3918841481208801, |
|
"eval_runtime": 9.5233, |
|
"eval_samples_per_second": 105.006, |
|
"eval_steps_per_second": 1.68, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 12.07865168539326, |
|
"grad_norm": 1383022.125, |
|
"learning_rate": 2.303921568627451e-05, |
|
"loss": 0.155, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 12.359550561797754, |
|
"grad_norm": 1435498.5, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.15, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 12.640449438202246, |
|
"grad_norm": 256395.265625, |
|
"learning_rate": 2.1405228758169934e-05, |
|
"loss": 0.1465, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 12.921348314606742, |
|
"grad_norm": 258689.03125, |
|
"learning_rate": 2.058823529411765e-05, |
|
"loss": 0.1522, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 0.14494504034519196, |
|
"eval_rmse": 0.380716472864151, |
|
"eval_runtime": 9.1341, |
|
"eval_samples_per_second": 109.48, |
|
"eval_steps_per_second": 1.752, |
|
"step": 1157 |
|
}, |
|
{ |
|
"epoch": 13.202247191011235, |
|
"grad_norm": 1009406.375, |
|
"learning_rate": 1.977124183006536e-05, |
|
"loss": 0.1491, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 13.48314606741573, |
|
"grad_norm": 720892.125, |
|
"learning_rate": 1.895424836601307e-05, |
|
"loss": 0.1502, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 13.764044943820224, |
|
"grad_norm": 48925.546875, |
|
"learning_rate": 1.8137254901960785e-05, |
|
"loss": 0.1458, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 0.14527221024036407, |
|
"eval_rmse": 0.3811459243297577, |
|
"eval_runtime": 9.7085, |
|
"eval_samples_per_second": 103.003, |
|
"eval_steps_per_second": 1.648, |
|
"step": 1246 |
|
}, |
|
{ |
|
"epoch": 14.044943820224718, |
|
"grad_norm": 421335.125, |
|
"learning_rate": 1.7320261437908496e-05, |
|
"loss": 0.1485, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 14.325842696629213, |
|
"grad_norm": 375955.40625, |
|
"learning_rate": 1.650326797385621e-05, |
|
"loss": 0.1457, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 14.606741573033707, |
|
"grad_norm": 624158.0, |
|
"learning_rate": 1.568627450980392e-05, |
|
"loss": 0.1498, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 14.887640449438202, |
|
"grad_norm": 115186.8984375, |
|
"learning_rate": 1.4869281045751634e-05, |
|
"loss": 0.1506, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 0.1455306112766266, |
|
"eval_rmse": 0.3814847767353058, |
|
"eval_runtime": 9.2528, |
|
"eval_samples_per_second": 108.075, |
|
"eval_steps_per_second": 1.729, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 15.168539325842696, |
|
"grad_norm": 1805258.0, |
|
"learning_rate": 1.4052287581699347e-05, |
|
"loss": 0.1467, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 15.44943820224719, |
|
"grad_norm": 520502.5625, |
|
"learning_rate": 1.323529411764706e-05, |
|
"loss": 0.1529, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 15.730337078651685, |
|
"grad_norm": 2020412.75, |
|
"learning_rate": 1.2418300653594772e-05, |
|
"loss": 0.1505, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 0.1451566517353058, |
|
"eval_rmse": 0.3809943199157715, |
|
"eval_runtime": 9.2736, |
|
"eval_samples_per_second": 107.833, |
|
"eval_steps_per_second": 1.725, |
|
"step": 1424 |
|
}, |
|
{ |
|
"epoch": 16.01123595505618, |
|
"grad_norm": 252456.1875, |
|
"learning_rate": 1.1601307189542485e-05, |
|
"loss": 0.1481, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 16.292134831460675, |
|
"grad_norm": 267339.03125, |
|
"learning_rate": 1.0784313725490197e-05, |
|
"loss": 0.1464, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 16.573033707865168, |
|
"grad_norm": 482848.15625, |
|
"learning_rate": 9.96732026143791e-06, |
|
"loss": 0.1482, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 16.853932584269664, |
|
"grad_norm": 297641.71875, |
|
"learning_rate": 9.150326797385621e-06, |
|
"loss": 0.1463, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 0.1449102759361267, |
|
"eval_rmse": 0.38067084550857544, |
|
"eval_runtime": 9.457, |
|
"eval_samples_per_second": 105.742, |
|
"eval_steps_per_second": 1.692, |
|
"step": 1513 |
|
}, |
|
{ |
|
"epoch": 17.134831460674157, |
|
"grad_norm": 353910.8125, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.1481, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 17.415730337078653, |
|
"grad_norm": 847917.6875, |
|
"learning_rate": 7.5163398692810456e-06, |
|
"loss": 0.1494, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 17.696629213483146, |
|
"grad_norm": 197619.375, |
|
"learning_rate": 6.699346405228758e-06, |
|
"loss": 0.145, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 17.97752808988764, |
|
"grad_norm": 934886.75, |
|
"learning_rate": 5.882352941176471e-06, |
|
"loss": 0.1463, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 0.14487937092781067, |
|
"eval_rmse": 0.380630224943161, |
|
"eval_runtime": 9.2621, |
|
"eval_samples_per_second": 107.967, |
|
"eval_steps_per_second": 1.727, |
|
"step": 1602 |
|
}, |
|
{ |
|
"epoch": 18.258426966292134, |
|
"grad_norm": 178175.765625, |
|
"learning_rate": 5.065359477124184e-06, |
|
"loss": 0.1456, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 18.53932584269663, |
|
"grad_norm": 282639.03125, |
|
"learning_rate": 4.2483660130718954e-06, |
|
"loss": 0.145, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 18.820224719101123, |
|
"grad_norm": 338323.84375, |
|
"learning_rate": 3.431372549019608e-06, |
|
"loss": 0.1494, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 0.1456519514322281, |
|
"eval_rmse": 0.38164374232292175, |
|
"eval_runtime": 9.5216, |
|
"eval_samples_per_second": 105.024, |
|
"eval_steps_per_second": 1.68, |
|
"step": 1691 |
|
}, |
|
{ |
|
"epoch": 19.10112359550562, |
|
"grad_norm": 340422.8125, |
|
"learning_rate": 2.6143790849673204e-06, |
|
"loss": 0.147, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 19.382022471910112, |
|
"grad_norm": 366319.65625, |
|
"learning_rate": 1.7973856209150326e-06, |
|
"loss": 0.1471, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 19.662921348314608, |
|
"grad_norm": 642705.4375, |
|
"learning_rate": 9.80392156862745e-07, |
|
"loss": 0.1467, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 19.9438202247191, |
|
"grad_norm": 455884.28125, |
|
"learning_rate": 1.6339869281045752e-07, |
|
"loss": 0.1454, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 0.14513316750526428, |
|
"eval_rmse": 0.3809635043144226, |
|
"eval_runtime": 9.5919, |
|
"eval_samples_per_second": 104.254, |
|
"eval_steps_per_second": 1.668, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 1780, |
|
"total_flos": 0.0, |
|
"train_loss": 0.16350786438149012, |
|
"train_runtime": 3277.5098, |
|
"train_samples_per_second": 34.551, |
|
"train_steps_per_second": 0.543 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 1780, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|