{ "best_metric": 0.14487937092781067, "best_model_checkpoint": "limbxy_pose/checkpoint-1602", "epoch": 20.0, "eval_steps": 500, "global_step": 1780, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2808988764044944, "grad_norm": 1.6329148591013448e-13, "learning_rate": 5e-06, "loss": 0.3282, "step": 25 }, { "epoch": 0.5617977528089888, "grad_norm": 609844.4375, "learning_rate": 1e-05, "loss": 0.3197, "step": 50 }, { "epoch": 0.8426966292134831, "grad_norm": 51261.34765625, "learning_rate": 1.5e-05, "loss": 0.2128, "step": 75 }, { "epoch": 1.0, "eval_loss": 0.17451409995555878, "eval_rmse": 0.4177488386631012, "eval_runtime": 9.5044, "eval_samples_per_second": 105.214, "eval_steps_per_second": 1.683, "step": 89 }, { "epoch": 1.1235955056179776, "grad_norm": 2084511.375, "learning_rate": 2e-05, "loss": 0.198, "step": 100 }, { "epoch": 1.404494382022472, "grad_norm": 721293.5625, "learning_rate": 2.5e-05, "loss": 0.1779, "step": 125 }, { "epoch": 1.6853932584269664, "grad_norm": 1280031.75, "learning_rate": 3e-05, "loss": 0.1604, "step": 150 }, { "epoch": 1.9662921348314608, "grad_norm": 1277506.125, "learning_rate": 3.5e-05, "loss": 0.1574, "step": 175 }, { "epoch": 2.0, "eval_loss": 0.148615300655365, "eval_rmse": 0.3855065405368805, "eval_runtime": 9.2155, "eval_samples_per_second": 108.513, "eval_steps_per_second": 1.736, "step": 178 }, { "epoch": 2.247191011235955, "grad_norm": 1975641.0, "learning_rate": 4e-05, "loss": 0.1589, "step": 200 }, { "epoch": 2.5280898876404496, "grad_norm": 2331497.75, "learning_rate": 4.5e-05, "loss": 0.1757, "step": 225 }, { "epoch": 2.808988764044944, "grad_norm": 4412882.0, "learning_rate": 5e-05, "loss": 0.2045, "step": 250 }, { "epoch": 3.0, "eval_loss": 0.15186643600463867, "eval_rmse": 0.3897004723548889, "eval_runtime": 9.3829, "eval_samples_per_second": 106.576, "eval_steps_per_second": 1.705, "step": 267 }, { "epoch": 3.0898876404494384, "grad_norm": 1961631.5, "learning_rate": 4.918300653594771e-05, "loss": 0.1809, "step": 275 }, { "epoch": 3.370786516853933, "grad_norm": 5652000.5, "learning_rate": 4.8366013071895424e-05, "loss": 0.1778, "step": 300 }, { "epoch": 3.6516853932584272, "grad_norm": 1794376.5, "learning_rate": 4.7549019607843135e-05, "loss": 0.1861, "step": 325 }, { "epoch": 3.932584269662921, "grad_norm": 1615815.75, "learning_rate": 4.673202614379085e-05, "loss": 0.1697, "step": 350 }, { "epoch": 4.0, "eval_loss": 0.16323314607143402, "eval_rmse": 0.4040212333202362, "eval_runtime": 9.3078, "eval_samples_per_second": 107.437, "eval_steps_per_second": 1.719, "step": 356 }, { "epoch": 4.213483146067416, "grad_norm": 2846568.25, "learning_rate": 4.5915032679738564e-05, "loss": 0.1743, "step": 375 }, { "epoch": 4.49438202247191, "grad_norm": 2384729.0, "learning_rate": 4.5098039215686275e-05, "loss": 0.1723, "step": 400 }, { "epoch": 4.775280898876405, "grad_norm": 1569872.375, "learning_rate": 4.4281045751633986e-05, "loss": 0.1818, "step": 425 }, { "epoch": 5.0, "eval_loss": 0.19486868381500244, "eval_rmse": 0.44143933057785034, "eval_runtime": 9.2813, "eval_samples_per_second": 107.744, "eval_steps_per_second": 1.724, "step": 445 }, { "epoch": 5.056179775280899, "grad_norm": 343663.96875, "learning_rate": 4.3464052287581704e-05, "loss": 0.1845, "step": 450 }, { "epoch": 5.337078651685394, "grad_norm": 1288543.625, "learning_rate": 4.2647058823529415e-05, "loss": 0.1941, "step": 475 }, { "epoch": 5.617977528089888, "grad_norm": 4547344.0, "learning_rate": 4.1830065359477126e-05, "loss": 0.1685, "step": 500 }, { "epoch": 5.898876404494382, "grad_norm": 185522.03125, "learning_rate": 4.101307189542484e-05, "loss": 0.1624, "step": 525 }, { "epoch": 6.0, "eval_loss": 0.14749938249588013, "eval_rmse": 0.3840564787387848, "eval_runtime": 9.2186, "eval_samples_per_second": 108.477, "eval_steps_per_second": 1.736, "step": 534 }, { "epoch": 6.179775280898877, "grad_norm": 2298808.25, "learning_rate": 4.0196078431372555e-05, "loss": 0.1578, "step": 550 }, { "epoch": 6.460674157303371, "grad_norm": 1625852.375, "learning_rate": 3.9379084967320266e-05, "loss": 0.162, "step": 575 }, { "epoch": 6.741573033707866, "grad_norm": 2281359.0, "learning_rate": 3.8562091503267977e-05, "loss": 0.1645, "step": 600 }, { "epoch": 7.0, "eval_loss": 0.14835500717163086, "eval_rmse": 0.38516879081726074, "eval_runtime": 9.4807, "eval_samples_per_second": 105.477, "eval_steps_per_second": 1.688, "step": 623 }, { "epoch": 7.022471910112359, "grad_norm": 673713.0625, "learning_rate": 3.774509803921569e-05, "loss": 0.1554, "step": 625 }, { "epoch": 7.303370786516854, "grad_norm": 260786.171875, "learning_rate": 3.6928104575163405e-05, "loss": 0.1656, "step": 650 }, { "epoch": 7.584269662921348, "grad_norm": 2860977.5, "learning_rate": 3.611111111111111e-05, "loss": 0.1642, "step": 675 }, { "epoch": 7.865168539325842, "grad_norm": 1183203.125, "learning_rate": 3.529411764705883e-05, "loss": 0.1655, "step": 700 }, { "epoch": 8.0, "eval_loss": 0.14708983898162842, "eval_rmse": 0.3835229277610779, "eval_runtime": 9.2282, "eval_samples_per_second": 108.363, "eval_steps_per_second": 1.734, "step": 712 }, { "epoch": 8.146067415730338, "grad_norm": 2235303.5, "learning_rate": 3.447712418300654e-05, "loss": 0.1543, "step": 725 }, { "epoch": 8.426966292134832, "grad_norm": 1311017.875, "learning_rate": 3.366013071895425e-05, "loss": 0.1507, "step": 750 }, { "epoch": 8.707865168539326, "grad_norm": 1330708.25, "learning_rate": 3.284313725490196e-05, "loss": 0.1625, "step": 775 }, { "epoch": 8.98876404494382, "grad_norm": 553780.625, "learning_rate": 3.202614379084967e-05, "loss": 0.1594, "step": 800 }, { "epoch": 9.0, "eval_loss": 0.15354213118553162, "eval_rmse": 0.3918445110321045, "eval_runtime": 9.1297, "eval_samples_per_second": 109.533, "eval_steps_per_second": 1.753, "step": 801 }, { "epoch": 9.269662921348315, "grad_norm": 208979.359375, "learning_rate": 3.120915032679739e-05, "loss": 0.1518, "step": 825 }, { "epoch": 9.55056179775281, "grad_norm": 78840.34375, "learning_rate": 3.0392156862745097e-05, "loss": 0.1552, "step": 850 }, { "epoch": 9.831460674157304, "grad_norm": 1873128.625, "learning_rate": 2.957516339869281e-05, "loss": 0.1513, "step": 875 }, { "epoch": 10.0, "eval_loss": 0.1448940634727478, "eval_rmse": 0.38064953684806824, "eval_runtime": 9.4691, "eval_samples_per_second": 105.607, "eval_steps_per_second": 1.69, "step": 890 }, { "epoch": 10.112359550561798, "grad_norm": 1098680.625, "learning_rate": 2.8758169934640522e-05, "loss": 0.1551, "step": 900 }, { "epoch": 10.393258426966293, "grad_norm": 1874154.875, "learning_rate": 2.7941176470588236e-05, "loss": 0.1516, "step": 925 }, { "epoch": 10.674157303370787, "grad_norm": 53160.01953125, "learning_rate": 2.7124183006535947e-05, "loss": 0.1466, "step": 950 }, { "epoch": 10.955056179775282, "grad_norm": 929561.25, "learning_rate": 2.630718954248366e-05, "loss": 0.1488, "step": 975 }, { "epoch": 11.0, "eval_loss": 0.14547079801559448, "eval_rmse": 0.38140633702278137, "eval_runtime": 9.1414, "eval_samples_per_second": 109.393, "eval_steps_per_second": 1.75, "step": 979 }, { "epoch": 11.235955056179776, "grad_norm": 1361347.875, "learning_rate": 2.5490196078431373e-05, "loss": 0.1457, "step": 1000 }, { "epoch": 11.51685393258427, "grad_norm": 72723.1953125, "learning_rate": 2.4673202614379087e-05, "loss": 0.1508, "step": 1025 }, { "epoch": 11.797752808988765, "grad_norm": 93677.0625, "learning_rate": 2.38562091503268e-05, "loss": 0.1507, "step": 1050 }, { "epoch": 12.0, "eval_loss": 0.1535731852054596, "eval_rmse": 0.3918841481208801, "eval_runtime": 9.5703, "eval_samples_per_second": 104.49, "eval_steps_per_second": 1.672, "step": 1068 }, { "epoch": 12.07865168539326, "grad_norm": 1383022.125, "learning_rate": 2.303921568627451e-05, "loss": 0.155, "step": 1075 }, { "epoch": 12.359550561797754, "grad_norm": 1435498.5, "learning_rate": 2.2222222222222223e-05, "loss": 0.15, "step": 1100 }, { "epoch": 12.640449438202246, "grad_norm": 256395.265625, "learning_rate": 2.1405228758169934e-05, "loss": 0.1465, "step": 1125 }, { "epoch": 12.921348314606742, "grad_norm": 258689.03125, "learning_rate": 2.058823529411765e-05, "loss": 0.1522, "step": 1150 }, { "epoch": 13.0, "eval_loss": 0.14494504034519196, "eval_rmse": 0.380716472864151, "eval_runtime": 9.4294, "eval_samples_per_second": 106.051, "eval_steps_per_second": 1.697, "step": 1157 }, { "epoch": 13.202247191011235, "grad_norm": 1009406.375, "learning_rate": 1.977124183006536e-05, "loss": 0.1491, "step": 1175 }, { "epoch": 13.48314606741573, "grad_norm": 720892.125, "learning_rate": 1.895424836601307e-05, "loss": 0.1502, "step": 1200 }, { "epoch": 13.764044943820224, "grad_norm": 48925.546875, "learning_rate": 1.8137254901960785e-05, "loss": 0.1458, "step": 1225 }, { "epoch": 14.0, "eval_loss": 0.14527221024036407, "eval_rmse": 0.3811459243297577, "eval_runtime": 9.2593, "eval_samples_per_second": 108.0, "eval_steps_per_second": 1.728, "step": 1246 }, { "epoch": 14.044943820224718, "grad_norm": 421335.125, "learning_rate": 1.7320261437908496e-05, "loss": 0.1485, "step": 1250 }, { "epoch": 14.325842696629213, "grad_norm": 375955.40625, "learning_rate": 1.650326797385621e-05, "loss": 0.1457, "step": 1275 }, { "epoch": 14.606741573033707, "grad_norm": 624158.0, "learning_rate": 1.568627450980392e-05, "loss": 0.1498, "step": 1300 }, { "epoch": 14.887640449438202, "grad_norm": 115186.8984375, "learning_rate": 1.4869281045751634e-05, "loss": 0.1506, "step": 1325 }, { "epoch": 15.0, "eval_loss": 0.1455306112766266, "eval_rmse": 0.3814847767353058, "eval_runtime": 9.3528, "eval_samples_per_second": 106.92, "eval_steps_per_second": 1.711, "step": 1335 }, { "epoch": 15.168539325842696, "grad_norm": 1805258.0, "learning_rate": 1.4052287581699347e-05, "loss": 0.1467, "step": 1350 }, { "epoch": 15.44943820224719, "grad_norm": 520502.5625, "learning_rate": 1.323529411764706e-05, "loss": 0.1529, "step": 1375 }, { "epoch": 15.730337078651685, "grad_norm": 2020412.75, "learning_rate": 1.2418300653594772e-05, "loss": 0.1505, "step": 1400 }, { "epoch": 16.0, "eval_loss": 0.1451566517353058, "eval_rmse": 0.3809943199157715, "eval_runtime": 9.215, "eval_samples_per_second": 108.518, "eval_steps_per_second": 1.736, "step": 1424 }, { "epoch": 16.01123595505618, "grad_norm": 252456.1875, "learning_rate": 1.1601307189542485e-05, "loss": 0.1481, "step": 1425 }, { "epoch": 16.292134831460675, "grad_norm": 267339.03125, "learning_rate": 1.0784313725490197e-05, "loss": 0.1464, "step": 1450 }, { "epoch": 16.573033707865168, "grad_norm": 482848.15625, "learning_rate": 9.96732026143791e-06, "loss": 0.1482, "step": 1475 }, { "epoch": 16.853932584269664, "grad_norm": 297641.71875, "learning_rate": 9.150326797385621e-06, "loss": 0.1463, "step": 1500 }, { "epoch": 17.0, "eval_loss": 0.1449102759361267, "eval_rmse": 0.38067084550857544, "eval_runtime": 9.2491, "eval_samples_per_second": 108.119, "eval_steps_per_second": 1.73, "step": 1513 }, { "epoch": 17.134831460674157, "grad_norm": 353910.8125, "learning_rate": 8.333333333333334e-06, "loss": 0.1481, "step": 1525 }, { "epoch": 17.415730337078653, "grad_norm": 847917.6875, "learning_rate": 7.5163398692810456e-06, "loss": 0.1494, "step": 1550 }, { "epoch": 17.696629213483146, "grad_norm": 197619.375, "learning_rate": 6.699346405228758e-06, "loss": 0.145, "step": 1575 }, { "epoch": 17.97752808988764, "grad_norm": 934886.75, "learning_rate": 5.882352941176471e-06, "loss": 0.1463, "step": 1600 }, { "epoch": 18.0, "eval_loss": 0.14487937092781067, "eval_rmse": 0.380630224943161, "eval_runtime": 9.3303, "eval_samples_per_second": 107.177, "eval_steps_per_second": 1.715, "step": 1602 }, { "epoch": 18.258426966292134, "grad_norm": 178175.765625, "learning_rate": 5.065359477124184e-06, "loss": 0.1456, "step": 1625 }, { "epoch": 18.53932584269663, "grad_norm": 282639.03125, "learning_rate": 4.2483660130718954e-06, "loss": 0.145, "step": 1650 }, { "epoch": 18.820224719101123, "grad_norm": 338323.84375, "learning_rate": 3.431372549019608e-06, "loss": 0.1494, "step": 1675 }, { "epoch": 19.0, "eval_loss": 0.1456519514322281, "eval_rmse": 0.38164374232292175, "eval_runtime": 9.5481, "eval_samples_per_second": 104.733, "eval_steps_per_second": 1.676, "step": 1691 }, { "epoch": 19.10112359550562, "grad_norm": 340422.8125, "learning_rate": 2.6143790849673204e-06, "loss": 0.147, "step": 1700 }, { "epoch": 19.382022471910112, "grad_norm": 366319.65625, "learning_rate": 1.7973856209150326e-06, "loss": 0.1471, "step": 1725 }, { "epoch": 19.662921348314608, "grad_norm": 642705.4375, "learning_rate": 9.80392156862745e-07, "loss": 0.1467, "step": 1750 }, { "epoch": 19.9438202247191, "grad_norm": 455884.28125, "learning_rate": 1.6339869281045752e-07, "loss": 0.1454, "step": 1775 }, { "epoch": 20.0, "eval_loss": 0.14513316750526428, "eval_rmse": 0.3809635043144226, "eval_runtime": 9.5295, "eval_samples_per_second": 104.937, "eval_steps_per_second": 1.679, "step": 1780 }, { "epoch": 20.0, "step": 1780, "total_flos": 0.0, "train_loss": 0.16350786438149012, "train_runtime": 3270.5878, "train_samples_per_second": 34.624, "train_steps_per_second": 0.544 } ], "logging_steps": 25, "max_steps": 1780, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }