{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.975609756097561, "eval_steps": 500, "global_step": 30, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.032520325203252036, "grad_norm": 40.474364127833894, "learning_rate": 4e-07, "loss": 2.8277, "step": 1 }, { "epoch": 0.06504065040650407, "grad_norm": 39.80986919634387, "learning_rate": 8e-07, "loss": 2.7894, "step": 2 }, { "epoch": 0.0975609756097561, "grad_norm": 37.613931575844546, "learning_rate": 1.2e-06, "loss": 2.7705, "step": 3 }, { "epoch": 0.13008130081300814, "grad_norm": 34.058960023767305, "learning_rate": 1.6e-06, "loss": 2.6445, "step": 4 }, { "epoch": 0.16260162601626016, "grad_norm": 27.17276921169534, "learning_rate": 2e-06, "loss": 2.6669, "step": 5 }, { "epoch": 0.1951219512195122, "grad_norm": 15.411738818229358, "learning_rate": 1.9996268812619105e-06, "loss": 2.449, "step": 6 }, { "epoch": 0.22764227642276422, "grad_norm": 9.722583741958005, "learning_rate": 1.998507803482828e-06, "loss": 2.492, "step": 7 }, { "epoch": 0.2601626016260163, "grad_norm": 5.002874583844784, "learning_rate": 1.9966436017605294e-06, "loss": 2.3573, "step": 8 }, { "epoch": 0.2926829268292683, "grad_norm": 4.38783321163898, "learning_rate": 1.9940356672322033e-06, "loss": 2.2742, "step": 9 }, { "epoch": 0.3252032520325203, "grad_norm": 4.050625126869575, "learning_rate": 1.9906859460363304e-06, "loss": 2.3301, "step": 10 }, { "epoch": 0.35772357723577236, "grad_norm": 3.6789133162460774, "learning_rate": 1.986596937860402e-06, "loss": 2.2729, "step": 11 }, { "epoch": 0.3902439024390244, "grad_norm": 2.613575263087869, "learning_rate": 1.9817716940755586e-06, "loss": 2.3178, "step": 12 }, { "epoch": 0.42276422764227645, "grad_norm": 2.83098515959453, "learning_rate": 1.9762138154595447e-06, "loss": 2.3486, "step": 13 }, { "epoch": 0.45528455284552843, "grad_norm": 2.5132095083333734, "learning_rate": 1.969927449509671e-06, "loss": 2.273, "step": 14 }, { "epoch": 0.4878048780487805, "grad_norm": 2.137375987210218, "learning_rate": 1.9629172873477994e-06, "loss": 2.224, "step": 15 }, { "epoch": 0.5203252032520326, "grad_norm": 1.7542766611994582, "learning_rate": 1.955188560219648e-06, "loss": 2.3732, "step": 16 }, { "epoch": 0.5528455284552846, "grad_norm": 1.490783000173608, "learning_rate": 1.9467470355910435e-06, "loss": 2.2965, "step": 17 }, { "epoch": 0.5853658536585366, "grad_norm": 1.6159290325764584, "learning_rate": 1.93759901284402e-06, "loss": 2.2234, "step": 18 }, { "epoch": 0.6178861788617886, "grad_norm": 1.805845446790275, "learning_rate": 1.9277513185759843e-06, "loss": 2.2912, "step": 19 }, { "epoch": 0.6504065040650406, "grad_norm": 1.4604974196739857, "learning_rate": 1.9172113015054528e-06, "loss": 2.2642, "step": 20 }, { "epoch": 0.6829268292682927, "grad_norm": 1.324880967104045, "learning_rate": 1.9059868269881636e-06, "loss": 2.2243, "step": 21 }, { "epoch": 0.7154471544715447, "grad_norm": 1.1866315224263582, "learning_rate": 1.894086271147651e-06, "loss": 2.1637, "step": 22 }, { "epoch": 0.7479674796747967, "grad_norm": 1.2828999422781817, "learning_rate": 1.8815185146246715e-06, "loss": 2.3203, "step": 23 }, { "epoch": 0.7804878048780488, "grad_norm": 1.1737064917540987, "learning_rate": 1.8682929359501337e-06, "loss": 2.2612, "step": 24 }, { "epoch": 0.8130081300813008, "grad_norm": 1.2633649535555074, "learning_rate": 1.8544194045464886e-06, "loss": 2.2641, "step": 25 }, { "epoch": 0.8455284552845529, "grad_norm": 1.214313162130321, "learning_rate": 1.8399082733627965e-06, "loss": 2.1903, "step": 26 }, { "epoch": 0.8780487804878049, "grad_norm": 1.1791856518491939, "learning_rate": 1.8247703711489684e-06, "loss": 2.2214, "step": 27 }, { "epoch": 0.9105691056910569, "grad_norm": 1.0774382245713374, "learning_rate": 1.8090169943749474e-06, "loss": 2.2613, "step": 28 }, { "epoch": 0.943089430894309, "grad_norm": 1.5312368454111793, "learning_rate": 1.792659898800858e-06, "loss": 2.2658, "step": 29 }, { "epoch": 0.975609756097561, "grad_norm": 1.1478826076751814, "learning_rate": 1.7757112907044198e-06, "loss": 2.3472, "step": 30 } ], "logging_steps": 1, "max_steps": 120, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 30, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.86038253256704e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }