{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.5678233438485805, "eval_steps": 9, "global_step": 45, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.012618296529968454, "eval_loss": 1.88910710811615, "eval_runtime": 2.2136, "eval_samples_per_second": 60.535, "eval_steps_per_second": 7.68, "step": 1 }, { "epoch": 0.03785488958990536, "grad_norm": 0.7511712908744812, "learning_rate": 3e-05, "loss": 1.8139, "step": 3 }, { "epoch": 0.07570977917981073, "grad_norm": 0.8463343381881714, "learning_rate": 6e-05, "loss": 1.9489, "step": 6 }, { "epoch": 0.11356466876971609, "grad_norm": 0.9100675582885742, "learning_rate": 9e-05, "loss": 1.7745, "step": 9 }, { "epoch": 0.11356466876971609, "eval_loss": 1.8236573934555054, "eval_runtime": 2.232, "eval_samples_per_second": 60.036, "eval_steps_per_second": 7.617, "step": 9 }, { "epoch": 0.15141955835962145, "grad_norm": 0.6830158233642578, "learning_rate": 9.987820251299122e-05, "loss": 1.7182, "step": 12 }, { "epoch": 0.1892744479495268, "grad_norm": 0.7045711278915405, "learning_rate": 9.924038765061042e-05, "loss": 1.695, "step": 15 }, { "epoch": 0.22712933753943218, "grad_norm": 0.6682664752006531, "learning_rate": 9.806308479691595e-05, "loss": 1.5894, "step": 18 }, { "epoch": 0.22712933753943218, "eval_loss": 1.7477922439575195, "eval_runtime": 2.2514, "eval_samples_per_second": 59.52, "eval_steps_per_second": 7.551, "step": 18 }, { "epoch": 0.26498422712933756, "grad_norm": 0.6020422577857971, "learning_rate": 9.635919272833938e-05, "loss": 1.6764, "step": 21 }, { "epoch": 0.3028391167192429, "grad_norm": 0.8476335406303406, "learning_rate": 9.414737964294636e-05, "loss": 1.6508, "step": 24 }, { "epoch": 0.34069400630914826, "grad_norm": 0.7293965220451355, "learning_rate": 9.145187862775209e-05, "loss": 1.4719, "step": 27 }, { "epoch": 0.34069400630914826, "eval_loss": 1.7059693336486816, "eval_runtime": 2.2316, "eval_samples_per_second": 60.048, "eval_steps_per_second": 7.618, "step": 27 }, { "epoch": 0.3785488958990536, "grad_norm": 0.6286596059799194, "learning_rate": 8.83022221559489e-05, "loss": 1.7276, "step": 30 }, { "epoch": 0.416403785488959, "grad_norm": 0.8441144227981567, "learning_rate": 8.473291852294987e-05, "loss": 1.8129, "step": 33 }, { "epoch": 0.45425867507886436, "grad_norm": 0.6982610821723938, "learning_rate": 8.07830737662829e-05, "loss": 1.8098, "step": 36 }, { "epoch": 0.45425867507886436, "eval_loss": 1.6810848712921143, "eval_runtime": 2.2408, "eval_samples_per_second": 59.8, "eval_steps_per_second": 7.587, "step": 36 }, { "epoch": 0.4921135646687697, "grad_norm": 0.704291045665741, "learning_rate": 7.649596321166024e-05, "loss": 1.5021, "step": 39 }, { "epoch": 0.5299684542586751, "grad_norm": 0.725155234336853, "learning_rate": 7.191855733945387e-05, "loss": 1.712, "step": 42 }, { "epoch": 0.5678233438485805, "grad_norm": 0.6382411122322083, "learning_rate": 6.710100716628344e-05, "loss": 1.587, "step": 45 }, { "epoch": 0.5678233438485805, "eval_loss": 1.6629996299743652, "eval_runtime": 2.2344, "eval_samples_per_second": 59.971, "eval_steps_per_second": 7.608, "step": 45 } ], "logging_steps": 3, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 9, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3568723789086720.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }