{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.07628734901462174, "eval_steps": 8, "global_step": 30, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0025429116338207248, "eval_loss": 2.1195061206817627, "eval_runtime": 8.9136, "eval_samples_per_second": 18.623, "eval_steps_per_second": 9.312, "step": 1 }, { "epoch": 0.007628734901462174, "grad_norm": 15.621068954467773, "learning_rate": 6e-05, "loss": 2.0799, "step": 3 }, { "epoch": 0.015257469802924348, "grad_norm": 2.5891709327697754, "learning_rate": 0.00012, "loss": 2.0585, "step": 6 }, { "epoch": 0.020343293070565798, "eval_loss": 1.9666500091552734, "eval_runtime": 8.9529, "eval_samples_per_second": 18.541, "eval_steps_per_second": 9.271, "step": 8 }, { "epoch": 0.02288620470438652, "grad_norm": 0.7750437259674072, "learning_rate": 0.00018, "loss": 2.1303, "step": 9 }, { "epoch": 0.030514939605848695, "grad_norm": 0.8389137387275696, "learning_rate": 0.00019510565162951537, "loss": 2.0045, "step": 12 }, { "epoch": 0.03814367450731087, "grad_norm": 0.7117744088172913, "learning_rate": 0.00017071067811865476, "loss": 1.889, "step": 15 }, { "epoch": 0.040686586141131596, "eval_loss": 1.8759232759475708, "eval_runtime": 8.9673, "eval_samples_per_second": 18.512, "eval_steps_per_second": 9.256, "step": 16 }, { "epoch": 0.04577240940877304, "grad_norm": 0.6732363700866699, "learning_rate": 0.00013090169943749476, "loss": 1.9495, "step": 18 }, { "epoch": 0.05340114431023522, "grad_norm": 0.6290881037712097, "learning_rate": 8.435655349597689e-05, "loss": 1.9134, "step": 21 }, { "epoch": 0.06102987921169739, "grad_norm": 0.541335940361023, "learning_rate": 4.12214747707527e-05, "loss": 1.7863, "step": 24 }, { "epoch": 0.06102987921169739, "eval_loss": 1.8515095710754395, "eval_runtime": 8.9814, "eval_samples_per_second": 18.483, "eval_steps_per_second": 9.241, "step": 24 }, { "epoch": 0.06865861411315957, "grad_norm": 0.6374207139015198, "learning_rate": 1.0899347581163221e-05, "loss": 1.9475, "step": 27 }, { "epoch": 0.07628734901462174, "grad_norm": 0.6173187494277954, "learning_rate": 0.0, "loss": 1.8881, "step": 30 } ], "logging_steps": 3, "max_steps": 30, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3116424700624896.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }