{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.010598834128245893, "eval_steps": 3, "global_step": 10, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0010598834128245894, "grad_norm": 0.6350849270820618, "learning_rate": 2e-05, "loss": 5.9436, "step": 1 }, { "epoch": 0.0010598834128245894, "eval_loss": 1.5727260112762451, "eval_runtime": 27.5042, "eval_samples_per_second": 14.471, "eval_steps_per_second": 7.235, "step": 1 }, { "epoch": 0.0021197668256491787, "grad_norm": 0.601396918296814, "learning_rate": 4e-05, "loss": 7.3717, "step": 2 }, { "epoch": 0.003179650238473768, "grad_norm": 0.6531712412834167, "learning_rate": 6e-05, "loss": 6.0098, "step": 3 }, { "epoch": 0.003179650238473768, "eval_loss": 1.5714995861053467, "eval_runtime": 27.5049, "eval_samples_per_second": 14.47, "eval_steps_per_second": 7.235, "step": 3 }, { "epoch": 0.0042395336512983575, "grad_norm": 0.7442638278007507, "learning_rate": 8e-05, "loss": 6.4499, "step": 4 }, { "epoch": 0.005299417064122946, "grad_norm": 0.7729753851890564, "learning_rate": 0.0001, "loss": 6.1611, "step": 5 }, { "epoch": 0.006359300476947536, "grad_norm": 0.7975832223892212, "learning_rate": 0.00012, "loss": 6.1086, "step": 6 }, { "epoch": 0.006359300476947536, "eval_loss": 1.549599051475525, "eval_runtime": 27.5431, "eval_samples_per_second": 14.45, "eval_steps_per_second": 7.225, "step": 6 }, { "epoch": 0.007419183889772125, "grad_norm": 0.9927419424057007, "learning_rate": 0.00014, "loss": 6.2598, "step": 7 }, { "epoch": 0.008479067302596715, "grad_norm": 1.246896743774414, "learning_rate": 0.00016, "loss": 6.0118, "step": 8 }, { "epoch": 0.009538950715421303, "grad_norm": 1.5003572702407837, "learning_rate": 0.00018, "loss": 5.9791, "step": 9 }, { "epoch": 0.009538950715421303, "eval_loss": 1.4221528768539429, "eval_runtime": 27.5518, "eval_samples_per_second": 14.446, "eval_steps_per_second": 7.223, "step": 9 }, { "epoch": 0.010598834128245893, "grad_norm": 1.3482239246368408, "learning_rate": 0.0002, "loss": 5.7713, "step": 10 } ], "logging_steps": 1, "max_steps": 10, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 3, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3257498484080640.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }