{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.07326007326007326, "eval_steps": 5, "global_step": 30, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.002442002442002442, "eval_loss": 1.4053446054458618, "eval_runtime": 25.0149, "eval_samples_per_second": 6.916, "eval_steps_per_second": 3.478, "step": 1 }, { "epoch": 0.007326007326007326, "grad_norm": 1.1817816495895386, "learning_rate": 3e-05, "loss": 5.7243, "step": 3 }, { "epoch": 0.01221001221001221, "eval_loss": 1.3982402086257935, "eval_runtime": 25.0191, "eval_samples_per_second": 6.915, "eval_steps_per_second": 3.477, "step": 5 }, { "epoch": 0.014652014652014652, "grad_norm": 1.2069846391677856, "learning_rate": 6e-05, "loss": 5.4238, "step": 6 }, { "epoch": 0.02197802197802198, "grad_norm": 1.5348583459854126, "learning_rate": 9e-05, "loss": 5.9045, "step": 9 }, { "epoch": 0.02442002442002442, "eval_loss": 1.3449891805648804, "eval_runtime": 25.0177, "eval_samples_per_second": 6.915, "eval_steps_per_second": 3.478, "step": 10 }, { "epoch": 0.029304029304029304, "grad_norm": 1.2581948041915894, "learning_rate": 9.755282581475769e-05, "loss": 5.2718, "step": 12 }, { "epoch": 0.03663003663003663, "grad_norm": 1.3958145380020142, "learning_rate": 8.535533905932738e-05, "loss": 5.481, "step": 15 }, { "epoch": 0.03663003663003663, "eval_loss": 1.274520754814148, "eval_runtime": 25.1149, "eval_samples_per_second": 6.888, "eval_steps_per_second": 3.464, "step": 15 }, { "epoch": 0.04395604395604396, "grad_norm": 1.4559262990951538, "learning_rate": 6.545084971874738e-05, "loss": 4.8546, "step": 18 }, { "epoch": 0.04884004884004884, "eval_loss": 1.2311657667160034, "eval_runtime": 25.1016, "eval_samples_per_second": 6.892, "eval_steps_per_second": 3.466, "step": 20 }, { "epoch": 0.05128205128205128, "grad_norm": 1.7523326873779297, "learning_rate": 4.2178276747988446e-05, "loss": 4.8254, "step": 21 }, { "epoch": 0.05860805860805861, "grad_norm": 1.4057033061981201, "learning_rate": 2.061073738537635e-05, "loss": 5.2476, "step": 24 }, { "epoch": 0.06105006105006105, "eval_loss": 1.2100886106491089, "eval_runtime": 25.1246, "eval_samples_per_second": 6.886, "eval_steps_per_second": 3.463, "step": 25 }, { "epoch": 0.06593406593406594, "grad_norm": 2.7432544231414795, "learning_rate": 5.449673790581611e-06, "loss": 4.6687, "step": 27 }, { "epoch": 0.07326007326007326, "grad_norm": 1.4368641376495361, "learning_rate": 0.0, "loss": 4.8216, "step": 30 }, { "epoch": 0.07326007326007326, "eval_loss": 1.2058435678482056, "eval_runtime": 25.1068, "eval_samples_per_second": 6.891, "eval_steps_per_second": 3.465, "step": 30 } ], "logging_steps": 3, "max_steps": 30, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.39606748790784e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }