{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.022714366837024418, "eval_steps": 10, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00045428733674048835, "eval_loss": 2.3526980876922607, "eval_runtime": 48.7648, "eval_samples_per_second": 19.01, "eval_steps_per_second": 9.515, "step": 1 }, { "epoch": 0.002271436683702442, "grad_norm": 6.649918079376221, "learning_rate": 5e-05, "loss": 2.2383, "step": 5 }, { "epoch": 0.004542873367404884, "grad_norm": 3.9040491580963135, "learning_rate": 0.0001, "loss": 1.5065, "step": 10 }, { "epoch": 0.004542873367404884, "eval_loss": 0.6507410407066345, "eval_runtime": 48.9057, "eval_samples_per_second": 18.955, "eval_steps_per_second": 9.488, "step": 10 }, { "epoch": 0.0068143100511073255, "grad_norm": 1.8648144006729126, "learning_rate": 9.619397662556435e-05, "loss": 0.3623, "step": 15 }, { "epoch": 0.009085746734809767, "grad_norm": 1.6206068992614746, "learning_rate": 8.535533905932738e-05, "loss": 0.158, "step": 20 }, { "epoch": 0.009085746734809767, "eval_loss": 0.06087474152445793, "eval_runtime": 48.8725, "eval_samples_per_second": 18.968, "eval_steps_per_second": 9.494, "step": 20 }, { "epoch": 0.011357183418512209, "grad_norm": 0.552495002746582, "learning_rate": 6.91341716182545e-05, "loss": 0.0247, "step": 25 }, { "epoch": 0.013628620102214651, "grad_norm": 2.7146968841552734, "learning_rate": 5e-05, "loss": 0.0146, "step": 30 }, { "epoch": 0.013628620102214651, "eval_loss": 0.00583901721984148, "eval_runtime": 49.0134, "eval_samples_per_second": 18.913, "eval_steps_per_second": 9.467, "step": 30 }, { "epoch": 0.01590005678591709, "grad_norm": 0.5495128631591797, "learning_rate": 3.086582838174551e-05, "loss": 0.0018, "step": 35 }, { "epoch": 0.018171493469619535, "grad_norm": 0.01361869927495718, "learning_rate": 1.4644660940672627e-05, "loss": 0.0004, "step": 40 }, { "epoch": 0.018171493469619535, "eval_loss": 0.0021691254805773497, "eval_runtime": 49.1642, "eval_samples_per_second": 18.855, "eval_steps_per_second": 9.438, "step": 40 }, { "epoch": 0.020442930153321975, "grad_norm": 0.020270636305212975, "learning_rate": 3.8060233744356633e-06, "loss": 0.001, "step": 45 }, { "epoch": 0.022714366837024418, "grad_norm": 0.010211742483079433, "learning_rate": 0.0, "loss": 0.0019, "step": 50 }, { "epoch": 0.022714366837024418, "eval_loss": 0.001980047207325697, "eval_runtime": 49.0112, "eval_samples_per_second": 18.914, "eval_steps_per_second": 9.467, "step": 50 } ], "logging_steps": 5, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 13, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2500486653542400.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }