{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.002319468068656255, "eval_steps": 8, "global_step": 30, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 7.731560228854182e-05, "eval_loss": 0.7148739695549011, "eval_runtime": 494.7064, "eval_samples_per_second": 11.009, "eval_steps_per_second": 5.504, "step": 1 }, { "epoch": 0.0002319468068656255, "grad_norm": 4.81842041015625, "learning_rate": 6e-05, "loss": 11.6946, "step": 3 }, { "epoch": 0.000463893613731251, "grad_norm": 6.081270217895508, "learning_rate": 0.00012, "loss": 9.5971, "step": 6 }, { "epoch": 0.0006185248183083346, "eval_loss": 0.7004389762878418, "eval_runtime": 497.7994, "eval_samples_per_second": 10.94, "eval_steps_per_second": 5.47, "step": 8 }, { "epoch": 0.0006958404205968764, "grad_norm": 10.870305061340332, "learning_rate": 0.00018, "loss": 8.6051, "step": 9 }, { "epoch": 0.000927787227462502, "grad_norm": 9.37214183807373, "learning_rate": 0.00019510565162951537, "loss": 7.6237, "step": 12 }, { "epoch": 0.0011597340343281275, "grad_norm": 13.013519287109375, "learning_rate": 0.00017071067811865476, "loss": 6.9112, "step": 15 }, { "epoch": 0.0012370496366166692, "eval_loss": 0.7165130376815796, "eval_runtime": 498.1829, "eval_samples_per_second": 10.932, "eval_steps_per_second": 5.466, "step": 16 }, { "epoch": 0.0013916808411937528, "grad_norm": 63.50639724731445, "learning_rate": 0.00013090169943749476, "loss": 6.4336, "step": 18 }, { "epoch": 0.0016236276480593784, "grad_norm": 4.184946537017822, "learning_rate": 8.435655349597689e-05, "loss": 4.8627, "step": 21 }, { "epoch": 0.001855574454925004, "grad_norm": 5.131363391876221, "learning_rate": 4.12214747707527e-05, "loss": 5.2544, "step": 24 }, { "epoch": 0.001855574454925004, "eval_loss": 0.645858108997345, "eval_runtime": 498.1385, "eval_samples_per_second": 10.933, "eval_steps_per_second": 5.466, "step": 24 }, { "epoch": 0.0020875212617906293, "grad_norm": 4.872713565826416, "learning_rate": 1.0899347581163221e-05, "loss": 5.5263, "step": 27 }, { "epoch": 0.002319468068656255, "grad_norm": 3.9103922843933105, "learning_rate": 0.0, "loss": 4.5022, "step": 30 } ], "logging_steps": 3, "max_steps": 30, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5957676102057984.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }