{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.0015446615111938452, "eval_steps": 10, "global_step": 39, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.96067054152268e-05, "eval_loss": 0.2887181341648102, "eval_runtime": 1261.1036, "eval_samples_per_second": 8.43, "eval_steps_per_second": 4.215, "step": 1 }, { "epoch": 0.00019803352707613398, "grad_norm": 1.734922170639038, "learning_rate": 5e-05, "loss": 1.1269, "step": 5 }, { "epoch": 0.00039606705415226796, "grad_norm": 1.451378345489502, "learning_rate": 0.0001, "loss": 0.9029, "step": 10 }, { "epoch": 0.00039606705415226796, "eval_loss": 0.2597455680370331, "eval_runtime": 1258.0453, "eval_samples_per_second": 8.45, "eval_steps_per_second": 4.226, "step": 10 }, { "epoch": 0.000594100581228402, "grad_norm": 1.9125475883483887, "learning_rate": 9.619397662556435e-05, "loss": 1.0348, "step": 15 }, { "epoch": 0.0007921341083045359, "grad_norm": 2.3644330501556396, "learning_rate": 8.535533905932738e-05, "loss": 1.0579, "step": 20 }, { "epoch": 0.0007921341083045359, "eval_loss": 0.22462156414985657, "eval_runtime": 1262.7065, "eval_samples_per_second": 8.419, "eval_steps_per_second": 4.21, "step": 20 }, { "epoch": 0.0009901676353806699, "grad_norm": 1.9722180366516113, "learning_rate": 6.91341716182545e-05, "loss": 0.7406, "step": 25 }, { "epoch": 0.001188201162456804, "grad_norm": 1.2997853755950928, "learning_rate": 5e-05, "loss": 0.702, "step": 30 }, { "epoch": 0.001188201162456804, "eval_loss": 0.21118231117725372, "eval_runtime": 1263.6849, "eval_samples_per_second": 8.413, "eval_steps_per_second": 4.207, "step": 30 }, { "epoch": 0.001386234689532938, "grad_norm": 2.2332987785339355, "learning_rate": 3.086582838174551e-05, "loss": 0.9282, "step": 35 } ], "logging_steps": 5, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 13, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.3670824330395648e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }