{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.0012733987011333248, "eval_steps": 8, "global_step": 30, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 4.244662337111083e-05, "eval_loss": 4.8717498779296875, "eval_runtime": 2826.5415, "eval_samples_per_second": 10.528, "eval_steps_per_second": 2.632, "step": 1 }, { "epoch": 0.00012733987011333248, "grad_norm": 51.041725158691406, "learning_rate": 6e-05, "loss": 28.8506, "step": 3 }, { "epoch": 0.00025467974022666496, "grad_norm": 33.79914093017578, "learning_rate": 0.00012, "loss": 24.9305, "step": 6 }, { "epoch": 0.0003395729869688866, "eval_loss": 2.785485029220581, "eval_runtime": 2833.9295, "eval_samples_per_second": 10.501, "eval_steps_per_second": 2.625, "step": 8 }, { "epoch": 0.00038201961033999744, "grad_norm": 36.487548828125, "learning_rate": 0.00018, "loss": 17.2526, "step": 9 }, { "epoch": 0.0005093594804533299, "grad_norm": 24.10533905029297, "learning_rate": 0.00019510565162951537, "loss": 14.9313, "step": 12 }, { "epoch": 0.0006366993505666624, "grad_norm": 25.62902069091797, "learning_rate": 0.00017071067811865476, "loss": 14.2183, "step": 15 }, { "epoch": 0.0006791459739377732, "eval_loss": 2.3047831058502197, "eval_runtime": 2837.1932, "eval_samples_per_second": 10.489, "eval_steps_per_second": 2.622, "step": 16 }, { "epoch": 0.0007640392206799949, "grad_norm": 18.13753890991211, "learning_rate": 0.00013090169943749476, "loss": 13.969, "step": 18 }, { "epoch": 0.0008913790907933274, "grad_norm": 15.9353666305542, "learning_rate": 8.435655349597689e-05, "loss": 13.4526, "step": 21 }, { "epoch": 0.0010187189609066599, "grad_norm": 16.094314575195312, "learning_rate": 4.12214747707527e-05, "loss": 13.4364, "step": 24 }, { "epoch": 0.0010187189609066599, "eval_loss": 2.2031667232513428, "eval_runtime": 2836.1989, "eval_samples_per_second": 10.493, "eval_steps_per_second": 2.623, "step": 24 }, { "epoch": 0.0011460588310199923, "grad_norm": 18.387222290039062, "learning_rate": 1.0899347581163221e-05, "loss": 12.7279, "step": 27 }, { "epoch": 0.0012733987011333248, "grad_norm": 15.67333698272705, "learning_rate": 0.0, "loss": 13.3088, "step": 30 } ], "logging_steps": 3, "max_steps": 30, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.164082744066048e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }