{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 1000, "global_step": 155, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.3225806451612903, "grad_norm": 0.40279635787010193, "learning_rate": 0.000125, "loss": 2.2735, "step": 10 }, { "epoch": 0.6451612903225806, "grad_norm": 0.31329983472824097, "learning_rate": 0.00019959162014075553, "loss": 2.1138, "step": 20 }, { "epoch": 0.967741935483871, "grad_norm": 0.2201918661594391, "learning_rate": 0.00019503556665478067, "loss": 2.0106, "step": 30 }, { "epoch": 1.2903225806451613, "grad_norm": 0.27235254645347595, "learning_rate": 0.00018564551148870563, "loss": 1.9603, "step": 40 }, { "epoch": 1.6129032258064515, "grad_norm": 0.2621012330055237, "learning_rate": 0.00017189908153577473, "loss": 2.0729, "step": 50 }, { "epoch": 1.935483870967742, "grad_norm": 0.2580893337726593, "learning_rate": 0.00015449549149872376, "loss": 1.9079, "step": 60 }, { "epoch": 2.258064516129032, "grad_norm": 0.371009886264801, "learning_rate": 0.00013431997820456592, "loss": 1.927, "step": 70 }, { "epoch": 2.5806451612903225, "grad_norm": 0.4627054035663605, "learning_rate": 0.00011239877286961122, "loss": 1.9111, "step": 80 }, { "epoch": 2.903225806451613, "grad_norm": 0.4472900331020355, "learning_rate": 8.984690165878921e-05, "loss": 1.8636, "step": 90 }, { "epoch": 3.225806451612903, "grad_norm": 0.602531373500824, "learning_rate": 6.781146967348284e-05, "loss": 1.8102, "step": 100 }, { "epoch": 3.5483870967741935, "grad_norm": 0.6399003863334656, "learning_rate": 4.7413313238324556e-05, "loss": 1.7998, "step": 110 }, { "epoch": 3.870967741935484, "grad_norm": 0.7003827691078186, "learning_rate": 2.968998835418174e-05, "loss": 1.7756, "step": 120 }, { "epoch": 4.193548387096774, "grad_norm": 0.5751145482063293, "learning_rate": 1.554299522021796e-05, "loss": 1.7402, "step": 130 }, { "epoch": 4.516129032258064, "grad_norm": 0.7806760668754578, "learning_rate": 5.691923259479093e-06, "loss": 1.7489, "step": 140 }, { "epoch": 4.838709677419355, "grad_norm": 0.7050411105155945, "learning_rate": 6.378490697611761e-07, "loss": 1.7256, "step": 150 }, { "epoch": 5.0, "step": 155, "total_flos": 7.90044384249774e+16, "train_loss": 1.902422603484123, "train_runtime": 278.0882, "train_samples_per_second": 17.387, "train_steps_per_second": 0.557 } ], "logging_steps": 10, "max_steps": 155, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.90044384249774e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }