{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "global_step": 1655, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.3, "learning_rate": 2.951807228915663e-05, "loss": 4.4788, "step": 100 }, { "epoch": 0.6, "learning_rate": 4.892545332437878e-05, "loss": 2.4769, "step": 200 }, { "epoch": 0.91, "learning_rate": 4.5567494963062465e-05, "loss": 2.3399, "step": 300 }, { "epoch": 1.21, "learning_rate": 4.220953660174614e-05, "loss": 2.1886, "step": 400 }, { "epoch": 1.51, "learning_rate": 3.885157824042982e-05, "loss": 2.1455, "step": 500 }, { "epoch": 1.81, "learning_rate": 3.54936198791135e-05, "loss": 2.1173, "step": 600 }, { "epoch": 2.11, "learning_rate": 3.213566151779718e-05, "loss": 2.0583, "step": 700 }, { "epoch": 2.42, "learning_rate": 2.8777703156480862e-05, "loss": 1.9753, "step": 800 }, { "epoch": 2.72, "learning_rate": 2.541974479516454e-05, "loss": 1.9841, "step": 900 }, { "epoch": 3.02, "learning_rate": 2.2061786433848224e-05, "loss": 1.963, "step": 1000 }, { "epoch": 3.32, "learning_rate": 1.87038280725319e-05, "loss": 1.8706, "step": 1100 }, { "epoch": 3.63, "learning_rate": 1.534586971121558e-05, "loss": 1.884, "step": 1200 }, { "epoch": 3.93, "learning_rate": 1.198791134989926e-05, "loss": 1.8814, "step": 1300 }, { "epoch": 4.23, "learning_rate": 8.629952988582941e-06, "loss": 1.828, "step": 1400 }, { "epoch": 4.53, "learning_rate": 5.271994627266622e-06, "loss": 1.8145, "step": 1500 }, { "epoch": 4.83, "learning_rate": 1.9140362659503023e-06, "loss": 1.8219, "step": 1600 }, { "epoch": 5.0, "step": 1655, "total_flos": 5.477940459823104e+16, "train_loss": 2.1648097335031746, "train_runtime": 1785.4315, "train_samples_per_second": 118.646, "train_steps_per_second": 0.927 } ], "max_steps": 1655, "num_train_epochs": 5, "total_flos": 5.477940459823104e+16, "trial_name": null, "trial_params": null }