{ "best_metric": 3.2361934185028076, "best_model_checkpoint": "/users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_fish_1000samples_-1vocab_original-frozen/checkpoint-5000", "epoch": 144.9225225225225, "global_step": 20000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 18.12, "learning_rate": 9e-05, "loss": 2.7987, "step": 2500 }, { "epoch": 36.23, "learning_rate": 8e-05, "loss": 2.0754, "step": 5000 }, { "epoch": 36.23, "eval_loss": 3.2361934185028076, "eval_runtime": 515.1551, "eval_samples_per_second": 9.523, "eval_steps_per_second": 4.762, "step": 5000 }, { "epoch": 54.35, "learning_rate": 7e-05, "loss": 1.628, "step": 7500 }, { "epoch": 72.46, "learning_rate": 6e-05, "loss": 1.2791, "step": 10000 }, { "epoch": 72.46, "eval_loss": 4.527117729187012, "eval_runtime": 515.0288, "eval_samples_per_second": 9.526, "eval_steps_per_second": 4.763, "step": 10000 }, { "epoch": 90.58, "learning_rate": 5e-05, "loss": 1.0056, "step": 12500 }, { "epoch": 108.69, "learning_rate": 4e-05, "loss": 0.7878, "step": 15000 }, { "epoch": 108.69, "eval_loss": 5.602742671966553, "eval_runtime": 515.0516, "eval_samples_per_second": 9.525, "eval_steps_per_second": 4.763, "step": 15000 }, { "epoch": 126.81, "learning_rate": 3e-05, "loss": 0.6143, "step": 17500 }, { "epoch": 144.92, "learning_rate": 2e-05, "loss": 0.4749, "step": 20000 }, { "epoch": 144.92, "eval_loss": 6.468493938446045, "eval_runtime": 515.9317, "eval_samples_per_second": 9.509, "eval_steps_per_second": 4.755, "step": 20000 }, { "epoch": 144.92, "step": 20000, "total_flos": 2.9879104118666035e+17, "train_loss": 1.3329723815917969, "train_runtime": 48886.3278, "train_samples_per_second": 4.091, "train_steps_per_second": 0.511 } ], "max_steps": 25000, "num_train_epochs": 182, "total_flos": 2.9879104118666035e+17, "trial_name": null, "trial_params": null }