{ "best_metric": 0.6744102835655212, "best_model_checkpoint": "mobilebert_add_GLUE_Experiment_logit_kd_cola/checkpoint-804", "epoch": 17.0, "global_step": 1139, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 4.9e-05, "loss": 0.8111, "step": 67 }, { "epoch": 1.0, "eval_loss": 0.6859493851661682, "eval_matthews_correlation": 0.0, "eval_runtime": 2.3711, "eval_samples_per_second": 439.88, "eval_steps_per_second": 3.796, "step": 67 }, { "epoch": 2.0, "learning_rate": 4.8e-05, "loss": 0.7968, "step": 134 }, { "epoch": 2.0, "eval_loss": 0.6865417957305908, "eval_matthews_correlation": 0.0, "eval_runtime": 2.378, "eval_samples_per_second": 438.609, "eval_steps_per_second": 3.785, "step": 134 }, { "epoch": 3.0, "learning_rate": 4.7e-05, "loss": 0.796, "step": 201 }, { "epoch": 3.0, "eval_loss": 0.6834728717803955, "eval_matthews_correlation": 0.0, "eval_runtime": 2.3689, "eval_samples_per_second": 440.286, "eval_steps_per_second": 3.799, "step": 201 }, { "epoch": 4.0, "learning_rate": 4.600000000000001e-05, "loss": 0.7938, "step": 268 }, { "epoch": 4.0, "eval_loss": 0.6812810897827148, "eval_matthews_correlation": 0.0, "eval_runtime": 2.5217, "eval_samples_per_second": 413.616, "eval_steps_per_second": 3.569, "step": 268 }, { "epoch": 5.0, "learning_rate": 4.5e-05, "loss": 0.7828, "step": 335 }, { "epoch": 5.0, "eval_loss": 0.6767937541007996, "eval_matthews_correlation": 0.0, "eval_runtime": 2.3679, "eval_samples_per_second": 440.48, "eval_steps_per_second": 3.801, "step": 335 }, { "epoch": 6.0, "learning_rate": 4.4000000000000006e-05, "loss": 0.7651, "step": 402 }, { "epoch": 6.0, "eval_loss": 0.6749773025512695, "eval_matthews_correlation": 0.0, "eval_runtime": 2.4029, "eval_samples_per_second": 434.065, "eval_steps_per_second": 3.746, "step": 402 }, { "epoch": 7.0, "learning_rate": 4.3e-05, "loss": 0.7594, "step": 469 }, { "epoch": 7.0, "eval_loss": 0.6960383653640747, "eval_matthews_correlation": 0.0, "eval_runtime": 2.3638, "eval_samples_per_second": 441.237, "eval_steps_per_second": 3.807, "step": 469 }, { "epoch": 8.0, "learning_rate": 4.2e-05, "loss": 0.7592, "step": 536 }, { "epoch": 8.0, "eval_loss": 0.6800056099891663, "eval_matthews_correlation": 0.0, "eval_runtime": 2.364, "eval_samples_per_second": 441.196, "eval_steps_per_second": 3.807, "step": 536 }, { "epoch": 9.0, "learning_rate": 4.1e-05, "loss": 0.7463, "step": 603 }, { "epoch": 9.0, "eval_loss": 0.6788876056671143, "eval_matthews_correlation": 0.0, "eval_runtime": 2.3679, "eval_samples_per_second": 440.471, "eval_steps_per_second": 3.801, "step": 603 }, { "epoch": 10.0, "learning_rate": 4e-05, "loss": 0.7437, "step": 670 }, { "epoch": 10.0, "eval_loss": 0.6794541478157043, "eval_matthews_correlation": 0.0, "eval_runtime": 2.3741, "eval_samples_per_second": 439.332, "eval_steps_per_second": 3.791, "step": 670 }, { "epoch": 11.0, "learning_rate": 3.9000000000000006e-05, "loss": 0.7401, "step": 737 }, { "epoch": 11.0, "eval_loss": 0.6745404601097107, "eval_matthews_correlation": -0.007887379670285008, "eval_runtime": 2.3678, "eval_samples_per_second": 440.5, "eval_steps_per_second": 3.801, "step": 737 }, { "epoch": 12.0, "learning_rate": 3.8e-05, "loss": 0.7398, "step": 804 }, { "epoch": 12.0, "eval_loss": 0.6744102835655212, "eval_matthews_correlation": -0.007887379670285008, "eval_runtime": 2.3606, "eval_samples_per_second": 441.829, "eval_steps_per_second": 3.813, "step": 804 }, { "epoch": 13.0, "learning_rate": 3.7e-05, "loss": 0.7328, "step": 871 }, { "epoch": 13.0, "eval_loss": 0.6813325881958008, "eval_matthews_correlation": 0.05873054109498616, "eval_runtime": 2.3703, "eval_samples_per_second": 440.032, "eval_steps_per_second": 3.797, "step": 871 }, { "epoch": 14.0, "learning_rate": 3.6e-05, "loss": 0.7321, "step": 938 }, { "epoch": 14.0, "eval_loss": 0.6881392598152161, "eval_matthews_correlation": 0.07944501641685853, "eval_runtime": 2.366, "eval_samples_per_second": 440.822, "eval_steps_per_second": 3.804, "step": 938 }, { "epoch": 15.0, "learning_rate": 3.5e-05, "loss": 0.7315, "step": 1005 }, { "epoch": 15.0, "eval_loss": 0.6783654689788818, "eval_matthews_correlation": 0.061491648871478045, "eval_runtime": 2.3648, "eval_samples_per_second": 441.052, "eval_steps_per_second": 3.806, "step": 1005 }, { "epoch": 16.0, "learning_rate": 3.4000000000000007e-05, "loss": 0.7295, "step": 1072 }, { "epoch": 16.0, "eval_loss": 0.681597888469696, "eval_matthews_correlation": 0.03846275142815186, "eval_runtime": 2.3627, "eval_samples_per_second": 441.445, "eval_steps_per_second": 3.809, "step": 1072 }, { "epoch": 17.0, "learning_rate": 3.3e-05, "loss": 0.7297, "step": 1139 }, { "epoch": 17.0, "eval_loss": 0.698566734790802, "eval_matthews_correlation": 0.050263791388451516, "eval_runtime": 2.3861, "eval_samples_per_second": 437.111, "eval_steps_per_second": 3.772, "step": 1139 }, { "epoch": 17.0, "step": 1139, "total_flos": 4300008849408000.0, "train_loss": 0.7582107054967436, "train_runtime": 1104.2457, "train_samples_per_second": 387.187, "train_steps_per_second": 3.034 } ], "max_steps": 3350, "num_train_epochs": 50, "total_flos": 4300008849408000.0, "trial_name": null, "trial_params": null }