{ "best_metric": 0.6706946492195129, "best_model_checkpoint": "mobilebert_add_GLUE_Experiment_logit_kd_cola_256/checkpoint-737", "epoch": 16.0, "global_step": 1072, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 4.9e-05, "loss": 0.8133, "step": 67 }, { "epoch": 1.0, "eval_loss": 0.6864015460014343, "eval_matthews_correlation": 0.0, "eval_runtime": 2.3673, "eval_samples_per_second": 440.581, "eval_steps_per_second": 3.802, "step": 67 }, { "epoch": 2.0, "learning_rate": 4.8e-05, "loss": 0.797, "step": 134 }, { "epoch": 2.0, "eval_loss": 0.6865732669830322, "eval_matthews_correlation": 0.0, "eval_runtime": 2.3603, "eval_samples_per_second": 441.891, "eval_steps_per_second": 3.813, "step": 134 }, { "epoch": 3.0, "learning_rate": 4.7e-05, "loss": 0.7964, "step": 201 }, { "epoch": 3.0, "eval_loss": 0.6832323670387268, "eval_matthews_correlation": 0.0, "eval_runtime": 2.3475, "eval_samples_per_second": 444.297, "eval_steps_per_second": 3.834, "step": 201 }, { "epoch": 4.0, "learning_rate": 4.600000000000001e-05, "loss": 0.7948, "step": 268 }, { "epoch": 4.0, "eval_loss": 0.6826822757720947, "eval_matthews_correlation": 0.0, "eval_runtime": 2.3515, "eval_samples_per_second": 443.542, "eval_steps_per_second": 3.827, "step": 268 }, { "epoch": 5.0, "learning_rate": 4.5e-05, "loss": 0.7934, "step": 335 }, { "epoch": 5.0, "eval_loss": 0.6786946058273315, "eval_matthews_correlation": 0.0, "eval_runtime": 2.3636, "eval_samples_per_second": 441.284, "eval_steps_per_second": 3.808, "step": 335 }, { "epoch": 6.0, "learning_rate": 4.4000000000000006e-05, "loss": 0.7759, "step": 402 }, { "epoch": 6.0, "eval_loss": 0.674231231212616, "eval_matthews_correlation": 0.0, "eval_runtime": 2.4966, "eval_samples_per_second": 417.768, "eval_steps_per_second": 3.605, "step": 402 }, { "epoch": 7.0, "learning_rate": 4.3e-05, "loss": 0.761, "step": 469 }, { "epoch": 7.0, "eval_loss": 0.6777247786521912, "eval_matthews_correlation": 0.0, "eval_runtime": 2.3633, "eval_samples_per_second": 441.324, "eval_steps_per_second": 3.808, "step": 469 }, { "epoch": 8.0, "learning_rate": 4.2e-05, "loss": 0.756, "step": 536 }, { "epoch": 8.0, "eval_loss": 0.6754477024078369, "eval_matthews_correlation": 0.0, "eval_runtime": 2.3686, "eval_samples_per_second": 440.338, "eval_steps_per_second": 3.8, "step": 536 }, { "epoch": 9.0, "learning_rate": 4.1e-05, "loss": 0.7471, "step": 603 }, { "epoch": 9.0, "eval_loss": 0.6772245764732361, "eval_matthews_correlation": 0.0, "eval_runtime": 2.3582, "eval_samples_per_second": 442.285, "eval_steps_per_second": 3.816, "step": 603 }, { "epoch": 10.0, "learning_rate": 4e-05, "loss": 0.7457, "step": 670 }, { "epoch": 10.0, "eval_loss": 0.6760045289993286, "eval_matthews_correlation": 0.0, "eval_runtime": 2.3622, "eval_samples_per_second": 441.534, "eval_steps_per_second": 3.81, "step": 670 }, { "epoch": 11.0, "learning_rate": 3.9000000000000006e-05, "loss": 0.7419, "step": 737 }, { "epoch": 11.0, "eval_loss": 0.6706946492195129, "eval_matthews_correlation": 0.0, "eval_runtime": 2.3314, "eval_samples_per_second": 447.378, "eval_steps_per_second": 3.86, "step": 737 }, { "epoch": 12.0, "learning_rate": 3.8e-05, "loss": 0.741, "step": 804 }, { "epoch": 12.0, "eval_loss": 0.6729392409324646, "eval_matthews_correlation": 0.0, "eval_runtime": 2.3775, "eval_samples_per_second": 438.704, "eval_steps_per_second": 3.786, "step": 804 }, { "epoch": 13.0, "learning_rate": 3.7e-05, "loss": 0.7351, "step": 871 }, { "epoch": 13.0, "eval_loss": 0.6835747957229614, "eval_matthews_correlation": 0.07577320814453954, "eval_runtime": 2.3704, "eval_samples_per_second": 440.011, "eval_steps_per_second": 3.797, "step": 871 }, { "epoch": 14.0, "learning_rate": 3.6e-05, "loss": 0.7349, "step": 938 }, { "epoch": 14.0, "eval_loss": 0.6768582463264465, "eval_matthews_correlation": 0.07951431611081063, "eval_runtime": 2.359, "eval_samples_per_second": 442.143, "eval_steps_per_second": 3.815, "step": 938 }, { "epoch": 15.0, "learning_rate": 3.5e-05, "loss": 0.7357, "step": 1005 }, { "epoch": 15.0, "eval_loss": 0.6715154051780701, "eval_matthews_correlation": 0.03149441738954873, "eval_runtime": 2.3487, "eval_samples_per_second": 444.084, "eval_steps_per_second": 3.832, "step": 1005 }, { "epoch": 16.0, "learning_rate": 3.4000000000000007e-05, "loss": 0.7333, "step": 1072 }, { "epoch": 16.0, "eval_loss": 0.681270956993103, "eval_matthews_correlation": 0.08941665373051183, "eval_runtime": 2.3503, "eval_samples_per_second": 443.777, "eval_steps_per_second": 3.829, "step": 1072 }, { "epoch": 16.0, "step": 1072, "total_flos": 3526461549969408.0, "train_loss": 0.7626726947613617, "train_runtime": 1027.7392, "train_samples_per_second": 416.01, "train_steps_per_second": 3.26 } ], "max_steps": 3350, "num_train_epochs": 50, "total_flos": 3526461549969408.0, "trial_name": null, "trial_params": null }