{ "best_metric": 0.7439953134153485, "best_model_checkpoint": "tiny-bert-sst2-distilled/run-14/checkpoint-1284", "epoch": 6.0, "eval_steps": 500, "global_step": 1284, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 4.230797290802002, "learning_rate": 0.0004542691429405582, "loss": 0.5244, "step": 214 }, { "epoch": 1.0, "eval_accuracy": 0.7065026362038664, "eval_f1": 0.3634053367217281, "eval_loss": 0.49707961082458496, "eval_mcc": 0.26187368637682734, "eval_precision": 0.6559633027522935, "eval_recall": 0.2513181019332162, "eval_runtime": 3.1642, "eval_samples_per_second": 539.474, "eval_steps_per_second": 17.066, "step": 214 }, { "epoch": 2.0, "grad_norm": 1.7037988901138306, "learning_rate": 0.0004303602406805288, "loss": 0.4856, "step": 428 }, { "epoch": 2.0, "eval_accuracy": 0.7047451669595782, "eval_f1": 0.5019762845849802, "eval_loss": 0.4853743612766266, "eval_mcc": 0.3014395863411175, "eval_precision": 0.5733634311512416, "eval_recall": 0.44639718804920914, "eval_runtime": 3.2868, "eval_samples_per_second": 519.356, "eval_steps_per_second": 16.43, "step": 428 }, { "epoch": 3.0, "grad_norm": 1.7830451726913452, "learning_rate": 0.0004064513384204994, "loss": 0.4758, "step": 642 }, { "epoch": 3.0, "eval_accuracy": 0.7018160515524312, "eval_f1": 0.25909752547307136, "eval_loss": 0.4858837425708771, "eval_mcc": 0.24331468344161114, "eval_precision": 0.7542372881355932, "eval_recall": 0.15641476274165203, "eval_runtime": 3.1648, "eval_samples_per_second": 539.369, "eval_steps_per_second": 17.063, "step": 642 }, { "epoch": 4.0, "grad_norm": 1.6830062866210938, "learning_rate": 0.0003825424361604701, "loss": 0.4724, "step": 856 }, { "epoch": 4.0, "eval_accuracy": 0.7275922671353251, "eval_f1": 0.4015444015444016, "eval_loss": 0.47459593415260315, "eval_mcc": 0.32925006262083517, "eval_precision": 0.75, "eval_recall": 0.2741652021089631, "eval_runtime": 3.7803, "eval_samples_per_second": 451.553, "eval_steps_per_second": 14.285, "step": 856 }, { "epoch": 5.0, "grad_norm": 4.760202407836914, "learning_rate": 0.0003586335339004407, "loss": 0.4614, "step": 1070 }, { "epoch": 5.0, "eval_accuracy": 0.7340363210310487, "eval_f1": 0.4733178654292344, "eval_loss": 0.4721404016017914, "eval_mcc": 0.35044282313920905, "eval_precision": 0.6962457337883959, "eval_recall": 0.3585237258347979, "eval_runtime": 3.1258, "eval_samples_per_second": 546.093, "eval_steps_per_second": 17.275, "step": 1070 }, { "epoch": 6.0, "grad_norm": 2.5421228408813477, "learning_rate": 0.0003347246316404113, "loss": 0.4617, "step": 1284 }, { "epoch": 6.0, "eval_accuracy": 0.7439953134153485, "eval_f1": 0.5346112886048987, "eval_loss": 0.4643152058124542, "eval_mcc": 0.38505007336259955, "eval_precision": 0.6783783783783783, "eval_recall": 0.44112478031634444, "eval_runtime": 3.278, "eval_samples_per_second": 520.745, "eval_steps_per_second": 16.473, "step": 1284 } ], "logging_steps": 500, "max_steps": 4280, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 3148653986640.0, "train_batch_size": 32, "trial_name": null, "trial_params": { "alpha": 0.8544594605661776, "learning_rate": 0.0004781780452005876, "num_train_epochs": 20, "temperature": 15 } }