{ "best_metric": 0.7544031311154599, "best_model_checkpoint": "tiny-bert-sst2-distilled/run-6/checkpoint-744", "epoch": 8.0, "eval_steps": 500, "global_step": 744, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 0.4286664128303528, "learning_rate": 1.945409831472016e-05, "loss": 0.4806, "step": 93 }, { "epoch": 1.0, "eval_accuracy": 0.5, "eval_f1": 0.0, "eval_loss": 0.4703535735607147, "eval_precision": 0.0, "eval_recall": 0.0, "eval_runtime": 28.42, "eval_samples_per_second": 35.961, "eval_steps_per_second": 1.126, "step": 93 }, { "epoch": 2.0, "grad_norm": 0.94158935546875, "learning_rate": 1.7022336025380143e-05, "loss": 0.4648, "step": 186 }, { "epoch": 2.0, "eval_accuracy": 0.5176125244618396, "eval_f1": 0.10849909584086799, "eval_loss": 0.4538751244544983, "eval_precision": 0.7142857142857143, "eval_recall": 0.05870841487279843, "eval_runtime": 28.3351, "eval_samples_per_second": 36.068, "eval_steps_per_second": 1.129, "step": 186 }, { "epoch": 3.0, "grad_norm": 1.0048439502716064, "learning_rate": 1.459057373604012e-05, "loss": 0.4406, "step": 279 }, { "epoch": 3.0, "eval_accuracy": 0.5929549902152642, "eval_f1": 0.3677811550151976, "eval_loss": 0.4149659276008606, "eval_precision": 0.8231292517006803, "eval_recall": 0.23679060665362034, "eval_runtime": 28.3925, "eval_samples_per_second": 35.995, "eval_steps_per_second": 1.127, "step": 279 }, { "epoch": 4.0, "grad_norm": 2.1001150608062744, "learning_rate": 1.2158811446700102e-05, "loss": 0.4126, "step": 372 }, { "epoch": 4.0, "eval_accuracy": 0.7025440313111546, "eval_f1": 0.6456876456876457, "eval_loss": 0.39196181297302246, "eval_precision": 0.7982708933717579, "eval_recall": 0.5420743639921722, "eval_runtime": 28.4943, "eval_samples_per_second": 35.867, "eval_steps_per_second": 1.123, "step": 372 }, { "epoch": 5.0, "grad_norm": 1.2996047735214233, "learning_rate": 9.72704915736008e-06, "loss": 0.4021, "step": 465 }, { "epoch": 5.0, "eval_accuracy": 0.735812133072407, "eval_f1": 0.7133757961783439, "eval_loss": 0.3851335346698761, "eval_precision": 0.7795823665893271, "eval_recall": 0.6575342465753424, "eval_runtime": 28.8344, "eval_samples_per_second": 35.444, "eval_steps_per_second": 1.11, "step": 465 }, { "epoch": 6.0, "grad_norm": 1.8318911790847778, "learning_rate": 7.29528686802006e-06, "loss": 0.3976, "step": 558 }, { "epoch": 6.0, "eval_accuracy": 0.7524461839530333, "eval_f1": 0.7394438722966015, "eval_loss": 0.3816056251525879, "eval_precision": 0.7804347826086957, "eval_recall": 0.7025440313111546, "eval_runtime": 28.629, "eval_samples_per_second": 35.698, "eval_steps_per_second": 1.118, "step": 558 }, { "epoch": 7.0, "grad_norm": 1.5164391994476318, "learning_rate": 4.86352457868004e-06, "loss": 0.3934, "step": 651 }, { "epoch": 7.0, "eval_accuracy": 0.7504892367906066, "eval_f1": 0.7357512953367875, "eval_loss": 0.37981584668159485, "eval_precision": 0.7819383259911894, "eval_recall": 0.6947162426614482, "eval_runtime": 29.4404, "eval_samples_per_second": 34.714, "eval_steps_per_second": 1.087, "step": 651 }, { "epoch": 8.0, "grad_norm": 1.8273214101791382, "learning_rate": 2.43176228934002e-06, "loss": 0.3903, "step": 744 }, { "epoch": 8.0, "eval_accuracy": 0.7544031311154599, "eval_f1": 0.7462082912032356, "eval_loss": 0.3790663480758667, "eval_precision": 0.7719665271966527, "eval_recall": 0.7221135029354208, "eval_runtime": 28.4026, "eval_samples_per_second": 35.983, "eval_steps_per_second": 1.127, "step": 744 } ], "logging_steps": 500, "max_steps": 837, "num_input_tokens_seen": 0, "num_train_epochs": 9, "save_steps": 500, "total_flos": 1885561578240.0, "train_batch_size": 33, "trial_name": null, "trial_params": { "alpha": 0.6122687021783514, "learning_rate": 2.188586060406018e-05, "num_train_epochs": 9, "per_device_train_batch_size": 33, "temperature": 14 } }