{ "best_metric": 0.7750439367311072, "best_model_checkpoint": "tiny-bert-sst2-distilled/run-11/checkpoint-2140", "epoch": 11.0, "eval_steps": 500, "global_step": 2354, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 8.918547630310059, "learning_rate": 0.00018354791279082705, "loss": 0.5995, "step": 214 }, { "epoch": 1.0, "eval_accuracy": 0.7053309900410076, "eval_f1": 0.4944723618090453, "eval_loss": 0.5527746677398682, "eval_mcc": 0.29864842708933803, "eval_precision": 0.5774647887323944, "eval_recall": 0.43233743409490333, "eval_runtime": 3.1509, "eval_samples_per_second": 541.75, "eval_steps_per_second": 17.138, "step": 214 }, { "epoch": 2.0, "grad_norm": 3.0788278579711914, "learning_rate": 0.00016519312151174438, "loss": 0.5413, "step": 428 }, { "epoch": 2.0, "eval_accuracy": 0.7258347978910369, "eval_f1": 0.4845814977973568, "eval_loss": 0.5296629071235657, "eval_mcc": 0.33330907379505725, "eval_precision": 0.6489675516224189, "eval_recall": 0.3866432337434095, "eval_runtime": 3.2724, "eval_samples_per_second": 521.631, "eval_steps_per_second": 16.502, "step": 428 }, { "epoch": 3.0, "grad_norm": 3.197441339492798, "learning_rate": 0.00014683833023266166, "loss": 0.5154, "step": 642 }, { "epoch": 3.0, "eval_accuracy": 0.7258347978910369, "eval_f1": 0.38421052631578945, "eval_loss": 0.5396824479103088, "eval_mcc": 0.32457545244467173, "eval_precision": 0.7643979057591623, "eval_recall": 0.2565905096660808, "eval_runtime": 3.1857, "eval_samples_per_second": 535.83, "eval_steps_per_second": 16.951, "step": 642 }, { "epoch": 4.0, "grad_norm": 3.9606778621673584, "learning_rate": 0.00012848353895357893, "loss": 0.504, "step": 856 }, { "epoch": 4.0, "eval_accuracy": 0.7264206209724663, "eval_f1": 0.3942931258106355, "eval_loss": 0.5155606269836426, "eval_mcc": 0.32574276921975914, "eval_precision": 0.7524752475247525, "eval_recall": 0.2671353251318102, "eval_runtime": 3.3181, "eval_samples_per_second": 514.447, "eval_steps_per_second": 16.274, "step": 856 }, { "epoch": 5.0, "grad_norm": 4.5369954109191895, "learning_rate": 0.00011012874767449624, "loss": 0.4946, "step": 1070 }, { "epoch": 5.0, "eval_accuracy": 0.7504393673110721, "eval_f1": 0.5590062111801243, "eval_loss": 0.5006516575813293, "eval_mcc": 0.4049525635550248, "eval_precision": 0.6801007556675063, "eval_recall": 0.47451669595782076, "eval_runtime": 3.155, "eval_samples_per_second": 541.038, "eval_steps_per_second": 17.115, "step": 1070 }, { "epoch": 6.0, "grad_norm": 5.3806962966918945, "learning_rate": 9.177395639541353e-05, "loss": 0.4829, "step": 1284 }, { "epoch": 6.0, "eval_accuracy": 0.7586408904510837, "eval_f1": 0.603082851637765, "eval_loss": 0.486370325088501, "eval_mcc": 0.43615024061600965, "eval_precision": 0.6673773987206824, "eval_recall": 0.5500878734622144, "eval_runtime": 3.9493, "eval_samples_per_second": 432.226, "eval_steps_per_second": 13.673, "step": 1284 }, { "epoch": 7.0, "grad_norm": 6.009945392608643, "learning_rate": 7.341916511633083e-05, "loss": 0.4704, "step": 1498 }, { "epoch": 7.0, "eval_accuracy": 0.7580550673696543, "eval_f1": 0.5100830367734281, "eval_loss": 0.4908476173877716, "eval_mcc": 0.41865909968733495, "eval_precision": 0.7846715328467153, "eval_recall": 0.37785588752196836, "eval_runtime": 3.1648, "eval_samples_per_second": 539.371, "eval_steps_per_second": 17.063, "step": 1498 }, { "epoch": 8.0, "grad_norm": 4.42345666885376, "learning_rate": 5.506437383724812e-05, "loss": 0.4564, "step": 1712 }, { "epoch": 8.0, "eval_accuracy": 0.7662565905096661, "eval_f1": 0.5973763874873865, "eval_loss": 0.47338271141052246, "eval_mcc": 0.4474691814812865, "eval_precision": 0.7014218009478673, "eval_recall": 0.5202108963093146, "eval_runtime": 3.2515, "eval_samples_per_second": 524.99, "eval_steps_per_second": 16.608, "step": 1712 }, { "epoch": 9.0, "grad_norm": 7.744714260101318, "learning_rate": 3.6709582558165414e-05, "loss": 0.4528, "step": 1926 }, { "epoch": 9.0, "eval_accuracy": 0.7656707674282367, "eval_f1": 0.5975855130784709, "eval_loss": 0.4673592448234558, "eval_mcc": 0.4464084855010257, "eval_precision": 0.6988235294117647, "eval_recall": 0.5219683655536028, "eval_runtime": 3.1615, "eval_samples_per_second": 539.929, "eval_steps_per_second": 17.08, "step": 1926 }, { "epoch": 10.0, "grad_norm": 5.320211887359619, "learning_rate": 1.8354791279082707e-05, "loss": 0.4449, "step": 2140 }, { "epoch": 10.0, "eval_accuracy": 0.7750439367311072, "eval_f1": 0.6335877862595419, "eval_loss": 0.4649231731891632, "eval_mcc": 0.47665988640122975, "eval_precision": 0.6931106471816284, "eval_recall": 0.5834797891036907, "eval_runtime": 3.2751, "eval_samples_per_second": 521.205, "eval_steps_per_second": 16.488, "step": 2140 }, { "epoch": 11.0, "grad_norm": 5.221016883850098, "learning_rate": 0.0, "loss": 0.4371, "step": 2354 }, { "epoch": 11.0, "eval_accuracy": 0.7738722905682484, "eval_f1": 0.6405959031657356, "eval_loss": 0.46391916275024414, "eval_mcc": 0.4782970414232871, "eval_precision": 0.6811881188118812, "eval_recall": 0.6045694200351494, "eval_runtime": 3.1542, "eval_samples_per_second": 541.178, "eval_steps_per_second": 17.12, "step": 2354 } ], "logging_steps": 500, "max_steps": 2354, "num_input_tokens_seen": 0, "num_train_epochs": 11, "save_steps": 500, "total_flos": 5772532308840.0, "train_batch_size": 32, "trial_name": null, "trial_params": { "alpha": 0.9896635045471636, "learning_rate": 0.00020190270406990977, "num_train_epochs": 11, "temperature": 11 } }