{ "best_metric": 0.6831683168316832, "best_model_checkpoint": "tiny-bert-sst2-distilled/run-10/checkpoint-1782", "epoch": 6.0, "eval_steps": 500, "global_step": 1782, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 2.256939649581909, "learning_rate": 0.00020003476406791257, "loss": 0.6792, "step": 297 }, { "epoch": 1.0, "eval_accuracy": 0.5623762376237624, "eval_f1": 0.4199475065616798, "eval_loss": 0.6835970878601074, "eval_mcc": 0.14191809339823447, "eval_precision": 0.6201550387596899, "eval_recall": 0.31746031746031744, "eval_runtime": 0.9302, "eval_samples_per_second": 542.921, "eval_steps_per_second": 17.201, "step": 297 }, { "epoch": 2.0, "grad_norm": 2.075068712234497, "learning_rate": 0.00016002781125433002, "loss": 0.6413, "step": 594 }, { "epoch": 2.0, "eval_accuracy": 0.6475247524752475, "eval_f1": 0.6920415224913495, "eval_loss": 0.6267873644828796, "eval_mcc": 0.3090077890032306, "eval_precision": 0.6134969325153374, "eval_recall": 0.7936507936507936, "eval_runtime": 0.9329, "eval_samples_per_second": 541.318, "eval_steps_per_second": 17.151, "step": 594 }, { "epoch": 3.0, "grad_norm": 2.5431761741638184, "learning_rate": 0.00012002085844074753, "loss": 0.6128, "step": 891 }, { "epoch": 3.0, "eval_accuracy": 0.6376237623762376, "eval_f1": 0.7127158555729984, "eval_loss": 0.6455628275871277, "eval_mcc": 0.3245645841843151, "eval_precision": 0.5896103896103896, "eval_recall": 0.9007936507936508, "eval_runtime": 0.9321, "eval_samples_per_second": 541.772, "eval_steps_per_second": 17.165, "step": 891 }, { "epoch": 4.0, "grad_norm": 1.826803207397461, "learning_rate": 8.001390562716501e-05, "loss": 0.5934, "step": 1188 }, { "epoch": 4.0, "eval_accuracy": 0.6554455445544555, "eval_f1": 0.7156862745098039, "eval_loss": 0.6337190866470337, "eval_mcc": 0.3445175537543676, "eval_precision": 0.6083333333333333, "eval_recall": 0.8690476190476191, "eval_runtime": 0.9303, "eval_samples_per_second": 542.814, "eval_steps_per_second": 17.198, "step": 1188 }, { "epoch": 5.0, "grad_norm": 3.4030377864837646, "learning_rate": 4.0006952813582505e-05, "loss": 0.5801, "step": 1485 }, { "epoch": 5.0, "eval_accuracy": 0.6792079207920793, "eval_f1": 0.7054545454545453, "eval_loss": 0.6095411777496338, "eval_mcc": 0.3647441045625896, "eval_precision": 0.6510067114093959, "eval_recall": 0.7698412698412699, "eval_runtime": 0.9333, "eval_samples_per_second": 541.076, "eval_steps_per_second": 17.143, "step": 1485 }, { "epoch": 6.0, "grad_norm": 3.6899707317352295, "learning_rate": 0.0, "loss": 0.5709, "step": 1782 }, { "epoch": 6.0, "eval_accuracy": 0.6831683168316832, "eval_f1": 0.7090909090909091, "eval_loss": 0.6130138039588928, "eval_mcc": 0.3727967308109112, "eval_precision": 0.6543624161073825, "eval_recall": 0.7738095238095238, "eval_runtime": 0.9378, "eval_samples_per_second": 538.498, "eval_steps_per_second": 17.061, "step": 1782 } ], "logging_steps": 500, "max_steps": 1782, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 500, "total_flos": 4384207595880.0, "train_batch_size": 32, "trial_name": null, "trial_params": { "alpha": 0.9802546625572438, "learning_rate": 0.00024004171688149506, "num_train_epochs": 6, "temperature": 49 } }