{ "best_metric": 0.8307240704500979, "best_model_checkpoint": "tiny-bert-sst2-distilled/run-1/checkpoint-728", "epoch": 8.0, "eval_steps": 500, "global_step": 728, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 5.826261043548584, "learning_rate": 0.0002239970993716639, "loss": 0.481, "step": 91 }, { "epoch": 1.0, "eval_accuracy": 0.7749510763209393, "eval_f1": 0.7993019197207678, "eval_loss": 0.4282819330692291, "eval_precision": 0.721259842519685, "eval_recall": 0.8962818003913894, "eval_runtime": 28.5148, "eval_samples_per_second": 35.841, "eval_steps_per_second": 1.122, "step": 91 }, { "epoch": 2.0, "grad_norm": 8.367091178894043, "learning_rate": 0.00019599746195020593, "loss": 0.4267, "step": 182 }, { "epoch": 2.0, "eval_accuracy": 0.7896281800391389, "eval_f1": 0.8043676069153776, "eval_loss": 0.4049767255783081, "eval_precision": 0.7517006802721088, "eval_recall": 0.8649706457925636, "eval_runtime": 28.5272, "eval_samples_per_second": 35.825, "eval_steps_per_second": 1.122, "step": 182 }, { "epoch": 3.0, "grad_norm": 3.821913480758667, "learning_rate": 0.00016799782452874793, "loss": 0.4057, "step": 273 }, { "epoch": 3.0, "eval_accuracy": 0.8101761252446184, "eval_f1": 0.8116504854368931, "eval_loss": 0.38922348618507385, "eval_precision": 0.8053949903660886, "eval_recall": 0.8180039138943248, "eval_runtime": 28.5222, "eval_samples_per_second": 35.832, "eval_steps_per_second": 1.122, "step": 273 }, { "epoch": 4.0, "grad_norm": 4.150608539581299, "learning_rate": 0.00013999818710728996, "loss": 0.3899, "step": 364 }, { "epoch": 4.0, "eval_accuracy": 0.8209393346379648, "eval_f1": 0.8221574344023325, "eval_loss": 0.38097265362739563, "eval_precision": 0.8166023166023166, "eval_recall": 0.8277886497064579, "eval_runtime": 27.9162, "eval_samples_per_second": 36.61, "eval_steps_per_second": 1.146, "step": 364 }, { "epoch": 5.0, "grad_norm": 15.217473030090332, "learning_rate": 0.00011199854968583195, "loss": 0.3811, "step": 455 }, { "epoch": 5.0, "eval_accuracy": 0.8209393346379648, "eval_f1": 0.8288119738072964, "eval_loss": 0.38663867115974426, "eval_precision": 0.7939068100358423, "eval_recall": 0.8669275929549902, "eval_runtime": 28.6175, "eval_samples_per_second": 35.712, "eval_steps_per_second": 1.118, "step": 455 }, { "epoch": 6.0, "grad_norm": 2.0855538845062256, "learning_rate": 8.399891226437396e-05, "loss": 0.3782, "step": 546 }, { "epoch": 6.0, "eval_accuracy": 0.8111545988258317, "eval_f1": 0.8305531167690957, "eval_loss": 0.39196252822875977, "eval_precision": 0.7531847133757962, "eval_recall": 0.9256360078277887, "eval_runtime": 28.4313, "eval_samples_per_second": 35.946, "eval_steps_per_second": 1.126, "step": 546 }, { "epoch": 7.0, "grad_norm": 4.913670063018799, "learning_rate": 5.5999274842915974e-05, "loss": 0.3707, "step": 637 }, { "epoch": 7.0, "eval_accuracy": 0.8170254403131115, "eval_f1": 0.8310749774164408, "eval_loss": 0.38759666681289673, "eval_precision": 0.7718120805369127, "eval_recall": 0.9001956947162426, "eval_runtime": 28.3099, "eval_samples_per_second": 36.1, "eval_steps_per_second": 1.13, "step": 637 }, { "epoch": 8.0, "grad_norm": 8.997469902038574, "learning_rate": 2.7999637421457987e-05, "loss": 0.3696, "step": 728 }, { "epoch": 8.0, "eval_accuracy": 0.8307240704500979, "eval_f1": 0.8422971741112123, "eval_loss": 0.3802284896373749, "eval_precision": 0.78839590443686, "eval_recall": 0.9041095890410958, "eval_runtime": 28.5564, "eval_samples_per_second": 35.789, "eval_steps_per_second": 1.121, "step": 728 } ], "logging_steps": 500, "max_steps": 819, "num_input_tokens_seen": 0, "num_train_epochs": 9, "save_steps": 500, "total_flos": 1885561578240.0, "train_batch_size": 34, "trial_name": null, "trial_params": { "alpha": 0.7088780913019314, "learning_rate": 0.0002519967367931219, "num_train_epochs": 9, "per_device_train_batch_size": 34, "temperature": 19 } }