{ "best_metric": 0.5564356435643565, "best_model_checkpoint": "tiny-bert-sst2-distilled/run-8/checkpoint-2079", "epoch": 7.0, "eval_steps": 500, "global_step": 2079, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 0.24534238874912262, "learning_rate": 0.0002919829387820306, "loss": 0.3074, "step": 297 }, { "epoch": 1.0, "eval_accuracy": 0.5148514851485149, "eval_f1": 0.16382252559726962, "eval_loss": 0.30149543285369873, "eval_precision": 0.5853658536585366, "eval_recall": 0.09523809523809523, "eval_runtime": 14.7271, "eval_samples_per_second": 34.291, "eval_steps_per_second": 1.086, "step": 297 }, { "epoch": 2.0, "grad_norm": 0.5045191645622253, "learning_rate": 0.00025027109038459764, "loss": 0.3009, "step": 594 }, { "epoch": 2.0, "eval_accuracy": 0.5168316831683168, "eval_f1": 0.1643835616438356, "eval_loss": 0.2990603744983673, "eval_precision": 0.6, "eval_recall": 0.09523809523809523, "eval_runtime": 15.7792, "eval_samples_per_second": 32.004, "eval_steps_per_second": 1.014, "step": 594 }, { "epoch": 3.0, "grad_norm": 0.6056041121482849, "learning_rate": 0.00020855924198716472, "loss": 0.2956, "step": 891 }, { "epoch": 3.0, "eval_accuracy": 0.5207920792079208, "eval_f1": 0.17687074829931973, "eval_loss": 0.2930928170681, "eval_precision": 0.6190476190476191, "eval_recall": 0.10317460317460317, "eval_runtime": 14.6946, "eval_samples_per_second": 34.366, "eval_steps_per_second": 1.089, "step": 891 }, { "epoch": 4.0, "grad_norm": 0.4732127785682678, "learning_rate": 0.00016684739358973177, "loss": 0.2915, "step": 1188 }, { "epoch": 4.0, "eval_accuracy": 0.5207920792079208, "eval_f1": 0.16551724137931034, "eval_loss": 0.2868767976760864, "eval_precision": 0.631578947368421, "eval_recall": 0.09523809523809523, "eval_runtime": 14.9872, "eval_samples_per_second": 33.695, "eval_steps_per_second": 1.068, "step": 1188 }, { "epoch": 5.0, "grad_norm": 0.4796178936958313, "learning_rate": 0.00012513554519229882, "loss": 0.2881, "step": 1485 }, { "epoch": 5.0, "eval_accuracy": 0.5188118811881188, "eval_f1": 0.15916955017301038, "eval_loss": 0.2868916988372803, "eval_precision": 0.6216216216216216, "eval_recall": 0.09126984126984126, "eval_runtime": 14.7705, "eval_samples_per_second": 34.19, "eval_steps_per_second": 1.083, "step": 1485 }, { "epoch": 6.0, "grad_norm": 0.5560742616653442, "learning_rate": 8.342369679486588e-05, "loss": 0.2859, "step": 1782 }, { "epoch": 6.0, "eval_accuracy": 0.5544554455445545, "eval_f1": 0.25742574257425743, "eval_loss": 0.2852015793323517, "eval_precision": 0.7647058823529411, "eval_recall": 0.15476190476190477, "eval_runtime": 15.1062, "eval_samples_per_second": 33.43, "eval_steps_per_second": 1.059, "step": 1782 }, { "epoch": 7.0, "grad_norm": 0.5270741581916809, "learning_rate": 4.171184839743294e-05, "loss": 0.2835, "step": 2079 }, { "epoch": 7.0, "eval_accuracy": 0.5564356435643565, "eval_f1": 0.28205128205128205, "eval_loss": 0.28437066078186035, "eval_precision": 0.7333333333333333, "eval_recall": 0.1746031746031746, "eval_runtime": 14.6907, "eval_samples_per_second": 34.375, "eval_steps_per_second": 1.089, "step": 2079 } ], "logging_steps": 500, "max_steps": 2376, "num_input_tokens_seen": 0, "num_train_epochs": 8, "save_steps": 500, "total_flos": 5444902981980.0, "train_batch_size": 32, "trial_name": null, "trial_params": { "alpha": 0.37248229271478195, "learning_rate": 0.00033369478717946353, "num_train_epochs": 8, "per_device_train_batch_size": 32, "temperature": 11 } }