{ "best_metric": 0.541095890410959, "best_model_checkpoint": "tiny-bert-sst2-distilled/run-5/checkpoint-402", "epoch": 10.0, "eval_steps": 500, "global_step": 670, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 0.22246789932250977, "learning_rate": 1.5679734473172616e-05, "loss": 0.2458, "step": 67 }, { "epoch": 1.0, "eval_accuracy": 0.5, "eval_f1": 0.0, "eval_loss": 0.1889893114566803, "eval_precision": 0.0, "eval_recall": 0.0, "eval_runtime": 28.3905, "eval_samples_per_second": 35.998, "eval_steps_per_second": 1.127, "step": 67 }, { "epoch": 2.0, "grad_norm": 0.2654605507850647, "learning_rate": 1.3937541753931215e-05, "loss": 0.1861, "step": 134 }, { "epoch": 2.0, "eval_accuracy": 0.5, "eval_f1": 0.0, "eval_loss": 0.17826829850673676, "eval_precision": 0.0, "eval_recall": 0.0, "eval_runtime": 29.9866, "eval_samples_per_second": 34.082, "eval_steps_per_second": 1.067, "step": 134 }, { "epoch": 3.0, "grad_norm": 0.32397571206092834, "learning_rate": 1.2195349034689811e-05, "loss": 0.1759, "step": 201 }, { "epoch": 3.0, "eval_accuracy": 0.5107632093933464, "eval_f1": 0.07063197026022304, "eval_loss": 0.16691070795059204, "eval_precision": 0.7037037037037037, "eval_recall": 0.03718199608610567, "eval_runtime": 28.5351, "eval_samples_per_second": 35.816, "eval_steps_per_second": 1.121, "step": 201 }, { "epoch": 4.0, "grad_norm": 0.6136437058448792, "learning_rate": 1.045315631544841e-05, "loss": 0.168, "step": 268 }, { "epoch": 4.0, "eval_accuracy": 0.5362035225048923, "eval_f1": 0.18556701030927833, "eval_loss": 0.160459965467453, "eval_precision": 0.7605633802816901, "eval_recall": 0.10567514677103718, "eval_runtime": 28.6402, "eval_samples_per_second": 35.684, "eval_steps_per_second": 1.117, "step": 268 }, { "epoch": 5.0, "grad_norm": 0.3216162919998169, "learning_rate": 8.710963596207009e-06, "loss": 0.1647, "step": 335 }, { "epoch": 5.0, "eval_accuracy": 0.5401174168297456, "eval_f1": 0.20068027210884357, "eval_loss": 0.1576094627380371, "eval_precision": 0.7662337662337663, "eval_recall": 0.11545988258317025, "eval_runtime": 28.3765, "eval_samples_per_second": 36.016, "eval_steps_per_second": 1.128, "step": 335 }, { "epoch": 6.0, "grad_norm": 0.45282211899757385, "learning_rate": 6.968770876965607e-06, "loss": 0.1625, "step": 402 }, { "epoch": 6.0, "eval_accuracy": 0.541095890410959, "eval_f1": 0.20373514431239387, "eval_loss": 0.15556302666664124, "eval_precision": 0.7692307692307693, "eval_recall": 0.11741682974559686, "eval_runtime": 28.8112, "eval_samples_per_second": 35.472, "eval_steps_per_second": 1.111, "step": 402 }, { "epoch": 7.0, "grad_norm": 0.3418940007686615, "learning_rate": 5.226578157724205e-06, "loss": 0.1611, "step": 469 }, { "epoch": 7.0, "eval_accuracy": 0.541095890410959, "eval_f1": 0.20373514431239387, "eval_loss": 0.15463578701019287, "eval_precision": 0.7692307692307693, "eval_recall": 0.11741682974559686, "eval_runtime": 28.8835, "eval_samples_per_second": 35.384, "eval_steps_per_second": 1.108, "step": 469 }, { "epoch": 8.0, "grad_norm": 0.3419853448867798, "learning_rate": 3.4843854384828036e-06, "loss": 0.1596, "step": 536 }, { "epoch": 8.0, "eval_accuracy": 0.541095890410959, "eval_f1": 0.20373514431239387, "eval_loss": 0.15391579270362854, "eval_precision": 0.7692307692307693, "eval_recall": 0.11741682974559686, "eval_runtime": 28.6467, "eval_samples_per_second": 35.676, "eval_steps_per_second": 1.117, "step": 536 }, { "epoch": 9.0, "grad_norm": 0.3018151521682739, "learning_rate": 1.7421927192414018e-06, "loss": 0.1589, "step": 603 }, { "epoch": 9.0, "eval_accuracy": 0.541095890410959, "eval_f1": 0.20373514431239387, "eval_loss": 0.15353241562843323, "eval_precision": 0.7692307692307693, "eval_recall": 0.11741682974559686, "eval_runtime": 28.7082, "eval_samples_per_second": 35.6, "eval_steps_per_second": 1.115, "step": 603 }, { "epoch": 10.0, "grad_norm": 0.4547845721244812, "learning_rate": 0.0, "loss": 0.1592, "step": 670 }, { "epoch": 10.0, "eval_accuracy": 0.541095890410959, "eval_f1": 0.20373514431239387, "eval_loss": 0.15339058637619019, "eval_precision": 0.7692307692307693, "eval_recall": 0.11741682974559686, "eval_runtime": 28.3962, "eval_samples_per_second": 35.991, "eval_steps_per_second": 1.127, "step": 670 } ], "logging_steps": 500, "max_steps": 670, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 2356951972800.0, "train_batch_size": 46, "trial_name": null, "trial_params": { "alpha": 0.19075569878013487, "learning_rate": 1.7421927192414017e-05, "num_train_epochs": 10, "per_device_train_batch_size": 46, "temperature": 24 } }