{ "best_metric": 0.7211350293542075, "best_model_checkpoint": "tiny-bert-sst2-distilled/run-2/checkpoint-4596", "epoch": 7.0, "eval_steps": 500, "global_step": 5362, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 2.795396089553833, "learning_rate": 2.0226645656833037e-05, "loss": 0.3457, "step": 766 }, { "epoch": 1.0, "eval_accuracy": 0.5870841487279843, "eval_f1": 0.352760736196319, "eval_loss": 0.3146859109401703, "eval_precision": 0.8156028368794326, "eval_recall": 0.22504892367906065, "eval_runtime": 132.813, "eval_samples_per_second": 7.695, "eval_steps_per_second": 1.928, "step": 766 }, { "epoch": 2.0, "grad_norm": 7.475469589233398, "learning_rate": 1.6855538047360867e-05, "loss": 0.3142, "step": 1532 }, { "epoch": 2.0, "eval_accuracy": 0.5978473581213307, "eval_f1": 0.3763277693474962, "eval_loss": 0.3066306710243225, "eval_precision": 0.8378378378378378, "eval_recall": 0.24266144814090018, "eval_runtime": 132.6826, "eval_samples_per_second": 7.703, "eval_steps_per_second": 1.929, "step": 1532 }, { "epoch": 3.0, "grad_norm": 3.5962014198303223, "learning_rate": 1.3484430437888691e-05, "loss": 0.3086, "step": 2298 }, { "epoch": 3.0, "eval_accuracy": 0.6438356164383562, "eval_f1": 0.5172413793103448, "eval_loss": 0.30034366250038147, "eval_precision": 0.8024691358024691, "eval_recall": 0.3816046966731898, "eval_runtime": 133.6957, "eval_samples_per_second": 7.644, "eval_steps_per_second": 1.915, "step": 2298 }, { "epoch": 4.0, "grad_norm": 5.574785232543945, "learning_rate": 1.0113322828416519e-05, "loss": 0.3062, "step": 3064 }, { "epoch": 4.0, "eval_accuracy": 0.6663405088062623, "eval_f1": 0.5742821473158551, "eval_loss": 0.29888635873794556, "eval_precision": 0.7931034482758621, "eval_recall": 0.4500978473581213, "eval_runtime": 131.8312, "eval_samples_per_second": 7.752, "eval_steps_per_second": 1.942, "step": 3064 }, { "epoch": 5.0, "grad_norm": 5.6824212074279785, "learning_rate": 6.7422152189443455e-06, "loss": 0.3046, "step": 3830 }, { "epoch": 5.0, "eval_accuracy": 0.7084148727984344, "eval_f1": 0.6526806526806528, "eval_loss": 0.29940325021743774, "eval_precision": 0.8069164265129684, "eval_recall": 0.547945205479452, "eval_runtime": 132.0554, "eval_samples_per_second": 7.739, "eval_steps_per_second": 1.939, "step": 3830 }, { "epoch": 6.0, "grad_norm": 6.931075572967529, "learning_rate": 3.3711076094721728e-06, "loss": 0.3016, "step": 4596 }, { "epoch": 6.0, "eval_accuracy": 0.7211350293542075, "eval_f1": 0.6772366930917327, "eval_loss": 0.29910269379615784, "eval_precision": 0.803763440860215, "eval_recall": 0.5851272015655578, "eval_runtime": 132.5392, "eval_samples_per_second": 7.711, "eval_steps_per_second": 1.932, "step": 4596 }, { "epoch": 7.0, "grad_norm": 2.585300922393799, "learning_rate": 0.0, "loss": 0.3018, "step": 5362 }, { "epoch": 7.0, "eval_accuracy": 0.6947162426614482, "eval_f1": 0.625, "eval_loss": 0.29733312129974365, "eval_precision": 0.8099688473520249, "eval_recall": 0.5088062622309197, "eval_runtime": 133.4769, "eval_samples_per_second": 7.657, "eval_steps_per_second": 1.918, "step": 5362 } ], "logging_steps": 500, "max_steps": 5362, "num_input_tokens_seen": 0, "num_train_epochs": 7, "save_steps": 500, "total_flos": 1649866380960.0, "train_batch_size": 4, "trial_name": null, "trial_params": { "alpha": 0.43980036163769065, "learning_rate": 2.359775326630521e-05, "num_train_epochs": 7, "temperature": 17 } }