{ "best_metric": 0.8346379647749511, "best_model_checkpoint": "tiny-bert-sst2-distilled/run-20/checkpoint-480", "epoch": 5.0, "eval_steps": 500, "global_step": 480, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 3.3483004570007324, "learning_rate": 0.00013855533928571864, "loss": 0.5685, "step": 96 }, { "epoch": 1.0, "eval_accuracy": 0.7671232876712328, "eval_f1": 0.8019966722129783, "eval_loss": 0.477300226688385, "eval_precision": 0.6975397973950795, "eval_recall": 0.9432485322896281, "eval_runtime": 25.843, "eval_samples_per_second": 39.546, "eval_steps_per_second": 1.238, "step": 96 }, { "epoch": 2.0, "grad_norm": 2.558192729949951, "learning_rate": 0.00011546278273809888, "loss": 0.4736, "step": 192 }, { "epoch": 2.0, "eval_accuracy": 0.8072407045009785, "eval_f1": 0.8255093002657219, "eval_loss": 0.43477192521095276, "eval_precision": 0.7540453074433657, "eval_recall": 0.9119373776908023, "eval_runtime": 25.942, "eval_samples_per_second": 39.396, "eval_steps_per_second": 1.234, "step": 192 }, { "epoch": 3.0, "grad_norm": 3.4191761016845703, "learning_rate": 9.237022619047909e-05, "loss": 0.4377, "step": 288 }, { "epoch": 3.0, "eval_accuracy": 0.8258317025440313, "eval_f1": 0.8381818181818183, "eval_loss": 0.4183538258075714, "eval_precision": 0.7826825127334465, "eval_recall": 0.9021526418786693, "eval_runtime": 25.665, "eval_samples_per_second": 39.821, "eval_steps_per_second": 1.247, "step": 288 }, { "epoch": 4.0, "grad_norm": 7.199775218963623, "learning_rate": 6.927766964285932e-05, "loss": 0.4231, "step": 384 }, { "epoch": 4.0, "eval_accuracy": 0.8199608610567515, "eval_f1": 0.8257575757575758, "eval_loss": 0.4184337258338928, "eval_precision": 0.8, "eval_recall": 0.8532289628180039, "eval_runtime": 25.0602, "eval_samples_per_second": 40.782, "eval_steps_per_second": 1.277, "step": 384 }, { "epoch": 5.0, "grad_norm": 2.4152629375457764, "learning_rate": 4.6185113095239546e-05, "loss": 0.4131, "step": 480 }, { "epoch": 5.0, "eval_accuracy": 0.8346379647749511, "eval_f1": 0.8467815049864007, "eval_loss": 0.4067833125591278, "eval_precision": 0.7888513513513513, "eval_recall": 0.913894324853229, "eval_runtime": 24.9745, "eval_samples_per_second": 40.922, "eval_steps_per_second": 1.281, "step": 480 } ], "logging_steps": 500, "max_steps": 672, "num_input_tokens_seen": 0, "num_train_epochs": 7, "save_steps": 500, "total_flos": 1178475986400.0, "train_batch_size": 32, "trial_name": null, "trial_params": { "alpha": 0.8907547983808969, "learning_rate": 0.00016164789583333842, "num_train_epochs": 7, "temperature": 2 } }