{ "best_metric": 0.8346379647749511, "best_model_checkpoint": "tiny-bert-sst2-distilled/run-20/checkpoint-480", "epoch": 7.0, "eval_steps": 500, "global_step": 672, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 3.3483004570007324, "learning_rate": 0.00013855533928571864, "loss": 0.5685, "step": 96 }, { "epoch": 1.0, "eval_accuracy": 0.7671232876712328, "eval_f1": 0.8019966722129783, "eval_loss": 0.477300226688385, "eval_precision": 0.6975397973950795, "eval_recall": 0.9432485322896281, "eval_runtime": 25.843, "eval_samples_per_second": 39.546, "eval_steps_per_second": 1.238, "step": 96 }, { "epoch": 2.0, "grad_norm": 2.558192729949951, "learning_rate": 0.00011546278273809888, "loss": 0.4736, "step": 192 }, { "epoch": 2.0, "eval_accuracy": 0.8072407045009785, "eval_f1": 0.8255093002657219, "eval_loss": 0.43477192521095276, "eval_precision": 0.7540453074433657, "eval_recall": 0.9119373776908023, "eval_runtime": 25.942, "eval_samples_per_second": 39.396, "eval_steps_per_second": 1.234, "step": 192 }, { "epoch": 3.0, "grad_norm": 3.4191761016845703, "learning_rate": 9.237022619047909e-05, "loss": 0.4377, "step": 288 }, { "epoch": 3.0, "eval_accuracy": 0.8258317025440313, "eval_f1": 0.8381818181818183, "eval_loss": 0.4183538258075714, "eval_precision": 0.7826825127334465, "eval_recall": 0.9021526418786693, "eval_runtime": 25.665, "eval_samples_per_second": 39.821, "eval_steps_per_second": 1.247, "step": 288 }, { "epoch": 4.0, "grad_norm": 7.199775218963623, "learning_rate": 6.927766964285932e-05, "loss": 0.4231, "step": 384 }, { "epoch": 4.0, "eval_accuracy": 0.8199608610567515, "eval_f1": 0.8257575757575758, "eval_loss": 0.4184337258338928, "eval_precision": 0.8, "eval_recall": 0.8532289628180039, "eval_runtime": 25.0602, "eval_samples_per_second": 40.782, "eval_steps_per_second": 1.277, "step": 384 }, { "epoch": 5.0, "grad_norm": 2.4152629375457764, "learning_rate": 4.6185113095239546e-05, "loss": 0.4131, "step": 480 }, { "epoch": 5.0, "eval_accuracy": 0.8346379647749511, "eval_f1": 0.8467815049864007, "eval_loss": 0.4067833125591278, "eval_precision": 0.7888513513513513, "eval_recall": 0.913894324853229, "eval_runtime": 24.9745, "eval_samples_per_second": 40.922, "eval_steps_per_second": 1.281, "step": 480 }, { "epoch": 6.0, "grad_norm": 3.8709616661071777, "learning_rate": 2.3092556547619773e-05, "loss": 0.4025, "step": 576 }, { "epoch": 6.0, "eval_accuracy": 0.8307240704500979, "eval_f1": 0.8442844284428443, "eval_loss": 0.4088129699230194, "eval_precision": 0.7816666666666666, "eval_recall": 0.9178082191780822, "eval_runtime": 25.2849, "eval_samples_per_second": 40.419, "eval_steps_per_second": 1.266, "step": 576 }, { "epoch": 7.0, "grad_norm": 3.1289896965026855, "learning_rate": 0.0, "loss": 0.398, "step": 672 }, { "epoch": 7.0, "eval_accuracy": 0.8287671232876712, "eval_f1": 0.839596700274977, "eval_loss": 0.40621256828308105, "eval_precision": 0.7896551724137931, "eval_recall": 0.8962818003913894, "eval_runtime": 25.662, "eval_samples_per_second": 39.825, "eval_steps_per_second": 1.247, "step": 672 } ], "logging_steps": 500, "max_steps": 672, "num_input_tokens_seen": 0, "num_train_epochs": 7, "save_steps": 500, "total_flos": 1649866380960.0, "train_batch_size": 32, "trial_name": null, "trial_params": { "alpha": 0.8907547983808969, "learning_rate": 0.00016164789583333842, "num_train_epochs": 7, "temperature": 2 } }