{ "best_metric": 0.8395303326810176, "best_model_checkpoint": "tiny-bert-sst2-distilled/run-19/checkpoint-960", "epoch": 10.0, "eval_steps": 500, "global_step": 960, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 3.265742301940918, "learning_rate": 0.00016552182978108178, "loss": 0.5708, "step": 96 }, { "epoch": 1.0, "eval_accuracy": 0.7592954990215264, "eval_f1": 0.7980295566502462, "eval_loss": 0.4895075857639313, "eval_precision": 0.6874115983026874, "eval_recall": 0.9510763209393346, "eval_runtime": 24.8356, "eval_samples_per_second": 41.151, "eval_steps_per_second": 1.288, "step": 96 }, { "epoch": 2.0, "grad_norm": 4.506702899932861, "learning_rate": 0.00014713051536096157, "loss": 0.4657, "step": 192 }, { "epoch": 2.0, "eval_accuracy": 0.812133072407045, "eval_f1": 0.8318739054290717, "eval_loss": 0.42986905574798584, "eval_precision": 0.7527733755942948, "eval_recall": 0.9295499021526419, "eval_runtime": 25.1401, "eval_samples_per_second": 40.652, "eval_steps_per_second": 1.273, "step": 192 }, { "epoch": 3.0, "grad_norm": 2.952416181564331, "learning_rate": 0.00012873920094084136, "loss": 0.4303, "step": 288 }, { "epoch": 3.0, "eval_accuracy": 0.8209393346379648, "eval_f1": 0.8300835654596102, "eval_loss": 0.42889997363090515, "eval_precision": 0.7897526501766784, "eval_recall": 0.8747553816046967, "eval_runtime": 25.3264, "eval_samples_per_second": 40.353, "eval_steps_per_second": 1.264, "step": 288 }, { "epoch": 4.0, "grad_norm": 7.971947193145752, "learning_rate": 0.00011034788652072118, "loss": 0.4148, "step": 384 }, { "epoch": 4.0, "eval_accuracy": 0.815068493150685, "eval_f1": 0.8156097560975609, "eval_loss": 0.4303808808326721, "eval_precision": 0.8132295719844358, "eval_recall": 0.8180039138943248, "eval_runtime": 25.4322, "eval_samples_per_second": 40.185, "eval_steps_per_second": 1.258, "step": 384 }, { "epoch": 5.0, "grad_norm": 1.9225187301635742, "learning_rate": 9.195657210060098e-05, "loss": 0.4076, "step": 480 }, { "epoch": 5.0, "eval_accuracy": 0.8277886497064579, "eval_f1": 0.8414414414414414, "eval_loss": 0.40675535798072815, "eval_precision": 0.7796327212020033, "eval_recall": 0.913894324853229, "eval_runtime": 25.6271, "eval_samples_per_second": 39.88, "eval_steps_per_second": 1.249, "step": 480 }, { "epoch": 6.0, "grad_norm": 4.180634498596191, "learning_rate": 7.356525768048079e-05, "loss": 0.4009, "step": 576 }, { "epoch": 6.0, "eval_accuracy": 0.8238747553816047, "eval_f1": 0.8432055749128919, "eval_loss": 0.4179611802101135, "eval_precision": 0.7598116169544741, "eval_recall": 0.9471624266144814, "eval_runtime": 25.4907, "eval_samples_per_second": 40.093, "eval_steps_per_second": 1.255, "step": 576 }, { "epoch": 7.0, "grad_norm": 3.1518468856811523, "learning_rate": 5.517394326036059e-05, "loss": 0.3956, "step": 672 }, { "epoch": 7.0, "eval_accuracy": 0.8268101761252447, "eval_f1": 0.8456843940714908, "eval_loss": 0.412363737821579, "eval_precision": 0.7625786163522013, "eval_recall": 0.949119373776908, "eval_runtime": 25.9899, "eval_samples_per_second": 39.323, "eval_steps_per_second": 1.231, "step": 672 }, { "epoch": 8.0, "grad_norm": 3.885427236557007, "learning_rate": 3.678262884024039e-05, "loss": 0.3914, "step": 768 }, { "epoch": 8.0, "eval_accuracy": 0.8287671232876712, "eval_f1": 0.8466257668711656, "eval_loss": 0.4102531671524048, "eval_precision": 0.7666666666666667, "eval_recall": 0.9452054794520548, "eval_runtime": 25.478, "eval_samples_per_second": 40.113, "eval_steps_per_second": 1.256, "step": 768 }, { "epoch": 9.0, "grad_norm": 2.2045633792877197, "learning_rate": 1.8391314420120196e-05, "loss": 0.3868, "step": 864 }, { "epoch": 9.0, "eval_accuracy": 0.8365949119373777, "eval_f1": 0.8460829493087557, "eval_loss": 0.4009145498275757, "eval_precision": 0.7996515679442509, "eval_recall": 0.898238747553816, "eval_runtime": 24.6734, "eval_samples_per_second": 41.421, "eval_steps_per_second": 1.297, "step": 864 }, { "epoch": 10.0, "grad_norm": 4.398197650909424, "learning_rate": 0.0, "loss": 0.3889, "step": 960 }, { "epoch": 10.0, "eval_accuracy": 0.8395303326810176, "eval_f1": 0.8509090909090908, "eval_loss": 0.3997047245502472, "eval_precision": 0.7945670628183361, "eval_recall": 0.9158512720156555, "eval_runtime": 25.0131, "eval_samples_per_second": 40.859, "eval_steps_per_second": 1.279, "step": 960 } ], "logging_steps": 500, "max_steps": 960, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 2356951972800.0, "train_batch_size": 32, "trial_name": null, "trial_params": { "alpha": 0.8822091648395545, "learning_rate": 0.00018391314420120196, "num_train_epochs": 10, "temperature": 8 } }