{ "best_metric": 0.541095890410959, "best_model_checkpoint": "tiny-bert-sst2-distilled/run-4/checkpoint-96", "epoch": 5.0, "eval_steps": 500, "global_step": 480, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 0.4191925525665283, "learning_rate": 0.00022738674133918383, "loss": 0.098, "step": 96 }, { "epoch": 1.0, "eval_accuracy": 0.541095890410959, "eval_f1": 0.20642978003384094, "eval_loss": 0.08223184198141098, "eval_precision": 0.7625, "eval_recall": 0.11937377690802348, "eval_runtime": 28.0826, "eval_samples_per_second": 36.393, "eval_steps_per_second": 1.139, "step": 96 }, { "epoch": 2.0, "grad_norm": 0.35244929790496826, "learning_rate": 0.00017054005600438787, "loss": 0.0833, "step": 192 }, { "epoch": 2.0, "eval_accuracy": 0.5371819960861057, "eval_f1": 0.19145299145299144, "eval_loss": 0.07941487431526184, "eval_precision": 0.7567567567567568, "eval_recall": 0.1095890410958904, "eval_runtime": 28.0455, "eval_samples_per_second": 36.441, "eval_steps_per_second": 1.141, "step": 192 }, { "epoch": 3.0, "grad_norm": 0.2497691810131073, "learning_rate": 0.00011369337066959191, "loss": 0.0813, "step": 288 }, { "epoch": 3.0, "eval_accuracy": 0.5342465753424658, "eval_f1": 0.18213058419243985, "eval_loss": 0.07839526236057281, "eval_precision": 0.7464788732394366, "eval_recall": 0.10371819960861056, "eval_runtime": 28.0524, "eval_samples_per_second": 36.432, "eval_steps_per_second": 1.141, "step": 288 }, { "epoch": 4.0, "grad_norm": 0.2048983871936798, "learning_rate": 5.684668533479596e-05, "loss": 0.0805, "step": 384 }, { "epoch": 4.0, "eval_accuracy": 0.5332681017612525, "eval_f1": 0.17331022530329288, "eval_loss": 0.07847526669502258, "eval_precision": 0.7575757575757576, "eval_recall": 0.09784735812133072, "eval_runtime": 28.003, "eval_samples_per_second": 36.496, "eval_steps_per_second": 1.143, "step": 384 }, { "epoch": 5.0, "grad_norm": 0.3579064607620239, "learning_rate": 0.0, "loss": 0.0801, "step": 480 }, { "epoch": 5.0, "eval_accuracy": 0.538160469667319, "eval_f1": 0.1945392491467577, "eval_loss": 0.07800330221652985, "eval_precision": 0.76, "eval_recall": 0.11154598825831702, "eval_runtime": 27.9144, "eval_samples_per_second": 36.612, "eval_steps_per_second": 1.146, "step": 480 } ], "logging_steps": 500, "max_steps": 480, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "total_flos": 1178475986400.0, "train_batch_size": 32, "trial_name": null, "trial_params": { "alpha": 0.09292461434001364, "learning_rate": 0.0002842334266739798, "num_train_epochs": 5, "temperature": 2 } }