{ "best_metric": 0.837573385518591, "best_model_checkpoint": "tiny-bert-sst2-distilled/run-25/checkpoint-768", "epoch": 10.0, "eval_steps": 500, "global_step": 960, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 1.8755850791931152, "learning_rate": 0.00020053170483737637, "loss": 0.4801, "step": 96 }, { "epoch": 1.0, "eval_accuracy": 0.7818003913894325, "eval_f1": 0.8092386655260906, "eval_loss": 0.42955365777015686, "eval_precision": 0.7188449848024316, "eval_recall": 0.9256360078277887, "eval_runtime": 31.124, "eval_samples_per_second": 32.836, "eval_steps_per_second": 1.028, "step": 96 }, { "epoch": 2.0, "grad_norm": 2.4842429161071777, "learning_rate": 0.0001782504042998901, "loss": 0.4182, "step": 192 }, { "epoch": 2.0, "eval_accuracy": 0.8091976516634051, "eval_f1": 0.8232094288304623, "eval_loss": 0.3939443826675415, "eval_precision": 0.7668918918918919, "eval_recall": 0.8884540117416829, "eval_runtime": 32.508, "eval_samples_per_second": 31.438, "eval_steps_per_second": 0.984, "step": 192 }, { "epoch": 3.0, "grad_norm": 2.5258917808532715, "learning_rate": 0.00015596910376240383, "loss": 0.3965, "step": 288 }, { "epoch": 3.0, "eval_accuracy": 0.815068493150685, "eval_f1": 0.8325952170062001, "eval_loss": 0.39436766505241394, "eval_precision": 0.7605177993527508, "eval_recall": 0.9197651663405088, "eval_runtime": 29.4789, "eval_samples_per_second": 34.669, "eval_steps_per_second": 1.086, "step": 288 }, { "epoch": 4.0, "grad_norm": 3.0768847465515137, "learning_rate": 0.00013368780322491758, "loss": 0.3841, "step": 384 }, { "epoch": 4.0, "eval_accuracy": 0.8140900195694716, "eval_f1": 0.8137254901960785, "eval_loss": 0.38306960463523865, "eval_precision": 0.8153241650294696, "eval_recall": 0.812133072407045, "eval_runtime": 32.8502, "eval_samples_per_second": 31.111, "eval_steps_per_second": 0.974, "step": 384 }, { "epoch": 5.0, "grad_norm": 1.1314367055892944, "learning_rate": 0.00011140650268743131, "loss": 0.3779, "step": 480 }, { "epoch": 5.0, "eval_accuracy": 0.8326810176125244, "eval_f1": 0.8423963133640553, "eval_loss": 0.37976235151290894, "eval_precision": 0.7961672473867596, "eval_recall": 0.8943248532289628, "eval_runtime": 29.0986, "eval_samples_per_second": 35.122, "eval_steps_per_second": 1.1, "step": 480 }, { "epoch": 6.0, "grad_norm": 2.076519012451172, "learning_rate": 8.912520214994506e-05, "loss": 0.3741, "step": 576 }, { "epoch": 6.0, "eval_accuracy": 0.824853228962818, "eval_f1": 0.8397493285586393, "eval_loss": 0.3835048973560333, "eval_precision": 0.7739273927392739, "eval_recall": 0.9178082191780822, "eval_runtime": 28.8483, "eval_samples_per_second": 35.427, "eval_steps_per_second": 1.109, "step": 576 }, { "epoch": 7.0, "grad_norm": 1.4347150325775146, "learning_rate": 6.684390161245879e-05, "loss": 0.3699, "step": 672 }, { "epoch": 7.0, "eval_accuracy": 0.8356164383561644, "eval_f1": 0.8469945355191257, "eval_loss": 0.3747766315937042, "eval_precision": 0.7921635434412265, "eval_recall": 0.9099804305283757, "eval_runtime": 28.9227, "eval_samples_per_second": 35.336, "eval_steps_per_second": 1.106, "step": 672 }, { "epoch": 8.0, "grad_norm": 2.361301898956299, "learning_rate": 4.456260107497253e-05, "loss": 0.3677, "step": 768 }, { "epoch": 8.0, "eval_accuracy": 0.837573385518591, "eval_f1": 0.8482632541133455, "eval_loss": 0.3733108639717102, "eval_precision": 0.7958833619210978, "eval_recall": 0.9080234833659491, "eval_runtime": 29.9904, "eval_samples_per_second": 34.078, "eval_steps_per_second": 1.067, "step": 768 }, { "epoch": 9.0, "grad_norm": 1.602822184562683, "learning_rate": 2.2281300537486264e-05, "loss": 0.3622, "step": 864 }, { "epoch": 9.0, "eval_accuracy": 0.8277886497064579, "eval_f1": 0.8345864661654135, "eval_loss": 0.37449803948402405, "eval_precision": 0.8028933092224232, "eval_recall": 0.8688845401174168, "eval_runtime": 28.7959, "eval_samples_per_second": 35.491, "eval_steps_per_second": 1.111, "step": 864 }, { "epoch": 10.0, "grad_norm": 2.441206455230713, "learning_rate": 0.0, "loss": 0.3632, "step": 960 }, { "epoch": 10.0, "eval_accuracy": 0.824853228962818, "eval_f1": 0.831924882629108, "eval_loss": 0.3725528120994568, "eval_precision": 0.7996389891696751, "eval_recall": 0.8669275929549902, "eval_runtime": 29.1177, "eval_samples_per_second": 35.099, "eval_steps_per_second": 1.099, "step": 960 } ], "logging_steps": 500, "max_steps": 960, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 2356951972800.0, "train_batch_size": 32, "trial_name": null, "trial_params": { "alpha": 0.6872599698926333, "learning_rate": 0.00022281300537486262, "num_train_epochs": 10, "temperature": 15 } }