{ "best_metric": 0.8414872798434442, "best_model_checkpoint": "tiny-bert-sst2-distilled/run-27/checkpoint-768", "epoch": 8.0, "eval_steps": 500, "global_step": 768, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 1.020627737045288, "learning_rate": 0.00020460634625664984, "loss": 0.494, "step": 96 }, { "epoch": 1.0, "eval_accuracy": 0.7886497064579256, "eval_f1": 0.8032786885245903, "eval_loss": 0.4278793931007385, "eval_precision": 0.7512776831345827, "eval_recall": 0.863013698630137, "eval_runtime": 24.9337, "eval_samples_per_second": 40.989, "eval_steps_per_second": 1.283, "step": 96 }, { "epoch": 2.0, "grad_norm": 2.554121732711792, "learning_rate": 0.00017903055297456864, "loss": 0.4258, "step": 192 }, { "epoch": 2.0, "eval_accuracy": 0.8062622309197651, "eval_f1": 0.8241563055062168, "eval_loss": 0.40374234318733215, "eval_precision": 0.7544715447154472, "eval_recall": 0.9080234833659491, "eval_runtime": 25.4225, "eval_samples_per_second": 40.201, "eval_steps_per_second": 1.259, "step": 192 }, { "epoch": 3.0, "grad_norm": 3.225475311279297, "learning_rate": 0.00015345475969248738, "loss": 0.405, "step": 288 }, { "epoch": 3.0, "eval_accuracy": 0.8277886497064579, "eval_f1": 0.84, "eval_loss": 0.39554914832115173, "eval_precision": 0.7843803056027164, "eval_recall": 0.9041095890410958, "eval_runtime": 25.5434, "eval_samples_per_second": 40.01, "eval_steps_per_second": 1.253, "step": 288 }, { "epoch": 4.0, "grad_norm": 3.6178488731384277, "learning_rate": 0.00012787896641040618, "loss": 0.3903, "step": 384 }, { "epoch": 4.0, "eval_accuracy": 0.821917808219178, "eval_f1": 0.8222656250000001, "eval_loss": 0.38986799120903015, "eval_precision": 0.8206627680311891, "eval_recall": 0.8238747553816047, "eval_runtime": 25.4279, "eval_samples_per_second": 40.192, "eval_steps_per_second": 1.258, "step": 384 }, { "epoch": 5.0, "grad_norm": 1.1577227115631104, "learning_rate": 0.00010230317312832492, "loss": 0.3846, "step": 480 }, { "epoch": 5.0, "eval_accuracy": 0.8228962818003914, "eval_f1": 0.8309990662931839, "eval_loss": 0.3861733376979828, "eval_precision": 0.7946428571428571, "eval_recall": 0.8708414872798435, "eval_runtime": 25.7302, "eval_samples_per_second": 39.72, "eval_steps_per_second": 1.244, "step": 480 }, { "epoch": 6.0, "grad_norm": 2.0598785877227783, "learning_rate": 7.672737984624369e-05, "loss": 0.3784, "step": 576 }, { "epoch": 6.0, "eval_accuracy": 0.8317025440313112, "eval_f1": 0.8472468916518652, "eval_loss": 0.3895849585533142, "eval_precision": 0.775609756097561, "eval_recall": 0.9334637964774951, "eval_runtime": 25.139, "eval_samples_per_second": 40.654, "eval_steps_per_second": 1.273, "step": 576 }, { "epoch": 7.0, "grad_norm": 1.7429569959640503, "learning_rate": 5.115158656416246e-05, "loss": 0.3756, "step": 672 }, { "epoch": 7.0, "eval_accuracy": 0.8405088062622309, "eval_f1": 0.8530207394048693, "eval_loss": 0.3818517029285431, "eval_precision": 0.7909698996655519, "eval_recall": 0.9256360078277887, "eval_runtime": 25.2278, "eval_samples_per_second": 40.511, "eval_steps_per_second": 1.268, "step": 672 }, { "epoch": 8.0, "grad_norm": 2.883507013320923, "learning_rate": 2.557579328208123e-05, "loss": 0.3725, "step": 768 }, { "epoch": 8.0, "eval_accuracy": 0.8414872798434442, "eval_f1": 0.8527272727272727, "eval_loss": 0.38016170263290405, "eval_precision": 0.7962648556876061, "eval_recall": 0.9178082191780822, "eval_runtime": 24.8536, "eval_samples_per_second": 41.121, "eval_steps_per_second": 1.288, "step": 768 } ], "logging_steps": 500, "max_steps": 864, "num_input_tokens_seen": 0, "num_train_epochs": 9, "save_steps": 500, "total_flos": 1885561578240.0, "train_batch_size": 32, "trial_name": null, "trial_params": { "alpha": 0.718256826807231, "learning_rate": 0.0002301821395387311, "num_train_epochs": 9, "temperature": 6 } }