{ "best_metric": 0.7955477445811365, "best_model_checkpoint": "tiny-bert-sst2-distilled/run-16/checkpoint-3210", "epoch": 15.0, "eval_steps": 500, "global_step": 3210, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 7.164041519165039, "learning_rate": 0.0004107110800529193, "loss": 0.5976, "step": 214 }, { "epoch": 1.0, "eval_accuracy": 0.7182190978324546, "eval_f1": 0.49208025343189016, "eval_loss": 0.5573095679283142, "eval_mcc": 0.32024447377810733, "eval_precision": 0.6164021164021164, "eval_recall": 0.4094903339191564, "eval_runtime": 3.1533, "eval_samples_per_second": 541.343, "eval_steps_per_second": 17.125, "step": 214 }, { "epoch": 2.0, "grad_norm": 2.810741424560547, "learning_rate": 0.00038909470741855514, "loss": 0.535, "step": 428 }, { "epoch": 2.0, "eval_accuracy": 0.7299355594610427, "eval_f1": 0.5719591457753018, "eval_loss": 0.5309674143791199, "eval_mcc": 0.3769093274057024, "eval_precision": 0.6062992125984252, "eval_recall": 0.5413005272407733, "eval_runtime": 3.2455, "eval_samples_per_second": 525.953, "eval_steps_per_second": 16.638, "step": 428 }, { "epoch": 3.0, "grad_norm": 2.5110363960266113, "learning_rate": 0.00036747833478419095, "loss": 0.51, "step": 642 }, { "epoch": 3.0, "eval_accuracy": 0.7527826596367897, "eval_f1": 0.5290178571428571, "eval_loss": 0.5084466934204102, "eval_mcc": 0.4042060476032326, "eval_precision": 0.7247706422018348, "eval_recall": 0.4165202108963093, "eval_runtime": 3.1369, "eval_samples_per_second": 544.167, "eval_steps_per_second": 17.214, "step": 642 }, { "epoch": 4.0, "grad_norm": 2.1826329231262207, "learning_rate": 0.0003458619621498268, "loss": 0.4995, "step": 856 }, { "epoch": 4.0, "eval_accuracy": 0.7451669595782073, "eval_f1": 0.49241540256709454, "eval_loss": 0.5046694278717041, "eval_mcc": 0.38160728386386483, "eval_precision": 0.7326388888888888, "eval_recall": 0.37082601054481545, "eval_runtime": 3.3202, "eval_samples_per_second": 514.123, "eval_steps_per_second": 16.264, "step": 856 }, { "epoch": 5.0, "grad_norm": 6.0804123878479, "learning_rate": 0.0003242455895154626, "loss": 0.4853, "step": 1070 }, { "epoch": 5.0, "eval_accuracy": 0.7574692442882249, "eval_f1": 0.6393728222996515, "eval_loss": 0.4947313964366913, "eval_mcc": 0.45673224487908876, "eval_precision": 0.6338514680483592, "eval_recall": 0.6449912126537786, "eval_runtime": 3.1788, "eval_samples_per_second": 536.989, "eval_steps_per_second": 16.987, "step": 1070 }, { "epoch": 6.0, "grad_norm": 2.1514229774475098, "learning_rate": 0.0003026292168810984, "loss": 0.4724, "step": 1284 }, { "epoch": 6.0, "eval_accuracy": 0.7650849443468073, "eval_f1": 0.6598812553011025, "eval_loss": 0.477894127368927, "eval_mcc": 0.4814730312998762, "eval_precision": 0.6377049180327868, "eval_recall": 0.6836555360281195, "eval_runtime": 3.2099, "eval_samples_per_second": 531.785, "eval_steps_per_second": 16.823, "step": 1284 }, { "epoch": 7.0, "grad_norm": 5.599643230438232, "learning_rate": 0.0002810128442467343, "loss": 0.46, "step": 1498 }, { "epoch": 7.0, "eval_accuracy": 0.773286467486819, "eval_f1": 0.5798045602605862, "eval_loss": 0.47905173897743225, "eval_mcc": 0.45971678817188244, "eval_precision": 0.7585227272727273, "eval_recall": 0.46924428822495606, "eval_runtime": 3.1625, "eval_samples_per_second": 539.761, "eval_steps_per_second": 17.075, "step": 1498 }, { "epoch": 8.0, "grad_norm": 3.2664639949798584, "learning_rate": 0.0002593964716123701, "loss": 0.4428, "step": 1712 }, { "epoch": 8.0, "eval_accuracy": 0.7885178676039836, "eval_f1": 0.6386386386386386, "eval_loss": 0.4652605950832367, "eval_mcc": 0.5028818263800968, "eval_precision": 0.7418604651162791, "eval_recall": 0.5606326889279437, "eval_runtime": 3.2551, "eval_samples_per_second": 524.404, "eval_steps_per_second": 16.589, "step": 1712 }, { "epoch": 9.0, "grad_norm": 5.06269645690918, "learning_rate": 0.00023778009897800593, "loss": 0.4442, "step": 1926 }, { "epoch": 9.0, "eval_accuracy": 0.7773872290568249, "eval_f1": 0.6008403361344538, "eval_loss": 0.47534701228141785, "eval_mcc": 0.4716667178372095, "eval_precision": 0.7467362924281984, "eval_recall": 0.5026362038664324, "eval_runtime": 3.1553, "eval_samples_per_second": 541.002, "eval_steps_per_second": 17.114, "step": 1926 }, { "epoch": 10.0, "grad_norm": 5.0700459480285645, "learning_rate": 0.00021616372634364174, "loss": 0.4307, "step": 2140 }, { "epoch": 10.0, "eval_accuracy": 0.7838312829525483, "eval_f1": 0.6666666666666666, "eval_loss": 0.47036686539649963, "eval_mcc": 0.5073397319926002, "eval_precision": 0.6858736059479554, "eval_recall": 0.648506151142355, "eval_runtime": 3.3854, "eval_samples_per_second": 504.231, "eval_steps_per_second": 15.951, "step": 2140 }, { "epoch": 11.0, "grad_norm": 6.991399765014648, "learning_rate": 0.00019454735370927757, "loss": 0.4243, "step": 2354 }, { "epoch": 11.0, "eval_accuracy": 0.7867603983596954, "eval_f1": 0.636, "eval_loss": 0.4695989191532135, "eval_mcc": 0.49868088180967785, "eval_precision": 0.7378190255220418, "eval_recall": 0.5588752196836555, "eval_runtime": 3.1405, "eval_samples_per_second": 543.539, "eval_steps_per_second": 17.195, "step": 2354 }, { "epoch": 12.0, "grad_norm": 5.208732604980469, "learning_rate": 0.0001729309810749134, "loss": 0.4198, "step": 2568 }, { "epoch": 12.0, "eval_accuracy": 0.7896895137668424, "eval_f1": 0.666046511627907, "eval_loss": 0.45460793375968933, "eval_mcc": 0.5152127188733497, "eval_precision": 0.7075098814229249, "eval_recall": 0.6291739894551845, "eval_runtime": 3.8567, "eval_samples_per_second": 442.612, "eval_steps_per_second": 14.002, "step": 2568 }, { "epoch": 13.0, "grad_norm": 16.56888771057129, "learning_rate": 0.0001513146084405492, "loss": 0.4199, "step": 2782 }, { "epoch": 13.0, "eval_accuracy": 0.7908611599297012, "eval_f1": 0.6641580432737535, "eval_loss": 0.46026891469955444, "eval_mcc": 0.5161067927608803, "eval_precision": 0.7145748987854251, "eval_recall": 0.6203866432337434, "eval_runtime": 3.1632, "eval_samples_per_second": 539.639, "eval_steps_per_second": 17.071, "step": 2782 }, { "epoch": 14.0, "grad_norm": 6.45969820022583, "learning_rate": 0.00012969823580618505, "loss": 0.4135, "step": 2996 }, { "epoch": 14.0, "eval_accuracy": 0.789103690685413, "eval_f1": 0.6622889305816135, "eval_loss": 0.4559510350227356, "eval_mcc": 0.5124487501229456, "eval_precision": 0.710261569416499, "eval_recall": 0.6203866432337434, "eval_runtime": 3.2281, "eval_samples_per_second": 528.794, "eval_steps_per_second": 16.728, "step": 2996 }, { "epoch": 15.0, "grad_norm": 1.7556620836257935, "learning_rate": 0.00010808186317182087, "loss": 0.4066, "step": 3210 }, { "epoch": 15.0, "eval_accuracy": 0.7955477445811365, "eval_f1": 0.6666666666666667, "eval_loss": 0.4586973190307617, "eval_mcc": 0.5249372343364972, "eval_precision": 0.7301255230125523, "eval_recall": 0.6133567662565905, "eval_runtime": 3.1701, "eval_samples_per_second": 538.473, "eval_steps_per_second": 17.034, "step": 3210 } ], "logging_steps": 500, "max_steps": 4280, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 7871634966600.0, "train_batch_size": 32, "trial_name": null, "trial_params": { "alpha": 0.9950627760257441, "learning_rate": 0.0004323274526872835, "num_train_epochs": 20, "temperature": 49 } }