{ "best_metric": 0.7885178676039836, "best_model_checkpoint": "tiny-bert-sst2-distilled/run-13/checkpoint-3210", "epoch": 19.0, "eval_steps": 500, "global_step": 4066, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 8.763052940368652, "learning_rate": 0.00036729100192842157, "loss": 0.5966, "step": 214 }, { "epoch": 1.0, "eval_accuracy": 0.7211482132396017, "eval_f1": 0.5132924335378324, "eval_loss": 0.5418887734413147, "eval_mcc": 0.33384466816371045, "eval_precision": 0.6136919315403423, "eval_recall": 0.44112478031634444, "eval_runtime": 3.1521, "eval_samples_per_second": 541.55, "eval_steps_per_second": 17.132, "step": 214 }, { "epoch": 2.0, "grad_norm": 3.084683895111084, "learning_rate": 0.00034688594626573147, "loss": 0.5299, "step": 428 }, { "epoch": 2.0, "eval_accuracy": 0.7404803749267721, "eval_f1": 0.5179542981501633, "eval_loss": 0.5285571813583374, "eval_mcc": 0.37347577729616044, "eval_precision": 0.68, "eval_recall": 0.4182776801405975, "eval_runtime": 3.8839, "eval_samples_per_second": 439.512, "eval_steps_per_second": 13.904, "step": 428 }, { "epoch": 3.0, "grad_norm": 1.8376232385635376, "learning_rate": 0.0003264808906030414, "loss": 0.5041, "step": 642 }, { "epoch": 3.0, "eval_accuracy": 0.7475102519039251, "eval_f1": 0.4800965018094088, "eval_loss": 0.5334233045578003, "eval_mcc": 0.3885026303183633, "eval_precision": 0.7653846153846153, "eval_recall": 0.34973637961335674, "eval_runtime": 3.1807, "eval_samples_per_second": 536.674, "eval_steps_per_second": 16.977, "step": 642 }, { "epoch": 4.0, "grad_norm": 2.807384490966797, "learning_rate": 0.00030607583494035134, "loss": 0.4955, "step": 856 }, { "epoch": 4.0, "eval_accuracy": 0.7398945518453427, "eval_f1": 0.4422110552763819, "eval_loss": 0.5175681710243225, "eval_mcc": 0.36720426049390004, "eval_precision": 0.775330396475771, "eval_recall": 0.3093145869947276, "eval_runtime": 3.9525, "eval_samples_per_second": 431.878, "eval_steps_per_second": 13.662, "step": 856 }, { "epoch": 5.0, "grad_norm": 7.096841335296631, "learning_rate": 0.0002856707792776612, "loss": 0.4841, "step": 1070 }, { "epoch": 5.0, "eval_accuracy": 0.7516110134739309, "eval_f1": 0.6052141527001862, "eval_loss": 0.49192342162132263, "eval_mcc": 0.4265647238499904, "eval_precision": 0.6435643564356436, "eval_recall": 0.5711775043936731, "eval_runtime": 3.1288, "eval_samples_per_second": 545.582, "eval_steps_per_second": 17.259, "step": 1070 }, { "epoch": 6.0, "grad_norm": 4.960063457489014, "learning_rate": 0.00026526572361497115, "loss": 0.4738, "step": 1284 }, { "epoch": 6.0, "eval_accuracy": 0.7621558289396603, "eval_f1": 0.621268656716418, "eval_loss": 0.4805011451244354, "eval_mcc": 0.4506812006793125, "eval_precision": 0.6620278330019881, "eval_recall": 0.5852372583479789, "eval_runtime": 3.2097, "eval_samples_per_second": 531.827, "eval_steps_per_second": 16.824, "step": 1284 }, { "epoch": 7.0, "grad_norm": 6.75669002532959, "learning_rate": 0.000244860667952281, "loss": 0.4601, "step": 1498 }, { "epoch": 7.0, "eval_accuracy": 0.7586408904510837, "eval_f1": 0.5264367816091953, "eval_loss": 0.4859382212162018, "eval_mcc": 0.41956224318934715, "eval_precision": 0.760797342192691, "eval_recall": 0.4024604569420035, "eval_runtime": 3.1647, "eval_samples_per_second": 539.388, "eval_steps_per_second": 17.063, "step": 1498 }, { "epoch": 8.0, "grad_norm": 5.769745349884033, "learning_rate": 0.00022445561228959097, "loss": 0.4483, "step": 1712 }, { "epoch": 8.0, "eval_accuracy": 0.7785588752196837, "eval_f1": 0.6337209302325582, "eval_loss": 0.466266006231308, "eval_mcc": 0.4826297358701252, "eval_precision": 0.7062634989200864, "eval_recall": 0.5746924428822495, "eval_runtime": 3.8302, "eval_samples_per_second": 445.67, "eval_steps_per_second": 14.099, "step": 1712 }, { "epoch": 9.0, "grad_norm": 7.356119632720947, "learning_rate": 0.00020405055662690085, "loss": 0.4408, "step": 1926 }, { "epoch": 9.0, "eval_accuracy": 0.7809021675454013, "eval_f1": 0.6191446028513238, "eval_loss": 0.46509596705436707, "eval_mcc": 0.48266230635542773, "eval_precision": 0.7360774818401937, "eval_recall": 0.5342706502636204, "eval_runtime": 3.1727, "eval_samples_per_second": 538.023, "eval_steps_per_second": 17.02, "step": 1926 }, { "epoch": 10.0, "grad_norm": 3.752133369445801, "learning_rate": 0.00018364550096421078, "loss": 0.4322, "step": 2140 }, { "epoch": 10.0, "eval_accuracy": 0.7809021675454013, "eval_f1": 0.661231884057971, "eval_loss": 0.46195492148399353, "eval_mcc": 0.5000717628957632, "eval_precision": 0.6822429906542056, "eval_recall": 0.6414762741652021, "eval_runtime": 3.329, "eval_samples_per_second": 512.771, "eval_steps_per_second": 16.221, "step": 2140 }, { "epoch": 11.0, "grad_norm": 2.258147954940796, "learning_rate": 0.0001632404453015207, "loss": 0.4251, "step": 2354 }, { "epoch": 11.0, "eval_accuracy": 0.7756297598125366, "eval_f1": 0.6135216952573159, "eval_loss": 0.46607503294944763, "eval_mcc": 0.4705148045618678, "eval_precision": 0.7203791469194313, "eval_recall": 0.5342706502636204, "eval_runtime": 3.1529, "eval_samples_per_second": 541.404, "eval_steps_per_second": 17.127, "step": 2354 }, { "epoch": 12.0, "grad_norm": 3.4573206901550293, "learning_rate": 0.0001428353896388306, "loss": 0.417, "step": 2568 }, { "epoch": 12.0, "eval_accuracy": 0.7832454598711189, "eval_f1": 0.6777003484320557, "eval_loss": 0.4633403718471527, "eval_mcc": 0.5144799999787437, "eval_precision": 0.6718480138169257, "eval_recall": 0.6836555360281195, "eval_runtime": 3.2478, "eval_samples_per_second": 525.584, "eval_steps_per_second": 16.627, "step": 2568 }, { "epoch": 13.0, "grad_norm": 16.82988166809082, "learning_rate": 0.0001224303339761405, "loss": 0.4122, "step": 2782 }, { "epoch": 13.0, "eval_accuracy": 0.7855887521968365, "eval_f1": 0.6623616236162362, "eval_loss": 0.47581344842910767, "eval_mcc": 0.5072003861458727, "eval_precision": 0.6970873786407767, "eval_recall": 0.6309314586994728, "eval_runtime": 3.1465, "eval_samples_per_second": 542.51, "eval_steps_per_second": 17.162, "step": 2782 }, { "epoch": 14.0, "grad_norm": 14.468791961669922, "learning_rate": 0.00010202527831345042, "loss": 0.4053, "step": 2996 }, { "epoch": 14.0, "eval_accuracy": 0.7814879906268307, "eval_f1": 0.6642664266426643, "eval_loss": 0.464487224817276, "eval_mcc": 0.5027717230279147, "eval_precision": 0.6808118081180812, "eval_recall": 0.648506151142355, "eval_runtime": 3.3781, "eval_samples_per_second": 505.31, "eval_steps_per_second": 15.985, "step": 2996 }, { "epoch": 15.0, "grad_norm": 3.5750694274902344, "learning_rate": 8.162022265076034e-05, "loss": 0.3972, "step": 3210 }, { "epoch": 15.0, "eval_accuracy": 0.7885178676039836, "eval_f1": 0.6762331838565022, "eval_loss": 0.4647216796875, "eval_mcc": 0.5195513014296814, "eval_precision": 0.6904761904761905, "eval_recall": 0.6625659050966608, "eval_runtime": 3.1634, "eval_samples_per_second": 539.617, "eval_steps_per_second": 17.07, "step": 3210 }, { "epoch": 16.0, "grad_norm": 5.849695205688477, "learning_rate": 6.121516698807025e-05, "loss": 0.3948, "step": 3424 }, { "epoch": 16.0, "eval_accuracy": 0.7703573520796719, "eval_f1": 0.6711409395973155, "eval_loss": 0.47917693853378296, "eval_mcc": 0.49648044887604803, "eval_precision": 0.6420545746388443, "eval_recall": 0.70298769771529, "eval_runtime": 3.3554, "eval_samples_per_second": 508.739, "eval_steps_per_second": 16.094, "step": 3424 }, { "epoch": 17.0, "grad_norm": 13.20305061340332, "learning_rate": 4.081011132538017e-05, "loss": 0.3958, "step": 3638 }, { "epoch": 17.0, "eval_accuracy": 0.7850029291154071, "eval_f1": 0.6760812003530451, "eval_loss": 0.47589311003685, "eval_mcc": 0.5152027838894037, "eval_precision": 0.6790780141843972, "eval_recall": 0.6731107205623902, "eval_runtime": 3.1566, "eval_samples_per_second": 540.765, "eval_steps_per_second": 17.107, "step": 3638 }, { "epoch": 18.0, "grad_norm": 4.045512676239014, "learning_rate": 2.0405055662690086e-05, "loss": 0.3873, "step": 3852 }, { "epoch": 18.0, "eval_accuracy": 0.7867603983596954, "eval_f1": 0.6738351254480287, "eval_loss": 0.47346317768096924, "eval_mcc": 0.5157490976144277, "eval_precision": 0.6873857404021938, "eval_recall": 0.6608084358523726, "eval_runtime": 3.8927, "eval_samples_per_second": 438.512, "eval_steps_per_second": 13.872, "step": 3852 }, { "epoch": 19.0, "grad_norm": 5.878767967224121, "learning_rate": 0.0, "loss": 0.3795, "step": 4066 }, { "epoch": 19.0, "eval_accuracy": 0.7832454598711189, "eval_f1": 0.6760070052539404, "eval_loss": 0.47538015246391296, "eval_mcc": 0.5131650023345729, "eval_precision": 0.6736474694589878, "eval_recall": 0.6783831282952548, "eval_runtime": 3.1451, "eval_samples_per_second": 542.748, "eval_steps_per_second": 17.17, "step": 4066 } ], "logging_steps": 500, "max_steps": 4066, "num_input_tokens_seen": 0, "num_train_epochs": 19, "save_steps": 500, "total_flos": 9970737624360.0, "train_batch_size": 32, "trial_name": null, "trial_params": { "alpha": 0.9979252549744665, "learning_rate": 0.00038769605759111166, "num_train_epochs": 19, "temperature": 10 } }