|
{ |
|
"best_metric": 0.7885178676039836, |
|
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-13/checkpoint-3210", |
|
"epoch": 15.0, |
|
"eval_steps": 500, |
|
"global_step": 3210, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 8.763052940368652, |
|
"learning_rate": 0.00036729100192842157, |
|
"loss": 0.5966, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7211482132396017, |
|
"eval_f1": 0.5132924335378324, |
|
"eval_loss": 0.5418887734413147, |
|
"eval_mcc": 0.33384466816371045, |
|
"eval_precision": 0.6136919315403423, |
|
"eval_recall": 0.44112478031634444, |
|
"eval_runtime": 3.1521, |
|
"eval_samples_per_second": 541.55, |
|
"eval_steps_per_second": 17.132, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 3.084683895111084, |
|
"learning_rate": 0.00034688594626573147, |
|
"loss": 0.5299, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7404803749267721, |
|
"eval_f1": 0.5179542981501633, |
|
"eval_loss": 0.5285571813583374, |
|
"eval_mcc": 0.37347577729616044, |
|
"eval_precision": 0.68, |
|
"eval_recall": 0.4182776801405975, |
|
"eval_runtime": 3.8839, |
|
"eval_samples_per_second": 439.512, |
|
"eval_steps_per_second": 13.904, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.8376232385635376, |
|
"learning_rate": 0.0003264808906030414, |
|
"loss": 0.5041, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7475102519039251, |
|
"eval_f1": 0.4800965018094088, |
|
"eval_loss": 0.5334233045578003, |
|
"eval_mcc": 0.3885026303183633, |
|
"eval_precision": 0.7653846153846153, |
|
"eval_recall": 0.34973637961335674, |
|
"eval_runtime": 3.1807, |
|
"eval_samples_per_second": 536.674, |
|
"eval_steps_per_second": 16.977, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.807384490966797, |
|
"learning_rate": 0.00030607583494035134, |
|
"loss": 0.4955, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7398945518453427, |
|
"eval_f1": 0.4422110552763819, |
|
"eval_loss": 0.5175681710243225, |
|
"eval_mcc": 0.36720426049390004, |
|
"eval_precision": 0.775330396475771, |
|
"eval_recall": 0.3093145869947276, |
|
"eval_runtime": 3.9525, |
|
"eval_samples_per_second": 431.878, |
|
"eval_steps_per_second": 13.662, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 7.096841335296631, |
|
"learning_rate": 0.0002856707792776612, |
|
"loss": 0.4841, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7516110134739309, |
|
"eval_f1": 0.6052141527001862, |
|
"eval_loss": 0.49192342162132263, |
|
"eval_mcc": 0.4265647238499904, |
|
"eval_precision": 0.6435643564356436, |
|
"eval_recall": 0.5711775043936731, |
|
"eval_runtime": 3.1288, |
|
"eval_samples_per_second": 545.582, |
|
"eval_steps_per_second": 17.259, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 4.960063457489014, |
|
"learning_rate": 0.00026526572361497115, |
|
"loss": 0.4738, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7621558289396603, |
|
"eval_f1": 0.621268656716418, |
|
"eval_loss": 0.4805011451244354, |
|
"eval_mcc": 0.4506812006793125, |
|
"eval_precision": 0.6620278330019881, |
|
"eval_recall": 0.5852372583479789, |
|
"eval_runtime": 3.2097, |
|
"eval_samples_per_second": 531.827, |
|
"eval_steps_per_second": 16.824, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 6.75669002532959, |
|
"learning_rate": 0.000244860667952281, |
|
"loss": 0.4601, |
|
"step": 1498 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7586408904510837, |
|
"eval_f1": 0.5264367816091953, |
|
"eval_loss": 0.4859382212162018, |
|
"eval_mcc": 0.41956224318934715, |
|
"eval_precision": 0.760797342192691, |
|
"eval_recall": 0.4024604569420035, |
|
"eval_runtime": 3.1647, |
|
"eval_samples_per_second": 539.388, |
|
"eval_steps_per_second": 17.063, |
|
"step": 1498 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 5.769745349884033, |
|
"learning_rate": 0.00022445561228959097, |
|
"loss": 0.4483, |
|
"step": 1712 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7785588752196837, |
|
"eval_f1": 0.6337209302325582, |
|
"eval_loss": 0.466266006231308, |
|
"eval_mcc": 0.4826297358701252, |
|
"eval_precision": 0.7062634989200864, |
|
"eval_recall": 0.5746924428822495, |
|
"eval_runtime": 3.8302, |
|
"eval_samples_per_second": 445.67, |
|
"eval_steps_per_second": 14.099, |
|
"step": 1712 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 7.356119632720947, |
|
"learning_rate": 0.00020405055662690085, |
|
"loss": 0.4408, |
|
"step": 1926 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7809021675454013, |
|
"eval_f1": 0.6191446028513238, |
|
"eval_loss": 0.46509596705436707, |
|
"eval_mcc": 0.48266230635542773, |
|
"eval_precision": 0.7360774818401937, |
|
"eval_recall": 0.5342706502636204, |
|
"eval_runtime": 3.1727, |
|
"eval_samples_per_second": 538.023, |
|
"eval_steps_per_second": 17.02, |
|
"step": 1926 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 3.752133369445801, |
|
"learning_rate": 0.00018364550096421078, |
|
"loss": 0.4322, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7809021675454013, |
|
"eval_f1": 0.661231884057971, |
|
"eval_loss": 0.46195492148399353, |
|
"eval_mcc": 0.5000717628957632, |
|
"eval_precision": 0.6822429906542056, |
|
"eval_recall": 0.6414762741652021, |
|
"eval_runtime": 3.329, |
|
"eval_samples_per_second": 512.771, |
|
"eval_steps_per_second": 16.221, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 2.258147954940796, |
|
"learning_rate": 0.0001632404453015207, |
|
"loss": 0.4251, |
|
"step": 2354 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.7756297598125366, |
|
"eval_f1": 0.6135216952573159, |
|
"eval_loss": 0.46607503294944763, |
|
"eval_mcc": 0.4705148045618678, |
|
"eval_precision": 0.7203791469194313, |
|
"eval_recall": 0.5342706502636204, |
|
"eval_runtime": 3.1529, |
|
"eval_samples_per_second": 541.404, |
|
"eval_steps_per_second": 17.127, |
|
"step": 2354 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 3.4573206901550293, |
|
"learning_rate": 0.0001428353896388306, |
|
"loss": 0.417, |
|
"step": 2568 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7832454598711189, |
|
"eval_f1": 0.6777003484320557, |
|
"eval_loss": 0.4633403718471527, |
|
"eval_mcc": 0.5144799999787437, |
|
"eval_precision": 0.6718480138169257, |
|
"eval_recall": 0.6836555360281195, |
|
"eval_runtime": 3.2478, |
|
"eval_samples_per_second": 525.584, |
|
"eval_steps_per_second": 16.627, |
|
"step": 2568 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 16.82988166809082, |
|
"learning_rate": 0.0001224303339761405, |
|
"loss": 0.4122, |
|
"step": 2782 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.7855887521968365, |
|
"eval_f1": 0.6623616236162362, |
|
"eval_loss": 0.47581344842910767, |
|
"eval_mcc": 0.5072003861458727, |
|
"eval_precision": 0.6970873786407767, |
|
"eval_recall": 0.6309314586994728, |
|
"eval_runtime": 3.1465, |
|
"eval_samples_per_second": 542.51, |
|
"eval_steps_per_second": 17.162, |
|
"step": 2782 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 14.468791961669922, |
|
"learning_rate": 0.00010202527831345042, |
|
"loss": 0.4053, |
|
"step": 2996 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.7814879906268307, |
|
"eval_f1": 0.6642664266426643, |
|
"eval_loss": 0.464487224817276, |
|
"eval_mcc": 0.5027717230279147, |
|
"eval_precision": 0.6808118081180812, |
|
"eval_recall": 0.648506151142355, |
|
"eval_runtime": 3.3781, |
|
"eval_samples_per_second": 505.31, |
|
"eval_steps_per_second": 15.985, |
|
"step": 2996 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 3.5750694274902344, |
|
"learning_rate": 8.162022265076034e-05, |
|
"loss": 0.3972, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.7885178676039836, |
|
"eval_f1": 0.6762331838565022, |
|
"eval_loss": 0.4647216796875, |
|
"eval_mcc": 0.5195513014296814, |
|
"eval_precision": 0.6904761904761905, |
|
"eval_recall": 0.6625659050966608, |
|
"eval_runtime": 3.1634, |
|
"eval_samples_per_second": 539.617, |
|
"eval_steps_per_second": 17.07, |
|
"step": 3210 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 4066, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 19, |
|
"save_steps": 500, |
|
"total_flos": 7871634966600.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.9979252549744665, |
|
"learning_rate": 0.00038769605759111166, |
|
"num_train_epochs": 19, |
|
"temperature": 10 |
|
} |
|
} |
|
|