|
{ |
|
"best_metric": 0.6666666666666666, |
|
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-0/checkpoint-214", |
|
"epoch": 14.0, |
|
"eval_steps": 500, |
|
"global_step": 2996, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.3824027478694916, |
|
"learning_rate": 5.531898354978819e-05, |
|
"loss": 0.0835, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_f1": 0.0, |
|
"eval_loss": 0.06421080976724625, |
|
"eval_mcc": 0.0, |
|
"eval_precision": 0.0, |
|
"eval_recall": 0.0, |
|
"eval_runtime": 3.1927, |
|
"eval_samples_per_second": 534.663, |
|
"eval_steps_per_second": 16.914, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.4931301176548004, |
|
"learning_rate": 5.10636771228814e-05, |
|
"loss": 0.066, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_f1": 0.006980802792321117, |
|
"eval_loss": 0.06297493726015091, |
|
"eval_mcc": 0.01713474628469157, |
|
"eval_precision": 0.5, |
|
"eval_recall": 0.0035149384885764497, |
|
"eval_runtime": 3.3168, |
|
"eval_samples_per_second": 514.655, |
|
"eval_steps_per_second": 16.281, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.38297247886657715, |
|
"learning_rate": 4.6808370695974625e-05, |
|
"loss": 0.0648, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_f1": 0.013864818024263433, |
|
"eval_loss": 0.06352359801530838, |
|
"eval_mcc": 0.024260699053001704, |
|
"eval_precision": 0.5, |
|
"eval_recall": 0.007029876977152899, |
|
"eval_runtime": 3.1502, |
|
"eval_samples_per_second": 541.864, |
|
"eval_steps_per_second": 17.142, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.4884311258792877, |
|
"learning_rate": 4.255306426906784e-05, |
|
"loss": 0.0642, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_f1": 0.017271157167530225, |
|
"eval_loss": 0.06221030279994011, |
|
"eval_mcc": 0.027140265094376777, |
|
"eval_precision": 0.5, |
|
"eval_recall": 0.008787346221441126, |
|
"eval_runtime": 3.7762, |
|
"eval_samples_per_second": 452.044, |
|
"eval_steps_per_second": 14.3, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.7101069688796997, |
|
"learning_rate": 3.829775784216106e-05, |
|
"loss": 0.064, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_f1": 0.017271157167530225, |
|
"eval_loss": 0.06226570904254913, |
|
"eval_mcc": 0.027140265094376777, |
|
"eval_precision": 0.5, |
|
"eval_recall": 0.008787346221441126, |
|
"eval_runtime": 3.1334, |
|
"eval_samples_per_second": 544.769, |
|
"eval_steps_per_second": 17.233, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 0.42137953639030457, |
|
"learning_rate": 3.404245141525427e-05, |
|
"loss": 0.0637, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6649091974223784, |
|
"eval_f1": 0.02389078498293515, |
|
"eval_loss": 0.06161979213356972, |
|
"eval_mcc": 0.016686958293742785, |
|
"eval_precision": 0.4117647058823529, |
|
"eval_recall": 0.012302284710017574, |
|
"eval_runtime": 3.2462, |
|
"eval_samples_per_second": 525.852, |
|
"eval_steps_per_second": 16.635, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.4732053279876709, |
|
"learning_rate": 2.9787144988347488e-05, |
|
"loss": 0.0634, |
|
"step": 1498 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.664323374340949, |
|
"eval_f1": 0.020512820512820513, |
|
"eval_loss": 0.061539050191640854, |
|
"eval_mcc": 0.008597718124511362, |
|
"eval_precision": 0.375, |
|
"eval_recall": 0.01054481546572935, |
|
"eval_runtime": 3.1722, |
|
"eval_samples_per_second": 538.12, |
|
"eval_steps_per_second": 17.023, |
|
"step": 1498 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.4758211374282837, |
|
"learning_rate": 2.55318385614407e-05, |
|
"loss": 0.0631, |
|
"step": 1712 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6654950205038078, |
|
"eval_f1": 0.017211703958691912, |
|
"eval_loss": 0.061484288424253464, |
|
"eval_mcc": 0.01487410293271824, |
|
"eval_precision": 0.4166666666666667, |
|
"eval_recall": 0.008787346221441126, |
|
"eval_runtime": 3.1952, |
|
"eval_samples_per_second": 534.244, |
|
"eval_steps_per_second": 16.901, |
|
"step": 1712 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.3838660418987274, |
|
"learning_rate": 2.127653213453392e-05, |
|
"loss": 0.0632, |
|
"step": 1926 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.6660808435852372, |
|
"eval_f1": 0.01724137931034483, |
|
"eval_loss": 0.06174994260072708, |
|
"eval_mcc": 0.020707884164064556, |
|
"eval_precision": 0.45454545454545453, |
|
"eval_recall": 0.008787346221441126, |
|
"eval_runtime": 3.1409, |
|
"eval_samples_per_second": 543.466, |
|
"eval_steps_per_second": 17.192, |
|
"step": 1926 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.3443503677845001, |
|
"learning_rate": 1.7021225707627134e-05, |
|
"loss": 0.0629, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.664323374340949, |
|
"eval_f1": 0.017152658662092625, |
|
"eval_loss": 0.061242878437042236, |
|
"eval_mcc": 0.004592958330124466, |
|
"eval_precision": 0.35714285714285715, |
|
"eval_recall": 0.008787346221441126, |
|
"eval_runtime": 3.1964, |
|
"eval_samples_per_second": 534.034, |
|
"eval_steps_per_second": 16.894, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 0.31307530403137207, |
|
"learning_rate": 1.276591928072035e-05, |
|
"loss": 0.0628, |
|
"step": 2354 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.6654950205038078, |
|
"eval_f1": 0.017211703958691912, |
|
"eval_loss": 0.061483997851610184, |
|
"eval_mcc": 0.01487410293271824, |
|
"eval_precision": 0.4166666666666667, |
|
"eval_recall": 0.008787346221441126, |
|
"eval_runtime": 3.1572, |
|
"eval_samples_per_second": 540.674, |
|
"eval_steps_per_second": 17.104, |
|
"step": 2354 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 0.26839011907577515, |
|
"learning_rate": 8.510612853813567e-06, |
|
"loss": 0.0628, |
|
"step": 2568 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.6654950205038078, |
|
"eval_f1": 0.017211703958691912, |
|
"eval_loss": 0.06137599050998688, |
|
"eval_mcc": 0.01487410293271824, |
|
"eval_precision": 0.4166666666666667, |
|
"eval_recall": 0.008787346221441126, |
|
"eval_runtime": 3.1832, |
|
"eval_samples_per_second": 536.247, |
|
"eval_steps_per_second": 16.964, |
|
"step": 2568 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 0.8179745674133301, |
|
"learning_rate": 4.2553064269067835e-06, |
|
"loss": 0.0626, |
|
"step": 2782 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.6654950205038078, |
|
"eval_f1": 0.017211703958691912, |
|
"eval_loss": 0.06123984605073929, |
|
"eval_mcc": 0.01487410293271824, |
|
"eval_precision": 0.4166666666666667, |
|
"eval_recall": 0.008787346221441126, |
|
"eval_runtime": 3.1468, |
|
"eval_samples_per_second": 542.461, |
|
"eval_steps_per_second": 17.16, |
|
"step": 2782 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 0.34526437520980835, |
|
"learning_rate": 0.0, |
|
"loss": 0.0624, |
|
"step": 2996 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.6654950205038078, |
|
"eval_f1": 0.017211703958691912, |
|
"eval_loss": 0.061105918139219284, |
|
"eval_mcc": 0.01487410293271824, |
|
"eval_precision": 0.4166666666666667, |
|
"eval_recall": 0.008787346221441126, |
|
"eval_runtime": 3.9414, |
|
"eval_samples_per_second": 433.098, |
|
"eval_steps_per_second": 13.701, |
|
"step": 2996 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2996, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 14, |
|
"save_steps": 500, |
|
"total_flos": 7346859302160.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.08651897585698409, |
|
"learning_rate": 5.9574289976694975e-05, |
|
"num_train_epochs": 14, |
|
"temperature": 43 |
|
} |
|
} |
|
|