xuancoblab2023's picture
Training in progress, epoch 1
c2e428b verified
raw
history blame
5.3 kB
{
"best_metric": 0.8414872798434442,
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-27/checkpoint-768",
"epoch": 9.0,
"eval_steps": 500,
"global_step": 864,
"is_hyper_param_search": true,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 1.020627737045288,
"learning_rate": 0.00020460634625664984,
"loss": 0.494,
"step": 96
},
{
"epoch": 1.0,
"eval_accuracy": 0.7886497064579256,
"eval_f1": 0.8032786885245903,
"eval_loss": 0.4278793931007385,
"eval_precision": 0.7512776831345827,
"eval_recall": 0.863013698630137,
"eval_runtime": 24.9337,
"eval_samples_per_second": 40.989,
"eval_steps_per_second": 1.283,
"step": 96
},
{
"epoch": 2.0,
"grad_norm": 2.554121732711792,
"learning_rate": 0.00017903055297456864,
"loss": 0.4258,
"step": 192
},
{
"epoch": 2.0,
"eval_accuracy": 0.8062622309197651,
"eval_f1": 0.8241563055062168,
"eval_loss": 0.40374234318733215,
"eval_precision": 0.7544715447154472,
"eval_recall": 0.9080234833659491,
"eval_runtime": 25.4225,
"eval_samples_per_second": 40.201,
"eval_steps_per_second": 1.259,
"step": 192
},
{
"epoch": 3.0,
"grad_norm": 3.225475311279297,
"learning_rate": 0.00015345475969248738,
"loss": 0.405,
"step": 288
},
{
"epoch": 3.0,
"eval_accuracy": 0.8277886497064579,
"eval_f1": 0.84,
"eval_loss": 0.39554914832115173,
"eval_precision": 0.7843803056027164,
"eval_recall": 0.9041095890410958,
"eval_runtime": 25.5434,
"eval_samples_per_second": 40.01,
"eval_steps_per_second": 1.253,
"step": 288
},
{
"epoch": 4.0,
"grad_norm": 3.6178488731384277,
"learning_rate": 0.00012787896641040618,
"loss": 0.3903,
"step": 384
},
{
"epoch": 4.0,
"eval_accuracy": 0.821917808219178,
"eval_f1": 0.8222656250000001,
"eval_loss": 0.38986799120903015,
"eval_precision": 0.8206627680311891,
"eval_recall": 0.8238747553816047,
"eval_runtime": 25.4279,
"eval_samples_per_second": 40.192,
"eval_steps_per_second": 1.258,
"step": 384
},
{
"epoch": 5.0,
"grad_norm": 1.1577227115631104,
"learning_rate": 0.00010230317312832492,
"loss": 0.3846,
"step": 480
},
{
"epoch": 5.0,
"eval_accuracy": 0.8228962818003914,
"eval_f1": 0.8309990662931839,
"eval_loss": 0.3861733376979828,
"eval_precision": 0.7946428571428571,
"eval_recall": 0.8708414872798435,
"eval_runtime": 25.7302,
"eval_samples_per_second": 39.72,
"eval_steps_per_second": 1.244,
"step": 480
},
{
"epoch": 6.0,
"grad_norm": 2.0598785877227783,
"learning_rate": 7.672737984624369e-05,
"loss": 0.3784,
"step": 576
},
{
"epoch": 6.0,
"eval_accuracy": 0.8317025440313112,
"eval_f1": 0.8472468916518652,
"eval_loss": 0.3895849585533142,
"eval_precision": 0.775609756097561,
"eval_recall": 0.9334637964774951,
"eval_runtime": 25.139,
"eval_samples_per_second": 40.654,
"eval_steps_per_second": 1.273,
"step": 576
},
{
"epoch": 7.0,
"grad_norm": 1.7429569959640503,
"learning_rate": 5.115158656416246e-05,
"loss": 0.3756,
"step": 672
},
{
"epoch": 7.0,
"eval_accuracy": 0.8405088062622309,
"eval_f1": 0.8530207394048693,
"eval_loss": 0.3818517029285431,
"eval_precision": 0.7909698996655519,
"eval_recall": 0.9256360078277887,
"eval_runtime": 25.2278,
"eval_samples_per_second": 40.511,
"eval_steps_per_second": 1.268,
"step": 672
},
{
"epoch": 8.0,
"grad_norm": 2.883507013320923,
"learning_rate": 2.557579328208123e-05,
"loss": 0.3725,
"step": 768
},
{
"epoch": 8.0,
"eval_accuracy": 0.8414872798434442,
"eval_f1": 0.8527272727272727,
"eval_loss": 0.38016170263290405,
"eval_precision": 0.7962648556876061,
"eval_recall": 0.9178082191780822,
"eval_runtime": 24.8536,
"eval_samples_per_second": 41.121,
"eval_steps_per_second": 1.288,
"step": 768
},
{
"epoch": 9.0,
"grad_norm": 2.0533533096313477,
"learning_rate": 0.0,
"loss": 0.3665,
"step": 864
},
{
"epoch": 9.0,
"eval_accuracy": 0.837573385518591,
"eval_f1": 0.8471454880294659,
"eval_loss": 0.38007956743240356,
"eval_precision": 0.8,
"eval_recall": 0.9001956947162426,
"eval_runtime": 25.3933,
"eval_samples_per_second": 40.247,
"eval_steps_per_second": 1.26,
"step": 864
}
],
"logging_steps": 500,
"max_steps": 864,
"num_input_tokens_seen": 0,
"num_train_epochs": 9,
"save_steps": 500,
"total_flos": 2121256775520.0,
"train_batch_size": 32,
"trial_name": null,
"trial_params": {
"alpha": 0.718256826807231,
"learning_rate": 0.0002301821395387311,
"num_train_epochs": 9,
"temperature": 6
}
}