xuancoblab2023's picture
Training in progress, epoch 1
46b71dd verified
raw
history blame
5.86 kB
{
"best_metric": 0.8414872798434442,
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-14/checkpoint-960",
"epoch": 10.0,
"eval_steps": 500,
"global_step": 960,
"is_hyper_param_search": true,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 2.5439341068267822,
"learning_rate": 0.00025636357557273684,
"loss": 0.5365,
"step": 96
},
{
"epoch": 1.0,
"eval_accuracy": 0.7935420743639922,
"eval_f1": 0.8087035358114234,
"eval_loss": 0.4461754560470581,
"eval_precision": 0.7533783783783784,
"eval_recall": 0.87279843444227,
"eval_runtime": 25.9122,
"eval_samples_per_second": 39.441,
"eval_steps_per_second": 1.235,
"step": 96
},
{
"epoch": 2.0,
"grad_norm": 4.693763732910156,
"learning_rate": 0.00022787873384243272,
"loss": 0.4548,
"step": 192
},
{
"epoch": 2.0,
"eval_accuracy": 0.8003913894324853,
"eval_f1": 0.8216783216783217,
"eval_loss": 0.43167614936828613,
"eval_precision": 0.7424960505529226,
"eval_recall": 0.9197651663405088,
"eval_runtime": 25.6332,
"eval_samples_per_second": 39.87,
"eval_steps_per_second": 1.248,
"step": 192
},
{
"epoch": 3.0,
"grad_norm": 3.1225955486297607,
"learning_rate": 0.00019939389211212863,
"loss": 0.4276,
"step": 288
},
{
"epoch": 3.0,
"eval_accuracy": 0.812133072407045,
"eval_f1": 0.8282647584973166,
"eval_loss": 0.4346017837524414,
"eval_precision": 0.7627677100494233,
"eval_recall": 0.9060665362035225,
"eval_runtime": 25.7026,
"eval_samples_per_second": 39.762,
"eval_steps_per_second": 1.245,
"step": 288
},
{
"epoch": 4.0,
"grad_norm": 2.9123752117156982,
"learning_rate": 0.00017090905038182454,
"loss": 0.4092,
"step": 384
},
{
"epoch": 4.0,
"eval_accuracy": 0.8228962818003914,
"eval_f1": 0.8297271872060207,
"eval_loss": 0.3994919955730438,
"eval_precision": 0.7989130434782609,
"eval_recall": 0.863013698630137,
"eval_runtime": 25.5493,
"eval_samples_per_second": 40.001,
"eval_steps_per_second": 1.252,
"step": 384
},
{
"epoch": 5.0,
"grad_norm": 2.3602194786071777,
"learning_rate": 0.00014242420865152045,
"loss": 0.3965,
"step": 480
},
{
"epoch": 5.0,
"eval_accuracy": 0.824853228962818,
"eval_f1": 0.8306527909176915,
"eval_loss": 0.407778263092041,
"eval_precision": 0.8040293040293041,
"eval_recall": 0.8590998043052838,
"eval_runtime": 25.3461,
"eval_samples_per_second": 40.322,
"eval_steps_per_second": 1.263,
"step": 480
},
{
"epoch": 6.0,
"grad_norm": 2.538515567779541,
"learning_rate": 0.00011393936692121636,
"loss": 0.3968,
"step": 576
},
{
"epoch": 6.0,
"eval_accuracy": 0.8317025440313112,
"eval_f1": 0.8477876106194692,
"eval_loss": 0.4041251838207245,
"eval_precision": 0.7738287560581584,
"eval_recall": 0.9373776908023483,
"eval_runtime": 25.09,
"eval_samples_per_second": 40.733,
"eval_steps_per_second": 1.275,
"step": 576
},
{
"epoch": 7.0,
"grad_norm": 2.204845428466797,
"learning_rate": 8.545452519091227e-05,
"loss": 0.3854,
"step": 672
},
{
"epoch": 7.0,
"eval_accuracy": 0.8365949119373777,
"eval_f1": 0.8499550763701708,
"eval_loss": 0.39838555455207825,
"eval_precision": 0.7857142857142857,
"eval_recall": 0.9256360078277887,
"eval_runtime": 24.9237,
"eval_samples_per_second": 41.005,
"eval_steps_per_second": 1.284,
"step": 672
},
{
"epoch": 8.0,
"grad_norm": 2.975243091583252,
"learning_rate": 5.696968346060818e-05,
"loss": 0.3772,
"step": 768
},
{
"epoch": 8.0,
"eval_accuracy": 0.837573385518591,
"eval_f1": 0.8520499108734402,
"eval_loss": 0.39499443769454956,
"eval_precision": 0.7823240589198036,
"eval_recall": 0.9354207436399217,
"eval_runtime": 24.9105,
"eval_samples_per_second": 41.027,
"eval_steps_per_second": 1.285,
"step": 768
},
{
"epoch": 9.0,
"grad_norm": 3.0168142318725586,
"learning_rate": 2.848484173030409e-05,
"loss": 0.372,
"step": 864
},
{
"epoch": 9.0,
"eval_accuracy": 0.8405088062622309,
"eval_f1": 0.8500459981600736,
"eval_loss": 0.3911450505256653,
"eval_precision": 0.8020833333333334,
"eval_recall": 0.9041095890410958,
"eval_runtime": 24.8944,
"eval_samples_per_second": 41.053,
"eval_steps_per_second": 1.285,
"step": 864
},
{
"epoch": 10.0,
"grad_norm": 3.654553174972534,
"learning_rate": 0.0,
"loss": 0.3739,
"step": 960
},
{
"epoch": 10.0,
"eval_accuracy": 0.8414872798434442,
"eval_f1": 0.852994555353902,
"eval_loss": 0.39094072580337524,
"eval_precision": 0.7952622673434856,
"eval_recall": 0.9197651663405088,
"eval_runtime": 24.9774,
"eval_samples_per_second": 40.917,
"eval_steps_per_second": 1.281,
"step": 960
}
],
"logging_steps": 500,
"max_steps": 960,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 2356951972800.0,
"train_batch_size": 32,
"trial_name": null,
"trial_params": {
"alpha": 0.8320855687119669,
"learning_rate": 0.0002848484173030409,
"num_train_epochs": 10,
"temperature": 8
}
}