xuancoblab2023's picture
Training in progress, epoch 1
2c2b859 verified
raw
history blame
4.86 kB
{
"best_metric": 0.5567514677103719,
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-4/checkpoint-704",
"epoch": 8.0,
"eval_steps": 500,
"global_step": 704,
"is_hyper_param_search": true,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 0.4811249077320099,
"learning_rate": 6.084357840038042e-05,
"loss": 0.24,
"step": 88
},
{
"epoch": 1.0,
"eval_accuracy": 0.5430528375733855,
"eval_f1": 0.2098138747884941,
"eval_loss": 0.21115726232528687,
"eval_precision": 0.775,
"eval_recall": 0.12133072407045009,
"eval_runtime": 31.2118,
"eval_samples_per_second": 32.744,
"eval_steps_per_second": 1.025,
"step": 88
},
{
"epoch": 2.0,
"grad_norm": 0.9260004758834839,
"learning_rate": 5.408318080033815e-05,
"loss": 0.2091,
"step": 176
},
{
"epoch": 2.0,
"eval_accuracy": 0.541095890410959,
"eval_f1": 0.20642978003384094,
"eval_loss": 0.1988278329372406,
"eval_precision": 0.7625,
"eval_recall": 0.11937377690802348,
"eval_runtime": 30.8172,
"eval_samples_per_second": 33.163,
"eval_steps_per_second": 1.038,
"step": 176
},
{
"epoch": 3.0,
"grad_norm": 0.5467262864112854,
"learning_rate": 4.732278320029588e-05,
"loss": 0.2019,
"step": 264
},
{
"epoch": 3.0,
"eval_accuracy": 0.5420743639921722,
"eval_f1": 0.20945945945945946,
"eval_loss": 0.1968851089477539,
"eval_precision": 0.7654320987654321,
"eval_recall": 0.12133072407045009,
"eval_runtime": 31.5996,
"eval_samples_per_second": 32.342,
"eval_steps_per_second": 1.013,
"step": 264
},
{
"epoch": 4.0,
"grad_norm": 0.3741915822029114,
"learning_rate": 4.0562385600253614e-05,
"loss": 0.2013,
"step": 352
},
{
"epoch": 4.0,
"eval_accuracy": 0.5391389432485323,
"eval_f1": 0.19761499148211242,
"eval_loss": 0.19684568047523499,
"eval_precision": 0.7631578947368421,
"eval_recall": 0.11350293542074363,
"eval_runtime": 30.3074,
"eval_samples_per_second": 33.721,
"eval_steps_per_second": 1.056,
"step": 352
},
{
"epoch": 5.0,
"grad_norm": 0.39911171793937683,
"learning_rate": 3.3801988000211345e-05,
"loss": 0.2002,
"step": 440
},
{
"epoch": 5.0,
"eval_accuracy": 0.547945205479452,
"eval_f1": 0.23255813953488372,
"eval_loss": 0.1965479999780655,
"eval_precision": 0.7692307692307693,
"eval_recall": 0.136986301369863,
"eval_runtime": 29.369,
"eval_samples_per_second": 34.799,
"eval_steps_per_second": 1.09,
"step": 440
},
{
"epoch": 6.0,
"grad_norm": 1.0663460493087769,
"learning_rate": 2.7041590400169076e-05,
"loss": 0.1998,
"step": 528
},
{
"epoch": 6.0,
"eval_accuracy": 0.5557729941291585,
"eval_f1": 0.26058631921824105,
"eval_loss": 0.19687490165233612,
"eval_precision": 0.7766990291262136,
"eval_recall": 0.15655577299412915,
"eval_runtime": 29.649,
"eval_samples_per_second": 34.47,
"eval_steps_per_second": 1.079,
"step": 528
},
{
"epoch": 7.0,
"grad_norm": 0.4529750347137451,
"learning_rate": 2.0281192800126807e-05,
"loss": 0.1992,
"step": 616
},
{
"epoch": 7.0,
"eval_accuracy": 0.5499021526418787,
"eval_f1": 0.24092409240924093,
"eval_loss": 0.19784095883369446,
"eval_precision": 0.7684210526315789,
"eval_recall": 0.14285714285714285,
"eval_runtime": 29.7832,
"eval_samples_per_second": 34.315,
"eval_steps_per_second": 1.074,
"step": 616
},
{
"epoch": 8.0,
"grad_norm": 0.8743975162506104,
"learning_rate": 1.3520795200084538e-05,
"loss": 0.1989,
"step": 704
},
{
"epoch": 8.0,
"eval_accuracy": 0.5567514677103719,
"eval_f1": 0.2634146341463414,
"eval_loss": 0.19613005220890045,
"eval_precision": 0.7788461538461539,
"eval_recall": 0.15851272015655576,
"eval_runtime": 28.6336,
"eval_samples_per_second": 35.692,
"eval_steps_per_second": 1.118,
"step": 704
}
],
"logging_steps": 500,
"max_steps": 880,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 1885561578240.0,
"train_batch_size": 35,
"trial_name": null,
"trial_params": {
"alpha": 0.2634542499188758,
"learning_rate": 6.760397600042269e-05,
"num_train_epochs": 10,
"per_device_train_batch_size": 35,
"temperature": 2
}
}