xuancoblab2023's picture
Training in progress, epoch 1
c6afd6e verified
raw
history blame
4.81 kB
{
"best_metric": 0.7544031311154599,
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-6/checkpoint-744",
"epoch": 8.0,
"eval_steps": 500,
"global_step": 744,
"is_hyper_param_search": true,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 0.4286664128303528,
"learning_rate": 1.945409831472016e-05,
"loss": 0.4806,
"step": 93
},
{
"epoch": 1.0,
"eval_accuracy": 0.5,
"eval_f1": 0.0,
"eval_loss": 0.4703535735607147,
"eval_precision": 0.0,
"eval_recall": 0.0,
"eval_runtime": 28.42,
"eval_samples_per_second": 35.961,
"eval_steps_per_second": 1.126,
"step": 93
},
{
"epoch": 2.0,
"grad_norm": 0.94158935546875,
"learning_rate": 1.7022336025380143e-05,
"loss": 0.4648,
"step": 186
},
{
"epoch": 2.0,
"eval_accuracy": 0.5176125244618396,
"eval_f1": 0.10849909584086799,
"eval_loss": 0.4538751244544983,
"eval_precision": 0.7142857142857143,
"eval_recall": 0.05870841487279843,
"eval_runtime": 28.3351,
"eval_samples_per_second": 36.068,
"eval_steps_per_second": 1.129,
"step": 186
},
{
"epoch": 3.0,
"grad_norm": 1.0048439502716064,
"learning_rate": 1.459057373604012e-05,
"loss": 0.4406,
"step": 279
},
{
"epoch": 3.0,
"eval_accuracy": 0.5929549902152642,
"eval_f1": 0.3677811550151976,
"eval_loss": 0.4149659276008606,
"eval_precision": 0.8231292517006803,
"eval_recall": 0.23679060665362034,
"eval_runtime": 28.3925,
"eval_samples_per_second": 35.995,
"eval_steps_per_second": 1.127,
"step": 279
},
{
"epoch": 4.0,
"grad_norm": 2.1001150608062744,
"learning_rate": 1.2158811446700102e-05,
"loss": 0.4126,
"step": 372
},
{
"epoch": 4.0,
"eval_accuracy": 0.7025440313111546,
"eval_f1": 0.6456876456876457,
"eval_loss": 0.39196181297302246,
"eval_precision": 0.7982708933717579,
"eval_recall": 0.5420743639921722,
"eval_runtime": 28.4943,
"eval_samples_per_second": 35.867,
"eval_steps_per_second": 1.123,
"step": 372
},
{
"epoch": 5.0,
"grad_norm": 1.2996047735214233,
"learning_rate": 9.72704915736008e-06,
"loss": 0.4021,
"step": 465
},
{
"epoch": 5.0,
"eval_accuracy": 0.735812133072407,
"eval_f1": 0.7133757961783439,
"eval_loss": 0.3851335346698761,
"eval_precision": 0.7795823665893271,
"eval_recall": 0.6575342465753424,
"eval_runtime": 28.8344,
"eval_samples_per_second": 35.444,
"eval_steps_per_second": 1.11,
"step": 465
},
{
"epoch": 6.0,
"grad_norm": 1.8318911790847778,
"learning_rate": 7.29528686802006e-06,
"loss": 0.3976,
"step": 558
},
{
"epoch": 6.0,
"eval_accuracy": 0.7524461839530333,
"eval_f1": 0.7394438722966015,
"eval_loss": 0.3816056251525879,
"eval_precision": 0.7804347826086957,
"eval_recall": 0.7025440313111546,
"eval_runtime": 28.629,
"eval_samples_per_second": 35.698,
"eval_steps_per_second": 1.118,
"step": 558
},
{
"epoch": 7.0,
"grad_norm": 1.5164391994476318,
"learning_rate": 4.86352457868004e-06,
"loss": 0.3934,
"step": 651
},
{
"epoch": 7.0,
"eval_accuracy": 0.7504892367906066,
"eval_f1": 0.7357512953367875,
"eval_loss": 0.37981584668159485,
"eval_precision": 0.7819383259911894,
"eval_recall": 0.6947162426614482,
"eval_runtime": 29.4404,
"eval_samples_per_second": 34.714,
"eval_steps_per_second": 1.087,
"step": 651
},
{
"epoch": 8.0,
"grad_norm": 1.8273214101791382,
"learning_rate": 2.43176228934002e-06,
"loss": 0.3903,
"step": 744
},
{
"epoch": 8.0,
"eval_accuracy": 0.7544031311154599,
"eval_f1": 0.7462082912032356,
"eval_loss": 0.3790663480758667,
"eval_precision": 0.7719665271966527,
"eval_recall": 0.7221135029354208,
"eval_runtime": 28.4026,
"eval_samples_per_second": 35.983,
"eval_steps_per_second": 1.127,
"step": 744
}
],
"logging_steps": 500,
"max_steps": 837,
"num_input_tokens_seen": 0,
"num_train_epochs": 9,
"save_steps": 500,
"total_flos": 1885561578240.0,
"train_batch_size": 33,
"trial_name": null,
"trial_params": {
"alpha": 0.6122687021783514,
"learning_rate": 2.188586060406018e-05,
"num_train_epochs": 9,
"per_device_train_batch_size": 33,
"temperature": 14
}
}