xuancoblab2023's picture
Training in progress, epoch 1
59c3b50 verified
raw
history blame
4.5 kB
{
"best_metric": 0.7662745098039215,
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-21/checkpoint-800",
"epoch": 7.0,
"eval_steps": 500,
"global_step": 1120,
"is_hyper_param_search": true,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 1.839683175086975,
"learning_rate": 0.0004864196945990802,
"loss": 0.5129,
"step": 160
},
{
"epoch": 1.0,
"eval_accuracy": 0.6666666666666666,
"eval_f1": 0.0,
"eval_loss": 0.5049448609352112,
"eval_mcc": 0.0,
"eval_precision": 0.0,
"eval_recall": 0.0,
"eval_runtime": 1.8683,
"eval_samples_per_second": 682.449,
"eval_steps_per_second": 21.41,
"step": 160
},
{
"epoch": 2.0,
"grad_norm": 1.1274871826171875,
"learning_rate": 0.0004053497454992336,
"loss": 0.4794,
"step": 320
},
{
"epoch": 2.0,
"eval_accuracy": 0.7050980392156863,
"eval_f1": 0.26848249027237353,
"eval_loss": 0.45544198155403137,
"eval_mcc": 0.2568204849073838,
"eval_precision": 0.7752808988764045,
"eval_recall": 0.1623529411764706,
"eval_runtime": 2.1369,
"eval_samples_per_second": 596.661,
"eval_steps_per_second": 18.719,
"step": 320
},
{
"epoch": 3.0,
"grad_norm": 1.5591638088226318,
"learning_rate": 0.00032427979639938683,
"loss": 0.4627,
"step": 480
},
{
"epoch": 3.0,
"eval_accuracy": 0.7333333333333333,
"eval_f1": 0.470404984423676,
"eval_loss": 0.44455307722091675,
"eval_mcc": 0.34827660665065835,
"eval_precision": 0.695852534562212,
"eval_recall": 0.3552941176470588,
"eval_runtime": 1.8876,
"eval_samples_per_second": 675.477,
"eval_steps_per_second": 21.191,
"step": 480
},
{
"epoch": 4.0,
"grad_norm": 0.6579329371452332,
"learning_rate": 0.0002432098472995401,
"loss": 0.4529,
"step": 640
},
{
"epoch": 4.0,
"eval_accuracy": 0.7239215686274509,
"eval_f1": 0.38461538461538464,
"eval_loss": 0.4378909468650818,
"eval_mcc": 0.31777730290612743,
"eval_precision": 0.7482993197278912,
"eval_recall": 0.25882352941176473,
"eval_runtime": 1.8818,
"eval_samples_per_second": 677.548,
"eval_steps_per_second": 21.256,
"step": 640
},
{
"epoch": 5.0,
"grad_norm": 1.6731197834014893,
"learning_rate": 0.00016213989819969341,
"loss": 0.4444,
"step": 800
},
{
"epoch": 5.0,
"eval_accuracy": 0.7662745098039215,
"eval_f1": 0.5906593406593407,
"eval_loss": 0.4348558187484741,
"eval_mcc": 0.4456117783291848,
"eval_precision": 0.7095709570957096,
"eval_recall": 0.5058823529411764,
"eval_runtime": 1.8669,
"eval_samples_per_second": 682.95,
"eval_steps_per_second": 21.426,
"step": 800
},
{
"epoch": 6.0,
"grad_norm": 1.8448941707611084,
"learning_rate": 8.106994909984671e-05,
"loss": 0.4415,
"step": 960
},
{
"epoch": 6.0,
"eval_accuracy": 0.7623529411764706,
"eval_f1": 0.5738396624472574,
"eval_loss": 0.4331771433353424,
"eval_mcc": 0.4334324670996623,
"eval_precision": 0.7132867132867133,
"eval_recall": 0.48,
"eval_runtime": 1.8638,
"eval_samples_per_second": 684.078,
"eval_steps_per_second": 21.461,
"step": 960
},
{
"epoch": 7.0,
"grad_norm": 1.8716546297073364,
"learning_rate": 0.0,
"loss": 0.436,
"step": 1120
},
{
"epoch": 7.0,
"eval_accuracy": 0.7654901960784314,
"eval_f1": 0.5672937771345876,
"eval_loss": 0.4309416115283966,
"eval_mcc": 0.4394955402199731,
"eval_precision": 0.7368421052631579,
"eval_recall": 0.4611764705882353,
"eval_runtime": 1.8706,
"eval_samples_per_second": 681.61,
"eval_steps_per_second": 21.384,
"step": 1120
}
],
"logging_steps": 500,
"max_steps": 1120,
"num_input_tokens_seen": 0,
"num_train_epochs": 7,
"save_steps": 500,
"total_flos": 2042288065440.0,
"train_batch_size": 32,
"trial_name": null,
"trial_params": {
"alpha": 0.7976867614913634,
"learning_rate": 0.000567489643698927,
"num_train_epochs": 7,
"temperature": 7
}
}