xuancoblab2023's picture
Training in progress, epoch 1
296672d verified
raw
history blame
4.6 kB
{
"best_metric": 0.8238747553816047,
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-14/checkpoint-1152",
"epoch": 7.0,
"eval_steps": 500,
"global_step": 1344,
"is_hyper_param_search": true,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 2.0995702743530273,
"learning_rate": 0.00024019499178328863,
"loss": 0.4158,
"step": 192
},
{
"epoch": 1.0,
"eval_accuracy": 0.7318982387475538,
"eval_f1": 0.7078891257995735,
"eval_loss": 0.38118404150009155,
"eval_mcc": 0.4701927470442256,
"eval_precision": 0.7775175644028103,
"eval_recall": 0.649706457925636,
"eval_runtime": 66.3838,
"eval_samples_per_second": 15.395,
"eval_steps_per_second": 0.482,
"step": 192
},
{
"epoch": 2.0,
"grad_norm": 4.5503435134887695,
"learning_rate": 0.00020016249315274053,
"loss": 0.3871,
"step": 384
},
{
"epoch": 2.0,
"eval_accuracy": 0.7798434442270059,
"eval_f1": 0.7619047619047619,
"eval_loss": 0.3572150468826294,
"eval_mcc": 0.566151295376701,
"eval_precision": 0.8294930875576036,
"eval_recall": 0.7045009784735812,
"eval_runtime": 66.9799,
"eval_samples_per_second": 15.258,
"eval_steps_per_second": 0.478,
"step": 384
},
{
"epoch": 3.0,
"grad_norm": 2.7390501499176025,
"learning_rate": 0.00016012999452219242,
"loss": 0.3651,
"step": 576
},
{
"epoch": 3.0,
"eval_accuracy": 0.8072407045009785,
"eval_f1": 0.8047571853320119,
"eval_loss": 0.36672884225845337,
"eval_mcc": 0.6146803546891556,
"eval_precision": 0.8152610441767069,
"eval_recall": 0.7945205479452054,
"eval_runtime": 66.7321,
"eval_samples_per_second": 15.315,
"eval_steps_per_second": 0.48,
"step": 576
},
{
"epoch": 4.0,
"grad_norm": 2.3112893104553223,
"learning_rate": 0.00012009749589164432,
"loss": 0.3601,
"step": 768
},
{
"epoch": 4.0,
"eval_accuracy": 0.8131115459882583,
"eval_f1": 0.811451135241856,
"eval_loss": 0.35847175121307373,
"eval_mcc": 0.626320242099439,
"eval_precision": 0.8187250996015937,
"eval_recall": 0.8043052837573386,
"eval_runtime": 66.7668,
"eval_samples_per_second": 15.307,
"eval_steps_per_second": 0.479,
"step": 768
},
{
"epoch": 5.0,
"grad_norm": 4.440983295440674,
"learning_rate": 8.006499726109621e-05,
"loss": 0.3558,
"step": 960
},
{
"epoch": 5.0,
"eval_accuracy": 0.812133072407045,
"eval_f1": 0.8095238095238095,
"eval_loss": 0.36063674092292786,
"eval_mcc": 0.6245005669570931,
"eval_precision": 0.8209255533199196,
"eval_recall": 0.7984344422700587,
"eval_runtime": 66.686,
"eval_samples_per_second": 15.326,
"eval_steps_per_second": 0.48,
"step": 960
},
{
"epoch": 6.0,
"grad_norm": 2.304095506668091,
"learning_rate": 4.0032498630548105e-05,
"loss": 0.349,
"step": 1152
},
{
"epoch": 6.0,
"eval_accuracy": 0.8238747553816047,
"eval_f1": 0.8311444652908067,
"eval_loss": 0.3584487736225128,
"eval_mcc": 0.6501642144850927,
"eval_precision": 0.7981981981981981,
"eval_recall": 0.8669275929549902,
"eval_runtime": 66.5615,
"eval_samples_per_second": 15.354,
"eval_steps_per_second": 0.481,
"step": 1152
},
{
"epoch": 7.0,
"grad_norm": 1.643546223640442,
"learning_rate": 0.0,
"loss": 0.3446,
"step": 1344
},
{
"epoch": 7.0,
"eval_accuracy": 0.8189823874755382,
"eval_f1": 0.8198636806231744,
"eval_loss": 0.3578638434410095,
"eval_mcc": 0.637995316854455,
"eval_precision": 0.8158914728682171,
"eval_recall": 0.8238747553816047,
"eval_runtime": 68.6894,
"eval_samples_per_second": 14.879,
"eval_steps_per_second": 0.466,
"step": 1344
}
],
"logging_steps": 500,
"max_steps": 1344,
"num_input_tokens_seen": 0,
"num_train_epochs": 7,
"save_steps": 500,
"total_flos": 1649866380960.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": {
"alpha": 0.6115137465963567,
"learning_rate": 0.00028022749041383674,
"num_train_epochs": 7,
"per_device_train_batch_size": 16,
"temperature": 33
}
}