xuancoblab2023's picture
Training in progress, epoch 2
f25ade1 verified
{
"best_metric": 0.6874704863843853,
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-1/checkpoint-3176",
"epoch": 2.0,
"eval_steps": 500,
"global_step": 3176,
"is_hyper_param_search": true,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 1.0594383478164673,
"learning_rate": 8.902447569795288e-05,
"loss": 0.5725,
"step": 1588
},
{
"epoch": 1.0,
"eval_accuracy": 0.6804659216118369,
"eval_f1": 0.3575949367088608,
"eval_loss": 0.5597381591796875,
"eval_mcc": 0.19612047304332003,
"eval_precision": 0.5419664268585132,
"eval_recall": 0.2668240850059032,
"eval_runtime": 23.4624,
"eval_samples_per_second": 541.547,
"eval_steps_per_second": 16.963,
"step": 1588
},
{
"epoch": 2.0,
"grad_norm": 1.540066123008728,
"learning_rate": 7.789641623570877e-05,
"loss": 0.5555,
"step": 3176
},
{
"epoch": 2.0,
"eval_accuracy": 0.6874704863843853,
"eval_f1": 0.2603836841124977,
"eval_loss": 0.5536655783653259,
"eval_mcc": 0.18799631789309962,
"eval_precision": 0.6164021164021164,
"eval_recall": 0.16505312868949232,
"eval_runtime": 23.2494,
"eval_samples_per_second": 546.509,
"eval_steps_per_second": 17.119,
"step": 3176
}
],
"logging_steps": 500,
"max_steps": 14292,
"num_input_tokens_seen": 0,
"num_train_epochs": 9,
"save_steps": 500,
"total_flos": 7817788152600.0,
"train_batch_size": 32,
"trial_name": null,
"trial_params": {
"alpha": 0.9165014388792823,
"learning_rate": 0.00010015253516019699,
"num_train_epochs": 9,
"temperature": 13
}
}