xuancoblab2023's picture
Training in progress, epoch 3
b237307 verified
raw
history blame
2.9 kB
{
"best_metric": 0.7521968365553603,
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-38/checkpoint-642",
"epoch": 4.0,
"eval_steps": 500,
"global_step": 856,
"is_hyper_param_search": true,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 5.823625087738037,
"learning_rate": 0.00022026694526910956,
"loss": 0.4997,
"step": 214
},
{
"epoch": 1.0,
"eval_accuracy": 0.7129466900995899,
"eval_f1": 0.40243902439024387,
"eval_loss": 0.4626123011112213,
"eval_mcc": 0.2854021761857005,
"eval_precision": 0.6573705179282868,
"eval_recall": 0.28998242530755713,
"eval_runtime": 3.1456,
"eval_samples_per_second": 542.661,
"eval_steps_per_second": 17.167,
"step": 214
},
{
"epoch": 2.0,
"grad_norm": 1.7993404865264893,
"learning_rate": 0.0001888002388020939,
"loss": 0.4542,
"step": 428
},
{
"epoch": 2.0,
"eval_accuracy": 0.7398945518453427,
"eval_f1": 0.47641509433962265,
"eval_loss": 0.4441252648830414,
"eval_mcc": 0.366324992569054,
"eval_precision": 0.7240143369175627,
"eval_recall": 0.35500878734622143,
"eval_runtime": 3.7126,
"eval_samples_per_second": 459.782,
"eval_steps_per_second": 14.545,
"step": 428
},
{
"epoch": 3.0,
"grad_norm": 1.832775354385376,
"learning_rate": 0.00015733353233507826,
"loss": 0.4371,
"step": 642
},
{
"epoch": 3.0,
"eval_accuracy": 0.7521968365553603,
"eval_f1": 0.5052631578947367,
"eval_loss": 0.44420263171195984,
"eval_mcc": 0.4015258565021214,
"eval_precision": 0.7552447552447552,
"eval_recall": 0.37961335676625657,
"eval_runtime": 3.1594,
"eval_samples_per_second": 540.296,
"eval_steps_per_second": 17.092,
"step": 642
},
{
"epoch": 4.0,
"grad_norm": 2.8890116214752197,
"learning_rate": 0.0001258668258680626,
"loss": 0.4321,
"step": 856
},
{
"epoch": 4.0,
"eval_accuracy": 0.7398945518453427,
"eval_f1": 0.43367346938775514,
"eval_loss": 0.4393432140350342,
"eval_mcc": 0.3683014058572899,
"eval_precision": 0.7906976744186046,
"eval_recall": 0.29876977152899825,
"eval_runtime": 3.8868,
"eval_samples_per_second": 439.178,
"eval_steps_per_second": 13.893,
"step": 856
}
],
"logging_steps": 500,
"max_steps": 1712,
"num_input_tokens_seen": 0,
"num_train_epochs": 8,
"save_steps": 500,
"total_flos": 2099102657760.0,
"train_batch_size": 32,
"trial_name": null,
"trial_params": {
"alpha": 0.806613569484766,
"learning_rate": 0.0002517336517361252,
"num_train_epochs": 8,
"temperature": 7
}
}