|
{ |
|
"best_metric": 0.7439953134153485, |
|
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-14/checkpoint-1284", |
|
"epoch": 7.0, |
|
"eval_steps": 500, |
|
"global_step": 1498, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 4.230797290802002, |
|
"learning_rate": 0.0004542691429405582, |
|
"loss": 0.5244, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7065026362038664, |
|
"eval_f1": 0.3634053367217281, |
|
"eval_loss": 0.49707961082458496, |
|
"eval_mcc": 0.26187368637682734, |
|
"eval_precision": 0.6559633027522935, |
|
"eval_recall": 0.2513181019332162, |
|
"eval_runtime": 3.1642, |
|
"eval_samples_per_second": 539.474, |
|
"eval_steps_per_second": 17.066, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.7037988901138306, |
|
"learning_rate": 0.0004303602406805288, |
|
"loss": 0.4856, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7047451669595782, |
|
"eval_f1": 0.5019762845849802, |
|
"eval_loss": 0.4853743612766266, |
|
"eval_mcc": 0.3014395863411175, |
|
"eval_precision": 0.5733634311512416, |
|
"eval_recall": 0.44639718804920914, |
|
"eval_runtime": 3.2868, |
|
"eval_samples_per_second": 519.356, |
|
"eval_steps_per_second": 16.43, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.7830451726913452, |
|
"learning_rate": 0.0004064513384204994, |
|
"loss": 0.4758, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7018160515524312, |
|
"eval_f1": 0.25909752547307136, |
|
"eval_loss": 0.4858837425708771, |
|
"eval_mcc": 0.24331468344161114, |
|
"eval_precision": 0.7542372881355932, |
|
"eval_recall": 0.15641476274165203, |
|
"eval_runtime": 3.1648, |
|
"eval_samples_per_second": 539.369, |
|
"eval_steps_per_second": 17.063, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 1.6830062866210938, |
|
"learning_rate": 0.0003825424361604701, |
|
"loss": 0.4724, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7275922671353251, |
|
"eval_f1": 0.4015444015444016, |
|
"eval_loss": 0.47459593415260315, |
|
"eval_mcc": 0.32925006262083517, |
|
"eval_precision": 0.75, |
|
"eval_recall": 0.2741652021089631, |
|
"eval_runtime": 3.7803, |
|
"eval_samples_per_second": 451.553, |
|
"eval_steps_per_second": 14.285, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 4.760202407836914, |
|
"learning_rate": 0.0003586335339004407, |
|
"loss": 0.4614, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7340363210310487, |
|
"eval_f1": 0.4733178654292344, |
|
"eval_loss": 0.4721404016017914, |
|
"eval_mcc": 0.35044282313920905, |
|
"eval_precision": 0.6962457337883959, |
|
"eval_recall": 0.3585237258347979, |
|
"eval_runtime": 3.1258, |
|
"eval_samples_per_second": 546.093, |
|
"eval_steps_per_second": 17.275, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.5421228408813477, |
|
"learning_rate": 0.0003347246316404113, |
|
"loss": 0.4617, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7439953134153485, |
|
"eval_f1": 0.5346112886048987, |
|
"eval_loss": 0.4643152058124542, |
|
"eval_mcc": 0.38505007336259955, |
|
"eval_precision": 0.6783783783783783, |
|
"eval_recall": 0.44112478031634444, |
|
"eval_runtime": 3.278, |
|
"eval_samples_per_second": 520.745, |
|
"eval_steps_per_second": 16.473, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 3.2574965953826904, |
|
"learning_rate": 0.0003108157293803819, |
|
"loss": 0.4555, |
|
"step": 1498 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7416520210896309, |
|
"eval_f1": 0.4562268803945746, |
|
"eval_loss": 0.46802204847335815, |
|
"eval_mcc": 0.3717088012670463, |
|
"eval_precision": 0.7644628099173554, |
|
"eval_recall": 0.3251318101933216, |
|
"eval_runtime": 3.1628, |
|
"eval_samples_per_second": 539.712, |
|
"eval_steps_per_second": 17.073, |
|
"step": 1498 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 4280, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 3673429651080.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.8544594605661776, |
|
"learning_rate": 0.0004781780452005876, |
|
"num_train_epochs": 20, |
|
"temperature": 15 |
|
} |
|
} |
|
|