|
{ |
|
"best_metric": 0.8317025440313112, |
|
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-41/checkpoint-864", |
|
"epoch": 9.0, |
|
"eval_steps": 500, |
|
"global_step": 864, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.4870496988296509, |
|
"learning_rate": 0.00011097537892965708, |
|
"loss": 0.5464, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7749510763209393, |
|
"eval_f1": 0.8017241379310346, |
|
"eval_loss": 0.4673503041267395, |
|
"eval_precision": 0.7164869029275809, |
|
"eval_recall": 0.9099804305283757, |
|
"eval_runtime": 28.2108, |
|
"eval_samples_per_second": 36.227, |
|
"eval_steps_per_second": 1.134, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.9109100103378296, |
|
"learning_rate": 9.710345656344995e-05, |
|
"loss": 0.4617, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8082191780821918, |
|
"eval_f1": 0.8262411347517731, |
|
"eval_loss": 0.43051373958587646, |
|
"eval_precision": 0.7552674230145867, |
|
"eval_recall": 0.9119373776908023, |
|
"eval_runtime": 28.0456, |
|
"eval_samples_per_second": 36.441, |
|
"eval_steps_per_second": 1.141, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 2.547697067260742, |
|
"learning_rate": 8.32315341972428e-05, |
|
"loss": 0.4341, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8228962818003914, |
|
"eval_f1": 0.8337924701561066, |
|
"eval_loss": 0.41541436314582825, |
|
"eval_precision": 0.7854671280276817, |
|
"eval_recall": 0.8884540117416829, |
|
"eval_runtime": 28.3605, |
|
"eval_samples_per_second": 36.036, |
|
"eval_steps_per_second": 1.128, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 5.3498454093933105, |
|
"learning_rate": 6.935961183103568e-05, |
|
"loss": 0.4236, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8170254403131115, |
|
"eval_f1": 0.8203650336215178, |
|
"eval_loss": 0.4138866066932678, |
|
"eval_precision": 0.8056603773584906, |
|
"eval_recall": 0.8356164383561644, |
|
"eval_runtime": 28.1596, |
|
"eval_samples_per_second": 36.293, |
|
"eval_steps_per_second": 1.136, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.9074805974960327, |
|
"learning_rate": 5.548768946482854e-05, |
|
"loss": 0.4117, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8287671232876712, |
|
"eval_f1": 0.8427672955974842, |
|
"eval_loss": 0.40257665514945984, |
|
"eval_precision": 0.7790697674418605, |
|
"eval_recall": 0.9178082191780822, |
|
"eval_runtime": 28.5426, |
|
"eval_samples_per_second": 35.806, |
|
"eval_steps_per_second": 1.121, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 3.1893153190612793, |
|
"learning_rate": 4.16157670986214e-05, |
|
"loss": 0.4039, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.824853228962818, |
|
"eval_f1": 0.8385933273219117, |
|
"eval_loss": 0.4039454162120819, |
|
"eval_precision": 0.7775919732441472, |
|
"eval_recall": 0.9099804305283757, |
|
"eval_runtime": 28.1064, |
|
"eval_samples_per_second": 36.362, |
|
"eval_steps_per_second": 1.139, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 2.368473768234253, |
|
"learning_rate": 2.774384473241427e-05, |
|
"loss": 0.3968, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8307240704500979, |
|
"eval_f1": 0.8464951197870452, |
|
"eval_loss": 0.4061815142631531, |
|
"eval_precision": 0.7743506493506493, |
|
"eval_recall": 0.9334637964774951, |
|
"eval_runtime": 28.8284, |
|
"eval_samples_per_second": 35.451, |
|
"eval_steps_per_second": 1.11, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 3.229116916656494, |
|
"learning_rate": 1.3871922366207134e-05, |
|
"loss": 0.3973, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8307240704500979, |
|
"eval_f1": 0.8434389140271493, |
|
"eval_loss": 0.3992971181869507, |
|
"eval_precision": 0.7845117845117845, |
|
"eval_recall": 0.9119373776908023, |
|
"eval_runtime": 28.7392, |
|
"eval_samples_per_second": 35.561, |
|
"eval_steps_per_second": 1.113, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 2.1530864238739014, |
|
"learning_rate": 0.0, |
|
"loss": 0.392, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8317025440313112, |
|
"eval_f1": 0.8407407407407408, |
|
"eval_loss": 0.39753690361976624, |
|
"eval_precision": 0.7978910369068541, |
|
"eval_recall": 0.8884540117416829, |
|
"eval_runtime": 28.2952, |
|
"eval_samples_per_second": 36.119, |
|
"eval_steps_per_second": 1.131, |
|
"step": 864 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 864, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9, |
|
"save_steps": 500, |
|
"total_flos": 2121256775520.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.8078288065121237, |
|
"learning_rate": 0.0001248473012958642, |
|
"num_train_epochs": 9, |
|
"temperature": 4 |
|
} |
|
} |
|
|