|
{ |
|
"best_metric": 0.837573385518591, |
|
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-25/checkpoint-768", |
|
"epoch": 8.0, |
|
"eval_steps": 500, |
|
"global_step": 768, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.8755850791931152, |
|
"learning_rate": 0.00020053170483737637, |
|
"loss": 0.4801, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7818003913894325, |
|
"eval_f1": 0.8092386655260906, |
|
"eval_loss": 0.42955365777015686, |
|
"eval_precision": 0.7188449848024316, |
|
"eval_recall": 0.9256360078277887, |
|
"eval_runtime": 31.124, |
|
"eval_samples_per_second": 32.836, |
|
"eval_steps_per_second": 1.028, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 2.4842429161071777, |
|
"learning_rate": 0.0001782504042998901, |
|
"loss": 0.4182, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8091976516634051, |
|
"eval_f1": 0.8232094288304623, |
|
"eval_loss": 0.3939443826675415, |
|
"eval_precision": 0.7668918918918919, |
|
"eval_recall": 0.8884540117416829, |
|
"eval_runtime": 32.508, |
|
"eval_samples_per_second": 31.438, |
|
"eval_steps_per_second": 0.984, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 2.5258917808532715, |
|
"learning_rate": 0.00015596910376240383, |
|
"loss": 0.3965, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.815068493150685, |
|
"eval_f1": 0.8325952170062001, |
|
"eval_loss": 0.39436766505241394, |
|
"eval_precision": 0.7605177993527508, |
|
"eval_recall": 0.9197651663405088, |
|
"eval_runtime": 29.4789, |
|
"eval_samples_per_second": 34.669, |
|
"eval_steps_per_second": 1.086, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 3.0768847465515137, |
|
"learning_rate": 0.00013368780322491758, |
|
"loss": 0.3841, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8140900195694716, |
|
"eval_f1": 0.8137254901960785, |
|
"eval_loss": 0.38306960463523865, |
|
"eval_precision": 0.8153241650294696, |
|
"eval_recall": 0.812133072407045, |
|
"eval_runtime": 32.8502, |
|
"eval_samples_per_second": 31.111, |
|
"eval_steps_per_second": 0.974, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.1314367055892944, |
|
"learning_rate": 0.00011140650268743131, |
|
"loss": 0.3779, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8326810176125244, |
|
"eval_f1": 0.8423963133640553, |
|
"eval_loss": 0.37976235151290894, |
|
"eval_precision": 0.7961672473867596, |
|
"eval_recall": 0.8943248532289628, |
|
"eval_runtime": 29.0986, |
|
"eval_samples_per_second": 35.122, |
|
"eval_steps_per_second": 1.1, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.076519012451172, |
|
"learning_rate": 8.912520214994506e-05, |
|
"loss": 0.3741, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.824853228962818, |
|
"eval_f1": 0.8397493285586393, |
|
"eval_loss": 0.3835048973560333, |
|
"eval_precision": 0.7739273927392739, |
|
"eval_recall": 0.9178082191780822, |
|
"eval_runtime": 28.8483, |
|
"eval_samples_per_second": 35.427, |
|
"eval_steps_per_second": 1.109, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 1.4347150325775146, |
|
"learning_rate": 6.684390161245879e-05, |
|
"loss": 0.3699, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8356164383561644, |
|
"eval_f1": 0.8469945355191257, |
|
"eval_loss": 0.3747766315937042, |
|
"eval_precision": 0.7921635434412265, |
|
"eval_recall": 0.9099804305283757, |
|
"eval_runtime": 28.9227, |
|
"eval_samples_per_second": 35.336, |
|
"eval_steps_per_second": 1.106, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 2.361301898956299, |
|
"learning_rate": 4.456260107497253e-05, |
|
"loss": 0.3677, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.837573385518591, |
|
"eval_f1": 0.8482632541133455, |
|
"eval_loss": 0.3733108639717102, |
|
"eval_precision": 0.7958833619210978, |
|
"eval_recall": 0.9080234833659491, |
|
"eval_runtime": 29.9904, |
|
"eval_samples_per_second": 34.078, |
|
"eval_steps_per_second": 1.067, |
|
"step": 768 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 960, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 1885561578240.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.6872599698926333, |
|
"learning_rate": 0.00022281300537486262, |
|
"num_train_epochs": 10, |
|
"temperature": 15 |
|
} |
|
} |
|
|