|
{ |
|
"best_metric": 0.8405088062622309, |
|
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-1/checkpoint-819", |
|
"epoch": 9.0, |
|
"eval_steps": 500, |
|
"global_step": 819, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 5.826261043548584, |
|
"learning_rate": 0.0002239970993716639, |
|
"loss": 0.481, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7749510763209393, |
|
"eval_f1": 0.7993019197207678, |
|
"eval_loss": 0.4282819330692291, |
|
"eval_precision": 0.721259842519685, |
|
"eval_recall": 0.8962818003913894, |
|
"eval_runtime": 28.5148, |
|
"eval_samples_per_second": 35.841, |
|
"eval_steps_per_second": 1.122, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 8.367091178894043, |
|
"learning_rate": 0.00019599746195020593, |
|
"loss": 0.4267, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7896281800391389, |
|
"eval_f1": 0.8043676069153776, |
|
"eval_loss": 0.4049767255783081, |
|
"eval_precision": 0.7517006802721088, |
|
"eval_recall": 0.8649706457925636, |
|
"eval_runtime": 28.5272, |
|
"eval_samples_per_second": 35.825, |
|
"eval_steps_per_second": 1.122, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 3.821913480758667, |
|
"learning_rate": 0.00016799782452874793, |
|
"loss": 0.4057, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8101761252446184, |
|
"eval_f1": 0.8116504854368931, |
|
"eval_loss": 0.38922348618507385, |
|
"eval_precision": 0.8053949903660886, |
|
"eval_recall": 0.8180039138943248, |
|
"eval_runtime": 28.5222, |
|
"eval_samples_per_second": 35.832, |
|
"eval_steps_per_second": 1.122, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 4.150608539581299, |
|
"learning_rate": 0.00013999818710728996, |
|
"loss": 0.3899, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8209393346379648, |
|
"eval_f1": 0.8221574344023325, |
|
"eval_loss": 0.38097265362739563, |
|
"eval_precision": 0.8166023166023166, |
|
"eval_recall": 0.8277886497064579, |
|
"eval_runtime": 27.9162, |
|
"eval_samples_per_second": 36.61, |
|
"eval_steps_per_second": 1.146, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 15.217473030090332, |
|
"learning_rate": 0.00011199854968583195, |
|
"loss": 0.3811, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8209393346379648, |
|
"eval_f1": 0.8288119738072964, |
|
"eval_loss": 0.38663867115974426, |
|
"eval_precision": 0.7939068100358423, |
|
"eval_recall": 0.8669275929549902, |
|
"eval_runtime": 28.6175, |
|
"eval_samples_per_second": 35.712, |
|
"eval_steps_per_second": 1.118, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.0855538845062256, |
|
"learning_rate": 8.399891226437396e-05, |
|
"loss": 0.3782, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8111545988258317, |
|
"eval_f1": 0.8305531167690957, |
|
"eval_loss": 0.39196252822875977, |
|
"eval_precision": 0.7531847133757962, |
|
"eval_recall": 0.9256360078277887, |
|
"eval_runtime": 28.4313, |
|
"eval_samples_per_second": 35.946, |
|
"eval_steps_per_second": 1.126, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 4.913670063018799, |
|
"learning_rate": 5.5999274842915974e-05, |
|
"loss": 0.3707, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8170254403131115, |
|
"eval_f1": 0.8310749774164408, |
|
"eval_loss": 0.38759666681289673, |
|
"eval_precision": 0.7718120805369127, |
|
"eval_recall": 0.9001956947162426, |
|
"eval_runtime": 28.3099, |
|
"eval_samples_per_second": 36.1, |
|
"eval_steps_per_second": 1.13, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 8.997469902038574, |
|
"learning_rate": 2.7999637421457987e-05, |
|
"loss": 0.3696, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8307240704500979, |
|
"eval_f1": 0.8422971741112123, |
|
"eval_loss": 0.3802284896373749, |
|
"eval_precision": 0.78839590443686, |
|
"eval_recall": 0.9041095890410958, |
|
"eval_runtime": 28.5564, |
|
"eval_samples_per_second": 35.789, |
|
"eval_steps_per_second": 1.121, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 7.643197536468506, |
|
"learning_rate": 0.0, |
|
"loss": 0.3628, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8405088062622309, |
|
"eval_f1": 0.8480894687791241, |
|
"eval_loss": 0.37590980529785156, |
|
"eval_precision": 0.8096085409252669, |
|
"eval_recall": 0.8904109589041096, |
|
"eval_runtime": 28.5328, |
|
"eval_samples_per_second": 35.818, |
|
"eval_steps_per_second": 1.122, |
|
"step": 819 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 819, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9, |
|
"save_steps": 500, |
|
"total_flos": 2121256775520.0, |
|
"train_batch_size": 34, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.7088780913019314, |
|
"learning_rate": 0.0002519967367931219, |
|
"num_train_epochs": 9, |
|
"per_device_train_batch_size": 34, |
|
"temperature": 19 |
|
} |
|
} |
|
|