|
{ |
|
"best_metric": 0.7089108910891089, |
|
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-28/checkpoint-1782", |
|
"epoch": 7.0, |
|
"eval_steps": 500, |
|
"global_step": 2079, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.3590941429138184, |
|
"learning_rate": 0.00033268893750406807, |
|
"loss": 0.6415, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5861386138613861, |
|
"eval_f1": 0.473551637279597, |
|
"eval_loss": 0.6401379108428955, |
|
"eval_mcc": 0.1894629867942258, |
|
"eval_precision": 0.6482758620689655, |
|
"eval_recall": 0.373015873015873, |
|
"eval_runtime": 0.9371, |
|
"eval_samples_per_second": 538.913, |
|
"eval_steps_per_second": 17.074, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 2.476292848587036, |
|
"learning_rate": 0.00028516194643205835, |
|
"loss": 0.613, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6514851485148515, |
|
"eval_f1": 0.68, |
|
"eval_loss": 0.5918833017349243, |
|
"eval_mcc": 0.30837572082433856, |
|
"eval_precision": 0.62751677852349, |
|
"eval_recall": 0.7420634920634921, |
|
"eval_runtime": 0.9445, |
|
"eval_samples_per_second": 534.7, |
|
"eval_steps_per_second": 16.941, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.6527726650238037, |
|
"learning_rate": 0.0002376349553600486, |
|
"loss": 0.5836, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6594059405940594, |
|
"eval_f1": 0.693950177935943, |
|
"eval_loss": 0.5902799367904663, |
|
"eval_mcc": 0.3278781101029775, |
|
"eval_precision": 0.6290322580645161, |
|
"eval_recall": 0.7738095238095238, |
|
"eval_runtime": 0.9292, |
|
"eval_samples_per_second": 543.474, |
|
"eval_steps_per_second": 17.219, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 1.4662635326385498, |
|
"learning_rate": 0.0001901079642880389, |
|
"loss": 0.5692, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6613861386138614, |
|
"eval_f1": 0.6994727592267135, |
|
"eval_loss": 0.5874853730201721, |
|
"eval_mcc": 0.33437205674458564, |
|
"eval_precision": 0.6277602523659306, |
|
"eval_recall": 0.7896825396825397, |
|
"eval_runtime": 0.9276, |
|
"eval_samples_per_second": 544.403, |
|
"eval_steps_per_second": 17.248, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 2.2937889099121094, |
|
"learning_rate": 0.00014258097321602918, |
|
"loss": 0.5565, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.6712871287128713, |
|
"eval_f1": 0.6844106463878327, |
|
"eval_loss": 0.5748322010040283, |
|
"eval_mcc": 0.34399282923991964, |
|
"eval_precision": 0.656934306569343, |
|
"eval_recall": 0.7142857142857143, |
|
"eval_runtime": 0.9341, |
|
"eval_samples_per_second": 540.619, |
|
"eval_steps_per_second": 17.129, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.6975440979003906, |
|
"learning_rate": 9.505398214401945e-05, |
|
"loss": 0.5445, |
|
"step": 1782 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7089108910891089, |
|
"eval_f1": 0.7379679144385027, |
|
"eval_loss": 0.5586543083190918, |
|
"eval_mcc": 0.4291472704190268, |
|
"eval_precision": 0.6699029126213593, |
|
"eval_recall": 0.8214285714285714, |
|
"eval_runtime": 0.932, |
|
"eval_samples_per_second": 541.831, |
|
"eval_steps_per_second": 17.167, |
|
"step": 1782 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 3.01635479927063, |
|
"learning_rate": 4.752699107200972e-05, |
|
"loss": 0.5314, |
|
"step": 2079 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7069306930693069, |
|
"eval_f1": 0.7279411764705882, |
|
"eval_loss": 0.5484515428543091, |
|
"eval_mcc": 0.419334756687555, |
|
"eval_precision": 0.678082191780822, |
|
"eval_recall": 0.7857142857142857, |
|
"eval_runtime": 0.937, |
|
"eval_samples_per_second": 538.933, |
|
"eval_steps_per_second": 17.075, |
|
"step": 2079 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2376, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 8, |
|
"save_steps": 500, |
|
"total_flos": 5114908861860.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.9092941270097592, |
|
"learning_rate": 0.0003802159285760778, |
|
"num_train_epochs": 8, |
|
"temperature": 11 |
|
} |
|
} |
|
|