|
{ |
|
"best_metric": 0.8414872798434442, |
|
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-27/checkpoint-768", |
|
"epoch": 9.0, |
|
"eval_steps": 500, |
|
"global_step": 864, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.020627737045288, |
|
"learning_rate": 0.00020460634625664984, |
|
"loss": 0.494, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7886497064579256, |
|
"eval_f1": 0.8032786885245903, |
|
"eval_loss": 0.4278793931007385, |
|
"eval_precision": 0.7512776831345827, |
|
"eval_recall": 0.863013698630137, |
|
"eval_runtime": 24.9337, |
|
"eval_samples_per_second": 40.989, |
|
"eval_steps_per_second": 1.283, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 2.554121732711792, |
|
"learning_rate": 0.00017903055297456864, |
|
"loss": 0.4258, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8062622309197651, |
|
"eval_f1": 0.8241563055062168, |
|
"eval_loss": 0.40374234318733215, |
|
"eval_precision": 0.7544715447154472, |
|
"eval_recall": 0.9080234833659491, |
|
"eval_runtime": 25.4225, |
|
"eval_samples_per_second": 40.201, |
|
"eval_steps_per_second": 1.259, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 3.225475311279297, |
|
"learning_rate": 0.00015345475969248738, |
|
"loss": 0.405, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8277886497064579, |
|
"eval_f1": 0.84, |
|
"eval_loss": 0.39554914832115173, |
|
"eval_precision": 0.7843803056027164, |
|
"eval_recall": 0.9041095890410958, |
|
"eval_runtime": 25.5434, |
|
"eval_samples_per_second": 40.01, |
|
"eval_steps_per_second": 1.253, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 3.6178488731384277, |
|
"learning_rate": 0.00012787896641040618, |
|
"loss": 0.3903, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.821917808219178, |
|
"eval_f1": 0.8222656250000001, |
|
"eval_loss": 0.38986799120903015, |
|
"eval_precision": 0.8206627680311891, |
|
"eval_recall": 0.8238747553816047, |
|
"eval_runtime": 25.4279, |
|
"eval_samples_per_second": 40.192, |
|
"eval_steps_per_second": 1.258, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.1577227115631104, |
|
"learning_rate": 0.00010230317312832492, |
|
"loss": 0.3846, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8228962818003914, |
|
"eval_f1": 0.8309990662931839, |
|
"eval_loss": 0.3861733376979828, |
|
"eval_precision": 0.7946428571428571, |
|
"eval_recall": 0.8708414872798435, |
|
"eval_runtime": 25.7302, |
|
"eval_samples_per_second": 39.72, |
|
"eval_steps_per_second": 1.244, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.0598785877227783, |
|
"learning_rate": 7.672737984624369e-05, |
|
"loss": 0.3784, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8317025440313112, |
|
"eval_f1": 0.8472468916518652, |
|
"eval_loss": 0.3895849585533142, |
|
"eval_precision": 0.775609756097561, |
|
"eval_recall": 0.9334637964774951, |
|
"eval_runtime": 25.139, |
|
"eval_samples_per_second": 40.654, |
|
"eval_steps_per_second": 1.273, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 1.7429569959640503, |
|
"learning_rate": 5.115158656416246e-05, |
|
"loss": 0.3756, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8405088062622309, |
|
"eval_f1": 0.8530207394048693, |
|
"eval_loss": 0.3818517029285431, |
|
"eval_precision": 0.7909698996655519, |
|
"eval_recall": 0.9256360078277887, |
|
"eval_runtime": 25.2278, |
|
"eval_samples_per_second": 40.511, |
|
"eval_steps_per_second": 1.268, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 2.883507013320923, |
|
"learning_rate": 2.557579328208123e-05, |
|
"loss": 0.3725, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8414872798434442, |
|
"eval_f1": 0.8527272727272727, |
|
"eval_loss": 0.38016170263290405, |
|
"eval_precision": 0.7962648556876061, |
|
"eval_recall": 0.9178082191780822, |
|
"eval_runtime": 24.8536, |
|
"eval_samples_per_second": 41.121, |
|
"eval_steps_per_second": 1.288, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 2.0533533096313477, |
|
"learning_rate": 0.0, |
|
"loss": 0.3665, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.837573385518591, |
|
"eval_f1": 0.8471454880294659, |
|
"eval_loss": 0.38007956743240356, |
|
"eval_precision": 0.8, |
|
"eval_recall": 0.9001956947162426, |
|
"eval_runtime": 25.3933, |
|
"eval_samples_per_second": 40.247, |
|
"eval_steps_per_second": 1.26, |
|
"step": 864 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 864, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9, |
|
"save_steps": 500, |
|
"total_flos": 2121256775520.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.718256826807231, |
|
"learning_rate": 0.0002301821395387311, |
|
"num_train_epochs": 9, |
|
"temperature": 6 |
|
} |
|
} |
|
|