|
{ |
|
"best_metric": 0.5564356435643565, |
|
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-8/checkpoint-2079", |
|
"epoch": 7.0, |
|
"eval_steps": 500, |
|
"global_step": 2079, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.24534238874912262, |
|
"learning_rate": 0.0002919829387820306, |
|
"loss": 0.3074, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5148514851485149, |
|
"eval_f1": 0.16382252559726962, |
|
"eval_loss": 0.30149543285369873, |
|
"eval_precision": 0.5853658536585366, |
|
"eval_recall": 0.09523809523809523, |
|
"eval_runtime": 14.7271, |
|
"eval_samples_per_second": 34.291, |
|
"eval_steps_per_second": 1.086, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.5045191645622253, |
|
"learning_rate": 0.00025027109038459764, |
|
"loss": 0.3009, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5168316831683168, |
|
"eval_f1": 0.1643835616438356, |
|
"eval_loss": 0.2990603744983673, |
|
"eval_precision": 0.6, |
|
"eval_recall": 0.09523809523809523, |
|
"eval_runtime": 15.7792, |
|
"eval_samples_per_second": 32.004, |
|
"eval_steps_per_second": 1.014, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.6056041121482849, |
|
"learning_rate": 0.00020855924198716472, |
|
"loss": 0.2956, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5207920792079208, |
|
"eval_f1": 0.17687074829931973, |
|
"eval_loss": 0.2930928170681, |
|
"eval_precision": 0.6190476190476191, |
|
"eval_recall": 0.10317460317460317, |
|
"eval_runtime": 14.6946, |
|
"eval_samples_per_second": 34.366, |
|
"eval_steps_per_second": 1.089, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.4732127785682678, |
|
"learning_rate": 0.00016684739358973177, |
|
"loss": 0.2915, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5207920792079208, |
|
"eval_f1": 0.16551724137931034, |
|
"eval_loss": 0.2868767976760864, |
|
"eval_precision": 0.631578947368421, |
|
"eval_recall": 0.09523809523809523, |
|
"eval_runtime": 14.9872, |
|
"eval_samples_per_second": 33.695, |
|
"eval_steps_per_second": 1.068, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.4796178936958313, |
|
"learning_rate": 0.00012513554519229882, |
|
"loss": 0.2881, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.5188118811881188, |
|
"eval_f1": 0.15916955017301038, |
|
"eval_loss": 0.2868916988372803, |
|
"eval_precision": 0.6216216216216216, |
|
"eval_recall": 0.09126984126984126, |
|
"eval_runtime": 14.7705, |
|
"eval_samples_per_second": 34.19, |
|
"eval_steps_per_second": 1.083, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 0.5560742616653442, |
|
"learning_rate": 8.342369679486588e-05, |
|
"loss": 0.2859, |
|
"step": 1782 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.5544554455445545, |
|
"eval_f1": 0.25742574257425743, |
|
"eval_loss": 0.2852015793323517, |
|
"eval_precision": 0.7647058823529411, |
|
"eval_recall": 0.15476190476190477, |
|
"eval_runtime": 15.1062, |
|
"eval_samples_per_second": 33.43, |
|
"eval_steps_per_second": 1.059, |
|
"step": 1782 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.5270741581916809, |
|
"learning_rate": 4.171184839743294e-05, |
|
"loss": 0.2835, |
|
"step": 2079 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.5564356435643565, |
|
"eval_f1": 0.28205128205128205, |
|
"eval_loss": 0.28437066078186035, |
|
"eval_precision": 0.7333333333333333, |
|
"eval_recall": 0.1746031746031746, |
|
"eval_runtime": 14.6907, |
|
"eval_samples_per_second": 34.375, |
|
"eval_steps_per_second": 1.089, |
|
"step": 2079 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2376, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 8, |
|
"save_steps": 500, |
|
"total_flos": 5444902981980.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.37248229271478195, |
|
"learning_rate": 0.00033369478717946353, |
|
"num_train_epochs": 8, |
|
"per_device_train_batch_size": 32, |
|
"temperature": 11 |
|
} |
|
} |
|
|