|
{ |
|
"best_metric": 0.6604696673189824, |
|
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-0/checkpoint-486", |
|
"epoch": 9.0, |
|
"eval_steps": 500, |
|
"global_step": 729, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.29800447821617126, |
|
"learning_rate": 2.6647930950340397e-05, |
|
"loss": 0.3887, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5, |
|
"eval_f1": 0.0, |
|
"eval_loss": 0.3728242516517639, |
|
"eval_precision": 0.0, |
|
"eval_recall": 0.0, |
|
"eval_runtime": 30.0921, |
|
"eval_samples_per_second": 33.962, |
|
"eval_steps_per_second": 1.063, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.0294488668441772, |
|
"learning_rate": 2.331693958154785e-05, |
|
"loss": 0.3659, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5371819960861057, |
|
"eval_f1": 0.19145299145299144, |
|
"eval_loss": 0.35036981105804443, |
|
"eval_precision": 0.7567567567567568, |
|
"eval_recall": 0.1095890410958904, |
|
"eval_runtime": 30.0254, |
|
"eval_samples_per_second": 34.038, |
|
"eval_steps_per_second": 1.066, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.3886942863464355, |
|
"learning_rate": 1.99859482127553e-05, |
|
"loss": 0.3406, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5802348336594912, |
|
"eval_f1": 0.32863849765258213, |
|
"eval_loss": 0.32384729385375977, |
|
"eval_precision": 0.8203125, |
|
"eval_recall": 0.2054794520547945, |
|
"eval_runtime": 29.2219, |
|
"eval_samples_per_second": 34.974, |
|
"eval_steps_per_second": 1.095, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.869124174118042, |
|
"learning_rate": 1.665495684396275e-05, |
|
"loss": 0.3253, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6360078277886497, |
|
"eval_f1": 0.48901098901098894, |
|
"eval_loss": 0.31458520889282227, |
|
"eval_precision": 0.8202764976958525, |
|
"eval_recall": 0.34833659491193736, |
|
"eval_runtime": 29.7469, |
|
"eval_samples_per_second": 34.356, |
|
"eval_steps_per_second": 1.076, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.3957839012145996, |
|
"learning_rate": 1.3323965475170199e-05, |
|
"loss": 0.3227, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.6487279843444227, |
|
"eval_f1": 0.521970705725699, |
|
"eval_loss": 0.3122331500053406, |
|
"eval_precision": 0.8166666666666667, |
|
"eval_recall": 0.3835616438356164, |
|
"eval_runtime": 29.6103, |
|
"eval_samples_per_second": 34.515, |
|
"eval_steps_per_second": 1.081, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.8226208686828613, |
|
"learning_rate": 9.99297410637765e-06, |
|
"loss": 0.3203, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6604696673189824, |
|
"eval_f1": 0.5522580645161289, |
|
"eval_loss": 0.31177300214767456, |
|
"eval_precision": 0.8106060606060606, |
|
"eval_recall": 0.4187866927592955, |
|
"eval_runtime": 29.3492, |
|
"eval_samples_per_second": 34.822, |
|
"eval_steps_per_second": 1.09, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 1.4845212697982788, |
|
"learning_rate": 6.661982737585099e-06, |
|
"loss": 0.3188, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.6340508806262231, |
|
"eval_f1": 0.48484848484848486, |
|
"eval_loss": 0.3102571964263916, |
|
"eval_precision": 0.8186046511627907, |
|
"eval_recall": 0.34442270058708413, |
|
"eval_runtime": 29.5252, |
|
"eval_samples_per_second": 34.614, |
|
"eval_steps_per_second": 1.084, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 1.0866426229476929, |
|
"learning_rate": 3.3309913687925497e-06, |
|
"loss": 0.3164, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6545988258317026, |
|
"eval_f1": 0.5397653194263363, |
|
"eval_loss": 0.3101910948753357, |
|
"eval_precision": 0.80859375, |
|
"eval_recall": 0.4050880626223092, |
|
"eval_runtime": 30.0772, |
|
"eval_samples_per_second": 33.979, |
|
"eval_steps_per_second": 1.064, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.8858753442764282, |
|
"learning_rate": 0.0, |
|
"loss": 0.316, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.649706457925636, |
|
"eval_f1": 0.5289473684210527, |
|
"eval_loss": 0.30977755784988403, |
|
"eval_precision": 0.8072289156626506, |
|
"eval_recall": 0.3933463796477495, |
|
"eval_runtime": 30.1686, |
|
"eval_samples_per_second": 33.876, |
|
"eval_steps_per_second": 1.061, |
|
"step": 729 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 729, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9, |
|
"save_steps": 500, |
|
"total_flos": 2121256775520.0, |
|
"train_batch_size": 38, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.462182377373202, |
|
"learning_rate": 2.997892231913295e-05, |
|
"num_train_epochs": 9, |
|
"per_device_train_batch_size": 38, |
|
"temperature": 10 |
|
} |
|
} |
|
|