|
{ |
|
"best_metric": 0.8268101761252447, |
|
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-1/checkpoint-1728", |
|
"epoch": 9.0, |
|
"eval_steps": 500, |
|
"global_step": 1728, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.8947777152061462, |
|
"learning_rate": 0.0005181908296523575, |
|
"loss": 0.4457, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7925636007827789, |
|
"eval_f1": 0.8090090090090091, |
|
"eval_loss": 0.41997030377388, |
|
"eval_mcc": 0.59400157079309, |
|
"eval_precision": 0.7495826377295493, |
|
"eval_recall": 0.8786692759295499, |
|
"eval_runtime": 66.1643, |
|
"eval_samples_per_second": 15.446, |
|
"eval_steps_per_second": 0.484, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.971238613128662, |
|
"learning_rate": 0.0004606140708020956, |
|
"loss": 0.4163, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7915851272015656, |
|
"eval_f1": 0.7957813998082454, |
|
"eval_loss": 0.39958736300468445, |
|
"eval_mcc": 0.5836633290639656, |
|
"eval_precision": 0.7800751879699248, |
|
"eval_recall": 0.812133072407045, |
|
"eval_runtime": 66.5614, |
|
"eval_samples_per_second": 15.354, |
|
"eval_steps_per_second": 0.481, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 2.333700656890869, |
|
"learning_rate": 0.00040303731195183364, |
|
"loss": 0.4018, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7915851272015656, |
|
"eval_f1": 0.7909715407262021, |
|
"eval_loss": 0.40145382285118103, |
|
"eval_mcc": 0.5831803046632951, |
|
"eval_precision": 0.7933070866141733, |
|
"eval_recall": 0.7886497064579256, |
|
"eval_runtime": 66.8237, |
|
"eval_samples_per_second": 15.294, |
|
"eval_steps_per_second": 0.479, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 1.9171998500823975, |
|
"learning_rate": 0.00034546055310157167, |
|
"loss": 0.3976, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.786692759295499, |
|
"eval_f1": 0.783300198807157, |
|
"eval_loss": 0.40033411979675293, |
|
"eval_mcc": 0.5736667956847438, |
|
"eval_precision": 0.795959595959596, |
|
"eval_recall": 0.7710371819960861, |
|
"eval_runtime": 66.6803, |
|
"eval_samples_per_second": 15.327, |
|
"eval_steps_per_second": 0.48, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.3183633089065552, |
|
"learning_rate": 0.00028788379425130975, |
|
"loss": 0.3869, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8013698630136986, |
|
"eval_f1": 0.8031037827352084, |
|
"eval_loss": 0.39175575971603394, |
|
"eval_mcc": 0.60283323302071, |
|
"eval_precision": 0.7961538461538461, |
|
"eval_recall": 0.8101761252446184, |
|
"eval_runtime": 66.5306, |
|
"eval_samples_per_second": 15.361, |
|
"eval_steps_per_second": 0.481, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.770856499671936, |
|
"learning_rate": 0.0002303070354010478, |
|
"loss": 0.3843, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8228962818003914, |
|
"eval_f1": 0.8350045578851412, |
|
"eval_loss": 0.38628602027893066, |
|
"eval_mcc": 0.6528627457969611, |
|
"eval_precision": 0.7815699658703071, |
|
"eval_recall": 0.8962818003913894, |
|
"eval_runtime": 67.0562, |
|
"eval_samples_per_second": 15.241, |
|
"eval_steps_per_second": 0.477, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 1.1659635305404663, |
|
"learning_rate": 0.00017273027655078583, |
|
"loss": 0.3793, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8111545988258317, |
|
"eval_f1": 0.8269058295964126, |
|
"eval_loss": 0.39157727360725403, |
|
"eval_mcc": 0.6328787442530197, |
|
"eval_precision": 0.7632450331125827, |
|
"eval_recall": 0.9021526418786693, |
|
"eval_runtime": 66.5334, |
|
"eval_samples_per_second": 15.361, |
|
"eval_steps_per_second": 0.481, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 2.5721819400787354, |
|
"learning_rate": 0.0001151535177005239, |
|
"loss": 0.3727, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.821917808219178, |
|
"eval_f1": 0.8311688311688312, |
|
"eval_loss": 0.3822523355484009, |
|
"eval_mcc": 0.6477369472991997, |
|
"eval_precision": 0.7901234567901234, |
|
"eval_recall": 0.8767123287671232, |
|
"eval_runtime": 67.4301, |
|
"eval_samples_per_second": 15.156, |
|
"eval_steps_per_second": 0.475, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 1.5437999963760376, |
|
"learning_rate": 5.757675885026195e-05, |
|
"loss": 0.3692, |
|
"step": 1728 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8268101761252447, |
|
"eval_f1": 0.8334901222953904, |
|
"eval_loss": 0.37901678681373596, |
|
"eval_mcc": 0.655734447578894, |
|
"eval_precision": 0.802536231884058, |
|
"eval_recall": 0.8669275929549902, |
|
"eval_runtime": 67.6441, |
|
"eval_samples_per_second": 15.108, |
|
"eval_steps_per_second": 0.473, |
|
"step": 1728 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 1920, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 2121256775520.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.6664545915670365, |
|
"learning_rate": 0.0005757675885026195, |
|
"num_train_epochs": 10, |
|
"per_device_train_batch_size": 16, |
|
"temperature": 37 |
|
} |
|
} |
|
|