|
{ |
|
"best_metric": 0.7607843137254902, |
|
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-18/checkpoint-1120", |
|
"epoch": 8.0, |
|
"eval_steps": 500, |
|
"global_step": 1280, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 2.1976242065429688, |
|
"learning_rate": 0.0004979101297109036, |
|
"loss": 0.5537, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_f1": 0.0, |
|
"eval_loss": 0.5193991661071777, |
|
"eval_mcc": 0.0, |
|
"eval_precision": 0.0, |
|
"eval_recall": 0.0, |
|
"eval_runtime": 1.8654, |
|
"eval_samples_per_second": 683.512, |
|
"eval_steps_per_second": 21.444, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.7457181215286255, |
|
"learning_rate": 0.00043567136349704073, |
|
"loss": 0.517, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7019607843137254, |
|
"eval_f1": 0.27480916030534347, |
|
"eval_loss": 0.4912988245487213, |
|
"eval_mcc": 0.24246530650972198, |
|
"eval_precision": 0.7272727272727273, |
|
"eval_recall": 0.16941176470588235, |
|
"eval_runtime": 2.0814, |
|
"eval_samples_per_second": 612.558, |
|
"eval_steps_per_second": 19.218, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 2.177762031555176, |
|
"learning_rate": 0.00037343259728317776, |
|
"loss": 0.4984, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7294117647058823, |
|
"eval_f1": 0.5161290322580645, |
|
"eval_loss": 0.4763549864292145, |
|
"eval_mcc": 0.3501338577435767, |
|
"eval_precision": 0.6388888888888888, |
|
"eval_recall": 0.4329411764705882, |
|
"eval_runtime": 1.8808, |
|
"eval_samples_per_second": 677.886, |
|
"eval_steps_per_second": 21.267, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 1.1199020147323608, |
|
"learning_rate": 0.00031119383106931485, |
|
"loss": 0.4888, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7278431372549019, |
|
"eval_f1": 0.4376012965964343, |
|
"eval_loss": 0.47154995799064636, |
|
"eval_mcc": 0.3302932991068742, |
|
"eval_precision": 0.703125, |
|
"eval_recall": 0.3176470588235294, |
|
"eval_runtime": 1.8859, |
|
"eval_samples_per_second": 676.058, |
|
"eval_steps_per_second": 21.21, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.9632917642593384, |
|
"learning_rate": 0.0002489550648554518, |
|
"loss": 0.4797, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7388235294117647, |
|
"eval_f1": 0.5963636363636363, |
|
"eval_loss": 0.4733050763607025, |
|
"eval_mcc": 0.4039872699550251, |
|
"eval_precision": 0.615, |
|
"eval_recall": 0.5788235294117647, |
|
"eval_runtime": 1.8712, |
|
"eval_samples_per_second": 681.38, |
|
"eval_steps_per_second": 21.377, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.4523799419403076, |
|
"learning_rate": 0.00018671629864158888, |
|
"loss": 0.472, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7403921568627451, |
|
"eval_f1": 0.46353322528363045, |
|
"eval_loss": 0.4695047438144684, |
|
"eval_mcc": 0.36750944548511355, |
|
"eval_precision": 0.7447916666666666, |
|
"eval_recall": 0.33647058823529413, |
|
"eval_runtime": 1.8728, |
|
"eval_samples_per_second": 680.799, |
|
"eval_steps_per_second": 21.358, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 3.7260565757751465, |
|
"learning_rate": 0.0001244775324277259, |
|
"loss": 0.4686, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7607843137254902, |
|
"eval_f1": 0.5861601085481682, |
|
"eval_loss": 0.47005850076675415, |
|
"eval_mcc": 0.43344484177930304, |
|
"eval_precision": 0.6923076923076923, |
|
"eval_recall": 0.508235294117647, |
|
"eval_runtime": 1.8699, |
|
"eval_samples_per_second": 681.865, |
|
"eval_steps_per_second": 21.392, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 1.6883962154388428, |
|
"learning_rate": 6.223876621386296e-05, |
|
"loss": 0.4597, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7529411764705882, |
|
"eval_f1": 0.5903771131339401, |
|
"eval_loss": 0.47118470072746277, |
|
"eval_mcc": 0.42107634181861553, |
|
"eval_precision": 0.6598837209302325, |
|
"eval_recall": 0.5341176470588235, |
|
"eval_runtime": 1.8712, |
|
"eval_samples_per_second": 681.374, |
|
"eval_steps_per_second": 21.376, |
|
"step": 1280 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 1440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9, |
|
"save_steps": 500, |
|
"total_flos": 2334043503360.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.8696755067403684, |
|
"learning_rate": 0.0005601488959247667, |
|
"num_train_epochs": 9, |
|
"temperature": 29 |
|
} |
|
} |
|
|