|
{ |
|
"best_metric": 0.8142587508579271, |
|
"best_model_checkpoint": "final_models/transformer_base_final_2/finetune/qqp/checkpoint-7600", |
|
"epoch": 4.7290640394088665, |
|
"global_step": 9600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.7154598832130432, |
|
"eval_f1": 0.5973793611535021, |
|
"eval_loss": 0.553121030330658, |
|
"eval_mcc": 0.4064984762837603, |
|
"eval_runtime": 36.7967, |
|
"eval_samples_per_second": 730.745, |
|
"eval_steps_per_second": 91.367, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.876847290640394e-05, |
|
"loss": 0.5958, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_accuracy": 0.7652571797370911, |
|
"eval_f1": 0.7188168210976479, |
|
"eval_loss": 0.4824499189853668, |
|
"eval_mcc": 0.5174020903173898, |
|
"eval_runtime": 37.2147, |
|
"eval_samples_per_second": 722.538, |
|
"eval_steps_per_second": 90.341, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.753694581280788e-05, |
|
"loss": 0.4983, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_accuracy": 0.7783480286598206, |
|
"eval_f1": 0.7443376801647221, |
|
"eval_loss": 0.4612788259983063, |
|
"eval_mcc": 0.5498175947268261, |
|
"eval_runtime": 36.7893, |
|
"eval_samples_per_second": 730.892, |
|
"eval_steps_per_second": 91.385, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.630541871921182e-05, |
|
"loss": 0.4669, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_accuracy": 0.7915132641792297, |
|
"eval_f1": 0.7595642477268828, |
|
"eval_loss": 0.445358544588089, |
|
"eval_mcc": 0.5766718430214125, |
|
"eval_runtime": 37.1991, |
|
"eval_samples_per_second": 722.841, |
|
"eval_steps_per_second": 90.379, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.507389162561577e-05, |
|
"loss": 0.4396, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.7877570986747742, |
|
"eval_f1": 0.7255854209741789, |
|
"eval_loss": 0.4518950581550598, |
|
"eval_mcc": 0.5602641007879372, |
|
"eval_runtime": 36.7715, |
|
"eval_samples_per_second": 731.245, |
|
"eval_steps_per_second": 91.429, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_accuracy": 0.808546245098114, |
|
"eval_f1": 0.7764072272411395, |
|
"eval_loss": 0.4165021777153015, |
|
"eval_mcc": 0.6094217214687762, |
|
"eval_runtime": 36.7866, |
|
"eval_samples_per_second": 730.946, |
|
"eval_steps_per_second": 91.392, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 4.384236453201971e-05, |
|
"loss": 0.3846, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_accuracy": 0.8110751509666443, |
|
"eval_f1": 0.7758164165931156, |
|
"eval_loss": 0.4379393756389618, |
|
"eval_mcc": 0.61257416035664, |
|
"eval_runtime": 36.9761, |
|
"eval_samples_per_second": 727.2, |
|
"eval_steps_per_second": 90.924, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 4.261083743842365e-05, |
|
"loss": 0.3731, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_accuracy": 0.8046041131019592, |
|
"eval_f1": 0.7478160698857637, |
|
"eval_loss": 0.4554973840713501, |
|
"eval_mcc": 0.5959099136582018, |
|
"eval_runtime": 37.1999, |
|
"eval_samples_per_second": 722.825, |
|
"eval_steps_per_second": 90.377, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 4.1379310344827587e-05, |
|
"loss": 0.3644, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_accuracy": 0.8124139904975891, |
|
"eval_f1": 0.763702801461632, |
|
"eval_loss": 0.41165271401405334, |
|
"eval_mcc": 0.611986827013611, |
|
"eval_runtime": 36.8489, |
|
"eval_samples_per_second": 729.709, |
|
"eval_steps_per_second": 91.237, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 4.014778325123153e-05, |
|
"loss": 0.3597, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_accuracy": 0.8246495127677917, |
|
"eval_f1": 0.7941317731301577, |
|
"eval_loss": 0.3947930634021759, |
|
"eval_mcc": 0.6416304162338577, |
|
"eval_runtime": 36.786, |
|
"eval_samples_per_second": 730.958, |
|
"eval_steps_per_second": 91.394, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"eval_accuracy": 0.8287403583526611, |
|
"eval_f1": 0.8015171759837938, |
|
"eval_loss": 0.43710950016975403, |
|
"eval_mcc": 0.651756180893439, |
|
"eval_runtime": 37.3039, |
|
"eval_samples_per_second": 720.808, |
|
"eval_steps_per_second": 90.125, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 3.891625615763547e-05, |
|
"loss": 0.2958, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_accuracy": 0.8277362585067749, |
|
"eval_f1": 0.7931773530987676, |
|
"eval_loss": 0.44132187962532043, |
|
"eval_mcc": 0.6456804038262367, |
|
"eval_runtime": 36.7484, |
|
"eval_samples_per_second": 731.705, |
|
"eval_steps_per_second": 91.487, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 3.768472906403941e-05, |
|
"loss": 0.2907, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_accuracy": 0.8306742310523987, |
|
"eval_f1": 0.799170746769, |
|
"eval_loss": 0.40259310603141785, |
|
"eval_mcc": 0.6528157073872258, |
|
"eval_runtime": 36.7616, |
|
"eval_samples_per_second": 731.443, |
|
"eval_steps_per_second": 91.454, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 3.645320197044335e-05, |
|
"loss": 0.2897, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"eval_accuracy": 0.8309345841407776, |
|
"eval_f1": 0.7963991400931566, |
|
"eval_loss": 0.41956043243408203, |
|
"eval_mcc": 0.652035643544486, |
|
"eval_runtime": 36.6997, |
|
"eval_samples_per_second": 732.675, |
|
"eval_steps_per_second": 91.608, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 3.522167487684729e-05, |
|
"loss": 0.2897, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_accuracy": 0.8341701030731201, |
|
"eval_f1": 0.8046440306681271, |
|
"eval_loss": 0.38309139013290405, |
|
"eval_mcc": 0.6607038531075586, |
|
"eval_runtime": 36.6961, |
|
"eval_samples_per_second": 732.748, |
|
"eval_steps_per_second": 91.617, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"eval_accuracy": 0.8317899703979492, |
|
"eval_f1": 0.795070454442481, |
|
"eval_loss": 0.4706867039203644, |
|
"eval_mcc": 0.6531092501008526, |
|
"eval_runtime": 37.0244, |
|
"eval_samples_per_second": 726.251, |
|
"eval_steps_per_second": 90.805, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 3.399014778325123e-05, |
|
"loss": 0.2317, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"eval_accuracy": 0.8349882960319519, |
|
"eval_f1": 0.8043046795748245, |
|
"eval_loss": 0.45305201411247253, |
|
"eval_mcc": 0.66167024556867, |
|
"eval_runtime": 36.9761, |
|
"eval_samples_per_second": 727.199, |
|
"eval_steps_per_second": 90.924, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 3.275862068965517e-05, |
|
"loss": 0.2273, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"eval_accuracy": 0.835248589515686, |
|
"eval_f1": 0.8068875326939843, |
|
"eval_loss": 0.4349125027656555, |
|
"eval_mcc": 0.6635037428287318, |
|
"eval_runtime": 37.2506, |
|
"eval_samples_per_second": 721.841, |
|
"eval_steps_per_second": 90.254, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 3.152709359605912e-05, |
|
"loss": 0.2257, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"eval_accuracy": 0.8389676213264465, |
|
"eval_f1": 0.8142587508579271, |
|
"eval_loss": 0.43228474259376526, |
|
"eval_mcc": 0.6733312473430861, |
|
"eval_runtime": 36.7955, |
|
"eval_samples_per_second": 730.769, |
|
"eval_steps_per_second": 91.37, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 3.0295566502463057e-05, |
|
"loss": 0.2282, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"eval_accuracy": 0.840380847454071, |
|
"eval_f1": 0.8076026537565, |
|
"eval_loss": 0.4352688193321228, |
|
"eval_mcc": 0.6714324068394563, |
|
"eval_runtime": 36.745, |
|
"eval_samples_per_second": 731.773, |
|
"eval_steps_per_second": 91.495, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"eval_accuracy": 0.8367362022399902, |
|
"eval_f1": 0.8015370705244124, |
|
"eval_loss": 0.5048763751983643, |
|
"eval_mcc": 0.663438414164681, |
|
"eval_runtime": 36.7329, |
|
"eval_samples_per_second": 732.013, |
|
"eval_steps_per_second": 91.525, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 2.9064039408866993e-05, |
|
"loss": 0.1807, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"eval_accuracy": 0.839153528213501, |
|
"eval_f1": 0.8050132996708894, |
|
"eval_loss": 0.4986499547958374, |
|
"eval_mcc": 0.668570004117267, |
|
"eval_runtime": 36.7487, |
|
"eval_samples_per_second": 731.7, |
|
"eval_steps_per_second": 91.486, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 2.7832512315270936e-05, |
|
"loss": 0.1756, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"eval_accuracy": 0.8389304280281067, |
|
"eval_f1": 0.8094672473714312, |
|
"eval_loss": 0.4615938365459442, |
|
"eval_mcc": 0.670007984448241, |
|
"eval_runtime": 36.7794, |
|
"eval_samples_per_second": 731.089, |
|
"eval_steps_per_second": 91.41, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 2.660098522167488e-05, |
|
"loss": 0.1758, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"eval_accuracy": 0.8421287536621094, |
|
"eval_f1": 0.8122761243532481, |
|
"eval_loss": 0.5110430717468262, |
|
"eval_mcc": 0.6760658284875277, |
|
"eval_runtime": 36.9386, |
|
"eval_samples_per_second": 727.937, |
|
"eval_steps_per_second": 91.016, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"step": 9600, |
|
"total_flos": 7.577744814102528e+16, |
|
"train_loss": 0.3191047571102778, |
|
"train_runtime": 4680.9438, |
|
"train_samples_per_second": 520.19, |
|
"train_steps_per_second": 4.337 |
|
} |
|
], |
|
"max_steps": 20300, |
|
"num_train_epochs": 10, |
|
"total_flos": 7.577744814102528e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|