Omar
update results
904c81d
{
"best_metric": 0.8142587508579271,
"best_model_checkpoint": "final_models/transformer_base_final_2/finetune/qqp/checkpoint-7600",
"epoch": 4.7290640394088665,
"global_step": 9600,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.2,
"eval_accuracy": 0.7154598832130432,
"eval_f1": 0.5973793611535021,
"eval_loss": 0.553121030330658,
"eval_mcc": 0.4064984762837603,
"eval_runtime": 36.7967,
"eval_samples_per_second": 730.745,
"eval_steps_per_second": 91.367,
"step": 400
},
{
"epoch": 0.25,
"learning_rate": 4.876847290640394e-05,
"loss": 0.5958,
"step": 500
},
{
"epoch": 0.39,
"eval_accuracy": 0.7652571797370911,
"eval_f1": 0.7188168210976479,
"eval_loss": 0.4824499189853668,
"eval_mcc": 0.5174020903173898,
"eval_runtime": 37.2147,
"eval_samples_per_second": 722.538,
"eval_steps_per_second": 90.341,
"step": 800
},
{
"epoch": 0.49,
"learning_rate": 4.753694581280788e-05,
"loss": 0.4983,
"step": 1000
},
{
"epoch": 0.59,
"eval_accuracy": 0.7783480286598206,
"eval_f1": 0.7443376801647221,
"eval_loss": 0.4612788259983063,
"eval_mcc": 0.5498175947268261,
"eval_runtime": 36.7893,
"eval_samples_per_second": 730.892,
"eval_steps_per_second": 91.385,
"step": 1200
},
{
"epoch": 0.74,
"learning_rate": 4.630541871921182e-05,
"loss": 0.4669,
"step": 1500
},
{
"epoch": 0.79,
"eval_accuracy": 0.7915132641792297,
"eval_f1": 0.7595642477268828,
"eval_loss": 0.445358544588089,
"eval_mcc": 0.5766718430214125,
"eval_runtime": 37.1991,
"eval_samples_per_second": 722.841,
"eval_steps_per_second": 90.379,
"step": 1600
},
{
"epoch": 0.99,
"learning_rate": 4.507389162561577e-05,
"loss": 0.4396,
"step": 2000
},
{
"epoch": 0.99,
"eval_accuracy": 0.7877570986747742,
"eval_f1": 0.7255854209741789,
"eval_loss": 0.4518950581550598,
"eval_mcc": 0.5602641007879372,
"eval_runtime": 36.7715,
"eval_samples_per_second": 731.245,
"eval_steps_per_second": 91.429,
"step": 2000
},
{
"epoch": 1.18,
"eval_accuracy": 0.808546245098114,
"eval_f1": 0.7764072272411395,
"eval_loss": 0.4165021777153015,
"eval_mcc": 0.6094217214687762,
"eval_runtime": 36.7866,
"eval_samples_per_second": 730.946,
"eval_steps_per_second": 91.392,
"step": 2400
},
{
"epoch": 1.23,
"learning_rate": 4.384236453201971e-05,
"loss": 0.3846,
"step": 2500
},
{
"epoch": 1.38,
"eval_accuracy": 0.8110751509666443,
"eval_f1": 0.7758164165931156,
"eval_loss": 0.4379393756389618,
"eval_mcc": 0.61257416035664,
"eval_runtime": 36.9761,
"eval_samples_per_second": 727.2,
"eval_steps_per_second": 90.924,
"step": 2800
},
{
"epoch": 1.48,
"learning_rate": 4.261083743842365e-05,
"loss": 0.3731,
"step": 3000
},
{
"epoch": 1.58,
"eval_accuracy": 0.8046041131019592,
"eval_f1": 0.7478160698857637,
"eval_loss": 0.4554973840713501,
"eval_mcc": 0.5959099136582018,
"eval_runtime": 37.1999,
"eval_samples_per_second": 722.825,
"eval_steps_per_second": 90.377,
"step": 3200
},
{
"epoch": 1.72,
"learning_rate": 4.1379310344827587e-05,
"loss": 0.3644,
"step": 3500
},
{
"epoch": 1.77,
"eval_accuracy": 0.8124139904975891,
"eval_f1": 0.763702801461632,
"eval_loss": 0.41165271401405334,
"eval_mcc": 0.611986827013611,
"eval_runtime": 36.8489,
"eval_samples_per_second": 729.709,
"eval_steps_per_second": 91.237,
"step": 3600
},
{
"epoch": 1.97,
"learning_rate": 4.014778325123153e-05,
"loss": 0.3597,
"step": 4000
},
{
"epoch": 1.97,
"eval_accuracy": 0.8246495127677917,
"eval_f1": 0.7941317731301577,
"eval_loss": 0.3947930634021759,
"eval_mcc": 0.6416304162338577,
"eval_runtime": 36.786,
"eval_samples_per_second": 730.958,
"eval_steps_per_second": 91.394,
"step": 4000
},
{
"epoch": 2.17,
"eval_accuracy": 0.8287403583526611,
"eval_f1": 0.8015171759837938,
"eval_loss": 0.43710950016975403,
"eval_mcc": 0.651756180893439,
"eval_runtime": 37.3039,
"eval_samples_per_second": 720.808,
"eval_steps_per_second": 90.125,
"step": 4400
},
{
"epoch": 2.22,
"learning_rate": 3.891625615763547e-05,
"loss": 0.2958,
"step": 4500
},
{
"epoch": 2.36,
"eval_accuracy": 0.8277362585067749,
"eval_f1": 0.7931773530987676,
"eval_loss": 0.44132187962532043,
"eval_mcc": 0.6456804038262367,
"eval_runtime": 36.7484,
"eval_samples_per_second": 731.705,
"eval_steps_per_second": 91.487,
"step": 4800
},
{
"epoch": 2.46,
"learning_rate": 3.768472906403941e-05,
"loss": 0.2907,
"step": 5000
},
{
"epoch": 2.56,
"eval_accuracy": 0.8306742310523987,
"eval_f1": 0.799170746769,
"eval_loss": 0.40259310603141785,
"eval_mcc": 0.6528157073872258,
"eval_runtime": 36.7616,
"eval_samples_per_second": 731.443,
"eval_steps_per_second": 91.454,
"step": 5200
},
{
"epoch": 2.71,
"learning_rate": 3.645320197044335e-05,
"loss": 0.2897,
"step": 5500
},
{
"epoch": 2.76,
"eval_accuracy": 0.8309345841407776,
"eval_f1": 0.7963991400931566,
"eval_loss": 0.41956043243408203,
"eval_mcc": 0.652035643544486,
"eval_runtime": 36.6997,
"eval_samples_per_second": 732.675,
"eval_steps_per_second": 91.608,
"step": 5600
},
{
"epoch": 2.96,
"learning_rate": 3.522167487684729e-05,
"loss": 0.2897,
"step": 6000
},
{
"epoch": 2.96,
"eval_accuracy": 0.8341701030731201,
"eval_f1": 0.8046440306681271,
"eval_loss": 0.38309139013290405,
"eval_mcc": 0.6607038531075586,
"eval_runtime": 36.6961,
"eval_samples_per_second": 732.748,
"eval_steps_per_second": 91.617,
"step": 6000
},
{
"epoch": 3.15,
"eval_accuracy": 0.8317899703979492,
"eval_f1": 0.795070454442481,
"eval_loss": 0.4706867039203644,
"eval_mcc": 0.6531092501008526,
"eval_runtime": 37.0244,
"eval_samples_per_second": 726.251,
"eval_steps_per_second": 90.805,
"step": 6400
},
{
"epoch": 3.2,
"learning_rate": 3.399014778325123e-05,
"loss": 0.2317,
"step": 6500
},
{
"epoch": 3.35,
"eval_accuracy": 0.8349882960319519,
"eval_f1": 0.8043046795748245,
"eval_loss": 0.45305201411247253,
"eval_mcc": 0.66167024556867,
"eval_runtime": 36.9761,
"eval_samples_per_second": 727.199,
"eval_steps_per_second": 90.924,
"step": 6800
},
{
"epoch": 3.45,
"learning_rate": 3.275862068965517e-05,
"loss": 0.2273,
"step": 7000
},
{
"epoch": 3.55,
"eval_accuracy": 0.835248589515686,
"eval_f1": 0.8068875326939843,
"eval_loss": 0.4349125027656555,
"eval_mcc": 0.6635037428287318,
"eval_runtime": 37.2506,
"eval_samples_per_second": 721.841,
"eval_steps_per_second": 90.254,
"step": 7200
},
{
"epoch": 3.69,
"learning_rate": 3.152709359605912e-05,
"loss": 0.2257,
"step": 7500
},
{
"epoch": 3.74,
"eval_accuracy": 0.8389676213264465,
"eval_f1": 0.8142587508579271,
"eval_loss": 0.43228474259376526,
"eval_mcc": 0.6733312473430861,
"eval_runtime": 36.7955,
"eval_samples_per_second": 730.769,
"eval_steps_per_second": 91.37,
"step": 7600
},
{
"epoch": 3.94,
"learning_rate": 3.0295566502463057e-05,
"loss": 0.2282,
"step": 8000
},
{
"epoch": 3.94,
"eval_accuracy": 0.840380847454071,
"eval_f1": 0.8076026537565,
"eval_loss": 0.4352688193321228,
"eval_mcc": 0.6714324068394563,
"eval_runtime": 36.745,
"eval_samples_per_second": 731.773,
"eval_steps_per_second": 91.495,
"step": 8000
},
{
"epoch": 4.14,
"eval_accuracy": 0.8367362022399902,
"eval_f1": 0.8015370705244124,
"eval_loss": 0.5048763751983643,
"eval_mcc": 0.663438414164681,
"eval_runtime": 36.7329,
"eval_samples_per_second": 732.013,
"eval_steps_per_second": 91.525,
"step": 8400
},
{
"epoch": 4.19,
"learning_rate": 2.9064039408866993e-05,
"loss": 0.1807,
"step": 8500
},
{
"epoch": 4.33,
"eval_accuracy": 0.839153528213501,
"eval_f1": 0.8050132996708894,
"eval_loss": 0.4986499547958374,
"eval_mcc": 0.668570004117267,
"eval_runtime": 36.7487,
"eval_samples_per_second": 731.7,
"eval_steps_per_second": 91.486,
"step": 8800
},
{
"epoch": 4.43,
"learning_rate": 2.7832512315270936e-05,
"loss": 0.1756,
"step": 9000
},
{
"epoch": 4.53,
"eval_accuracy": 0.8389304280281067,
"eval_f1": 0.8094672473714312,
"eval_loss": 0.4615938365459442,
"eval_mcc": 0.670007984448241,
"eval_runtime": 36.7794,
"eval_samples_per_second": 731.089,
"eval_steps_per_second": 91.41,
"step": 9200
},
{
"epoch": 4.68,
"learning_rate": 2.660098522167488e-05,
"loss": 0.1758,
"step": 9500
},
{
"epoch": 4.73,
"eval_accuracy": 0.8421287536621094,
"eval_f1": 0.8122761243532481,
"eval_loss": 0.5110430717468262,
"eval_mcc": 0.6760658284875277,
"eval_runtime": 36.9386,
"eval_samples_per_second": 727.937,
"eval_steps_per_second": 91.016,
"step": 9600
},
{
"epoch": 4.73,
"step": 9600,
"total_flos": 7.577744814102528e+16,
"train_loss": 0.3191047571102778,
"train_runtime": 4680.9438,
"train_samples_per_second": 520.19,
"train_steps_per_second": 4.337
}
],
"max_steps": 20300,
"num_train_epochs": 10,
"total_flos": 7.577744814102528e+16,
"trial_name": null,
"trial_params": null
}