gsInfoNCE-roberta-base / trainer_state.json
root
init
0a185c9
{
"best_metric": 0.8433638471389473,
"best_model_checkpoint": "result/my-unsup-simcse-roberta-base-d0.1-rs192-std0.1-t0.05",
"epoch": 1.0,
"global_step": 15626,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"eval_avg_sts": 0.7572535655220898,
"eval_sickr_spearman": 0.7054352844388353,
"eval_stsb_spearman": 0.8090718466053444,
"step": 125
},
{
"epoch": 0.02,
"eval_avg_sts": 0.7614025514877952,
"eval_sickr_spearman": 0.7051708251943171,
"eval_stsb_spearman": 0.8176342777812734,
"step": 250
},
{
"epoch": 0.02,
"eval_avg_sts": 0.7636217659471919,
"eval_sickr_spearman": 0.7079373205749496,
"eval_stsb_spearman": 0.8193062113194342,
"step": 375
},
{
"epoch": 0.03,
"learning_rate": 9.680020478689364e-06,
"loss": 0.2125,
"step": 500
},
{
"epoch": 0.03,
"eval_avg_sts": 0.7657425201047614,
"eval_sickr_spearman": 0.7097071706007412,
"eval_stsb_spearman": 0.8217778696087817,
"step": 500
},
{
"epoch": 0.04,
"eval_avg_sts": 0.7674773834562932,
"eval_sickr_spearman": 0.7100151940542637,
"eval_stsb_spearman": 0.8249395728583228,
"step": 625
},
{
"epoch": 0.05,
"eval_avg_sts": 0.7670247636071179,
"eval_sickr_spearman": 0.7053980603352208,
"eval_stsb_spearman": 0.828651466879015,
"step": 750
},
{
"epoch": 0.06,
"eval_avg_sts": 0.7642544693075077,
"eval_sickr_spearman": 0.7042453619318076,
"eval_stsb_spearman": 0.8242635766832077,
"step": 875
},
{
"epoch": 0.06,
"learning_rate": 9.36004095737873e-06,
"loss": 0.0005,
"step": 1000
},
{
"epoch": 0.06,
"eval_avg_sts": 0.7706729186771359,
"eval_sickr_spearman": 0.7109765365495476,
"eval_stsb_spearman": 0.8303693008047242,
"step": 1000
},
{
"epoch": 0.07,
"eval_avg_sts": 0.7723601548256038,
"eval_sickr_spearman": 0.7154485202800445,
"eval_stsb_spearman": 0.8292717893711632,
"step": 1125
},
{
"epoch": 0.08,
"eval_avg_sts": 0.7715988622174201,
"eval_sickr_spearman": 0.7117784157880568,
"eval_stsb_spearman": 0.8314193086467835,
"step": 1250
},
{
"epoch": 0.09,
"eval_avg_sts": 0.7699957963614845,
"eval_sickr_spearman": 0.7098231176796127,
"eval_stsb_spearman": 0.8301684750433563,
"step": 1375
},
{
"epoch": 0.1,
"learning_rate": 9.040061436068092e-06,
"loss": 0.0003,
"step": 1500
},
{
"epoch": 0.1,
"eval_avg_sts": 0.7732021256956165,
"eval_sickr_spearman": 0.7176254338905241,
"eval_stsb_spearman": 0.8287788175007089,
"step": 1500
},
{
"epoch": 0.1,
"eval_avg_sts": 0.7706739860482326,
"eval_sickr_spearman": 0.7126164144148476,
"eval_stsb_spearman": 0.8287315576816174,
"step": 1625
},
{
"epoch": 0.11,
"eval_avg_sts": 0.7719707314787304,
"eval_sickr_spearman": 0.7146769966976441,
"eval_stsb_spearman": 0.8292644662598168,
"step": 1750
},
{
"epoch": 0.12,
"eval_avg_sts": 0.7692985063340654,
"eval_sickr_spearman": 0.7109207724407779,
"eval_stsb_spearman": 0.827676240227353,
"step": 1875
},
{
"epoch": 0.13,
"learning_rate": 8.720081914757458e-06,
"loss": 0.0002,
"step": 2000
},
{
"epoch": 0.13,
"eval_avg_sts": 0.771889783054714,
"eval_sickr_spearman": 0.7135132511409,
"eval_stsb_spearman": 0.8302663149685279,
"step": 2000
},
{
"epoch": 0.14,
"eval_avg_sts": 0.7615509683620076,
"eval_sickr_spearman": 0.6917990552038427,
"eval_stsb_spearman": 0.8313028815201725,
"step": 2125
},
{
"epoch": 0.14,
"eval_avg_sts": 0.7642814292065848,
"eval_sickr_spearman": 0.6928178503087533,
"eval_stsb_spearman": 0.8357450081044163,
"step": 2250
},
{
"epoch": 0.15,
"eval_avg_sts": 0.7664718523224301,
"eval_sickr_spearman": 0.6978614041463637,
"eval_stsb_spearman": 0.8350823004984966,
"step": 2375
},
{
"epoch": 0.16,
"learning_rate": 8.400102393446819e-06,
"loss": 0.0002,
"step": 2500
},
{
"epoch": 0.16,
"eval_avg_sts": 0.7669677380011899,
"eval_sickr_spearman": 0.6978531908280179,
"eval_stsb_spearman": 0.8360822851743619,
"step": 2500
},
{
"epoch": 0.17,
"eval_avg_sts": 0.767509277729593,
"eval_sickr_spearman": 0.697946947538025,
"eval_stsb_spearman": 0.837071607921161,
"step": 2625
},
{
"epoch": 0.18,
"eval_avg_sts": 0.7668651085967232,
"eval_sickr_spearman": 0.6985169326187911,
"eval_stsb_spearman": 0.8352132845746553,
"step": 2750
},
{
"epoch": 0.18,
"eval_avg_sts": 0.7668897285698411,
"eval_sickr_spearman": 0.6986797580526662,
"eval_stsb_spearman": 0.835099699087016,
"step": 2875
},
{
"epoch": 0.19,
"learning_rate": 8.080122872136184e-06,
"loss": 0.0002,
"step": 3000
},
{
"epoch": 0.19,
"eval_avg_sts": 0.7678085026409753,
"eval_sickr_spearman": 0.6984642425005135,
"eval_stsb_spearman": 0.8371527627814372,
"step": 3000
},
{
"epoch": 0.2,
"eval_avg_sts": 0.7692156947271456,
"eval_sickr_spearman": 0.7026153304423024,
"eval_stsb_spearman": 0.8358160590119886,
"step": 3125
},
{
"epoch": 0.21,
"eval_avg_sts": 0.768843154466003,
"eval_sickr_spearman": 0.705912809649333,
"eval_stsb_spearman": 0.8317734992826732,
"step": 3250
},
{
"epoch": 0.22,
"eval_avg_sts": 0.7672560021396605,
"eval_sickr_spearman": 0.7075799211491486,
"eval_stsb_spearman": 0.8269320831301724,
"step": 3375
},
{
"epoch": 0.22,
"learning_rate": 7.760143350825547e-06,
"loss": 0.0002,
"step": 3500
},
{
"epoch": 0.22,
"eval_avg_sts": 0.7665000851835462,
"eval_sickr_spearman": 0.7056194356817491,
"eval_stsb_spearman": 0.8273807346853432,
"step": 3500
},
{
"epoch": 0.23,
"eval_avg_sts": 0.7675835380050031,
"eval_sickr_spearman": 0.706044462898375,
"eval_stsb_spearman": 0.8291226131116314,
"step": 3625
},
{
"epoch": 0.24,
"eval_avg_sts": 0.766064422853222,
"eval_sickr_spearman": 0.7050694795702827,
"eval_stsb_spearman": 0.8270593661361613,
"step": 3750
},
{
"epoch": 0.25,
"eval_avg_sts": 0.7680428355810465,
"eval_sickr_spearman": 0.7066562350373923,
"eval_stsb_spearman": 0.8294294361247007,
"step": 3875
},
{
"epoch": 0.26,
"learning_rate": 7.440163829514912e-06,
"loss": 0.0003,
"step": 4000
},
{
"epoch": 0.26,
"eval_avg_sts": 0.7706781402051571,
"eval_sickr_spearman": 0.7060189583835114,
"eval_stsb_spearman": 0.8353373220268029,
"step": 4000
},
{
"epoch": 0.26,
"eval_avg_sts": 0.7704250512723954,
"eval_sickr_spearman": 0.7065240054151332,
"eval_stsb_spearman": 0.8343260971296576,
"step": 4125
},
{
"epoch": 0.27,
"eval_avg_sts": 0.7676150118719631,
"eval_sickr_spearman": 0.7018462564460752,
"eval_stsb_spearman": 0.833383767297851,
"step": 4250
},
{
"epoch": 0.28,
"eval_avg_sts": 0.7672794331395227,
"eval_sickr_spearman": 0.7023547617170005,
"eval_stsb_spearman": 0.832204104562045,
"step": 4375
},
{
"epoch": 0.29,
"learning_rate": 7.120184308204276e-06,
"loss": 0.0003,
"step": 4500
},
{
"epoch": 0.29,
"eval_avg_sts": 0.7603546899710306,
"eval_sickr_spearman": 0.6936834187877694,
"eval_stsb_spearman": 0.8270259611542918,
"step": 4500
},
{
"epoch": 0.3,
"eval_avg_sts": 0.7604700472645542,
"eval_sickr_spearman": 0.6935327932536596,
"eval_stsb_spearman": 0.8274073012754489,
"step": 4625
},
{
"epoch": 0.3,
"eval_avg_sts": 0.761349850549489,
"eval_sickr_spearman": 0.6934180469523239,
"eval_stsb_spearman": 0.8292816541466541,
"step": 4750
},
{
"epoch": 0.31,
"eval_avg_sts": 0.7624410643103536,
"eval_sickr_spearman": 0.6947914482668454,
"eval_stsb_spearman": 0.830090680353862,
"step": 4875
},
{
"epoch": 0.32,
"learning_rate": 6.800204786893639e-06,
"loss": 0.0001,
"step": 5000
},
{
"epoch": 0.32,
"eval_avg_sts": 0.7625184502556002,
"eval_sickr_spearman": 0.6949592209041686,
"eval_stsb_spearman": 0.8300776796070318,
"step": 5000
},
{
"epoch": 0.33,
"eval_avg_sts": 0.7620233259863773,
"eval_sickr_spearman": 0.6943664690813208,
"eval_stsb_spearman": 0.829680182891434,
"step": 5125
},
{
"epoch": 0.34,
"eval_avg_sts": 0.7688935367788103,
"eval_sickr_spearman": 0.7047914275240574,
"eval_stsb_spearman": 0.8329956460335631,
"step": 5250
},
{
"epoch": 0.34,
"eval_avg_sts": 0.7721471528975761,
"eval_sickr_spearman": 0.7084637894778126,
"eval_stsb_spearman": 0.8358305163173394,
"step": 5375
},
{
"epoch": 0.35,
"learning_rate": 6.480225265583003e-06,
"loss": 0.0002,
"step": 5500
},
{
"epoch": 0.35,
"eval_avg_sts": 0.7736568332643656,
"eval_sickr_spearman": 0.7087314187750259,
"eval_stsb_spearman": 0.8385822477537053,
"step": 5500
},
{
"epoch": 0.36,
"eval_avg_sts": 0.7734070993186029,
"eval_sickr_spearman": 0.7073194965171511,
"eval_stsb_spearman": 0.8394947021200546,
"step": 5625
},
{
"epoch": 0.37,
"eval_avg_sts": 0.7727328180179123,
"eval_sickr_spearman": 0.7090136014959747,
"eval_stsb_spearman": 0.83645203453985,
"step": 5750
},
{
"epoch": 0.38,
"eval_avg_sts": 0.7702894469668818,
"eval_sickr_spearman": 0.7065904804595234,
"eval_stsb_spearman": 0.8339884134742402,
"step": 5875
},
{
"epoch": 0.38,
"learning_rate": 6.1602457442723675e-06,
"loss": 0.0001,
"step": 6000
},
{
"epoch": 0.38,
"eval_avg_sts": 0.7673639531247151,
"eval_sickr_spearman": 0.7063840427855425,
"eval_stsb_spearman": 0.8283438634638877,
"step": 6000
},
{
"epoch": 0.39,
"eval_avg_sts": 0.7656242558893468,
"eval_sickr_spearman": 0.7046311957696599,
"eval_stsb_spearman": 0.8266173160090337,
"step": 6125
},
{
"epoch": 0.4,
"eval_avg_sts": 0.7678162328535889,
"eval_sickr_spearman": 0.7065671853753259,
"eval_stsb_spearman": 0.8290652803318519,
"step": 6250
},
{
"epoch": 0.41,
"eval_avg_sts": 0.7659380803169125,
"eval_sickr_spearman": 0.7034162970898843,
"eval_stsb_spearman": 0.8284598635439405,
"step": 6375
},
{
"epoch": 0.42,
"learning_rate": 5.840266222961732e-06,
"loss": 0.0002,
"step": 6500
},
{
"epoch": 0.42,
"eval_avg_sts": 0.7674929110456945,
"eval_sickr_spearman": 0.7043084267679958,
"eval_stsb_spearman": 0.8306773953233932,
"step": 6500
},
{
"epoch": 0.42,
"eval_avg_sts": 0.7674747228197092,
"eval_sickr_spearman": 0.7030638928986329,
"eval_stsb_spearman": 0.8318855527407855,
"step": 6625
},
{
"epoch": 0.43,
"eval_avg_sts": 0.7669269125478364,
"eval_sickr_spearman": 0.7022142707452941,
"eval_stsb_spearman": 0.8316395543503788,
"step": 6750
},
{
"epoch": 0.44,
"eval_avg_sts": 0.7663176164301795,
"eval_sickr_spearman": 0.7013282410170653,
"eval_stsb_spearman": 0.8313069918432938,
"step": 6875
},
{
"epoch": 0.45,
"learning_rate": 5.520286701651095e-06,
"loss": 0.0001,
"step": 7000
},
{
"epoch": 0.45,
"eval_avg_sts": 0.7679613274313469,
"eval_sickr_spearman": 0.705412661790058,
"eval_stsb_spearman": 0.8305099930726358,
"step": 7000
},
{
"epoch": 0.46,
"eval_avg_sts": 0.7677041476146869,
"eval_sickr_spearman": 0.7044645758787711,
"eval_stsb_spearman": 0.8309437193506026,
"step": 7125
},
{
"epoch": 0.46,
"eval_avg_sts": 0.771967747158485,
"eval_sickr_spearman": 0.7112394107677183,
"eval_stsb_spearman": 0.8326960835492517,
"step": 7250
},
{
"epoch": 0.47,
"eval_avg_sts": 0.7727980509536904,
"eval_sickr_spearman": 0.7122028666314655,
"eval_stsb_spearman": 0.8333932352759152,
"step": 7375
},
{
"epoch": 0.48,
"learning_rate": 5.200307180340458e-06,
"loss": 0.0002,
"step": 7500
},
{
"epoch": 0.48,
"eval_avg_sts": 0.7716808664260861,
"eval_sickr_spearman": 0.7073300153283661,
"eval_stsb_spearman": 0.8360317175238062,
"step": 7500
},
{
"epoch": 0.49,
"eval_avg_sts": 0.7749475591557311,
"eval_sickr_spearman": 0.7102010744168291,
"eval_stsb_spearman": 0.8396940438946332,
"step": 7625
},
{
"epoch": 0.5,
"eval_avg_sts": 0.7741400965390657,
"eval_sickr_spearman": 0.7103573195898074,
"eval_stsb_spearman": 0.8379228734883241,
"step": 7750
},
{
"epoch": 0.5,
"eval_avg_sts": 0.7742400629462907,
"eval_sickr_spearman": 0.7103860902195688,
"eval_stsb_spearman": 0.8380940356730125,
"step": 7875
},
{
"epoch": 0.51,
"learning_rate": 4.8803276590298225e-06,
"loss": 0.0001,
"step": 8000
},
{
"epoch": 0.51,
"eval_avg_sts": 0.7737984133316254,
"eval_sickr_spearman": 0.7100542913708344,
"eval_stsb_spearman": 0.8375425352924163,
"step": 8000
},
{
"epoch": 0.52,
"eval_avg_sts": 0.7736256294772575,
"eval_sickr_spearman": 0.7117209225596355,
"eval_stsb_spearman": 0.8355303363948796,
"step": 8125
},
{
"epoch": 0.53,
"eval_avg_sts": 0.7739892471083305,
"eval_sickr_spearman": 0.7121400419507844,
"eval_stsb_spearman": 0.8358384522658765,
"step": 8250
},
{
"epoch": 0.54,
"eval_avg_sts": 0.771587372188157,
"eval_sickr_spearman": 0.7086112929903291,
"eval_stsb_spearman": 0.834563451385985,
"step": 8375
},
{
"epoch": 0.54,
"learning_rate": 4.560348137719187e-06,
"loss": 0.0001,
"step": 8500
},
{
"epoch": 0.54,
"eval_avg_sts": 0.7717136828796769,
"eval_sickr_spearman": 0.7081623943162886,
"eval_stsb_spearman": 0.8352649714430652,
"step": 8500
},
{
"epoch": 0.55,
"eval_avg_sts": 0.7700399485422347,
"eval_sickr_spearman": 0.7062681917688737,
"eval_stsb_spearman": 0.8338117053155956,
"step": 8625
},
{
"epoch": 0.56,
"eval_avg_sts": 0.7655558973491845,
"eval_sickr_spearman": 0.7032426646581855,
"eval_stsb_spearman": 0.8278691300401836,
"step": 8750
},
{
"epoch": 0.57,
"eval_avg_sts": 0.7691329874077902,
"eval_sickr_spearman": 0.7023757513083291,
"eval_stsb_spearman": 0.8358902235072514,
"step": 8875
},
{
"epoch": 0.58,
"learning_rate": 4.24036861640855e-06,
"loss": 0.0001,
"step": 9000
},
{
"epoch": 0.58,
"eval_avg_sts": 0.7696286255223032,
"eval_sickr_spearman": 0.7031883895135604,
"eval_stsb_spearman": 0.8360688615310459,
"step": 9000
},
{
"epoch": 0.58,
"eval_avg_sts": 0.7696384100854612,
"eval_sickr_spearman": 0.7036895940570672,
"eval_stsb_spearman": 0.835587226113855,
"step": 9125
},
{
"epoch": 0.59,
"eval_avg_sts": 0.7688416464364851,
"eval_sickr_spearman": 0.7023802182007628,
"eval_stsb_spearman": 0.8353030746722075,
"step": 9250
},
{
"epoch": 0.6,
"eval_avg_sts": 0.7660994859121804,
"eval_sickr_spearman": 0.7051366750811947,
"eval_stsb_spearman": 0.8270622967431659,
"step": 9375
},
{
"epoch": 0.61,
"learning_rate": 3.920389095097914e-06,
"loss": 0.0002,
"step": 9500
},
{
"epoch": 0.61,
"eval_avg_sts": 0.7673638146997882,
"eval_sickr_spearman": 0.7064614138295504,
"eval_stsb_spearman": 0.828266215570026,
"step": 9500
},
{
"epoch": 0.62,
"eval_avg_sts": 0.7688152733727235,
"eval_sickr_spearman": 0.707200523478889,
"eval_stsb_spearman": 0.830430023266558,
"step": 9625
},
{
"epoch": 0.62,
"eval_avg_sts": 0.7697361419751771,
"eval_sickr_spearman": 0.7075413521746937,
"eval_stsb_spearman": 0.8319309317756607,
"step": 9750
},
{
"epoch": 0.63,
"eval_avg_sts": 0.7718885121529019,
"eval_sickr_spearman": 0.7071343846522086,
"eval_stsb_spearman": 0.8366426396535952,
"step": 9875
},
{
"epoch": 0.64,
"learning_rate": 3.600409573787278e-06,
"loss": 0.0001,
"step": 10000
},
{
"epoch": 0.64,
"eval_avg_sts": 0.7727693021998165,
"eval_sickr_spearman": 0.7095777267823654,
"eval_stsb_spearman": 0.8359608776172676,
"step": 10000
},
{
"epoch": 0.65,
"eval_avg_sts": 0.7717316927837671,
"eval_sickr_spearman": 0.7084112434528395,
"eval_stsb_spearman": 0.8350521421146947,
"step": 10125
},
{
"epoch": 0.66,
"eval_avg_sts": 0.7720632550487434,
"eval_sickr_spearman": 0.7069583986965393,
"eval_stsb_spearman": 0.8371681114009475,
"step": 10250
},
{
"epoch": 0.66,
"eval_avg_sts": 0.7716685440161564,
"eval_sickr_spearman": 0.7068186762224559,
"eval_stsb_spearman": 0.836518411809857,
"step": 10375
},
{
"epoch": 0.67,
"learning_rate": 3.280430052476642e-06,
"loss": 0.0001,
"step": 10500
},
{
"epoch": 0.67,
"eval_avg_sts": 0.7706994083764094,
"eval_sickr_spearman": 0.7044442106917614,
"eval_stsb_spearman": 0.8369546060610572,
"step": 10500
},
{
"epoch": 0.68,
"eval_avg_sts": 0.770376770388492,
"eval_sickr_spearman": 0.704766259226904,
"eval_stsb_spearman": 0.83598728155008,
"step": 10625
},
{
"epoch": 0.69,
"eval_avg_sts": 0.7700945307869602,
"eval_sickr_spearman": 0.7045319154829873,
"eval_stsb_spearman": 0.8356571460909332,
"step": 10750
},
{
"epoch": 0.7,
"eval_avg_sts": 0.7701867471043669,
"eval_sickr_spearman": 0.7049941187721263,
"eval_stsb_spearman": 0.8353793754366073,
"step": 10875
},
{
"epoch": 0.7,
"learning_rate": 2.960450531166006e-06,
"loss": 0.0001,
"step": 11000
},
{
"epoch": 0.7,
"eval_avg_sts": 0.7700153990116653,
"eval_sickr_spearman": 0.7048114084622558,
"eval_stsb_spearman": 0.8352193895610748,
"step": 11000
},
{
"epoch": 0.71,
"eval_avg_sts": 0.7693725150897858,
"eval_sickr_spearman": 0.7057958058862298,
"eval_stsb_spearman": 0.8329492242933417,
"step": 11125
},
{
"epoch": 0.72,
"eval_avg_sts": 0.7696360829638863,
"eval_sickr_spearman": 0.7062946088746646,
"eval_stsb_spearman": 0.832977557053108,
"step": 11250
},
{
"epoch": 0.73,
"eval_avg_sts": 0.7709027781052127,
"eval_sickr_spearman": 0.7053959469667576,
"eval_stsb_spearman": 0.8364096092436679,
"step": 11375
},
{
"epoch": 0.74,
"learning_rate": 2.640471009855369e-06,
"loss": 0.0002,
"step": 11500
},
{
"epoch": 0.74,
"eval_avg_sts": 0.7705889705773711,
"eval_sickr_spearman": 0.7048775472889359,
"eval_stsb_spearman": 0.8363003938658062,
"step": 11500
},
{
"epoch": 0.74,
"eval_avg_sts": 0.7756785758354756,
"eval_sickr_spearman": 0.7125353379156201,
"eval_stsb_spearman": 0.8388218137553312,
"step": 11625
},
{
"epoch": 0.75,
"eval_avg_sts": 0.775711044555684,
"eval_sickr_spearman": 0.7125440315449804,
"eval_stsb_spearman": 0.8388780575663876,
"step": 11750
},
{
"epoch": 0.76,
"eval_avg_sts": 0.7756594965556575,
"eval_sickr_spearman": 0.710795363234923,
"eval_stsb_spearman": 0.840523629876392,
"step": 11875
},
{
"epoch": 0.77,
"learning_rate": 2.3204914885447333e-06,
"loss": 0.0002,
"step": 12000
},
{
"epoch": 0.77,
"eval_avg_sts": 0.778271737124629,
"eval_sickr_spearman": 0.7131796271103109,
"eval_stsb_spearman": 0.8433638471389473,
"step": 12000
},
{
"epoch": 0.78,
"eval_avg_sts": 0.7778824941586701,
"eval_sickr_spearman": 0.7129357251772082,
"eval_stsb_spearman": 0.842829263140132,
"step": 12125
},
{
"epoch": 0.78,
"eval_avg_sts": 0.7791778033988318,
"eval_sickr_spearman": 0.7154212866455291,
"eval_stsb_spearman": 0.8429343201521345,
"step": 12250
},
{
"epoch": 0.79,
"eval_avg_sts": 0.7784491802919915,
"eval_sickr_spearman": 0.7169989642344666,
"eval_stsb_spearman": 0.8398993963495163,
"step": 12375
},
{
"epoch": 0.8,
"learning_rate": 2.000511967234097e-06,
"loss": 0.0003,
"step": 12500
},
{
"epoch": 0.8,
"eval_avg_sts": 0.7782960644263726,
"eval_sickr_spearman": 0.7171246616269303,
"eval_stsb_spearman": 0.8394674672258147,
"step": 12500
},
{
"epoch": 0.81,
"eval_avg_sts": 0.7777553880232895,
"eval_sickr_spearman": 0.7163744638535685,
"eval_stsb_spearman": 0.8391363121930105,
"step": 12625
},
{
"epoch": 0.82,
"eval_avg_sts": 0.7768079269313797,
"eval_sickr_spearman": 0.7156439108006947,
"eval_stsb_spearman": 0.8379719430620648,
"step": 12750
},
{
"epoch": 0.82,
"eval_avg_sts": 0.7769940996454496,
"eval_sickr_spearman": 0.7154824302376598,
"eval_stsb_spearman": 0.8385057690532396,
"step": 12875
},
{
"epoch": 0.83,
"learning_rate": 1.6805324459234608e-06,
"loss": 0.0002,
"step": 13000
},
{
"epoch": 0.83,
"eval_avg_sts": 0.7773811376862301,
"eval_sickr_spearman": 0.7153101426768013,
"eval_stsb_spearman": 0.839452132695659,
"step": 13000
},
{
"epoch": 0.84,
"eval_avg_sts": 0.7765844510293946,
"eval_sickr_spearman": 0.713703022022682,
"eval_stsb_spearman": 0.8394658800361071,
"step": 13125
},
{
"epoch": 0.85,
"eval_avg_sts": 0.7754935189329493,
"eval_sickr_spearman": 0.7128497975367355,
"eval_stsb_spearman": 0.838137240329163,
"step": 13250
},
{
"epoch": 0.86,
"eval_avg_sts": 0.7753794499527557,
"eval_sickr_spearman": 0.7128604124101533,
"eval_stsb_spearman": 0.837898487495358,
"step": 13375
},
{
"epoch": 0.86,
"learning_rate": 1.3605529246128248e-06,
"loss": 0.0001,
"step": 13500
},
{
"epoch": 0.86,
"eval_avg_sts": 0.7753019292848522,
"eval_sickr_spearman": 0.7128690099773107,
"eval_stsb_spearman": 0.8377348485923936,
"step": 13500
},
{
"epoch": 0.87,
"eval_avg_sts": 0.7751785379635656,
"eval_sickr_spearman": 0.7127431615818965,
"eval_stsb_spearman": 0.8376139143452346,
"step": 13625
},
{
"epoch": 0.88,
"eval_avg_sts": 0.7751092404866357,
"eval_sickr_spearman": 0.712538940248228,
"eval_stsb_spearman": 0.8376795407250434,
"step": 13750
},
{
"epoch": 0.89,
"eval_avg_sts": 0.7748652873780293,
"eval_sickr_spearman": 0.7121167948976883,
"eval_stsb_spearman": 0.8376137798583702,
"step": 13875
},
{
"epoch": 0.9,
"learning_rate": 1.0405734033021888e-06,
"loss": 0.0,
"step": 14000
},
{
"epoch": 0.9,
"eval_avg_sts": 0.7749284965089291,
"eval_sickr_spearman": 0.7121870643990923,
"eval_stsb_spearman": 0.8376699286187659,
"step": 14000
},
{
"epoch": 0.9,
"eval_avg_sts": 0.7736810149784585,
"eval_sickr_spearman": 0.7101105357906183,
"eval_stsb_spearman": 0.8372514941662987,
"step": 14125
},
{
"epoch": 0.91,
"eval_avg_sts": 0.7736952566877616,
"eval_sickr_spearman": 0.7101496811382905,
"eval_stsb_spearman": 0.8372408322372328,
"step": 14250
},
{
"epoch": 0.92,
"eval_avg_sts": 0.7737241698204669,
"eval_sickr_spearman": 0.7102500181091946,
"eval_stsb_spearman": 0.837198321531739,
"step": 14375
},
{
"epoch": 0.93,
"learning_rate": 7.205938819915525e-07,
"loss": 0.0001,
"step": 14500
},
{
"epoch": 0.93,
"eval_avg_sts": 0.7737795026917761,
"eval_sickr_spearman": 0.7103373866827106,
"eval_stsb_spearman": 0.8372216187008416,
"step": 14500
},
{
"epoch": 0.94,
"eval_avg_sts": 0.7736836444786954,
"eval_sickr_spearman": 0.7101872414596151,
"eval_stsb_spearman": 0.8371800474977756,
"step": 14625
},
{
"epoch": 0.94,
"eval_avg_sts": 0.7734449184616194,
"eval_sickr_spearman": 0.7102963681220825,
"eval_stsb_spearman": 0.8365934688011564,
"step": 14750
},
{
"epoch": 0.95,
"eval_avg_sts": 0.7733655720089518,
"eval_sickr_spearman": 0.7101887784548611,
"eval_stsb_spearman": 0.8365423655630426,
"step": 14875
},
{
"epoch": 0.96,
"learning_rate": 4.0061436068091647e-07,
"loss": 0.0001,
"step": 15000
},
{
"epoch": 0.96,
"eval_avg_sts": 0.7735006567182534,
"eval_sickr_spearman": 0.7106506935573914,
"eval_stsb_spearman": 0.8363506198791154,
"step": 15000
},
{
"epoch": 0.97,
"eval_avg_sts": 0.7733751888661335,
"eval_sickr_spearman": 0.710443343292483,
"eval_stsb_spearman": 0.8363070344397839,
"step": 15125
},
{
"epoch": 0.98,
"eval_avg_sts": 0.7736170866924498,
"eval_sickr_spearman": 0.7105904625561881,
"eval_stsb_spearman": 0.8366437108287115,
"step": 15250
},
{
"epoch": 0.98,
"eval_avg_sts": 0.7738120865784781,
"eval_sickr_spearman": 0.7107394070017475,
"eval_stsb_spearman": 0.8368847661552088,
"step": 15375
},
{
"epoch": 0.99,
"learning_rate": 8.06348393702803e-08,
"loss": 0.0002,
"step": 15500
},
{
"epoch": 0.99,
"eval_avg_sts": 0.7737627481564555,
"eval_sickr_spearman": 0.710675861854545,
"eval_stsb_spearman": 0.8368496344583661,
"step": 15500
},
{
"epoch": 1.0,
"eval_avg_sts": 0.7737442273397684,
"eval_sickr_spearman": 0.7106493006554496,
"eval_stsb_spearman": 0.8368391540240873,
"step": 15625
},
{
"epoch": 1.0,
"step": 15626,
"train_runtime": 3862.446,
"train_samples_per_second": 4.046
}
],
"max_steps": 15626,
"num_train_epochs": 1,
"total_flos": 48090729552850944,
"trial_name": null,
"trial_params": null
}