|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9992254066615027, |
|
"eval_steps": 100, |
|
"global_step": 2904, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.7182130584192438e-09, |
|
"logits/chosen": -2.293962240219116, |
|
"logits/rejected": -2.235898017883301, |
|
"logps/chosen": -280.7555847167969, |
|
"logps/rejected": -204.8462371826172, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.718213058419244e-08, |
|
"logits/chosen": -2.4112908840179443, |
|
"logits/rejected": -2.3392038345336914, |
|
"logps/chosen": -294.24664306640625, |
|
"logps/rejected": -213.90679931640625, |
|
"loss": 0.6947, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.002664534840732813, |
|
"rewards/margins": -9.001558646559715e-05, |
|
"rewards/rejected": 0.00275455042719841, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.436426116838488e-08, |
|
"logits/chosen": -2.4150779247283936, |
|
"logits/rejected": -2.3804850578308105, |
|
"logps/chosen": -279.4464416503906, |
|
"logps/rejected": -237.6862335205078, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.003086227923631668, |
|
"rewards/margins": 0.009707429446280003, |
|
"rewards/rejected": -0.006621202919632196, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.154639175257731e-08, |
|
"logits/chosen": -2.4616353511810303, |
|
"logits/rejected": -2.3937487602233887, |
|
"logps/chosen": -301.0641174316406, |
|
"logps/rejected": -215.75418090820312, |
|
"loss": 0.6935, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.0004951163427904248, |
|
"rewards/margins": 0.000455385452369228, |
|
"rewards/rejected": 3.9730781281832606e-05, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.872852233676976e-08, |
|
"logits/chosen": -2.3844287395477295, |
|
"logits/rejected": -2.344656467437744, |
|
"logps/chosen": -291.42413330078125, |
|
"logps/rejected": -231.827880859375, |
|
"loss": 0.6935, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.0011853076284751296, |
|
"rewards/margins": 0.002922216197475791, |
|
"rewards/rejected": -0.0017369084525853395, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.59106529209622e-08, |
|
"logits/chosen": -2.4430339336395264, |
|
"logits/rejected": -2.382826566696167, |
|
"logps/chosen": -299.23663330078125, |
|
"logps/rejected": -220.186767578125, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.0030969330109655857, |
|
"rewards/margins": 0.004286954645067453, |
|
"rewards/rejected": -0.0011900208191946149, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.0309278350515462e-07, |
|
"logits/chosen": -2.4357097148895264, |
|
"logits/rejected": -2.4250669479370117, |
|
"logps/chosen": -272.5534973144531, |
|
"logps/rejected": -227.50332641601562, |
|
"loss": 0.6936, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.00050017872126773, |
|
"rewards/margins": 0.0015349002787843347, |
|
"rewards/rejected": -0.0020350790582597256, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.202749140893471e-07, |
|
"logits/chosen": -2.465843677520752, |
|
"logits/rejected": -2.4208121299743652, |
|
"logps/chosen": -292.08990478515625, |
|
"logps/rejected": -206.94558715820312, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.0012726224958896637, |
|
"rewards/margins": 0.003322723787277937, |
|
"rewards/rejected": -0.0020501010585576296, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.3745704467353952e-07, |
|
"logits/chosen": -2.3812198638916016, |
|
"logits/rejected": -2.390000104904175, |
|
"logps/chosen": -250.2107391357422, |
|
"logps/rejected": -212.4706268310547, |
|
"loss": 0.6901, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.004158427938818932, |
|
"rewards/margins": 0.005836354102939367, |
|
"rewards/rejected": -0.0016779262805357575, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.5463917525773197e-07, |
|
"logits/chosen": -2.3327183723449707, |
|
"logits/rejected": -2.254629611968994, |
|
"logps/chosen": -241.6092529296875, |
|
"logps/rejected": -185.40869140625, |
|
"loss": 0.6951, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.005251345224678516, |
|
"rewards/margins": -0.004248884506523609, |
|
"rewards/rejected": -0.0010024613002315164, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.718213058419244e-07, |
|
"logits/chosen": -2.401676654815674, |
|
"logits/rejected": -2.3915371894836426, |
|
"logps/chosen": -259.74755859375, |
|
"logps/rejected": -221.87295532226562, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.005552566610276699, |
|
"rewards/margins": -0.0006124695646576583, |
|
"rewards/rejected": 0.006165036000311375, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.8900343642611682e-07, |
|
"logits/chosen": -2.441145181655884, |
|
"logits/rejected": -2.311302423477173, |
|
"logps/chosen": -252.06753540039062, |
|
"logps/rejected": -210.41696166992188, |
|
"loss": 0.6889, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.0075301057659089565, |
|
"rewards/margins": 0.013402941636741161, |
|
"rewards/rejected": -0.0058728354051709175, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.0618556701030925e-07, |
|
"logits/chosen": -2.3808252811431885, |
|
"logits/rejected": -2.4377057552337646, |
|
"logps/chosen": -258.4937438964844, |
|
"logps/rejected": -210.9638671875, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0047661615535616875, |
|
"rewards/margins": 0.008594167418777943, |
|
"rewards/rejected": -0.003828004002571106, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.2336769759450173e-07, |
|
"logits/chosen": -2.447084903717041, |
|
"logits/rejected": -2.4316954612731934, |
|
"logps/chosen": -279.2828674316406, |
|
"logps/rejected": -213.9290313720703, |
|
"loss": 0.6873, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.015633000060915947, |
|
"rewards/margins": 0.019236568361520767, |
|
"rewards/rejected": -0.0036035701632499695, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.405498281786942e-07, |
|
"logits/chosen": -2.574856996536255, |
|
"logits/rejected": -2.4002137184143066, |
|
"logps/chosen": -267.64520263671875, |
|
"logps/rejected": -203.2295684814453, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.008941258303821087, |
|
"rewards/margins": 0.014786717481911182, |
|
"rewards/rejected": -0.00584546010941267, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.5773195876288655e-07, |
|
"logits/chosen": -2.3573081493377686, |
|
"logits/rejected": -2.390941619873047, |
|
"logps/chosen": -280.3595886230469, |
|
"logps/rejected": -214.0395965576172, |
|
"loss": 0.6876, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.0026760019827634096, |
|
"rewards/margins": 0.010370850563049316, |
|
"rewards/rejected": -0.0076948488131165504, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.7491408934707903e-07, |
|
"logits/chosen": -2.460977077484131, |
|
"logits/rejected": -2.4357001781463623, |
|
"logps/chosen": -307.5310974121094, |
|
"logps/rejected": -234.86721801757812, |
|
"loss": 0.6857, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.010203736834228039, |
|
"rewards/margins": 0.017851073294878006, |
|
"rewards/rejected": -0.0076473369263112545, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.9209621993127146e-07, |
|
"logits/chosen": -2.4167332649230957, |
|
"logits/rejected": -2.4196879863739014, |
|
"logps/chosen": -284.23834228515625, |
|
"logps/rejected": -220.6202392578125, |
|
"loss": 0.6861, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.006566309370100498, |
|
"rewards/margins": 0.00825690571218729, |
|
"rewards/rejected": -0.001690595643594861, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.0927835051546394e-07, |
|
"logits/chosen": -2.426633358001709, |
|
"logits/rejected": -2.426823139190674, |
|
"logps/chosen": -261.1161193847656, |
|
"logps/rejected": -236.690673828125, |
|
"loss": 0.682, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.007787441369146109, |
|
"rewards/margins": 0.021113665774464607, |
|
"rewards/rejected": -0.013326220214366913, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.2646048109965636e-07, |
|
"logits/chosen": -2.47978138923645, |
|
"logits/rejected": -2.393406391143799, |
|
"logps/chosen": -261.36138916015625, |
|
"logps/rejected": -213.78268432617188, |
|
"loss": 0.6807, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.010398685932159424, |
|
"rewards/margins": 0.022776171565055847, |
|
"rewards/rejected": -0.012377489358186722, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.436426116838488e-07, |
|
"logits/chosen": -2.421168565750122, |
|
"logits/rejected": -2.3462955951690674, |
|
"logps/chosen": -262.1727294921875, |
|
"logps/rejected": -223.8699188232422, |
|
"loss": 0.6806, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.012244177982211113, |
|
"rewards/margins": 0.029001509770751, |
|
"rewards/rejected": -0.016757333651185036, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.608247422680412e-07, |
|
"logits/chosen": -2.446596145629883, |
|
"logits/rejected": -2.367396593093872, |
|
"logps/chosen": -276.6649475097656, |
|
"logps/rejected": -203.23178100585938, |
|
"loss": 0.6758, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.012773854658007622, |
|
"rewards/margins": 0.03576343134045601, |
|
"rewards/rejected": -0.022989574819803238, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.7800687285223364e-07, |
|
"logits/chosen": -2.4427998065948486, |
|
"logits/rejected": -2.400299549102783, |
|
"logps/chosen": -248.6843719482422, |
|
"logps/rejected": -211.16293334960938, |
|
"loss": 0.674, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.01836214028298855, |
|
"rewards/margins": 0.0383928082883358, |
|
"rewards/rejected": -0.020030666142702103, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.9518900343642607e-07, |
|
"logits/chosen": -2.4508414268493652, |
|
"logits/rejected": -2.444462299346924, |
|
"logps/chosen": -261.3500061035156, |
|
"logps/rejected": -210.68807983398438, |
|
"loss": 0.6755, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.02510516345500946, |
|
"rewards/margins": 0.04327309876680374, |
|
"rewards/rejected": -0.018167927861213684, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.123711340206185e-07, |
|
"logits/chosen": -2.3069140911102295, |
|
"logits/rejected": -2.3430962562561035, |
|
"logps/chosen": -280.17633056640625, |
|
"logps/rejected": -234.605224609375, |
|
"loss": 0.6696, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.024332646280527115, |
|
"rewards/margins": 0.05363120511174202, |
|
"rewards/rejected": -0.029298555105924606, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.2955326460481097e-07, |
|
"logits/chosen": -2.408478021621704, |
|
"logits/rejected": -2.38106369972229, |
|
"logps/chosen": -267.2620849609375, |
|
"logps/rejected": -210.53933715820312, |
|
"loss": 0.669, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.022226443514227867, |
|
"rewards/margins": 0.06285347044467926, |
|
"rewards/rejected": -0.040627025067806244, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.4673539518900345e-07, |
|
"logits/chosen": -2.3904354572296143, |
|
"logits/rejected": -2.4175901412963867, |
|
"logps/chosen": -261.8872375488281, |
|
"logps/rejected": -215.2822723388672, |
|
"loss": 0.6659, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.012217490933835506, |
|
"rewards/margins": 0.041740067303180695, |
|
"rewards/rejected": -0.029522571712732315, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.639175257731959e-07, |
|
"logits/chosen": -2.3512496948242188, |
|
"logits/rejected": -2.374730110168457, |
|
"logps/chosen": -221.1552276611328, |
|
"logps/rejected": -214.0994110107422, |
|
"loss": 0.6617, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.02718324586749077, |
|
"rewards/margins": 0.07020439207553864, |
|
"rewards/rejected": -0.04302113875746727, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.810996563573884e-07, |
|
"logits/chosen": -2.4109158515930176, |
|
"logits/rejected": -2.416260242462158, |
|
"logps/chosen": -265.5227355957031, |
|
"logps/rejected": -219.8955841064453, |
|
"loss": 0.6591, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.02033821865916252, |
|
"rewards/margins": 0.0716070905327797, |
|
"rewards/rejected": -0.051268868148326874, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.982817869415807e-07, |
|
"logits/chosen": -2.4649157524108887, |
|
"logits/rejected": -2.336876153945923, |
|
"logps/chosen": -299.04083251953125, |
|
"logps/rejected": -233.513671875, |
|
"loss": 0.6561, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.04183030128479004, |
|
"rewards/margins": 0.08989714086055756, |
|
"rewards/rejected": -0.04806683957576752, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.982778415614236e-07, |
|
"logits/chosen": -2.3887062072753906, |
|
"logits/rejected": -2.34873628616333, |
|
"logps/chosen": -236.2488250732422, |
|
"logps/rejected": -220.21450805664062, |
|
"loss": 0.6523, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.022816497832536697, |
|
"rewards/margins": 0.09650030732154846, |
|
"rewards/rejected": -0.07368380576372147, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.963643321852277e-07, |
|
"logits/chosen": -2.425257444381714, |
|
"logits/rejected": -2.330592393875122, |
|
"logps/chosen": -285.37896728515625, |
|
"logps/rejected": -231.2700653076172, |
|
"loss": 0.6515, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.010598802007734776, |
|
"rewards/margins": 0.0920100286602974, |
|
"rewards/rejected": -0.0814112201333046, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.944508228090318e-07, |
|
"logits/chosen": -2.4376401901245117, |
|
"logits/rejected": -2.4971015453338623, |
|
"logps/chosen": -238.9815216064453, |
|
"logps/rejected": -192.4442138671875, |
|
"loss": 0.6369, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.04103684425354004, |
|
"rewards/margins": 0.13544507324695587, |
|
"rewards/rejected": -0.09440822899341583, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.925373134328357e-07, |
|
"logits/chosen": -2.4623780250549316, |
|
"logits/rejected": -2.439487934112549, |
|
"logps/chosen": -252.5996551513672, |
|
"logps/rejected": -222.22421264648438, |
|
"loss": 0.6422, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.04304979741573334, |
|
"rewards/margins": 0.10223046690225601, |
|
"rewards/rejected": -0.05918065458536148, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.906238040566398e-07, |
|
"logits/chosen": -2.4748172760009766, |
|
"logits/rejected": -2.356078863143921, |
|
"logps/chosen": -271.64324951171875, |
|
"logps/rejected": -231.6174774169922, |
|
"loss": 0.6425, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.041741594672203064, |
|
"rewards/margins": 0.15150585770606995, |
|
"rewards/rejected": -0.10976427793502808, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.887102946804438e-07, |
|
"logits/chosen": -2.5339407920837402, |
|
"logits/rejected": -2.4247193336486816, |
|
"logps/chosen": -289.107177734375, |
|
"logps/rejected": -223.7015838623047, |
|
"loss": 0.6301, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.05352502316236496, |
|
"rewards/margins": 0.20187970995903015, |
|
"rewards/rejected": -0.1483546942472458, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.867967853042479e-07, |
|
"logits/chosen": -2.44026255607605, |
|
"logits/rejected": -2.4695184230804443, |
|
"logps/chosen": -272.41827392578125, |
|
"logps/rejected": -223.8868408203125, |
|
"loss": 0.6206, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.06177166849374771, |
|
"rewards/margins": 0.19406349956989288, |
|
"rewards/rejected": -0.13229182362556458, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.84883275928052e-07, |
|
"logits/chosen": -2.4581284523010254, |
|
"logits/rejected": -2.422722339630127, |
|
"logps/chosen": -274.32763671875, |
|
"logps/rejected": -228.18148803710938, |
|
"loss": 0.6222, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.04478592425584793, |
|
"rewards/margins": 0.19903826713562012, |
|
"rewards/rejected": -0.1542523354291916, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.82969766551856e-07, |
|
"logits/chosen": -2.365147829055786, |
|
"logits/rejected": -2.376405954360962, |
|
"logps/chosen": -258.0191345214844, |
|
"logps/rejected": -228.0791015625, |
|
"loss": 0.6345, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.04648314788937569, |
|
"rewards/margins": 0.18209731578826904, |
|
"rewards/rejected": -0.13561417162418365, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.810562571756601e-07, |
|
"logits/chosen": -2.418147325515747, |
|
"logits/rejected": -2.405946969985962, |
|
"logps/chosen": -255.6215362548828, |
|
"logps/rejected": -219.8120880126953, |
|
"loss": 0.6262, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.01959371007978916, |
|
"rewards/margins": 0.18485160171985626, |
|
"rewards/rejected": -0.16525788605213165, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.791427477994642e-07, |
|
"logits/chosen": -2.419095516204834, |
|
"logits/rejected": -2.4036061763763428, |
|
"logps/chosen": -306.9060974121094, |
|
"logps/rejected": -239.7941436767578, |
|
"loss": 0.6195, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.06084824725985527, |
|
"rewards/margins": 0.20281776785850525, |
|
"rewards/rejected": -0.14196953177452087, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.772292384232682e-07, |
|
"logits/chosen": -2.417729377746582, |
|
"logits/rejected": -2.3787436485290527, |
|
"logps/chosen": -246.24478149414062, |
|
"logps/rejected": -245.55166625976562, |
|
"loss": 0.6208, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.024662364274263382, |
|
"rewards/margins": 0.1620568484067917, |
|
"rewards/rejected": -0.18671919405460358, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.753157290470723e-07, |
|
"logits/chosen": -2.4877963066101074, |
|
"logits/rejected": -2.4664740562438965, |
|
"logps/chosen": -256.0787048339844, |
|
"logps/rejected": -225.0019989013672, |
|
"loss": 0.6281, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.006953413598239422, |
|
"rewards/margins": 0.161538228392601, |
|
"rewards/rejected": -0.15458481013774872, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.7340221967087635e-07, |
|
"logits/chosen": -2.412123918533325, |
|
"logits/rejected": -2.3679380416870117, |
|
"logps/chosen": -251.4219970703125, |
|
"logps/rejected": -224.938720703125, |
|
"loss": 0.6154, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.0361722894012928, |
|
"rewards/margins": 0.22137188911437988, |
|
"rewards/rejected": -0.1851995885372162, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.714887102946804e-07, |
|
"logits/chosen": -2.470468521118164, |
|
"logits/rejected": -2.3362622261047363, |
|
"logps/chosen": -257.301513671875, |
|
"logps/rejected": -200.90008544921875, |
|
"loss": 0.6021, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.01734147220849991, |
|
"rewards/margins": 0.22712071239948273, |
|
"rewards/rejected": -0.20977921783924103, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.6957520091848447e-07, |
|
"logits/chosen": -2.4263899326324463, |
|
"logits/rejected": -2.3918890953063965, |
|
"logps/chosen": -228.27206420898438, |
|
"logps/rejected": -204.10134887695312, |
|
"loss": 0.6338, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.007703957613557577, |
|
"rewards/margins": 0.17124322056770325, |
|
"rewards/rejected": -0.16353924572467804, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.6766169154228853e-07, |
|
"logits/chosen": -2.3560242652893066, |
|
"logits/rejected": -2.3139007091522217, |
|
"logps/chosen": -268.39398193359375, |
|
"logps/rejected": -239.73159790039062, |
|
"loss": 0.5952, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.06542753428220749, |
|
"rewards/margins": 0.2891160547733307, |
|
"rewards/rejected": -0.2236885279417038, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.657481821660926e-07, |
|
"logits/chosen": -2.452782154083252, |
|
"logits/rejected": -2.4332146644592285, |
|
"logps/chosen": -283.4723205566406, |
|
"logps/rejected": -211.5760498046875, |
|
"loss": 0.5978, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.004657271318137646, |
|
"rewards/margins": 0.2682662606239319, |
|
"rewards/rejected": -0.2729235589504242, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.6383467278989666e-07, |
|
"logits/chosen": -2.3910794258117676, |
|
"logits/rejected": -2.4533417224884033, |
|
"logps/chosen": -248.0291748046875, |
|
"logps/rejected": -230.6380157470703, |
|
"loss": 0.6143, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.015768401324748993, |
|
"rewards/margins": 0.2060297429561615, |
|
"rewards/rejected": -0.22179818153381348, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.6192116341370067e-07, |
|
"logits/chosen": -2.5222439765930176, |
|
"logits/rejected": -2.4398193359375, |
|
"logps/chosen": -277.2117614746094, |
|
"logps/rejected": -230.4596710205078, |
|
"loss": 0.5947, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.04367998614907265, |
|
"rewards/margins": 0.33074626326560974, |
|
"rewards/rejected": -0.2870662808418274, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.6000765403750473e-07, |
|
"logits/chosen": -2.423351764678955, |
|
"logits/rejected": -2.3357410430908203, |
|
"logps/chosen": -255.89846801757812, |
|
"logps/rejected": -211.130126953125, |
|
"loss": 0.5989, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.019995370879769325, |
|
"rewards/margins": 0.26641523838043213, |
|
"rewards/rejected": -0.24641986191272736, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.580941446613088e-07, |
|
"logits/chosen": -2.4628067016601562, |
|
"logits/rejected": -2.4185214042663574, |
|
"logps/chosen": -257.75567626953125, |
|
"logps/rejected": -238.31454467773438, |
|
"loss": 0.5993, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.023913182318210602, |
|
"rewards/margins": 0.23212233185768127, |
|
"rewards/rejected": -0.2560355067253113, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.5618063528511285e-07, |
|
"logits/chosen": -2.3876211643218994, |
|
"logits/rejected": -2.357927083969116, |
|
"logps/chosen": -274.25555419921875, |
|
"logps/rejected": -232.0738525390625, |
|
"loss": 0.5993, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.04971958324313164, |
|
"rewards/margins": 0.2438163459300995, |
|
"rewards/rejected": -0.29353591799736023, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.542671259089169e-07, |
|
"logits/chosen": -2.4641833305358887, |
|
"logits/rejected": -2.40731143951416, |
|
"logps/chosen": -273.7682189941406, |
|
"logps/rejected": -233.98583984375, |
|
"loss": 0.6062, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.0051215579733252525, |
|
"rewards/margins": 0.301483690738678, |
|
"rewards/rejected": -0.3066052496433258, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.52353616532721e-07, |
|
"logits/chosen": -2.4376657009124756, |
|
"logits/rejected": -2.4150702953338623, |
|
"logps/chosen": -266.14349365234375, |
|
"logps/rejected": -235.13882446289062, |
|
"loss": 0.6018, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.014219239354133606, |
|
"rewards/margins": 0.26282960176467896, |
|
"rewards/rejected": -0.27704882621765137, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.5044010715652504e-07, |
|
"logits/chosen": -2.4388842582702637, |
|
"logits/rejected": -2.390779495239258, |
|
"logps/chosen": -249.3515167236328, |
|
"logps/rejected": -228.97885131835938, |
|
"loss": 0.6, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.028151463717222214, |
|
"rewards/margins": 0.2760944068431854, |
|
"rewards/rejected": -0.30424588918685913, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.485265977803291e-07, |
|
"logits/chosen": -2.460728406906128, |
|
"logits/rejected": -2.3551411628723145, |
|
"logps/chosen": -293.16400146484375, |
|
"logps/rejected": -232.632080078125, |
|
"loss": 0.5871, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 0.06439458578824997, |
|
"rewards/margins": 0.4292263984680176, |
|
"rewards/rejected": -0.3648317754268646, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.4661308840413316e-07, |
|
"logits/chosen": -2.509218215942383, |
|
"logits/rejected": -2.375960111618042, |
|
"logps/chosen": -285.7981262207031, |
|
"logps/rejected": -236.2060546875, |
|
"loss": 0.6024, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.004936461336910725, |
|
"rewards/margins": 0.36126184463500977, |
|
"rewards/rejected": -0.3563253879547119, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.446995790279372e-07, |
|
"logits/chosen": -2.455012321472168, |
|
"logits/rejected": -2.436042547225952, |
|
"logps/chosen": -283.45404052734375, |
|
"logps/rejected": -230.90603637695312, |
|
"loss": 0.586, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.004780585877597332, |
|
"rewards/margins": 0.32074013352394104, |
|
"rewards/rejected": -0.31595954298973083, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.4278606965174123e-07, |
|
"logits/chosen": -2.4725747108459473, |
|
"logits/rejected": -2.470695734024048, |
|
"logps/chosen": -262.63873291015625, |
|
"logps/rejected": -241.70535278320312, |
|
"loss": 0.5869, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.011715828441083431, |
|
"rewards/margins": 0.34255507588386536, |
|
"rewards/rejected": -0.35427090525627136, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.408725602755453e-07, |
|
"logits/chosen": -2.460839033126831, |
|
"logits/rejected": -2.407060146331787, |
|
"logps/chosen": -233.80014038085938, |
|
"logps/rejected": -207.22445678710938, |
|
"loss": 0.5908, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.03514878451824188, |
|
"rewards/margins": 0.27000272274017334, |
|
"rewards/rejected": -0.3051515221595764, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.3895905089934936e-07, |
|
"logits/chosen": -2.4760079383850098, |
|
"logits/rejected": -2.3460214138031006, |
|
"logps/chosen": -276.608154296875, |
|
"logps/rejected": -231.1323699951172, |
|
"loss": 0.59, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.0008779823547229171, |
|
"rewards/margins": 0.3627086579799652, |
|
"rewards/rejected": -0.3618307411670685, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.370455415231534e-07, |
|
"logits/chosen": -2.4187588691711426, |
|
"logits/rejected": -2.426133632659912, |
|
"logps/chosen": -276.6905212402344, |
|
"logps/rejected": -260.84893798828125, |
|
"loss": 0.5706, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.05571802332997322, |
|
"rewards/margins": 0.310151606798172, |
|
"rewards/rejected": -0.3658696413040161, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.351320321469575e-07, |
|
"logits/chosen": -2.4383225440979004, |
|
"logits/rejected": -2.401784658432007, |
|
"logps/chosen": -298.56390380859375, |
|
"logps/rejected": -235.006591796875, |
|
"loss": 0.5838, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.035277482122182846, |
|
"rewards/margins": 0.44551342725753784, |
|
"rewards/rejected": -0.4102359712123871, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.3321852277076154e-07, |
|
"logits/chosen": -2.4030394554138184, |
|
"logits/rejected": -2.390498638153076, |
|
"logps/chosen": -267.2048645019531, |
|
"logps/rejected": -240.0560760498047, |
|
"loss": 0.5693, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.008936256170272827, |
|
"rewards/margins": 0.3941975235939026, |
|
"rewards/rejected": -0.3852612376213074, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.313050133945656e-07, |
|
"logits/chosen": -2.508209705352783, |
|
"logits/rejected": -2.45387601852417, |
|
"logps/chosen": -291.5556640625, |
|
"logps/rejected": -241.94131469726562, |
|
"loss": 0.5804, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.0002497173845767975, |
|
"rewards/margins": 0.4533967971801758, |
|
"rewards/rejected": -0.4536465108394623, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.2939150401836967e-07, |
|
"logits/chosen": -2.4588632583618164, |
|
"logits/rejected": -2.461416482925415, |
|
"logps/chosen": -245.1895751953125, |
|
"logps/rejected": -246.0583038330078, |
|
"loss": 0.5693, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.06281181424856186, |
|
"rewards/margins": 0.3227921426296234, |
|
"rewards/rejected": -0.38560396432876587, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.2747799464217373e-07, |
|
"logits/chosen": -2.397804021835327, |
|
"logits/rejected": -2.3986358642578125, |
|
"logps/chosen": -262.5384521484375, |
|
"logps/rejected": -231.0225830078125, |
|
"loss": 0.5746, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.10527386516332626, |
|
"rewards/margins": 0.3180972933769226, |
|
"rewards/rejected": -0.42337116599082947, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.255644852659778e-07, |
|
"logits/chosen": -2.49613618850708, |
|
"logits/rejected": -2.4438865184783936, |
|
"logps/chosen": -287.05419921875, |
|
"logps/rejected": -249.3083953857422, |
|
"loss": 0.6131, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.08329292386770248, |
|
"rewards/margins": 0.29655909538269043, |
|
"rewards/rejected": -0.3798519968986511, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.236509758897818e-07, |
|
"logits/chosen": -2.3793463706970215, |
|
"logits/rejected": -2.387934684753418, |
|
"logps/chosen": -260.1773986816406, |
|
"logps/rejected": -218.5209197998047, |
|
"loss": 0.602, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.06579776108264923, |
|
"rewards/margins": 0.35870271921157837, |
|
"rewards/rejected": -0.4245004653930664, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.2173746651358586e-07, |
|
"logits/chosen": -2.412266254425049, |
|
"logits/rejected": -2.389812469482422, |
|
"logps/chosen": -260.27349853515625, |
|
"logps/rejected": -246.9514923095703, |
|
"loss": 0.5861, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.10205063968896866, |
|
"rewards/margins": 0.2957732677459717, |
|
"rewards/rejected": -0.39782392978668213, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.198239571373899e-07, |
|
"logits/chosen": -2.42134690284729, |
|
"logits/rejected": -2.353253126144409, |
|
"logps/chosen": -243.3867645263672, |
|
"logps/rejected": -195.7749481201172, |
|
"loss": 0.5847, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.05715983361005783, |
|
"rewards/margins": 0.43060773611068726, |
|
"rewards/rejected": -0.48776760697364807, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.17910447761194e-07, |
|
"logits/chosen": -2.4673993587493896, |
|
"logits/rejected": -2.3654751777648926, |
|
"logps/chosen": -267.1549377441406, |
|
"logps/rejected": -255.4532012939453, |
|
"loss": 0.5956, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.08059550076723099, |
|
"rewards/margins": 0.34076714515686035, |
|
"rewards/rejected": -0.42136263847351074, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.1599693838499805e-07, |
|
"logits/chosen": -2.45065975189209, |
|
"logits/rejected": -2.4266138076782227, |
|
"logps/chosen": -295.47137451171875, |
|
"logps/rejected": -215.19296264648438, |
|
"loss": 0.5811, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.061896733939647675, |
|
"rewards/margins": 0.3598650097846985, |
|
"rewards/rejected": -0.42176175117492676, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.140834290088021e-07, |
|
"logits/chosen": -2.4394631385803223, |
|
"logits/rejected": -2.417912006378174, |
|
"logps/chosen": -277.1748046875, |
|
"logps/rejected": -221.8069610595703, |
|
"loss": 0.5631, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.08770271390676498, |
|
"rewards/margins": 0.44523605704307556, |
|
"rewards/rejected": -0.5329388380050659, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.121699196326062e-07, |
|
"logits/chosen": -2.435689926147461, |
|
"logits/rejected": -2.310481548309326, |
|
"logps/chosen": -218.2103729248047, |
|
"logps/rejected": -185.0491943359375, |
|
"loss": 0.5802, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.05216900631785393, |
|
"rewards/margins": 0.38854002952575684, |
|
"rewards/rejected": -0.44070905447006226, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.1025641025641024e-07, |
|
"logits/chosen": -2.399353504180908, |
|
"logits/rejected": -2.4066669940948486, |
|
"logps/chosen": -259.16229248046875, |
|
"logps/rejected": -243.228515625, |
|
"loss": 0.5769, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.06289193779230118, |
|
"rewards/margins": 0.4069492220878601, |
|
"rewards/rejected": -0.4698411524295807, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.083429008802143e-07, |
|
"logits/chosen": -2.525341510772705, |
|
"logits/rejected": -2.471456527709961, |
|
"logps/chosen": -274.21099853515625, |
|
"logps/rejected": -213.09451293945312, |
|
"loss": 0.5545, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.041963692754507065, |
|
"rewards/margins": 0.450117826461792, |
|
"rewards/rejected": -0.4920814633369446, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.0642939150401836e-07, |
|
"logits/chosen": -2.487657070159912, |
|
"logits/rejected": -2.3538174629211426, |
|
"logps/chosen": -290.19036865234375, |
|
"logps/rejected": -217.69906616210938, |
|
"loss": 0.5707, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.06529206037521362, |
|
"rewards/margins": 0.44091683626174927, |
|
"rewards/rejected": -0.5062088966369629, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.0451588212782237e-07, |
|
"logits/chosen": -2.429466724395752, |
|
"logits/rejected": -2.395692825317383, |
|
"logps/chosen": -255.69052124023438, |
|
"logps/rejected": -227.69168090820312, |
|
"loss": 0.5409, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.0646648183465004, |
|
"rewards/margins": 0.5062236189842224, |
|
"rewards/rejected": -0.5708884596824646, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.0260237275162643e-07, |
|
"logits/chosen": -2.368137836456299, |
|
"logits/rejected": -2.3667895793914795, |
|
"logps/chosen": -232.4878692626953, |
|
"logps/rejected": -227.72323608398438, |
|
"loss": 0.5741, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.057848911732435226, |
|
"rewards/margins": 0.45716094970703125, |
|
"rewards/rejected": -0.5150099396705627, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.006888633754305e-07, |
|
"logits/chosen": -2.5232253074645996, |
|
"logits/rejected": -2.4718704223632812, |
|
"logps/chosen": -277.5395812988281, |
|
"logps/rejected": -229.75863647460938, |
|
"loss": 0.545, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.02210831269621849, |
|
"rewards/margins": 0.5734663605690002, |
|
"rewards/rejected": -0.5955746173858643, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.9877535399923456e-07, |
|
"logits/chosen": -2.4078879356384277, |
|
"logits/rejected": -2.3697750568389893, |
|
"logps/chosen": -267.1474914550781, |
|
"logps/rejected": -224.79318237304688, |
|
"loss": 0.5611, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.05940120667219162, |
|
"rewards/margins": 0.5237945318222046, |
|
"rewards/rejected": -0.583195686340332, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.968618446230386e-07, |
|
"logits/chosen": -2.441854953765869, |
|
"logits/rejected": -2.413933277130127, |
|
"logps/chosen": -319.1496276855469, |
|
"logps/rejected": -247.3388671875, |
|
"loss": 0.5411, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.00216462230309844, |
|
"rewards/margins": 0.6005392074584961, |
|
"rewards/rejected": -0.6027037501335144, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.949483352468427e-07, |
|
"logits/chosen": -2.441795825958252, |
|
"logits/rejected": -2.408433437347412, |
|
"logps/chosen": -275.55364990234375, |
|
"logps/rejected": -240.1023712158203, |
|
"loss": 0.5996, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.18812677264213562, |
|
"rewards/margins": 0.3052147328853607, |
|
"rewards/rejected": -0.4933415353298187, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.9303482587064674e-07, |
|
"logits/chosen": -2.4030299186706543, |
|
"logits/rejected": -2.4201598167419434, |
|
"logps/chosen": -282.9662170410156, |
|
"logps/rejected": -238.91635131835938, |
|
"loss": 0.5653, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.07580298185348511, |
|
"rewards/margins": 0.48956719040870667, |
|
"rewards/rejected": -0.5653700828552246, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.911213164944508e-07, |
|
"logits/chosen": -2.4482102394104004, |
|
"logits/rejected": -2.3696365356445312, |
|
"logps/chosen": -291.52374267578125, |
|
"logps/rejected": -245.3708953857422, |
|
"loss": 0.5645, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.0693991631269455, |
|
"rewards/margins": 0.5479902029037476, |
|
"rewards/rejected": -0.6173893809318542, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.8920780711825487e-07, |
|
"logits/chosen": -2.333220958709717, |
|
"logits/rejected": -2.36674165725708, |
|
"logps/chosen": -270.8648376464844, |
|
"logps/rejected": -226.4284210205078, |
|
"loss": 0.5623, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.07031848281621933, |
|
"rewards/margins": 0.5230459570884705, |
|
"rewards/rejected": -0.5933644771575928, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.8729429774205893e-07, |
|
"logits/chosen": -2.414712905883789, |
|
"logits/rejected": -2.3959202766418457, |
|
"logps/chosen": -265.5609436035156, |
|
"logps/rejected": -221.8790283203125, |
|
"loss": 0.5583, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.09441053122282028, |
|
"rewards/margins": 0.4818418025970459, |
|
"rewards/rejected": -0.5762523412704468, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.8538078836586294e-07, |
|
"logits/chosen": -2.4260947704315186, |
|
"logits/rejected": -2.3521246910095215, |
|
"logps/chosen": -255.67459106445312, |
|
"logps/rejected": -249.5232696533203, |
|
"loss": 0.5798, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.11774387210607529, |
|
"rewards/margins": 0.41275423765182495, |
|
"rewards/rejected": -0.5304980874061584, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.83467278989667e-07, |
|
"logits/chosen": -2.4081029891967773, |
|
"logits/rejected": -2.356966495513916, |
|
"logps/chosen": -282.0208740234375, |
|
"logps/rejected": -222.43765258789062, |
|
"loss": 0.5496, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.0056116655468940735, |
|
"rewards/margins": 0.560874879360199, |
|
"rewards/rejected": -0.5664864778518677, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.8155376961347106e-07, |
|
"logits/chosen": -2.4205422401428223, |
|
"logits/rejected": -2.4086201190948486, |
|
"logps/chosen": -250.5408477783203, |
|
"logps/rejected": -203.41635131835938, |
|
"loss": 0.5658, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.08207114040851593, |
|
"rewards/margins": 0.5275572538375854, |
|
"rewards/rejected": -0.6096283197402954, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.796402602372751e-07, |
|
"logits/chosen": -2.4266042709350586, |
|
"logits/rejected": -2.3435492515563965, |
|
"logps/chosen": -272.265380859375, |
|
"logps/rejected": -252.53268432617188, |
|
"loss": 0.5561, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.08728349208831787, |
|
"rewards/margins": 0.43014270067214966, |
|
"rewards/rejected": -0.5174261927604675, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.777267508610792e-07, |
|
"logits/chosen": -2.4939112663269043, |
|
"logits/rejected": -2.428483247756958, |
|
"logps/chosen": -270.8354797363281, |
|
"logps/rejected": -260.45611572265625, |
|
"loss": 0.5662, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.1041080579161644, |
|
"rewards/margins": 0.5785307884216309, |
|
"rewards/rejected": -0.6826388239860535, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.7581324148488325e-07, |
|
"logits/chosen": -2.4214115142822266, |
|
"logits/rejected": -2.374481201171875, |
|
"logps/chosen": -299.26385498046875, |
|
"logps/rejected": -224.3602294921875, |
|
"loss": 0.5676, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.09809824079275131, |
|
"rewards/margins": 0.4741215705871582, |
|
"rewards/rejected": -0.5722197890281677, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.738997321086873e-07, |
|
"logits/chosen": -2.405039072036743, |
|
"logits/rejected": -2.36025333404541, |
|
"logps/chosen": -257.98358154296875, |
|
"logps/rejected": -224.39584350585938, |
|
"loss": 0.5705, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.055282533168792725, |
|
"rewards/margins": 0.41767168045043945, |
|
"rewards/rejected": -0.4729541838169098, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.7198622273249137e-07, |
|
"logits/chosen": -2.4322896003723145, |
|
"logits/rejected": -2.4136605262756348, |
|
"logps/chosen": -262.254150390625, |
|
"logps/rejected": -195.317626953125, |
|
"loss": 0.5501, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.09157253801822662, |
|
"rewards/margins": 0.4116409420967102, |
|
"rewards/rejected": -0.5032135248184204, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -2.1115331649780273, |
|
"eval_logits/rejected": -1.9901020526885986, |
|
"eval_logps/chosen": -265.7538146972656, |
|
"eval_logps/rejected": -225.6846923828125, |
|
"eval_loss": 0.5559706091880798, |
|
"eval_rewards/accuracies": 0.7200000286102295, |
|
"eval_rewards/chosen": -0.10930197685956955, |
|
"eval_rewards/margins": 0.5307366251945496, |
|
"eval_rewards/rejected": -0.6400385499000549, |
|
"eval_runtime": 454.9548, |
|
"eval_samples_per_second": 4.396, |
|
"eval_steps_per_second": 0.275, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.7007271335629544e-07, |
|
"logits/chosen": -2.393294334411621, |
|
"logits/rejected": -2.2951369285583496, |
|
"logps/chosen": -265.35040283203125, |
|
"logps/rejected": -216.5760955810547, |
|
"loss": 0.5536, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.0699644461274147, |
|
"rewards/margins": 0.6380785703659058, |
|
"rewards/rejected": -0.7080430388450623, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.681592039800995e-07, |
|
"logits/chosen": -2.384970188140869, |
|
"logits/rejected": -2.355285167694092, |
|
"logps/chosen": -242.12600708007812, |
|
"logps/rejected": -217.994873046875, |
|
"loss": 0.5518, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.10410074144601822, |
|
"rewards/margins": 0.4534371495246887, |
|
"rewards/rejected": -0.557537853717804, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.662456946039035e-07, |
|
"logits/chosen": -2.3399007320404053, |
|
"logits/rejected": -2.3183391094207764, |
|
"logps/chosen": -231.69064331054688, |
|
"logps/rejected": -201.68441772460938, |
|
"loss": 0.5997, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.2032332420349121, |
|
"rewards/margins": 0.3764384686946869, |
|
"rewards/rejected": -0.5796716809272766, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.6433218522770757e-07, |
|
"logits/chosen": -2.3833537101745605, |
|
"logits/rejected": -2.319551944732666, |
|
"logps/chosen": -295.35052490234375, |
|
"logps/rejected": -220.7740936279297, |
|
"loss": 0.5815, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.07751850038766861, |
|
"rewards/margins": 0.4878837466239929, |
|
"rewards/rejected": -0.5654021501541138, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.6241867585151163e-07, |
|
"logits/chosen": -2.449373245239258, |
|
"logits/rejected": -2.387481927871704, |
|
"logps/chosen": -248.5247802734375, |
|
"logps/rejected": -225.5816192626953, |
|
"loss": 0.5582, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.13464747369289398, |
|
"rewards/margins": 0.4785802960395813, |
|
"rewards/rejected": -0.6132277250289917, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.605051664753157e-07, |
|
"logits/chosen": -2.4245686531066895, |
|
"logits/rejected": -2.4296786785125732, |
|
"logps/chosen": -251.46206665039062, |
|
"logps/rejected": -202.94284057617188, |
|
"loss": 0.5488, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.10227508842945099, |
|
"rewards/margins": 0.5419632792472839, |
|
"rewards/rejected": -0.6442384123802185, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.5859165709911975e-07, |
|
"logits/chosen": -2.3132331371307373, |
|
"logits/rejected": -2.3190786838531494, |
|
"logps/chosen": -292.53125, |
|
"logps/rejected": -256.5845947265625, |
|
"loss": 0.5407, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.04681607708334923, |
|
"rewards/margins": 0.629582941532135, |
|
"rewards/rejected": -0.6763990521430969, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.566781477229238e-07, |
|
"logits/chosen": -2.36743426322937, |
|
"logits/rejected": -2.2601940631866455, |
|
"logps/chosen": -259.8556213378906, |
|
"logps/rejected": -235.2071075439453, |
|
"loss": 0.5544, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.0872168317437172, |
|
"rewards/margins": 0.5171287059783936, |
|
"rewards/rejected": -0.6043455600738525, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.547646383467279e-07, |
|
"logits/chosen": -2.326066732406616, |
|
"logits/rejected": -2.3307995796203613, |
|
"logps/chosen": -256.3676452636719, |
|
"logps/rejected": -235.98672485351562, |
|
"loss": 0.528, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.039658013731241226, |
|
"rewards/margins": 0.6266075372695923, |
|
"rewards/rejected": -0.6662655472755432, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.5285112897053194e-07, |
|
"logits/chosen": -2.4012277126312256, |
|
"logits/rejected": -2.287254810333252, |
|
"logps/chosen": -275.3678894042969, |
|
"logps/rejected": -235.6710662841797, |
|
"loss": 0.5394, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.1649123877286911, |
|
"rewards/margins": 0.7132270932197571, |
|
"rewards/rejected": -0.8781394958496094, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.50937619594336e-07, |
|
"logits/chosen": -2.4277257919311523, |
|
"logits/rejected": -2.3757331371307373, |
|
"logps/chosen": -291.19110107421875, |
|
"logps/rejected": -261.84320068359375, |
|
"loss": 0.5486, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.08061722666025162, |
|
"rewards/margins": 0.646703839302063, |
|
"rewards/rejected": -0.7273210883140564, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.4902411021814007e-07, |
|
"logits/chosen": -2.4494857788085938, |
|
"logits/rejected": -2.3594818115234375, |
|
"logps/chosen": -294.79901123046875, |
|
"logps/rejected": -229.1653594970703, |
|
"loss": 0.5273, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.0071506015956401825, |
|
"rewards/margins": 0.7936251759529114, |
|
"rewards/rejected": -0.8007757067680359, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.4711060084194413e-07, |
|
"logits/chosen": -2.3939168453216553, |
|
"logits/rejected": -2.403517961502075, |
|
"logps/chosen": -272.8337707519531, |
|
"logps/rejected": -228.14816284179688, |
|
"loss": 0.569, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.05292476341128349, |
|
"rewards/margins": 0.5845643281936646, |
|
"rewards/rejected": -0.6374891996383667, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.4519709146574814e-07, |
|
"logits/chosen": -2.4750289916992188, |
|
"logits/rejected": -2.3406853675842285, |
|
"logps/chosen": -267.2197265625, |
|
"logps/rejected": -247.76498413085938, |
|
"loss": 0.5588, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.07975287735462189, |
|
"rewards/margins": 0.5557757019996643, |
|
"rewards/rejected": -0.6355286240577698, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.432835820895522e-07, |
|
"logits/chosen": -2.4085628986358643, |
|
"logits/rejected": -2.3299522399902344, |
|
"logps/chosen": -250.01791381835938, |
|
"logps/rejected": -230.2222900390625, |
|
"loss": 0.5427, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.09803862869739532, |
|
"rewards/margins": 0.5516743659973145, |
|
"rewards/rejected": -0.6497129797935486, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.4137007271335626e-07, |
|
"logits/chosen": -2.349724054336548, |
|
"logits/rejected": -2.396789073944092, |
|
"logps/chosen": -218.85134887695312, |
|
"logps/rejected": -212.79843139648438, |
|
"loss": 0.5405, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.17577314376831055, |
|
"rewards/margins": 0.4191734790802002, |
|
"rewards/rejected": -0.5949466228485107, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.394565633371603e-07, |
|
"logits/chosen": -2.3591291904449463, |
|
"logits/rejected": -2.2995028495788574, |
|
"logps/chosen": -293.353271484375, |
|
"logps/rejected": -245.3329315185547, |
|
"loss": 0.5378, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.1506257951259613, |
|
"rewards/margins": 0.5927566289901733, |
|
"rewards/rejected": -0.7433823347091675, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.375430539609644e-07, |
|
"logits/chosen": -2.281580924987793, |
|
"logits/rejected": -2.3465418815612793, |
|
"logps/chosen": -239.721923828125, |
|
"logps/rejected": -211.04336547851562, |
|
"loss": 0.5148, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.13178768754005432, |
|
"rewards/margins": 0.6117417812347412, |
|
"rewards/rejected": -0.7435294985771179, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.3562954458476845e-07, |
|
"logits/chosen": -2.4424147605895996, |
|
"logits/rejected": -2.428922653198242, |
|
"logps/chosen": -306.62567138671875, |
|
"logps/rejected": -233.6839141845703, |
|
"loss": 0.5608, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.14439252018928528, |
|
"rewards/margins": 0.5881448984146118, |
|
"rewards/rejected": -0.7325373888015747, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.337160352085725e-07, |
|
"logits/chosen": -2.4268736839294434, |
|
"logits/rejected": -2.385798692703247, |
|
"logps/chosen": -298.24273681640625, |
|
"logps/rejected": -241.7369842529297, |
|
"loss": 0.535, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.06826777756214142, |
|
"rewards/margins": 0.7597671747207642, |
|
"rewards/rejected": -0.8280348777770996, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.3180252583237657e-07, |
|
"logits/chosen": -2.3368301391601562, |
|
"logits/rejected": -2.2904515266418457, |
|
"logps/chosen": -277.94744873046875, |
|
"logps/rejected": -231.4571533203125, |
|
"loss": 0.5509, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.057608962059020996, |
|
"rewards/margins": 0.6012957692146301, |
|
"rewards/rejected": -0.6589046716690063, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.2988901645618063e-07, |
|
"logits/chosen": -2.3686211109161377, |
|
"logits/rejected": -2.278508424758911, |
|
"logps/chosen": -261.26556396484375, |
|
"logps/rejected": -225.91049194335938, |
|
"loss": 0.5406, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.1395711600780487, |
|
"rewards/margins": 0.5758097171783447, |
|
"rewards/rejected": -0.715380847454071, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.279755070799847e-07, |
|
"logits/chosen": -2.402057647705078, |
|
"logits/rejected": -2.310411214828491, |
|
"logps/chosen": -259.6847229003906, |
|
"logps/rejected": -257.0271301269531, |
|
"loss": 0.5279, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.1509382575750351, |
|
"rewards/margins": 0.5303701162338257, |
|
"rewards/rejected": -0.681308388710022, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.260619977037887e-07, |
|
"logits/chosen": -2.397024154663086, |
|
"logits/rejected": -2.3406670093536377, |
|
"logps/chosen": -251.27926635742188, |
|
"logps/rejected": -214.0767822265625, |
|
"loss": 0.5191, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.07334662973880768, |
|
"rewards/margins": 0.7737401723861694, |
|
"rewards/rejected": -0.8470869064331055, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.2414848832759277e-07, |
|
"logits/chosen": -2.42278790473938, |
|
"logits/rejected": -2.2710700035095215, |
|
"logps/chosen": -261.6346435546875, |
|
"logps/rejected": -227.0576171875, |
|
"loss": 0.5345, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.13957397639751434, |
|
"rewards/margins": 0.585122287273407, |
|
"rewards/rejected": -0.7246962785720825, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.2223497895139683e-07, |
|
"logits/chosen": -2.3876070976257324, |
|
"logits/rejected": -2.3234522342681885, |
|
"logps/chosen": -263.7557678222656, |
|
"logps/rejected": -219.36367797851562, |
|
"loss": 0.5538, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.12251804769039154, |
|
"rewards/margins": 0.5337706804275513, |
|
"rewards/rejected": -0.656288743019104, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.203214695752009e-07, |
|
"logits/chosen": -2.4647679328918457, |
|
"logits/rejected": -2.385979175567627, |
|
"logps/chosen": -262.8483581542969, |
|
"logps/rejected": -230.214111328125, |
|
"loss": 0.5692, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.15525886416435242, |
|
"rewards/margins": 0.5802783370018005, |
|
"rewards/rejected": -0.7355371713638306, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.1840796019900495e-07, |
|
"logits/chosen": -2.4262988567352295, |
|
"logits/rejected": -2.3878419399261475, |
|
"logps/chosen": -262.4114074707031, |
|
"logps/rejected": -255.02749633789062, |
|
"loss": 0.5478, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.11712455749511719, |
|
"rewards/margins": 0.6756891012191772, |
|
"rewards/rejected": -0.7928137183189392, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.16494450822809e-07, |
|
"logits/chosen": -2.35296893119812, |
|
"logits/rejected": -2.3551628589630127, |
|
"logps/chosen": -236.53445434570312, |
|
"logps/rejected": -233.4728546142578, |
|
"loss": 0.5355, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.18067090213298798, |
|
"rewards/margins": 0.4853228032588959, |
|
"rewards/rejected": -0.6659936308860779, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.145809414466131e-07, |
|
"logits/chosen": -2.48589825630188, |
|
"logits/rejected": -2.478086233139038, |
|
"logps/chosen": -272.775634765625, |
|
"logps/rejected": -266.91058349609375, |
|
"loss": 0.5399, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.14183524250984192, |
|
"rewards/margins": 0.4844776690006256, |
|
"rewards/rejected": -0.6263129711151123, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.1266743207041714e-07, |
|
"logits/chosen": -2.406167507171631, |
|
"logits/rejected": -2.4039671421051025, |
|
"logps/chosen": -247.5933074951172, |
|
"logps/rejected": -226.58450317382812, |
|
"loss": 0.5439, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.1417725682258606, |
|
"rewards/margins": 0.6144998073577881, |
|
"rewards/rejected": -0.7562723755836487, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.107539226942212e-07, |
|
"logits/chosen": -2.392794132232666, |
|
"logits/rejected": -2.370959758758545, |
|
"logps/chosen": -259.82183837890625, |
|
"logps/rejected": -240.86178588867188, |
|
"loss": 0.5788, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.14074555039405823, |
|
"rewards/margins": 0.5021336674690247, |
|
"rewards/rejected": -0.6428791880607605, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.0884041331802526e-07, |
|
"logits/chosen": -2.3568851947784424, |
|
"logits/rejected": -2.2625174522399902, |
|
"logps/chosen": -243.117431640625, |
|
"logps/rejected": -216.640380859375, |
|
"loss": 0.5542, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.17768898606300354, |
|
"rewards/margins": 0.5181851983070374, |
|
"rewards/rejected": -0.6958742141723633, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.0692690394182927e-07, |
|
"logits/chosen": -2.436131477355957, |
|
"logits/rejected": -2.396608829498291, |
|
"logps/chosen": -277.2726135253906, |
|
"logps/rejected": -233.1066131591797, |
|
"loss": 0.5558, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.06510978192090988, |
|
"rewards/margins": 0.6768725514411926, |
|
"rewards/rejected": -0.7419822812080383, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.0501339456563334e-07, |
|
"logits/chosen": -2.4156711101531982, |
|
"logits/rejected": -2.271003007888794, |
|
"logps/chosen": -269.3404541015625, |
|
"logps/rejected": -243.42245483398438, |
|
"loss": 0.534, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.16720613837242126, |
|
"rewards/margins": 0.5710007548332214, |
|
"rewards/rejected": -0.7382069230079651, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.030998851894374e-07, |
|
"logits/chosen": -2.352940082550049, |
|
"logits/rejected": -2.363243579864502, |
|
"logps/chosen": -229.7804718017578, |
|
"logps/rejected": -221.0922393798828, |
|
"loss": 0.5117, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.08244588226079941, |
|
"rewards/margins": 0.668733537197113, |
|
"rewards/rejected": -0.7511794567108154, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.0118637581324146e-07, |
|
"logits/chosen": -2.4218077659606934, |
|
"logits/rejected": -2.36722469329834, |
|
"logps/chosen": -257.25872802734375, |
|
"logps/rejected": -223.78256225585938, |
|
"loss": 0.539, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.08728824555873871, |
|
"rewards/margins": 0.7230480909347534, |
|
"rewards/rejected": -0.8103362917900085, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.992728664370455e-07, |
|
"logits/chosen": -2.4494032859802246, |
|
"logits/rejected": -2.388096332550049, |
|
"logps/chosen": -239.74057006835938, |
|
"logps/rejected": -225.5271453857422, |
|
"loss": 0.5603, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.21483702957630157, |
|
"rewards/margins": 0.5343385338783264, |
|
"rewards/rejected": -0.7491755485534668, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.973593570608496e-07, |
|
"logits/chosen": -2.3430349826812744, |
|
"logits/rejected": -2.262885808944702, |
|
"logps/chosen": -261.22149658203125, |
|
"logps/rejected": -220.1973419189453, |
|
"loss": 0.5515, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.11469510942697525, |
|
"rewards/margins": 0.6628144979476929, |
|
"rewards/rejected": -0.7775096297264099, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.9544584768465365e-07, |
|
"logits/chosen": -2.415313243865967, |
|
"logits/rejected": -2.369680881500244, |
|
"logps/chosen": -310.0767517089844, |
|
"logps/rejected": -254.2592315673828, |
|
"loss": 0.5553, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.1948031485080719, |
|
"rewards/margins": 0.6389765739440918, |
|
"rewards/rejected": -0.8337796926498413, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.935323383084577e-07, |
|
"logits/chosen": -2.3823018074035645, |
|
"logits/rejected": -2.3315823078155518, |
|
"logps/chosen": -290.1065368652344, |
|
"logps/rejected": -242.30081176757812, |
|
"loss": 0.5604, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.09571398794651031, |
|
"rewards/margins": 0.5208708643913269, |
|
"rewards/rejected": -0.6165848970413208, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.9161882893226177e-07, |
|
"logits/chosen": -2.3578696250915527, |
|
"logits/rejected": -2.2521920204162598, |
|
"logps/chosen": -240.7886962890625, |
|
"logps/rejected": -204.34652709960938, |
|
"loss": 0.544, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.20477895438671112, |
|
"rewards/margins": 0.4378456473350525, |
|
"rewards/rejected": -0.6426246166229248, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.8970531955606583e-07, |
|
"logits/chosen": -2.453507900238037, |
|
"logits/rejected": -2.3540263175964355, |
|
"logps/chosen": -270.1062927246094, |
|
"logps/rejected": -231.31466674804688, |
|
"loss": 0.5674, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.16483107209205627, |
|
"rewards/margins": 0.64717036485672, |
|
"rewards/rejected": -0.8120015263557434, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.8779181017986984e-07, |
|
"logits/chosen": -2.424034595489502, |
|
"logits/rejected": -2.4023849964141846, |
|
"logps/chosen": -269.05780029296875, |
|
"logps/rejected": -239.2170867919922, |
|
"loss": 0.5546, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.22925500571727753, |
|
"rewards/margins": 0.5416269898414612, |
|
"rewards/rejected": -0.7708818912506104, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.858783008036739e-07, |
|
"logits/chosen": -2.3142106533050537, |
|
"logits/rejected": -2.261690616607666, |
|
"logps/chosen": -278.53643798828125, |
|
"logps/rejected": -220.97555541992188, |
|
"loss": 0.5294, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.13496367633342743, |
|
"rewards/margins": 0.6569581031799316, |
|
"rewards/rejected": -0.7919217944145203, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.8396479142747797e-07, |
|
"logits/chosen": -2.3985562324523926, |
|
"logits/rejected": -2.368497848510742, |
|
"logps/chosen": -230.22122192382812, |
|
"logps/rejected": -201.47998046875, |
|
"loss": 0.5405, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.1572544276714325, |
|
"rewards/margins": 0.6025274991989136, |
|
"rewards/rejected": -0.7597819566726685, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.8205128205128203e-07, |
|
"logits/chosen": -2.3532938957214355, |
|
"logits/rejected": -2.30509352684021, |
|
"logps/chosen": -260.51312255859375, |
|
"logps/rejected": -229.70376586914062, |
|
"loss": 0.5412, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.2625270187854767, |
|
"rewards/margins": 0.5138301253318787, |
|
"rewards/rejected": -0.776357114315033, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.801377726750861e-07, |
|
"logits/chosen": -2.3864636421203613, |
|
"logits/rejected": -2.316066265106201, |
|
"logps/chosen": -256.13763427734375, |
|
"logps/rejected": -217.63235473632812, |
|
"loss": 0.5431, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.1825113743543625, |
|
"rewards/margins": 0.49288827180862427, |
|
"rewards/rejected": -0.675399661064148, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.7822426329889015e-07, |
|
"logits/chosen": -2.428596019744873, |
|
"logits/rejected": -2.3228797912597656, |
|
"logps/chosen": -294.236328125, |
|
"logps/rejected": -235.548095703125, |
|
"loss": 0.5636, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.1488821804523468, |
|
"rewards/margins": 0.5950523018836975, |
|
"rewards/rejected": -0.7439345121383667, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.763107539226942e-07, |
|
"logits/chosen": -2.380725145339966, |
|
"logits/rejected": -2.3312487602233887, |
|
"logps/chosen": -280.84490966796875, |
|
"logps/rejected": -242.58340454101562, |
|
"loss": 0.5586, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.13354410231113434, |
|
"rewards/margins": 0.6448094844818115, |
|
"rewards/rejected": -0.7783535718917847, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.743972445464983e-07, |
|
"logits/chosen": -2.4229865074157715, |
|
"logits/rejected": -2.410762071609497, |
|
"logps/chosen": -254.83706665039062, |
|
"logps/rejected": -232.5323486328125, |
|
"loss": 0.5598, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.1900000274181366, |
|
"rewards/margins": 0.4888847768306732, |
|
"rewards/rejected": -0.678884744644165, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.7248373517030234e-07, |
|
"logits/chosen": -2.475088119506836, |
|
"logits/rejected": -2.4195237159729004, |
|
"logps/chosen": -274.3759765625, |
|
"logps/rejected": -237.31689453125, |
|
"loss": 0.5401, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.16161084175109863, |
|
"rewards/margins": 0.6231001615524292, |
|
"rewards/rejected": -0.7847110629081726, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.705702257941064e-07, |
|
"logits/chosen": -2.432378053665161, |
|
"logits/rejected": -2.4352517127990723, |
|
"logps/chosen": -311.6330261230469, |
|
"logps/rejected": -246.75796508789062, |
|
"loss": 0.5356, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.16773328185081482, |
|
"rewards/margins": 0.6822336912155151, |
|
"rewards/rejected": -0.8499670028686523, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.686567164179104e-07, |
|
"logits/chosen": -2.348602771759033, |
|
"logits/rejected": -2.275367259979248, |
|
"logps/chosen": -259.00390625, |
|
"logps/rejected": -219.0964813232422, |
|
"loss": 0.5445, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.16603624820709229, |
|
"rewards/margins": 0.6049584150314331, |
|
"rewards/rejected": -0.7709946632385254, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.6674320704171447e-07, |
|
"logits/chosen": -2.416759729385376, |
|
"logits/rejected": -2.3592278957366943, |
|
"logps/chosen": -259.57403564453125, |
|
"logps/rejected": -206.39962768554688, |
|
"loss": 0.5209, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.10807810723781586, |
|
"rewards/margins": 0.7587023973464966, |
|
"rewards/rejected": -0.8667804598808289, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.6482969766551853e-07, |
|
"logits/chosen": -2.4495720863342285, |
|
"logits/rejected": -2.447631359100342, |
|
"logps/chosen": -274.3368225097656, |
|
"logps/rejected": -237.6719970703125, |
|
"loss": 0.5443, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.08986170589923859, |
|
"rewards/margins": 0.7171138525009155, |
|
"rewards/rejected": -0.8069756627082825, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.629161882893226e-07, |
|
"logits/chosen": -2.4459540843963623, |
|
"logits/rejected": -2.371903419494629, |
|
"logps/chosen": -280.4138488769531, |
|
"logps/rejected": -214.16226196289062, |
|
"loss": 0.5159, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.15295711159706116, |
|
"rewards/margins": 0.7536977529525757, |
|
"rewards/rejected": -0.906654953956604, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.6100267891312666e-07, |
|
"logits/chosen": -2.3349404335021973, |
|
"logits/rejected": -2.303260087966919, |
|
"logps/chosen": -269.13787841796875, |
|
"logps/rejected": -208.63107299804688, |
|
"loss": 0.5546, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.12116660922765732, |
|
"rewards/margins": 0.6888124942779541, |
|
"rewards/rejected": -0.8099790811538696, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.590891695369307e-07, |
|
"logits/chosen": -2.450225830078125, |
|
"logits/rejected": -2.361754894256592, |
|
"logps/chosen": -256.0200500488281, |
|
"logps/rejected": -210.36026000976562, |
|
"loss": 0.529, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.1600186824798584, |
|
"rewards/margins": 0.5653270483016968, |
|
"rewards/rejected": -0.7253457903862, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.571756601607348e-07, |
|
"logits/chosen": -2.4175140857696533, |
|
"logits/rejected": -2.3866875171661377, |
|
"logps/chosen": -294.81744384765625, |
|
"logps/rejected": -217.4511260986328, |
|
"loss": 0.5455, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.18921521306037903, |
|
"rewards/margins": 0.6538978815078735, |
|
"rewards/rejected": -0.843113124370575, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.5526215078453884e-07, |
|
"logits/chosen": -2.4297268390655518, |
|
"logits/rejected": -2.3942902088165283, |
|
"logps/chosen": -301.64642333984375, |
|
"logps/rejected": -237.7666015625, |
|
"loss": 0.5349, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.10356118530035019, |
|
"rewards/margins": 0.7558127045631409, |
|
"rewards/rejected": -0.859373927116394, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.533486414083429e-07, |
|
"logits/chosen": -2.4258458614349365, |
|
"logits/rejected": -2.3717398643493652, |
|
"logps/chosen": -306.97442626953125, |
|
"logps/rejected": -249.22109985351562, |
|
"loss": 0.5161, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.07514607906341553, |
|
"rewards/margins": 0.7315629720687866, |
|
"rewards/rejected": -0.8067091107368469, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.5143513203214697e-07, |
|
"logits/chosen": -2.3663864135742188, |
|
"logits/rejected": -2.326347827911377, |
|
"logps/chosen": -275.7830505371094, |
|
"logps/rejected": -225.2949676513672, |
|
"loss": 0.5606, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.20903043448925018, |
|
"rewards/margins": 0.5634015202522278, |
|
"rewards/rejected": -0.7724319696426392, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.49521622655951e-07, |
|
"logits/chosen": -2.492893934249878, |
|
"logits/rejected": -2.3561127185821533, |
|
"logps/chosen": -293.12384033203125, |
|
"logps/rejected": -262.7926940917969, |
|
"loss": 0.553, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.04525697976350784, |
|
"rewards/margins": 0.8012422323226929, |
|
"rewards/rejected": -0.8464992642402649, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.4760811327975504e-07, |
|
"logits/chosen": -2.4678902626037598, |
|
"logits/rejected": -2.3764843940734863, |
|
"logps/chosen": -273.65106201171875, |
|
"logps/rejected": -253.1365966796875, |
|
"loss": 0.5352, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.13813751935958862, |
|
"rewards/margins": 0.5831281542778015, |
|
"rewards/rejected": -0.7212656736373901, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.456946039035591e-07, |
|
"logits/chosen": -2.466249942779541, |
|
"logits/rejected": -2.436295747756958, |
|
"logps/chosen": -290.6951599121094, |
|
"logps/rejected": -257.230712890625, |
|
"loss": 0.5668, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.10182980448007584, |
|
"rewards/margins": 0.7511130571365356, |
|
"rewards/rejected": -0.852942943572998, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.4378109452736316e-07, |
|
"logits/chosen": -2.3190982341766357, |
|
"logits/rejected": -2.3256137371063232, |
|
"logps/chosen": -272.5343322753906, |
|
"logps/rejected": -198.2899932861328, |
|
"loss": 0.5291, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.10047291219234467, |
|
"rewards/margins": 0.7110286951065063, |
|
"rewards/rejected": -0.8115016222000122, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.418675851511672e-07, |
|
"logits/chosen": -2.3069424629211426, |
|
"logits/rejected": -2.312654495239258, |
|
"logps/chosen": -281.74322509765625, |
|
"logps/rejected": -221.7678680419922, |
|
"loss": 0.5624, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.17993247509002686, |
|
"rewards/margins": 0.6608012318611145, |
|
"rewards/rejected": -0.8407337069511414, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.399540757749713e-07, |
|
"logits/chosen": -2.372399091720581, |
|
"logits/rejected": -2.2994461059570312, |
|
"logps/chosen": -250.9709014892578, |
|
"logps/rejected": -216.40042114257812, |
|
"loss": 0.5261, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.07177358865737915, |
|
"rewards/margins": 0.9095686078071594, |
|
"rewards/rejected": -0.9813421964645386, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.3804056639877535e-07, |
|
"logits/chosen": -2.333723545074463, |
|
"logits/rejected": -2.250431537628174, |
|
"logps/chosen": -269.04998779296875, |
|
"logps/rejected": -230.35256958007812, |
|
"loss": 0.5566, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.1327822506427765, |
|
"rewards/margins": 0.7238941192626953, |
|
"rewards/rejected": -0.8566763997077942, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.361270570225794e-07, |
|
"logits/chosen": -2.3128609657287598, |
|
"logits/rejected": -2.2538418769836426, |
|
"logps/chosen": -263.09417724609375, |
|
"logps/rejected": -223.70974731445312, |
|
"loss": 0.5049, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.11414393037557602, |
|
"rewards/margins": 0.789380669593811, |
|
"rewards/rejected": -0.9035245776176453, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.3421354764638345e-07, |
|
"logits/chosen": -2.4389374256134033, |
|
"logits/rejected": -2.3650295734405518, |
|
"logps/chosen": -296.4449462890625, |
|
"logps/rejected": -244.5340576171875, |
|
"loss": 0.554, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.16256913542747498, |
|
"rewards/margins": 0.6594110727310181, |
|
"rewards/rejected": -0.8219801783561707, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.323000382701875e-07, |
|
"logits/chosen": -2.38859224319458, |
|
"logits/rejected": -2.275116443634033, |
|
"logps/chosen": -289.535400390625, |
|
"logps/rejected": -226.6898956298828, |
|
"loss": 0.5174, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.11396412551403046, |
|
"rewards/margins": 0.6983034014701843, |
|
"rewards/rejected": -0.8122674822807312, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.3038652889399157e-07, |
|
"logits/chosen": -2.3573622703552246, |
|
"logits/rejected": -2.368932008743286, |
|
"logps/chosen": -259.396240234375, |
|
"logps/rejected": -235.6220245361328, |
|
"loss": 0.5219, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.0638931542634964, |
|
"rewards/margins": 0.7549349069595337, |
|
"rewards/rejected": -0.8188279867172241, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.2847301951779563e-07, |
|
"logits/chosen": -2.3194305896759033, |
|
"logits/rejected": -2.271104574203491, |
|
"logps/chosen": -230.3657989501953, |
|
"logps/rejected": -218.594970703125, |
|
"loss": 0.5364, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.23464179039001465, |
|
"rewards/margins": 0.5825188755989075, |
|
"rewards/rejected": -0.8171606063842773, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.265595101415997e-07, |
|
"logits/chosen": -2.3034956455230713, |
|
"logits/rejected": -2.26176118850708, |
|
"logps/chosen": -263.59234619140625, |
|
"logps/rejected": -207.0500946044922, |
|
"loss": 0.5405, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.1522129625082016, |
|
"rewards/margins": 0.8050564527511597, |
|
"rewards/rejected": -0.957269549369812, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.2464600076540373e-07, |
|
"logits/chosen": -2.3417623043060303, |
|
"logits/rejected": -2.3644707202911377, |
|
"logps/chosen": -272.0294494628906, |
|
"logps/rejected": -233.199951171875, |
|
"loss": 0.5376, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.14386221766471863, |
|
"rewards/margins": 0.7099758386611938, |
|
"rewards/rejected": -0.8538379669189453, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.227324913892078e-07, |
|
"logits/chosen": -2.4512619972229004, |
|
"logits/rejected": -2.333153247833252, |
|
"logps/chosen": -262.7388610839844, |
|
"logps/rejected": -233.94564819335938, |
|
"loss": 0.5274, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.09474427998065948, |
|
"rewards/margins": 0.678414523601532, |
|
"rewards/rejected": -0.773158848285675, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.2081898201301186e-07, |
|
"logits/chosen": -2.400722026824951, |
|
"logits/rejected": -2.328423023223877, |
|
"logps/chosen": -251.57211303710938, |
|
"logps/rejected": -219.1768035888672, |
|
"loss": 0.5368, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.13836149871349335, |
|
"rewards/margins": 0.6494232416152954, |
|
"rewards/rejected": -0.78778475522995, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.1890547263681592e-07, |
|
"logits/chosen": -2.4296817779541016, |
|
"logits/rejected": -2.367189407348633, |
|
"logps/chosen": -271.3897399902344, |
|
"logps/rejected": -215.4512481689453, |
|
"loss": 0.5408, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.08417050540447235, |
|
"rewards/margins": 0.754088282585144, |
|
"rewards/rejected": -0.8382587432861328, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.1699196326061998e-07, |
|
"logits/chosen": -2.3716368675231934, |
|
"logits/rejected": -2.3670201301574707, |
|
"logps/chosen": -265.5472106933594, |
|
"logps/rejected": -241.4296875, |
|
"loss": 0.5596, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.14254868030548096, |
|
"rewards/margins": 0.701485276222229, |
|
"rewards/rejected": -0.8440340161323547, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.1507845388442402e-07, |
|
"logits/chosen": -2.396054744720459, |
|
"logits/rejected": -2.285252332687378, |
|
"logps/chosen": -287.78558349609375, |
|
"logps/rejected": -233.22732543945312, |
|
"loss": 0.5295, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.057570360600948334, |
|
"rewards/margins": 0.7760987281799316, |
|
"rewards/rejected": -0.8336690664291382, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.1316494450822808e-07, |
|
"logits/chosen": -2.3319549560546875, |
|
"logits/rejected": -2.3298840522766113, |
|
"logps/chosen": -269.1609191894531, |
|
"logps/rejected": -242.9523468017578, |
|
"loss": 0.5454, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.12093748152256012, |
|
"rewards/margins": 0.7203799486160278, |
|
"rewards/rejected": -0.8413174748420715, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.1125143513203214e-07, |
|
"logits/chosen": -2.4473745822906494, |
|
"logits/rejected": -2.4215996265411377, |
|
"logps/chosen": -266.4071960449219, |
|
"logps/rejected": -224.84756469726562, |
|
"loss": 0.5133, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.14717252552509308, |
|
"rewards/margins": 0.730695366859436, |
|
"rewards/rejected": -0.8778678774833679, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.093379257558362e-07, |
|
"logits/chosen": -2.426567316055298, |
|
"logits/rejected": -2.4301810264587402, |
|
"logps/chosen": -274.27569580078125, |
|
"logps/rejected": -237.6707305908203, |
|
"loss": 0.5553, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.19985240697860718, |
|
"rewards/margins": 0.6345885992050171, |
|
"rewards/rejected": -0.8344410061836243, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.0742441637964026e-07, |
|
"logits/chosen": -2.32918119430542, |
|
"logits/rejected": -2.315727710723877, |
|
"logps/chosen": -299.13201904296875, |
|
"logps/rejected": -238.65219116210938, |
|
"loss": 0.5101, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.08389053493738174, |
|
"rewards/margins": 0.9151546359062195, |
|
"rewards/rejected": -0.999045193195343, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.055109070034443e-07, |
|
"logits/chosen": -2.4138271808624268, |
|
"logits/rejected": -2.395538091659546, |
|
"logps/chosen": -287.26788330078125, |
|
"logps/rejected": -256.14886474609375, |
|
"loss": 0.5232, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.10842672735452652, |
|
"rewards/margins": 0.7822098731994629, |
|
"rewards/rejected": -0.8906365633010864, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.0359739762724836e-07, |
|
"logits/chosen": -2.415120840072632, |
|
"logits/rejected": -2.359282970428467, |
|
"logps/chosen": -279.69464111328125, |
|
"logps/rejected": -237.9990234375, |
|
"loss": 0.5288, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.1128331646323204, |
|
"rewards/margins": 0.7950798869132996, |
|
"rewards/rejected": -0.9079130291938782, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.0168388825105242e-07, |
|
"logits/chosen": -2.342097520828247, |
|
"logits/rejected": -2.2842659950256348, |
|
"logps/chosen": -260.4648742675781, |
|
"logps/rejected": -264.97601318359375, |
|
"loss": 0.516, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.14475753903388977, |
|
"rewards/margins": 0.8012211918830872, |
|
"rewards/rejected": -0.9459787607192993, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.997703788748565e-07, |
|
"logits/chosen": -2.3480544090270996, |
|
"logits/rejected": -2.300661325454712, |
|
"logps/chosen": -278.40704345703125, |
|
"logps/rejected": -228.9430694580078, |
|
"loss": 0.5331, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.2291232794523239, |
|
"rewards/margins": 0.6820493936538696, |
|
"rewards/rejected": -0.9111726880073547, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.9785686949866055e-07, |
|
"logits/chosen": -2.374687910079956, |
|
"logits/rejected": -2.449584484100342, |
|
"logps/chosen": -275.783447265625, |
|
"logps/rejected": -233.29403686523438, |
|
"loss": 0.5627, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.1256851851940155, |
|
"rewards/margins": 0.5714830160140991, |
|
"rewards/rejected": -0.6971681714057922, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.9594336012246458e-07, |
|
"logits/chosen": -2.4475274085998535, |
|
"logits/rejected": -2.3420703411102295, |
|
"logps/chosen": -270.1884460449219, |
|
"logps/rejected": -235.0321502685547, |
|
"loss": 0.5117, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.15421505272388458, |
|
"rewards/margins": 0.7101847529411316, |
|
"rewards/rejected": -0.864399790763855, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.9402985074626865e-07, |
|
"logits/chosen": -2.379213333129883, |
|
"logits/rejected": -2.3949854373931885, |
|
"logps/chosen": -269.8036804199219, |
|
"logps/rejected": -228.36856079101562, |
|
"loss": 0.5536, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1839611977338791, |
|
"rewards/margins": 0.684939980506897, |
|
"rewards/rejected": -0.8689011335372925, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.921163413700727e-07, |
|
"logits/chosen": -2.3342185020446777, |
|
"logits/rejected": -2.291137218475342, |
|
"logps/chosen": -280.77459716796875, |
|
"logps/rejected": -209.2417755126953, |
|
"loss": 0.5782, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.22546765208244324, |
|
"rewards/margins": 0.6627296209335327, |
|
"rewards/rejected": -0.8881972432136536, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.9020283199387677e-07, |
|
"logits/chosen": -2.4452133178710938, |
|
"logits/rejected": -2.3817238807678223, |
|
"logps/chosen": -262.40533447265625, |
|
"logps/rejected": -233.92431640625, |
|
"loss": 0.552, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.05952071025967598, |
|
"rewards/margins": 0.7161270976066589, |
|
"rewards/rejected": -0.7756478190422058, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.8828932261768083e-07, |
|
"logits/chosen": -2.432097911834717, |
|
"logits/rejected": -2.338268995285034, |
|
"logps/chosen": -266.4609375, |
|
"logps/rejected": -244.36953735351562, |
|
"loss": 0.5319, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.1674240082502365, |
|
"rewards/margins": 0.6536375284194946, |
|
"rewards/rejected": -0.8210614919662476, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.8637581324148487e-07, |
|
"logits/chosen": -2.4438672065734863, |
|
"logits/rejected": -2.4159255027770996, |
|
"logps/chosen": -265.9490051269531, |
|
"logps/rejected": -240.15011596679688, |
|
"loss": 0.5412, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.11111648380756378, |
|
"rewards/margins": 0.8190618753433228, |
|
"rewards/rejected": -0.9301783442497253, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_logits/chosen": -2.0748343467712402, |
|
"eval_logits/rejected": -1.9509462118148804, |
|
"eval_logps/chosen": -266.15826416015625, |
|
"eval_logps/rejected": -227.92453002929688, |
|
"eval_loss": 0.5318232774734497, |
|
"eval_rewards/accuracies": 0.7419999837875366, |
|
"eval_rewards/chosen": -0.1497425138950348, |
|
"eval_rewards/margins": 0.7142792344093323, |
|
"eval_rewards/rejected": -0.8640217185020447, |
|
"eval_runtime": 456.7367, |
|
"eval_samples_per_second": 4.379, |
|
"eval_steps_per_second": 0.274, |
|
"step": 1936 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.8446230386528893e-07, |
|
"logits/chosen": -2.3549647331237793, |
|
"logits/rejected": -2.3090908527374268, |
|
"logps/chosen": -239.0998992919922, |
|
"logps/rejected": -226.9877166748047, |
|
"loss": 0.5444, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.196995347738266, |
|
"rewards/margins": 0.598027229309082, |
|
"rewards/rejected": -0.7950225472450256, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.82548794489093e-07, |
|
"logits/chosen": -2.307945489883423, |
|
"logits/rejected": -2.3319668769836426, |
|
"logps/chosen": -243.7688446044922, |
|
"logps/rejected": -234.90756225585938, |
|
"loss": 0.5395, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.20119981467723846, |
|
"rewards/margins": 0.630052924156189, |
|
"rewards/rejected": -0.8312527537345886, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.8063528511289706e-07, |
|
"logits/chosen": -2.358461380004883, |
|
"logits/rejected": -2.277463912963867, |
|
"logps/chosen": -286.9745178222656, |
|
"logps/rejected": -259.64605712890625, |
|
"loss": 0.5319, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.10196640342473984, |
|
"rewards/margins": 0.7701470851898193, |
|
"rewards/rejected": -0.8721135258674622, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.7872177573670112e-07, |
|
"logits/chosen": -2.4066734313964844, |
|
"logits/rejected": -2.4170022010803223, |
|
"logps/chosen": -252.1322479248047, |
|
"logps/rejected": -230.71755981445312, |
|
"loss": 0.5399, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.08831964433193207, |
|
"rewards/margins": 0.6736435890197754, |
|
"rewards/rejected": -0.7619632482528687, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.7680826636050515e-07, |
|
"logits/chosen": -2.313403844833374, |
|
"logits/rejected": -2.280029773712158, |
|
"logps/chosen": -278.5584716796875, |
|
"logps/rejected": -234.1926727294922, |
|
"loss": 0.5248, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.07849719375371933, |
|
"rewards/margins": 0.7140364050865173, |
|
"rewards/rejected": -0.7925336360931396, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.7489475698430921e-07, |
|
"logits/chosen": -2.332839012145996, |
|
"logits/rejected": -2.3671531677246094, |
|
"logps/chosen": -249.85073852539062, |
|
"logps/rejected": -249.09408569335938, |
|
"loss": 0.5556, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.13992241024971008, |
|
"rewards/margins": 0.6826204061508179, |
|
"rewards/rejected": -0.8225427865982056, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.7298124760811328e-07, |
|
"logits/chosen": -2.40509295463562, |
|
"logits/rejected": -2.385143756866455, |
|
"logps/chosen": -277.4422302246094, |
|
"logps/rejected": -224.10305786132812, |
|
"loss": 0.539, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.1345512419939041, |
|
"rewards/margins": 0.709915280342102, |
|
"rewards/rejected": -0.8444665670394897, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.7106773823191734e-07, |
|
"logits/chosen": -2.438018321990967, |
|
"logits/rejected": -2.372896909713745, |
|
"logps/chosen": -270.50103759765625, |
|
"logps/rejected": -240.79336547851562, |
|
"loss": 0.5304, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.14716067910194397, |
|
"rewards/margins": 0.8182382583618164, |
|
"rewards/rejected": -0.965398907661438, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.691542288557214e-07, |
|
"logits/chosen": -2.41938853263855, |
|
"logits/rejected": -2.322054624557495, |
|
"logps/chosen": -297.6514892578125, |
|
"logps/rejected": -230.926513671875, |
|
"loss": 0.544, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.17855019867420197, |
|
"rewards/margins": 0.7301106452941895, |
|
"rewards/rejected": -0.908660888671875, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.6724071947952544e-07, |
|
"logits/chosen": -2.4344213008880615, |
|
"logits/rejected": -2.275275468826294, |
|
"logps/chosen": -240.5940704345703, |
|
"logps/rejected": -232.0761260986328, |
|
"loss": 0.5354, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.1535104215145111, |
|
"rewards/margins": 0.6651692390441895, |
|
"rewards/rejected": -0.818679690361023, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.653272101033295e-07, |
|
"logits/chosen": -2.4676742553710938, |
|
"logits/rejected": -2.3419291973114014, |
|
"logps/chosen": -276.0610656738281, |
|
"logps/rejected": -212.14321899414062, |
|
"loss": 0.5129, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.09980995208024979, |
|
"rewards/margins": 0.677939772605896, |
|
"rewards/rejected": -0.7777497172355652, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.6341370072713356e-07, |
|
"logits/chosen": -2.471161365509033, |
|
"logits/rejected": -2.415436267852783, |
|
"logps/chosen": -302.96063232421875, |
|
"logps/rejected": -248.37313842773438, |
|
"loss": 0.5382, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.08729290962219238, |
|
"rewards/margins": 0.8466998934745789, |
|
"rewards/rejected": -0.9339929819107056, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.6150019135093762e-07, |
|
"logits/chosen": -2.4522697925567627, |
|
"logits/rejected": -2.3724026679992676, |
|
"logps/chosen": -304.5672912597656, |
|
"logps/rejected": -232.2679443359375, |
|
"loss": 0.5288, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.1347544938325882, |
|
"rewards/margins": 0.7216586470603943, |
|
"rewards/rejected": -0.8564130663871765, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.5958668197474169e-07, |
|
"logits/chosen": -2.4622063636779785, |
|
"logits/rejected": -2.3807263374328613, |
|
"logps/chosen": -272.76177978515625, |
|
"logps/rejected": -222.5310516357422, |
|
"loss": 0.5348, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.09426755458116531, |
|
"rewards/margins": 0.8474025726318359, |
|
"rewards/rejected": -0.9416700601577759, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.5767317259854572e-07, |
|
"logits/chosen": -2.4014036655426025, |
|
"logits/rejected": -2.3767526149749756, |
|
"logps/chosen": -262.97088623046875, |
|
"logps/rejected": -233.16796875, |
|
"loss": 0.5329, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.1530064344406128, |
|
"rewards/margins": 0.7272775769233704, |
|
"rewards/rejected": -0.8802839517593384, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.5575966322234978e-07, |
|
"logits/chosen": -2.326734781265259, |
|
"logits/rejected": -2.2565207481384277, |
|
"logps/chosen": -259.03985595703125, |
|
"logps/rejected": -247.90286254882812, |
|
"loss": 0.5285, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.1133735179901123, |
|
"rewards/margins": 0.7463991641998291, |
|
"rewards/rejected": -0.8597726821899414, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.5384615384615385e-07, |
|
"logits/chosen": -2.378979444503784, |
|
"logits/rejected": -2.2936904430389404, |
|
"logps/chosen": -238.01724243164062, |
|
"logps/rejected": -218.4258575439453, |
|
"loss": 0.52, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.17722460627555847, |
|
"rewards/margins": 0.7926307320594788, |
|
"rewards/rejected": -0.9698553085327148, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.519326444699579e-07, |
|
"logits/chosen": -2.386594295501709, |
|
"logits/rejected": -2.346097469329834, |
|
"logps/chosen": -279.75958251953125, |
|
"logps/rejected": -252.79776000976562, |
|
"loss": 0.5138, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.10955911874771118, |
|
"rewards/margins": 0.7847574949264526, |
|
"rewards/rejected": -0.8943166732788086, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.5001913509376197e-07, |
|
"logits/chosen": -2.2668120861053467, |
|
"logits/rejected": -2.29984188079834, |
|
"logps/chosen": -242.16360473632812, |
|
"logps/rejected": -219.63143920898438, |
|
"loss": 0.5344, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.03612351417541504, |
|
"rewards/margins": 0.8138107061386108, |
|
"rewards/rejected": -0.8499342799186707, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.4810562571756603e-07, |
|
"logits/chosen": -2.405428647994995, |
|
"logits/rejected": -2.382072925567627, |
|
"logps/chosen": -260.3352355957031, |
|
"logps/rejected": -238.9024658203125, |
|
"loss": 0.5394, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.1940733641386032, |
|
"rewards/margins": 0.6490978002548218, |
|
"rewards/rejected": -0.843171238899231, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.4619211634137007e-07, |
|
"logits/chosen": -2.417595624923706, |
|
"logits/rejected": -2.346921920776367, |
|
"logps/chosen": -296.86602783203125, |
|
"logps/rejected": -244.6204071044922, |
|
"loss": 0.5207, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.08298005163669586, |
|
"rewards/margins": 0.777125358581543, |
|
"rewards/rejected": -0.8601053357124329, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.4427860696517413e-07, |
|
"logits/chosen": -2.310767650604248, |
|
"logits/rejected": -2.2727725505828857, |
|
"logps/chosen": -276.6751708984375, |
|
"logps/rejected": -244.1461944580078, |
|
"loss": 0.5488, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.23952357470989227, |
|
"rewards/margins": 0.5698575973510742, |
|
"rewards/rejected": -0.8093811273574829, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.423650975889782e-07, |
|
"logits/chosen": -2.413959503173828, |
|
"logits/rejected": -2.317779302597046, |
|
"logps/chosen": -301.21697998046875, |
|
"logps/rejected": -230.67611694335938, |
|
"loss": 0.5223, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.07718826830387115, |
|
"rewards/margins": 0.7954361438751221, |
|
"rewards/rejected": -0.872624397277832, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.4045158821278225e-07, |
|
"logits/chosen": -2.43796443939209, |
|
"logits/rejected": -2.3855981826782227, |
|
"logps/chosen": -281.7301025390625, |
|
"logps/rejected": -252.88900756835938, |
|
"loss": 0.5216, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.07020233571529388, |
|
"rewards/margins": 0.8178524971008301, |
|
"rewards/rejected": -0.8880547285079956, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.3853807883658632e-07, |
|
"logits/chosen": -2.381850481033325, |
|
"logits/rejected": -2.3632988929748535, |
|
"logps/chosen": -286.48199462890625, |
|
"logps/rejected": -263.02325439453125, |
|
"loss": 0.5582, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.06794221699237823, |
|
"rewards/margins": 0.675620436668396, |
|
"rewards/rejected": -0.743562638759613, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.3662456946039035e-07, |
|
"logits/chosen": -2.3416738510131836, |
|
"logits/rejected": -2.3051633834838867, |
|
"logps/chosen": -244.4949188232422, |
|
"logps/rejected": -206.51089477539062, |
|
"loss": 0.5339, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.12611594796180725, |
|
"rewards/margins": 0.6276872158050537, |
|
"rewards/rejected": -0.7538031339645386, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.3471106008419441e-07, |
|
"logits/chosen": -2.3636796474456787, |
|
"logits/rejected": -2.3292782306671143, |
|
"logps/chosen": -273.59027099609375, |
|
"logps/rejected": -260.6876220703125, |
|
"loss": 0.5276, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.08774266391992569, |
|
"rewards/margins": 0.6979103088378906, |
|
"rewards/rejected": -0.7856529951095581, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.3279755070799848e-07, |
|
"logits/chosen": -2.3285207748413086, |
|
"logits/rejected": -2.2629952430725098, |
|
"logps/chosen": -284.96600341796875, |
|
"logps/rejected": -232.8168182373047, |
|
"loss": 0.5429, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.10798223316669464, |
|
"rewards/margins": 0.7428350448608398, |
|
"rewards/rejected": -0.8508173227310181, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.3088404133180254e-07, |
|
"logits/chosen": -2.2993249893188477, |
|
"logits/rejected": -2.2543981075286865, |
|
"logps/chosen": -306.201171875, |
|
"logps/rejected": -226.76223754882812, |
|
"loss": 0.483, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.06968189030885696, |
|
"rewards/margins": 0.9336912035942078, |
|
"rewards/rejected": -1.003373146057129, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.289705319556066e-07, |
|
"logits/chosen": -2.3141911029815674, |
|
"logits/rejected": -2.251255512237549, |
|
"logps/chosen": -252.2708282470703, |
|
"logps/rejected": -221.80953979492188, |
|
"loss": 0.5277, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.14586138725280762, |
|
"rewards/margins": 0.6129014492034912, |
|
"rewards/rejected": -0.7587628960609436, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.2705702257941064e-07, |
|
"logits/chosen": -2.373944044113159, |
|
"logits/rejected": -2.362863779067993, |
|
"logps/chosen": -257.61285400390625, |
|
"logps/rejected": -234.9366455078125, |
|
"loss": 0.5122, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.1866065412759781, |
|
"rewards/margins": 0.6813262701034546, |
|
"rewards/rejected": -0.8679327964782715, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.251435132032147e-07, |
|
"logits/chosen": -2.3465607166290283, |
|
"logits/rejected": -2.338548421859741, |
|
"logps/chosen": -273.64178466796875, |
|
"logps/rejected": -259.8929748535156, |
|
"loss": 0.5306, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.0799819678068161, |
|
"rewards/margins": 0.7552547454833984, |
|
"rewards/rejected": -0.8352367281913757, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.2323000382701873e-07, |
|
"logits/chosen": -2.3777191638946533, |
|
"logits/rejected": -2.3778316974639893, |
|
"logps/chosen": -270.16326904296875, |
|
"logps/rejected": -229.1251983642578, |
|
"loss": 0.5575, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.2065315544605255, |
|
"rewards/margins": 0.6355454921722412, |
|
"rewards/rejected": -0.8420770764350891, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.213164944508228e-07, |
|
"logits/chosen": -2.4501662254333496, |
|
"logits/rejected": -2.3497557640075684, |
|
"logps/chosen": -266.2723083496094, |
|
"logps/rejected": -269.751708984375, |
|
"loss": 0.5113, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.2602607309818268, |
|
"rewards/margins": 0.6484657526016235, |
|
"rewards/rejected": -0.9087265133857727, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.1940298507462686e-07, |
|
"logits/chosen": -2.2452497482299805, |
|
"logits/rejected": -2.276188850402832, |
|
"logps/chosen": -261.48919677734375, |
|
"logps/rejected": -211.9011688232422, |
|
"loss": 0.5202, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.0977492704987526, |
|
"rewards/margins": 0.8213205337524414, |
|
"rewards/rejected": -0.9190698862075806, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.1748947569843092e-07, |
|
"logits/chosen": -2.39143967628479, |
|
"logits/rejected": -2.307307481765747, |
|
"logps/chosen": -263.43524169921875, |
|
"logps/rejected": -225.5679473876953, |
|
"loss": 0.5088, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.12165951728820801, |
|
"rewards/margins": 0.8993788957595825, |
|
"rewards/rejected": -1.021038293838501, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.1557596632223497e-07, |
|
"logits/chosen": -2.361375331878662, |
|
"logits/rejected": -2.375532627105713, |
|
"logps/chosen": -272.00408935546875, |
|
"logps/rejected": -224.09286499023438, |
|
"loss": 0.5223, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.13877837359905243, |
|
"rewards/margins": 0.8421686291694641, |
|
"rewards/rejected": -0.9809468984603882, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.1366245694603903e-07, |
|
"logits/chosen": -2.4291224479675293, |
|
"logits/rejected": -2.245253801345825, |
|
"logps/chosen": -265.1824645996094, |
|
"logps/rejected": -249.3574676513672, |
|
"loss": 0.5579, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.24909143149852753, |
|
"rewards/margins": 0.6685684323310852, |
|
"rewards/rejected": -0.9176598787307739, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.1174894756984308e-07, |
|
"logits/chosen": -2.2975034713745117, |
|
"logits/rejected": -2.394157886505127, |
|
"logps/chosen": -254.5048370361328, |
|
"logps/rejected": -224.20376586914062, |
|
"loss": 0.5215, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.18976199626922607, |
|
"rewards/margins": 0.8317477107048035, |
|
"rewards/rejected": -1.0215097665786743, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.0983543819364714e-07, |
|
"logits/chosen": -2.4635863304138184, |
|
"logits/rejected": -2.4494881629943848, |
|
"logps/chosen": -278.895263671875, |
|
"logps/rejected": -231.98135375976562, |
|
"loss": 0.4772, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.08000790327787399, |
|
"rewards/margins": 0.9092146158218384, |
|
"rewards/rejected": -0.989222526550293, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.079219288174512e-07, |
|
"logits/chosen": -2.3342247009277344, |
|
"logits/rejected": -2.319608211517334, |
|
"logps/chosen": -247.49368286132812, |
|
"logps/rejected": -209.24111938476562, |
|
"loss": 0.5371, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.20573286712169647, |
|
"rewards/margins": 0.5834436416625977, |
|
"rewards/rejected": -0.7891765236854553, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.0600841944125525e-07, |
|
"logits/chosen": -2.436828136444092, |
|
"logits/rejected": -2.3965752124786377, |
|
"logps/chosen": -271.6636047363281, |
|
"logps/rejected": -221.7318572998047, |
|
"loss": 0.5528, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.20491096377372742, |
|
"rewards/margins": 0.6560606360435486, |
|
"rewards/rejected": -0.8609716296195984, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.0409491006505931e-07, |
|
"logits/chosen": -2.400538682937622, |
|
"logits/rejected": -2.284837484359741, |
|
"logps/chosen": -283.9150390625, |
|
"logps/rejected": -229.0013885498047, |
|
"loss": 0.5457, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.1943405717611313, |
|
"rewards/margins": 0.7493225336074829, |
|
"rewards/rejected": -0.9436630010604858, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.0218140068886336e-07, |
|
"logits/chosen": -2.377981662750244, |
|
"logits/rejected": -2.2820467948913574, |
|
"logps/chosen": -260.76312255859375, |
|
"logps/rejected": -221.4712371826172, |
|
"loss": 0.5534, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.21287551522254944, |
|
"rewards/margins": 0.5487321615219116, |
|
"rewards/rejected": -0.7616077661514282, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.0026789131266743e-07, |
|
"logits/chosen": -2.295510768890381, |
|
"logits/rejected": -2.329360246658325, |
|
"logps/chosen": -252.51107788085938, |
|
"logps/rejected": -242.34616088867188, |
|
"loss": 0.5429, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.1736653447151184, |
|
"rewards/margins": 0.6323011517524719, |
|
"rewards/rejected": -0.8059664964675903, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 9.835438193647149e-08, |
|
"logits/chosen": -2.3245301246643066, |
|
"logits/rejected": -2.372647523880005, |
|
"logps/chosen": -255.889892578125, |
|
"logps/rejected": -230.44961547851562, |
|
"loss": 0.5579, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.15318112075328827, |
|
"rewards/margins": 0.6755190491676331, |
|
"rewards/rejected": -0.8287001848220825, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 9.644087256027554e-08, |
|
"logits/chosen": -2.393808364868164, |
|
"logits/rejected": -2.387742280960083, |
|
"logps/chosen": -239.28219604492188, |
|
"logps/rejected": -218.7506103515625, |
|
"loss": 0.536, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.13704940676689148, |
|
"rewards/margins": 0.6621817946434021, |
|
"rewards/rejected": -0.7992312908172607, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 9.45273631840796e-08, |
|
"logits/chosen": -2.3953936100006104, |
|
"logits/rejected": -2.4050917625427246, |
|
"logps/chosen": -278.6383361816406, |
|
"logps/rejected": -231.466552734375, |
|
"loss": 0.5329, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.07729502022266388, |
|
"rewards/margins": 0.736717700958252, |
|
"rewards/rejected": -0.8140127062797546, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.261385380788366e-08, |
|
"logits/chosen": -2.276078939437866, |
|
"logits/rejected": -2.2849624156951904, |
|
"logps/chosen": -239.13525390625, |
|
"logps/rejected": -222.2736053466797, |
|
"loss": 0.5249, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.2353070229291916, |
|
"rewards/margins": 0.6328269243240356, |
|
"rewards/rejected": -0.8681339025497437, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 9.070034443168771e-08, |
|
"logits/chosen": -2.387070894241333, |
|
"logits/rejected": -2.37086820602417, |
|
"logps/chosen": -261.84112548828125, |
|
"logps/rejected": -227.5956268310547, |
|
"loss": 0.557, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.2481728047132492, |
|
"rewards/margins": 0.6492807865142822, |
|
"rewards/rejected": -0.8974536657333374, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 8.878683505549177e-08, |
|
"logits/chosen": -2.2902541160583496, |
|
"logits/rejected": -2.3299739360809326, |
|
"logps/chosen": -267.1860656738281, |
|
"logps/rejected": -226.9349822998047, |
|
"loss": 0.5107, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.10977288335561752, |
|
"rewards/margins": 0.7155038714408875, |
|
"rewards/rejected": -0.8252767324447632, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 8.687332567929582e-08, |
|
"logits/chosen": -2.3206660747528076, |
|
"logits/rejected": -2.3494954109191895, |
|
"logps/chosen": -291.82305908203125, |
|
"logps/rejected": -246.83761596679688, |
|
"loss": 0.5355, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.044042427092790604, |
|
"rewards/margins": 0.900373637676239, |
|
"rewards/rejected": -0.9444161653518677, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 8.495981630309988e-08, |
|
"logits/chosen": -2.2711682319641113, |
|
"logits/rejected": -2.290236473083496, |
|
"logps/chosen": -300.72979736328125, |
|
"logps/rejected": -218.26022338867188, |
|
"loss": 0.5448, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.1306503415107727, |
|
"rewards/margins": 0.5897533297538757, |
|
"rewards/rejected": -0.7204037308692932, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 8.304630692690395e-08, |
|
"logits/chosen": -2.317441463470459, |
|
"logits/rejected": -2.2715091705322266, |
|
"logps/chosen": -262.0405578613281, |
|
"logps/rejected": -205.5135955810547, |
|
"loss": 0.5286, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.14162805676460266, |
|
"rewards/margins": 0.7206326723098755, |
|
"rewards/rejected": -0.8622606992721558, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 8.1132797550708e-08, |
|
"logits/chosen": -2.3969407081604004, |
|
"logits/rejected": -2.3611950874328613, |
|
"logps/chosen": -267.4500427246094, |
|
"logps/rejected": -221.7566375732422, |
|
"loss": 0.5277, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.15396210551261902, |
|
"rewards/margins": 0.7070397138595581, |
|
"rewards/rejected": -0.8610017895698547, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.921928817451206e-08, |
|
"logits/chosen": -2.4169304370880127, |
|
"logits/rejected": -2.321960926055908, |
|
"logps/chosen": -277.09588623046875, |
|
"logps/rejected": -227.67959594726562, |
|
"loss": 0.5088, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.17860722541809082, |
|
"rewards/margins": 0.7987648248672485, |
|
"rewards/rejected": -0.9773720502853394, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.73057787983161e-08, |
|
"logits/chosen": -2.3953964710235596, |
|
"logits/rejected": -2.301238536834717, |
|
"logps/chosen": -292.7876892089844, |
|
"logps/rejected": -266.57476806640625, |
|
"loss": 0.5165, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.08117020130157471, |
|
"rewards/margins": 0.7519367933273315, |
|
"rewards/rejected": -0.8331069946289062, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 7.539226942212017e-08, |
|
"logits/chosen": -2.3565969467163086, |
|
"logits/rejected": -2.3130733966827393, |
|
"logps/chosen": -243.97903442382812, |
|
"logps/rejected": -219.99301147460938, |
|
"loss": 0.5185, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.11240849643945694, |
|
"rewards/margins": 0.8732815980911255, |
|
"rewards/rejected": -0.9856899976730347, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 7.347876004592423e-08, |
|
"logits/chosen": -2.4705801010131836, |
|
"logits/rejected": -2.40214467048645, |
|
"logps/chosen": -280.0275573730469, |
|
"logps/rejected": -232.6969451904297, |
|
"loss": 0.5403, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.14689256250858307, |
|
"rewards/margins": 0.7363706231117249, |
|
"rewards/rejected": -0.883263111114502, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 7.156525066972828e-08, |
|
"logits/chosen": -2.299910068511963, |
|
"logits/rejected": -2.3627266883850098, |
|
"logps/chosen": -242.22579956054688, |
|
"logps/rejected": -227.53482055664062, |
|
"loss": 0.5402, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.088679239153862, |
|
"rewards/margins": 0.6972663402557373, |
|
"rewards/rejected": -0.7859456539154053, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 6.965174129353234e-08, |
|
"logits/chosen": -2.3531718254089355, |
|
"logits/rejected": -2.2922921180725098, |
|
"logps/chosen": -302.03350830078125, |
|
"logps/rejected": -258.0994567871094, |
|
"loss": 0.5287, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.10454048961400986, |
|
"rewards/margins": 0.8333840370178223, |
|
"rewards/rejected": -0.9379245042800903, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.773823191733639e-08, |
|
"logits/chosen": -2.284903049468994, |
|
"logits/rejected": -2.2148680686950684, |
|
"logps/chosen": -259.69036865234375, |
|
"logps/rejected": -222.1662139892578, |
|
"loss": 0.5297, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.24571335315704346, |
|
"rewards/margins": 0.7015795707702637, |
|
"rewards/rejected": -0.9472929239273071, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 6.582472254114045e-08, |
|
"logits/chosen": -2.3052189350128174, |
|
"logits/rejected": -2.3175923824310303, |
|
"logps/chosen": -252.48291015625, |
|
"logps/rejected": -223.8980712890625, |
|
"loss": 0.5649, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.12469018995761871, |
|
"rewards/margins": 0.6323705911636353, |
|
"rewards/rejected": -0.757060706615448, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 6.391121316494451e-08, |
|
"logits/chosen": -2.3261055946350098, |
|
"logits/rejected": -2.3110244274139404, |
|
"logps/chosen": -295.005859375, |
|
"logps/rejected": -239.6460418701172, |
|
"loss": 0.5248, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.10888131707906723, |
|
"rewards/margins": 0.8443535566329956, |
|
"rewards/rejected": -0.953234851360321, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 6.199770378874856e-08, |
|
"logits/chosen": -2.377129077911377, |
|
"logits/rejected": -2.2884180545806885, |
|
"logps/chosen": -272.9131164550781, |
|
"logps/rejected": -225.4416961669922, |
|
"loss": 0.5302, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.07070966064929962, |
|
"rewards/margins": 0.9023112058639526, |
|
"rewards/rejected": -0.9730209112167358, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 6.008419441255262e-08, |
|
"logits/chosen": -2.3185131549835205, |
|
"logits/rejected": -2.260045051574707, |
|
"logps/chosen": -299.19635009765625, |
|
"logps/rejected": -217.824951171875, |
|
"loss": 0.5244, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.07015608996152878, |
|
"rewards/margins": 0.8233577013015747, |
|
"rewards/rejected": -0.8935137987136841, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.817068503635668e-08, |
|
"logits/chosen": -2.4003567695617676, |
|
"logits/rejected": -2.3623907566070557, |
|
"logps/chosen": -288.9065856933594, |
|
"logps/rejected": -222.6360626220703, |
|
"loss": 0.515, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.12081106007099152, |
|
"rewards/margins": 0.8377307057380676, |
|
"rewards/rejected": -0.958541750907898, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.6257175660160735e-08, |
|
"logits/chosen": -2.420441150665283, |
|
"logits/rejected": -2.37391996383667, |
|
"logps/chosen": -295.19952392578125, |
|
"logps/rejected": -238.8549041748047, |
|
"loss": 0.516, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.18193045258522034, |
|
"rewards/margins": 0.7073780298233032, |
|
"rewards/rejected": -0.8893085718154907, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 5.4343666283964784e-08, |
|
"logits/chosen": -2.4277868270874023, |
|
"logits/rejected": -2.3764290809631348, |
|
"logps/chosen": -263.9327697753906, |
|
"logps/rejected": -249.3427734375, |
|
"loss": 0.5214, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.07714501768350601, |
|
"rewards/margins": 0.7166513204574585, |
|
"rewards/rejected": -0.7937964200973511, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 5.243015690776884e-08, |
|
"logits/chosen": -2.467040538787842, |
|
"logits/rejected": -2.406569719314575, |
|
"logps/chosen": -285.0174865722656, |
|
"logps/rejected": -235.902587890625, |
|
"loss": 0.5146, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.15382210910320282, |
|
"rewards/margins": 0.7074260711669922, |
|
"rewards/rejected": -0.8612481951713562, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 5.05166475315729e-08, |
|
"logits/chosen": -2.3759026527404785, |
|
"logits/rejected": -2.323474884033203, |
|
"logps/chosen": -263.00689697265625, |
|
"logps/rejected": -218.7880859375, |
|
"loss": 0.5388, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.2826555371284485, |
|
"rewards/margins": 0.6195917129516602, |
|
"rewards/rejected": -0.9022472500801086, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.860313815537696e-08, |
|
"logits/chosen": -2.4053902626037598, |
|
"logits/rejected": -2.36479115486145, |
|
"logps/chosen": -284.4691467285156, |
|
"logps/rejected": -266.75579833984375, |
|
"loss": 0.5364, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.11270469427108765, |
|
"rewards/margins": 0.7914165258407593, |
|
"rewards/rejected": -0.9041212201118469, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.668962877918101e-08, |
|
"logits/chosen": -2.380582332611084, |
|
"logits/rejected": -2.2785563468933105, |
|
"logps/chosen": -288.0699768066406, |
|
"logps/rejected": -278.1865234375, |
|
"loss": 0.5236, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.12235669791698456, |
|
"rewards/margins": 0.8113776445388794, |
|
"rewards/rejected": -0.9337342381477356, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.477611940298507e-08, |
|
"logits/chosen": -2.4007792472839355, |
|
"logits/rejected": -2.3088767528533936, |
|
"logps/chosen": -245.1487274169922, |
|
"logps/rejected": -227.521728515625, |
|
"loss": 0.4938, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.13851900398731232, |
|
"rewards/margins": 0.8027512431144714, |
|
"rewards/rejected": -0.9412702322006226, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.2862610026789124e-08, |
|
"logits/chosen": -2.4124245643615723, |
|
"logits/rejected": -2.296288013458252, |
|
"logps/chosen": -259.74029541015625, |
|
"logps/rejected": -223.14627075195312, |
|
"loss": 0.5476, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.1544261872768402, |
|
"rewards/margins": 0.7074520587921143, |
|
"rewards/rejected": -0.8618782162666321, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.0949100650593186e-08, |
|
"logits/chosen": -2.3861641883850098, |
|
"logits/rejected": -2.328504800796509, |
|
"logps/chosen": -256.6800231933594, |
|
"logps/rejected": -243.67514038085938, |
|
"loss": 0.546, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.25118106603622437, |
|
"rewards/margins": 0.5502891540527344, |
|
"rewards/rejected": -0.8014701008796692, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.903559127439724e-08, |
|
"logits/chosen": -2.294541358947754, |
|
"logits/rejected": -2.2779922485351562, |
|
"logps/chosen": -273.6397399902344, |
|
"logps/rejected": -230.7729949951172, |
|
"loss": 0.5064, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.11550472676753998, |
|
"rewards/margins": 0.7522660493850708, |
|
"rewards/rejected": -0.867770791053772, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.71220818982013e-08, |
|
"logits/chosen": -2.406259536743164, |
|
"logits/rejected": -2.3714747428894043, |
|
"logps/chosen": -273.5616760253906, |
|
"logps/rejected": -218.18923950195312, |
|
"loss": 0.5481, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.23518535494804382, |
|
"rewards/margins": 0.6195680499076843, |
|
"rewards/rejected": -0.8547533750534058, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.520857252200535e-08, |
|
"logits/chosen": -2.363008975982666, |
|
"logits/rejected": -2.3325300216674805, |
|
"logps/chosen": -300.9599609375, |
|
"logps/rejected": -242.6225128173828, |
|
"loss": 0.5323, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.039127424359321594, |
|
"rewards/margins": 0.9830048680305481, |
|
"rewards/rejected": -1.022132158279419, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.3295063145809414e-08, |
|
"logits/chosen": -2.280790328979492, |
|
"logits/rejected": -2.225207805633545, |
|
"logps/chosen": -266.9983825683594, |
|
"logps/rejected": -244.36279296875, |
|
"loss": 0.5561, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.14134183526039124, |
|
"rewards/margins": 0.7063352465629578, |
|
"rewards/rejected": -0.8476771116256714, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 3.138155376961347e-08, |
|
"logits/chosen": -2.288053512573242, |
|
"logits/rejected": -2.3323187828063965, |
|
"logps/chosen": -234.13241577148438, |
|
"logps/rejected": -226.0674285888672, |
|
"loss": 0.5667, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.14560513198375702, |
|
"rewards/margins": 0.7158702611923218, |
|
"rewards/rejected": -0.8614753484725952, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.9468044393417525e-08, |
|
"logits/chosen": -2.4299161434173584, |
|
"logits/rejected": -2.40264892578125, |
|
"logps/chosen": -273.2800598144531, |
|
"logps/rejected": -262.6374206542969, |
|
"loss": 0.5196, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.06616147607564926, |
|
"rewards/margins": 0.8120719194412231, |
|
"rewards/rejected": -0.8782333135604858, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.755453501722158e-08, |
|
"logits/chosen": -2.37566876411438, |
|
"logits/rejected": -2.36662220954895, |
|
"logps/chosen": -285.37725830078125, |
|
"logps/rejected": -228.5401611328125, |
|
"loss": 0.5099, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.09863577038049698, |
|
"rewards/margins": 0.8224757313728333, |
|
"rewards/rejected": -0.9211114645004272, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.564102564102564e-08, |
|
"logits/chosen": -2.399296283721924, |
|
"logits/rejected": -2.3836464881896973, |
|
"logps/chosen": -258.1005554199219, |
|
"logps/rejected": -225.07723999023438, |
|
"loss": 0.524, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.18826046586036682, |
|
"rewards/margins": 0.6354348659515381, |
|
"rewards/rejected": -0.8236953020095825, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.3727516264829695e-08, |
|
"logits/chosen": -2.320814609527588, |
|
"logits/rejected": -2.295502185821533, |
|
"logps/chosen": -238.77200317382812, |
|
"logps/rejected": -234.020263671875, |
|
"loss": 0.5344, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.1772747039794922, |
|
"rewards/margins": 0.5961662530899048, |
|
"rewards/rejected": -0.7734408974647522, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.1814006888633754e-08, |
|
"logits/chosen": -2.334709405899048, |
|
"logits/rejected": -2.3521342277526855, |
|
"logps/chosen": -263.27349853515625, |
|
"logps/rejected": -233.0219268798828, |
|
"loss": 0.5471, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.2036319077014923, |
|
"rewards/margins": 0.6492605805397034, |
|
"rewards/rejected": -0.8528925180435181, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.990049751243781e-08, |
|
"logits/chosen": -2.3961801528930664, |
|
"logits/rejected": -2.344447612762451, |
|
"logps/chosen": -285.4869079589844, |
|
"logps/rejected": -231.99276733398438, |
|
"loss": 0.5319, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.11304112523794174, |
|
"rewards/margins": 0.6869500875473022, |
|
"rewards/rejected": -0.7999913096427917, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.7986988136241865e-08, |
|
"logits/chosen": -2.4139771461486816, |
|
"logits/rejected": -2.357755661010742, |
|
"logps/chosen": -262.9169006347656, |
|
"logps/rejected": -225.28134155273438, |
|
"loss": 0.4933, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.029026124626398087, |
|
"rewards/margins": 0.874340832233429, |
|
"rewards/rejected": -0.9033668637275696, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.6073478760045924e-08, |
|
"logits/chosen": -2.2996811866760254, |
|
"logits/rejected": -2.224923849105835, |
|
"logps/chosen": -285.3755798339844, |
|
"logps/rejected": -201.12803649902344, |
|
"loss": 0.5432, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.24773137271404266, |
|
"rewards/margins": 0.5589512586593628, |
|
"rewards/rejected": -0.8066825866699219, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.4159969383849981e-08, |
|
"logits/chosen": -2.493635654449463, |
|
"logits/rejected": -2.4172415733337402, |
|
"logps/chosen": -312.3724670410156, |
|
"logps/rejected": -248.0670623779297, |
|
"loss": 0.5265, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.10385121405124664, |
|
"rewards/margins": 0.8151585459709167, |
|
"rewards/rejected": -0.9190096855163574, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.2246460007654037e-08, |
|
"logits/chosen": -2.365940570831299, |
|
"logits/rejected": -2.3057992458343506, |
|
"logps/chosen": -266.408447265625, |
|
"logps/rejected": -227.32492065429688, |
|
"loss": 0.571, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.23473727703094482, |
|
"rewards/margins": 0.5884072184562683, |
|
"rewards/rejected": -0.8231445550918579, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.0332950631458094e-08, |
|
"logits/chosen": -2.306847095489502, |
|
"logits/rejected": -2.352964401245117, |
|
"logps/chosen": -273.6145935058594, |
|
"logps/rejected": -230.6118927001953, |
|
"loss": 0.5353, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.1490267664194107, |
|
"rewards/margins": 0.7372574210166931, |
|
"rewards/rejected": -0.8862841725349426, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 8.419441255262151e-09, |
|
"logits/chosen": -2.2987818717956543, |
|
"logits/rejected": -2.319676160812378, |
|
"logps/chosen": -267.0763854980469, |
|
"logps/rejected": -219.8603973388672, |
|
"loss": 0.5045, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.09555543214082718, |
|
"rewards/margins": 0.6565554738044739, |
|
"rewards/rejected": -0.7521108388900757, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 6.505931879066207e-09, |
|
"logits/chosen": -2.403472900390625, |
|
"logits/rejected": -2.3466238975524902, |
|
"logps/chosen": -235.06271362304688, |
|
"logps/rejected": -236.23068237304688, |
|
"loss": 0.5334, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.24678435921669006, |
|
"rewards/margins": 0.6536287665367126, |
|
"rewards/rejected": -0.9004132151603699, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 4.592422502870264e-09, |
|
"logits/chosen": -2.413404941558838, |
|
"logits/rejected": -2.317659854888916, |
|
"logps/chosen": -259.46697998046875, |
|
"logps/rejected": -227.43246459960938, |
|
"loss": 0.5463, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.05970311164855957, |
|
"rewards/margins": 0.6795765161514282, |
|
"rewards/rejected": -0.7392796277999878, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 2.6789131266743202e-09, |
|
"logits/chosen": -2.3422017097473145, |
|
"logits/rejected": -2.2731831073760986, |
|
"logps/chosen": -233.3965301513672, |
|
"logps/rejected": -225.33566284179688, |
|
"loss": 0.5157, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.2789511978626251, |
|
"rewards/margins": 0.6096433401107788, |
|
"rewards/rejected": -0.8885944485664368, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 7.654037504783773e-10, |
|
"logits/chosen": -2.3140885829925537, |
|
"logits/rejected": -2.351783275604248, |
|
"logps/chosen": -238.41293334960938, |
|
"logps/rejected": -245.66201782226562, |
|
"loss": 0.5454, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.1346302330493927, |
|
"rewards/margins": 0.7680613398551941, |
|
"rewards/rejected": -0.9026915431022644, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_logits/chosen": -2.0658671855926514, |
|
"eval_logits/rejected": -1.9411793947219849, |
|
"eval_logps/chosen": -266.13037109375, |
|
"eval_logps/rejected": -228.2694091796875, |
|
"eval_loss": 0.5265706777572632, |
|
"eval_rewards/accuracies": 0.7459999918937683, |
|
"eval_rewards/chosen": -0.1469534933567047, |
|
"eval_rewards/margins": 0.7515553832054138, |
|
"eval_rewards/rejected": -0.8985088467597961, |
|
"eval_runtime": 452.2504, |
|
"eval_samples_per_second": 4.422, |
|
"eval_steps_per_second": 0.276, |
|
"step": 2904 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 2904, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5642705829018732, |
|
"train_runtime": 71197.0849, |
|
"train_samples_per_second": 2.611, |
|
"train_steps_per_second": 0.041 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2904, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|