davidanugraha's picture
Upload folder using huggingface_hub
46362ff verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 1465,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00682681230532918,
"grad_norm": 20.491548678692148,
"learning_rate": 6.122448979591837e-08,
"logits/chosen": 0.03672148287296295,
"logits/rejected": 0.041521187871694565,
"logps/chosen": -191.74862670898438,
"logps/rejected": -189.4052276611328,
"loss": 0.6921,
"rewards/accuracies": 0.41874998807907104,
"rewards/chosen": 0.0014678842853754759,
"rewards/margins": 0.0024292597081512213,
"rewards/rejected": -0.0009613755391910672,
"step": 10
},
{
"epoch": 0.01365362461065836,
"grad_norm": 21.860852469835415,
"learning_rate": 1.2925170068027211e-07,
"logits/chosen": 0.04523754119873047,
"logits/rejected": 0.05510401353240013,
"logps/chosen": -187.8703155517578,
"logps/rejected": -187.6009979248047,
"loss": 0.6937,
"rewards/accuracies": 0.4859375059604645,
"rewards/chosen": -0.0003124059294350445,
"rewards/margins": -0.0007655444787815213,
"rewards/rejected": 0.0004531386948656291,
"step": 20
},
{
"epoch": 0.02048043691598754,
"grad_norm": 20.278529512570657,
"learning_rate": 1.9727891156462583e-07,
"logits/chosen": 0.020983930677175522,
"logits/rejected": 0.04532231390476227,
"logps/chosen": -185.85728454589844,
"logps/rejected": -188.9866180419922,
"loss": 0.6936,
"rewards/accuracies": 0.46875,
"rewards/chosen": 0.00064073596149683,
"rewards/margins": -0.0005829028668813407,
"rewards/rejected": 0.001223638653755188,
"step": 30
},
{
"epoch": 0.02730724922131672,
"grad_norm": 19.626379046619967,
"learning_rate": 2.653061224489796e-07,
"logits/chosen": 0.03043345920741558,
"logits/rejected": 0.032446593046188354,
"logps/chosen": -193.6338653564453,
"logps/rejected": -190.4232635498047,
"loss": 0.6913,
"rewards/accuracies": 0.53125,
"rewards/chosen": 0.002521326532587409,
"rewards/margins": 0.004052319563925266,
"rewards/rejected": -0.0015309930313378572,
"step": 40
},
{
"epoch": 0.0341340615266459,
"grad_norm": 21.08295374738999,
"learning_rate": 3.333333333333333e-07,
"logits/chosen": 0.04947035759687424,
"logits/rejected": 0.06372452527284622,
"logps/chosen": -188.39315795898438,
"logps/rejected": -190.05992126464844,
"loss": 0.6942,
"rewards/accuracies": 0.46406251192092896,
"rewards/chosen": 0.0021625806111842394,
"rewards/margins": -0.0017312343697994947,
"rewards/rejected": 0.003893814980983734,
"step": 50
},
{
"epoch": 0.04096087383197508,
"grad_norm": 20.25039554823623,
"learning_rate": 4.0136054421768705e-07,
"logits/chosen": 0.053825099021196365,
"logits/rejected": 0.0521962009370327,
"logps/chosen": -189.28480529785156,
"logps/rejected": -184.31430053710938,
"loss": 0.6937,
"rewards/accuracies": 0.5015624761581421,
"rewards/chosen": 0.004196351859718561,
"rewards/margins": -0.0006979627651162446,
"rewards/rejected": 0.0048943147994577885,
"step": 60
},
{
"epoch": 0.04778768613730426,
"grad_norm": 22.505298366939336,
"learning_rate": 4.693877551020408e-07,
"logits/chosen": 0.03855639323592186,
"logits/rejected": 0.041457682847976685,
"logps/chosen": -189.49111938476562,
"logps/rejected": -190.42034912109375,
"loss": 0.6933,
"rewards/accuracies": 0.4937499761581421,
"rewards/chosen": 0.008006598800420761,
"rewards/margins": 4.7756126150488853e-05,
"rewards/rejected": 0.007958842441439629,
"step": 70
},
{
"epoch": 0.05461449844263344,
"grad_norm": 19.99809543741437,
"learning_rate": 5.374149659863945e-07,
"logits/chosen": 0.026321567595005035,
"logits/rejected": 0.013571225106716156,
"logps/chosen": -189.8534393310547,
"logps/rejected": -187.626708984375,
"loss": 0.6878,
"rewards/accuracies": 0.6031249761581421,
"rewards/chosen": 0.018732454627752304,
"rewards/margins": 0.011271494440734386,
"rewards/rejected": 0.007460957858711481,
"step": 80
},
{
"epoch": 0.06144131074796262,
"grad_norm": 22.176568391543768,
"learning_rate": 6.054421768707482e-07,
"logits/chosen": 0.020383019000291824,
"logits/rejected": 0.02592673897743225,
"logps/chosen": -186.662841796875,
"logps/rejected": -189.3004608154297,
"loss": 0.6876,
"rewards/accuracies": 0.582812488079071,
"rewards/chosen": 0.027650414034724236,
"rewards/margins": 0.011809633113443851,
"rewards/rejected": 0.01584078185260296,
"step": 90
},
{
"epoch": 0.0682681230532918,
"grad_norm": 20.53234701755388,
"learning_rate": 6.734693877551019e-07,
"logits/chosen": 0.02966993674635887,
"logits/rejected": 0.05219441279768944,
"logps/chosen": -190.25782775878906,
"logps/rejected": -189.80935668945312,
"loss": 0.6858,
"rewards/accuracies": 0.5718749761581421,
"rewards/chosen": 0.040990687906742096,
"rewards/margins": 0.01583397202193737,
"rewards/rejected": 0.025156717747449875,
"step": 100
},
{
"epoch": 0.07509493535862098,
"grad_norm": 21.19602898096358,
"learning_rate": 7.414965986394558e-07,
"logits/chosen": -0.007384412921965122,
"logits/rejected": -0.016086794435977936,
"logps/chosen": -189.52395629882812,
"logps/rejected": -192.64816284179688,
"loss": 0.6817,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.05572628974914551,
"rewards/margins": 0.024477079510688782,
"rewards/rejected": 0.031249215826392174,
"step": 110
},
{
"epoch": 0.08192174766395016,
"grad_norm": 20.08862529877448,
"learning_rate": 8.095238095238095e-07,
"logits/chosen": -0.04889947175979614,
"logits/rejected": -0.049361489713191986,
"logps/chosen": -197.39492797851562,
"logps/rejected": -192.8791046142578,
"loss": 0.6828,
"rewards/accuracies": 0.5843750238418579,
"rewards/chosen": 0.059998854994773865,
"rewards/margins": 0.023517701774835587,
"rewards/rejected": 0.03648114949464798,
"step": 120
},
{
"epoch": 0.08874855996927934,
"grad_norm": 19.78186965312465,
"learning_rate": 8.775510204081632e-07,
"logits/chosen": -0.022162066772580147,
"logits/rejected": -0.02603471651673317,
"logps/chosen": -192.2538604736328,
"logps/rejected": -190.6973876953125,
"loss": 0.6782,
"rewards/accuracies": 0.651562511920929,
"rewards/chosen": 0.07047584652900696,
"rewards/margins": 0.03453099727630615,
"rewards/rejected": 0.035944852977991104,
"step": 130
},
{
"epoch": 0.09557537227460852,
"grad_norm": 21.72668562860521,
"learning_rate": 9.45578231292517e-07,
"logits/chosen": -0.028122998774051666,
"logits/rejected": -0.0023567965254187584,
"logps/chosen": -193.58602905273438,
"logps/rejected": -189.49517822265625,
"loss": 0.6721,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.08927410840988159,
"rewards/margins": 0.04811044782400131,
"rewards/rejected": 0.04116365686058998,
"step": 140
},
{
"epoch": 0.1024021845799377,
"grad_norm": 20.630914226397604,
"learning_rate": 9.98482549317147e-07,
"logits/chosen": -0.07732997089624405,
"logits/rejected": -0.08366119861602783,
"logps/chosen": -203.80441284179688,
"logps/rejected": -202.51812744140625,
"loss": 0.666,
"rewards/accuracies": 0.6500000357627869,
"rewards/chosen": 0.10389578342437744,
"rewards/margins": 0.06255247443914413,
"rewards/rejected": 0.04134330898523331,
"step": 150
},
{
"epoch": 0.10922899688526688,
"grad_norm": 20.25669433495337,
"learning_rate": 9.908952959028832e-07,
"logits/chosen": -0.09441889822483063,
"logits/rejected": -0.08870529383420944,
"logps/chosen": -185.63307189941406,
"logps/rejected": -186.53253173828125,
"loss": 0.6654,
"rewards/accuracies": 0.6500000357627869,
"rewards/chosen": 0.08515263348817825,
"rewards/margins": 0.0660884901881218,
"rewards/rejected": 0.019064147025346756,
"step": 160
},
{
"epoch": 0.11605580919059606,
"grad_norm": 20.384593980794733,
"learning_rate": 9.833080424886191e-07,
"logits/chosen": -0.08715031296014786,
"logits/rejected": -0.05636933073401451,
"logps/chosen": -188.3374481201172,
"logps/rejected": -190.37437438964844,
"loss": 0.659,
"rewards/accuracies": 0.6343749761581421,
"rewards/chosen": 0.04690036177635193,
"rewards/margins": 0.08634677529335022,
"rewards/rejected": -0.03944641351699829,
"step": 170
},
{
"epoch": 0.12288262149592524,
"grad_norm": 21.86056528276187,
"learning_rate": 9.75720789074355e-07,
"logits/chosen": -0.07912790030241013,
"logits/rejected": -0.07271625846624374,
"logps/chosen": -197.11959838867188,
"logps/rejected": -197.41287231445312,
"loss": 0.6528,
"rewards/accuracies": 0.6749999523162842,
"rewards/chosen": 0.04622086510062218,
"rewards/margins": 0.10496747493743896,
"rewards/rejected": -0.058746613562107086,
"step": 180
},
{
"epoch": 0.12970943380125444,
"grad_norm": 22.24802422589698,
"learning_rate": 9.68133535660091e-07,
"logits/chosen": -0.07506565004587173,
"logits/rejected": -0.05108420550823212,
"logps/chosen": -190.35340881347656,
"logps/rejected": -195.009521484375,
"loss": 0.6441,
"rewards/accuracies": 0.6812500357627869,
"rewards/chosen": 0.052541881799697876,
"rewards/margins": 0.12386594712734222,
"rewards/rejected": -0.07132406532764435,
"step": 190
},
{
"epoch": 0.1365362461065836,
"grad_norm": 22.419822765649933,
"learning_rate": 9.60546282245827e-07,
"logits/chosen": -0.11874101310968399,
"logits/rejected": -0.08336825668811798,
"logps/chosen": -193.62611389160156,
"logps/rejected": -196.01084899902344,
"loss": 0.6249,
"rewards/accuracies": 0.7046875357627869,
"rewards/chosen": 0.03949081152677536,
"rewards/margins": 0.17370560765266418,
"rewards/rejected": -0.13421478867530823,
"step": 200
},
{
"epoch": 0.1433630584119128,
"grad_norm": 22.915739502006815,
"learning_rate": 9.52959028831563e-07,
"logits/chosen": -0.17365601658821106,
"logits/rejected": -0.15520283579826355,
"logps/chosen": -203.1890869140625,
"logps/rejected": -200.14974975585938,
"loss": 0.6287,
"rewards/accuracies": 0.6687500476837158,
"rewards/chosen": -0.01979774236679077,
"rewards/margins": 0.18479280173778534,
"rewards/rejected": -0.2045905441045761,
"step": 210
},
{
"epoch": 0.15018987071724196,
"grad_norm": 20.769969852017695,
"learning_rate": 9.453717754172988e-07,
"logits/chosen": -0.1847243756055832,
"logits/rejected": -0.15192236006259918,
"logps/chosen": -198.33010864257812,
"logps/rejected": -200.56228637695312,
"loss": 0.6015,
"rewards/accuracies": 0.7109375,
"rewards/chosen": -0.03269830346107483,
"rewards/margins": 0.25339096784591675,
"rewards/rejected": -0.2860892415046692,
"step": 220
},
{
"epoch": 0.15701668302257116,
"grad_norm": 21.597574913870996,
"learning_rate": 9.377845220030348e-07,
"logits/chosen": -0.21274694800376892,
"logits/rejected": -0.19206659495830536,
"logps/chosen": -197.59228515625,
"logps/rejected": -200.42283630371094,
"loss": 0.611,
"rewards/accuracies": 0.6734374761581421,
"rewards/chosen": -0.09015801548957825,
"rewards/margins": 0.24926723539829254,
"rewards/rejected": -0.3394252359867096,
"step": 230
},
{
"epoch": 0.16384349532790032,
"grad_norm": 24.09497342960952,
"learning_rate": 9.301972685887707e-07,
"logits/chosen": -0.2293986827135086,
"logits/rejected": -0.19997453689575195,
"logps/chosen": -191.1751251220703,
"logps/rejected": -196.63511657714844,
"loss": 0.6125,
"rewards/accuracies": 0.690625011920929,
"rewards/chosen": -0.15094764530658722,
"rewards/margins": 0.24523335695266724,
"rewards/rejected": -0.39618098735809326,
"step": 240
},
{
"epoch": 0.17067030763322952,
"grad_norm": 22.186402685803138,
"learning_rate": 9.226100151745068e-07,
"logits/chosen": -0.23599499464035034,
"logits/rejected": -0.20987126231193542,
"logps/chosen": -191.61639404296875,
"logps/rejected": -197.80091857910156,
"loss": 0.6205,
"rewards/accuracies": 0.6546875238418579,
"rewards/chosen": -0.22373469173908234,
"rewards/margins": 0.2635762691497803,
"rewards/rejected": -0.4873109459877014,
"step": 250
},
{
"epoch": 0.17749711993855868,
"grad_norm": 23.30196457741843,
"learning_rate": 9.150227617602428e-07,
"logits/chosen": -0.2195354700088501,
"logits/rejected": -0.19019638001918793,
"logps/chosen": -190.50746154785156,
"logps/rejected": -195.74331665039062,
"loss": 0.6056,
"rewards/accuracies": 0.7046875357627869,
"rewards/chosen": -0.2523514926433563,
"rewards/margins": 0.29894089698791504,
"rewards/rejected": -0.5512923002243042,
"step": 260
},
{
"epoch": 0.18432393224388788,
"grad_norm": 23.437160399579792,
"learning_rate": 9.074355083459787e-07,
"logits/chosen": -0.2144363671541214,
"logits/rejected": -0.19538246095180511,
"logps/chosen": -194.883056640625,
"logps/rejected": -202.83575439453125,
"loss": 0.595,
"rewards/accuracies": 0.7078125476837158,
"rewards/chosen": -0.27382633090019226,
"rewards/margins": 0.3095867931842804,
"rewards/rejected": -0.5834130644798279,
"step": 270
},
{
"epoch": 0.19115074454921704,
"grad_norm": 23.67928529051871,
"learning_rate": 8.998482549317147e-07,
"logits/chosen": -0.2671777606010437,
"logits/rejected": -0.23835715651512146,
"logps/chosen": -189.7034912109375,
"logps/rejected": -194.55117797851562,
"loss": 0.589,
"rewards/accuracies": 0.7000000476837158,
"rewards/chosen": -0.2815781235694885,
"rewards/margins": 0.34006255865097046,
"rewards/rejected": -0.621640682220459,
"step": 280
},
{
"epoch": 0.19797755685454624,
"grad_norm": 26.3785919721159,
"learning_rate": 8.922610015174506e-07,
"logits/chosen": -0.2851921319961548,
"logits/rejected": -0.2668570280075073,
"logps/chosen": -202.77801513671875,
"logps/rejected": -207.8894805908203,
"loss": 0.59,
"rewards/accuracies": 0.7046875357627869,
"rewards/chosen": -0.33676964044570923,
"rewards/margins": 0.35969871282577515,
"rewards/rejected": -0.6964683532714844,
"step": 290
},
{
"epoch": 0.2048043691598754,
"grad_norm": 23.715391013722297,
"learning_rate": 8.846737481031866e-07,
"logits/chosen": -0.2776036262512207,
"logits/rejected": -0.24332435429096222,
"logps/chosen": -201.10296630859375,
"logps/rejected": -203.72195434570312,
"loss": 0.6111,
"rewards/accuracies": 0.715624988079071,
"rewards/chosen": -0.3781723380088806,
"rewards/margins": 0.3227519989013672,
"rewards/rejected": -0.700924277305603,
"step": 300
},
{
"epoch": 0.2116311814652046,
"grad_norm": 21.57268816738927,
"learning_rate": 8.770864946889226e-07,
"logits/chosen": -0.29242080450057983,
"logits/rejected": -0.2669425308704376,
"logps/chosen": -204.4817352294922,
"logps/rejected": -214.0943603515625,
"loss": 0.5794,
"rewards/accuracies": 0.7265625,
"rewards/chosen": -0.3647349178791046,
"rewards/margins": 0.4395143985748291,
"rewards/rejected": -0.8042493462562561,
"step": 310
},
{
"epoch": 0.21845799377053376,
"grad_norm": 25.227342019618998,
"learning_rate": 8.694992412746586e-07,
"logits/chosen": -0.27386438846588135,
"logits/rejected": -0.2711098790168762,
"logps/chosen": -198.40101623535156,
"logps/rejected": -204.6220703125,
"loss": 0.5727,
"rewards/accuracies": 0.7281250357627869,
"rewards/chosen": -0.3862449824810028,
"rewards/margins": 0.41143903136253357,
"rewards/rejected": -0.7976840734481812,
"step": 320
},
{
"epoch": 0.22528480607586296,
"grad_norm": 24.00522520700325,
"learning_rate": 8.619119878603945e-07,
"logits/chosen": -0.3334537744522095,
"logits/rejected": -0.3187546730041504,
"logps/chosen": -208.01986694335938,
"logps/rejected": -212.91488647460938,
"loss": 0.5913,
"rewards/accuracies": 0.6703125238418579,
"rewards/chosen": -0.4798099398612976,
"rewards/margins": 0.37955817580223083,
"rewards/rejected": -0.8593681454658508,
"step": 330
},
{
"epoch": 0.23211161838119213,
"grad_norm": 23.49360024665317,
"learning_rate": 8.543247344461305e-07,
"logits/chosen": -0.30438894033432007,
"logits/rejected": -0.28073978424072266,
"logps/chosen": -203.7110595703125,
"logps/rejected": -211.83615112304688,
"loss": 0.56,
"rewards/accuracies": 0.7328125238418579,
"rewards/chosen": -0.3902357518672943,
"rewards/margins": 0.5086088180541992,
"rewards/rejected": -0.8988445401191711,
"step": 340
},
{
"epoch": 0.23893843068652132,
"grad_norm": 23.086500001623612,
"learning_rate": 8.467374810318663e-07,
"logits/chosen": -0.3257724940776825,
"logits/rejected": -0.2853447198867798,
"logps/chosen": -204.09765625,
"logps/rejected": -212.38494873046875,
"loss": 0.5515,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -0.4673992991447449,
"rewards/margins": 0.5267953872680664,
"rewards/rejected": -0.9941946864128113,
"step": 350
},
{
"epoch": 0.24576524299185049,
"grad_norm": 24.60129579583855,
"learning_rate": 8.391502276176023e-07,
"logits/chosen": -0.3029869794845581,
"logits/rejected": -0.2718327045440674,
"logps/chosen": -196.5174560546875,
"logps/rejected": -204.4929656982422,
"loss": 0.5809,
"rewards/accuracies": 0.7046875357627869,
"rewards/chosen": -0.4800136089324951,
"rewards/margins": 0.43177759647369385,
"rewards/rejected": -0.9117912650108337,
"step": 360
},
{
"epoch": 0.25259205529717965,
"grad_norm": 23.03353178121409,
"learning_rate": 8.315629742033384e-07,
"logits/chosen": -0.28175657987594604,
"logits/rejected": -0.2525416612625122,
"logps/chosen": -197.58517456054688,
"logps/rejected": -210.83853149414062,
"loss": 0.5675,
"rewards/accuracies": 0.7234375476837158,
"rewards/chosen": -0.5489044785499573,
"rewards/margins": 0.4759043753147125,
"rewards/rejected": -1.0248088836669922,
"step": 370
},
{
"epoch": 0.2594188676025089,
"grad_norm": 21.702116754195792,
"learning_rate": 8.239757207890743e-07,
"logits/chosen": -0.3090224266052246,
"logits/rejected": -0.2872709333896637,
"logps/chosen": -204.044921875,
"logps/rejected": -214.3769989013672,
"loss": 0.5414,
"rewards/accuracies": 0.7265625,
"rewards/chosen": -0.501671552658081,
"rewards/margins": 0.5782625675201416,
"rewards/rejected": -1.0799341201782227,
"step": 380
},
{
"epoch": 0.26624567990783804,
"grad_norm": 22.690534272455945,
"learning_rate": 8.163884673748103e-07,
"logits/chosen": -0.2652078866958618,
"logits/rejected": -0.22916777431964874,
"logps/chosen": -206.28855895996094,
"logps/rejected": -217.3023681640625,
"loss": 0.532,
"rewards/accuracies": 0.7343750596046448,
"rewards/chosen": -0.47486239671707153,
"rewards/margins": 0.6135950684547424,
"rewards/rejected": -1.088457465171814,
"step": 390
},
{
"epoch": 0.2730724922131672,
"grad_norm": 24.587498727216616,
"learning_rate": 8.088012139605462e-07,
"logits/chosen": -0.28489071130752563,
"logits/rejected": -0.23875750601291656,
"logps/chosen": -202.77565002441406,
"logps/rejected": -216.6030731201172,
"loss": 0.5272,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.5735213756561279,
"rewards/margins": 0.6210550665855408,
"rewards/rejected": -1.194576382637024,
"step": 400
},
{
"epoch": 0.2798993045184964,
"grad_norm": 24.707605897401567,
"learning_rate": 8.012139605462822e-07,
"logits/chosen": -0.3593894839286804,
"logits/rejected": -0.3138624429702759,
"logps/chosen": -202.06204223632812,
"logps/rejected": -208.73065185546875,
"loss": 0.5575,
"rewards/accuracies": 0.721875011920929,
"rewards/chosen": -0.604642391204834,
"rewards/margins": 0.5522481203079224,
"rewards/rejected": -1.156890630722046,
"step": 410
},
{
"epoch": 0.2867261168238256,
"grad_norm": 24.754070000277498,
"learning_rate": 7.936267071320181e-07,
"logits/chosen": -0.3502323627471924,
"logits/rejected": -0.3173756003379822,
"logps/chosen": -207.6633758544922,
"logps/rejected": -216.3917236328125,
"loss": 0.5265,
"rewards/accuracies": 0.7484375238418579,
"rewards/chosen": -0.6551162004470825,
"rewards/margins": 0.6169639229774475,
"rewards/rejected": -1.2720801830291748,
"step": 420
},
{
"epoch": 0.29355292912915476,
"grad_norm": 23.564476771066985,
"learning_rate": 7.860394537177542e-07,
"logits/chosen": -0.3500007092952728,
"logits/rejected": -0.32545575499534607,
"logps/chosen": -211.29928588867188,
"logps/rejected": -227.12037658691406,
"loss": 0.5223,
"rewards/accuracies": 0.7421875596046448,
"rewards/chosen": -0.7528213262557983,
"rewards/margins": 0.739406943321228,
"rewards/rejected": -1.492228388786316,
"step": 430
},
{
"epoch": 0.3003797414344839,
"grad_norm": 21.091018091079327,
"learning_rate": 7.784522003034901e-07,
"logits/chosen": -0.35516998171806335,
"logits/rejected": -0.3074837327003479,
"logps/chosen": -203.1188507080078,
"logps/rejected": -212.15496826171875,
"loss": 0.5055,
"rewards/accuracies": 0.7765625715255737,
"rewards/chosen": -0.6801650524139404,
"rewards/margins": 0.7159599661827087,
"rewards/rejected": -1.396125078201294,
"step": 440
},
{
"epoch": 0.3072065537398131,
"grad_norm": 30.178688833532316,
"learning_rate": 7.708649468892261e-07,
"logits/chosen": -0.3771928548812866,
"logits/rejected": -0.34754854440689087,
"logps/chosen": -208.95216369628906,
"logps/rejected": -225.38938903808594,
"loss": 0.5226,
"rewards/accuracies": 0.746874988079071,
"rewards/chosen": -0.7856850624084473,
"rewards/margins": 0.6984450817108154,
"rewards/rejected": -1.4841301441192627,
"step": 450
},
{
"epoch": 0.3140333660451423,
"grad_norm": 22.73508892423378,
"learning_rate": 7.632776934749621e-07,
"logits/chosen": -0.40090760588645935,
"logits/rejected": -0.3806273937225342,
"logps/chosen": -208.29766845703125,
"logps/rejected": -223.73020935058594,
"loss": 0.5013,
"rewards/accuracies": 0.765625,
"rewards/chosen": -0.7431963086128235,
"rewards/margins": 0.8224382400512695,
"rewards/rejected": -1.5656344890594482,
"step": 460
},
{
"epoch": 0.3208601783504715,
"grad_norm": 24.65367082247547,
"learning_rate": 7.55690440060698e-07,
"logits/chosen": -0.41392359137535095,
"logits/rejected": -0.3990693688392639,
"logps/chosen": -211.69845581054688,
"logps/rejected": -222.681884765625,
"loss": 0.4896,
"rewards/accuracies": 0.7671874761581421,
"rewards/chosen": -0.7812504768371582,
"rewards/margins": 0.8228715062141418,
"rewards/rejected": -1.6041220426559448,
"step": 470
},
{
"epoch": 0.32768699065580065,
"grad_norm": 26.060565630616303,
"learning_rate": 7.481031866464339e-07,
"logits/chosen": -0.4470677673816681,
"logits/rejected": -0.4043146073818207,
"logps/chosen": -201.87158203125,
"logps/rejected": -216.65240478515625,
"loss": 0.5178,
"rewards/accuracies": 0.7406250238418579,
"rewards/chosen": -0.8349807858467102,
"rewards/margins": 0.7298619151115417,
"rewards/rejected": -1.564842700958252,
"step": 480
},
{
"epoch": 0.3345138029611298,
"grad_norm": 24.867787006387463,
"learning_rate": 7.405159332321699e-07,
"logits/chosen": -0.4602758288383484,
"logits/rejected": -0.4031441807746887,
"logps/chosen": -215.20541381835938,
"logps/rejected": -234.6583251953125,
"loss": 0.5155,
"rewards/accuracies": 0.7484375238418579,
"rewards/chosen": -0.9265861511230469,
"rewards/margins": 0.8055697679519653,
"rewards/rejected": -1.7321559190750122,
"step": 490
},
{
"epoch": 0.34134061526645904,
"grad_norm": 32.86790243336268,
"learning_rate": 7.329286798179059e-07,
"logits/chosen": -0.4144153594970703,
"logits/rejected": -0.3892706036567688,
"logps/chosen": -216.45887756347656,
"logps/rejected": -225.97056579589844,
"loss": 0.5274,
"rewards/accuracies": 0.7328125238418579,
"rewards/chosen": -0.9314414262771606,
"rewards/margins": 0.7752954363822937,
"rewards/rejected": -1.7067368030548096,
"step": 500
},
{
"epoch": 0.3481674275717882,
"grad_norm": 29.0406209714796,
"learning_rate": 7.253414264036418e-07,
"logits/chosen": -0.4518946707248688,
"logits/rejected": -0.4360005855560303,
"logps/chosen": -210.40875244140625,
"logps/rejected": -227.6586456298828,
"loss": 0.4918,
"rewards/accuracies": 0.7640624642372131,
"rewards/chosen": -0.7644888162612915,
"rewards/margins": 0.8264600038528442,
"rewards/rejected": -1.5909489393234253,
"step": 510
},
{
"epoch": 0.35499423987711737,
"grad_norm": 29.792037648827193,
"learning_rate": 7.177541729893778e-07,
"logits/chosen": -0.46055272221565247,
"logits/rejected": -0.41955289244651794,
"logps/chosen": -203.9451904296875,
"logps/rejected": -225.48402404785156,
"loss": 0.5137,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.7920026779174805,
"rewards/margins": 0.7750235795974731,
"rewards/rejected": -1.5670262575149536,
"step": 520
},
{
"epoch": 0.36182105218244653,
"grad_norm": 28.48324275582042,
"learning_rate": 7.101669195751137e-07,
"logits/chosen": -0.44266417622566223,
"logits/rejected": -0.4136849045753479,
"logps/chosen": -217.11045837402344,
"logps/rejected": -232.384521484375,
"loss": 0.5059,
"rewards/accuracies": 0.7500000596046448,
"rewards/chosen": -0.9079422950744629,
"rewards/margins": 0.8452929258346558,
"rewards/rejected": -1.7532353401184082,
"step": 530
},
{
"epoch": 0.36864786448777576,
"grad_norm": 24.346858846505146,
"learning_rate": 7.025796661608497e-07,
"logits/chosen": -0.4453073740005493,
"logits/rejected": -0.39773428440093994,
"logps/chosen": -199.64686584472656,
"logps/rejected": -217.36294555664062,
"loss": 0.5282,
"rewards/accuracies": 0.745312511920929,
"rewards/chosen": -0.7881425023078918,
"rewards/margins": 0.745051383972168,
"rewards/rejected": -1.533193826675415,
"step": 540
},
{
"epoch": 0.3754746767931049,
"grad_norm": 23.88017645464549,
"learning_rate": 6.949924127465857e-07,
"logits/chosen": -0.4227825701236725,
"logits/rejected": -0.3899107873439789,
"logps/chosen": -218.3785400390625,
"logps/rejected": -230.14222717285156,
"loss": 0.5021,
"rewards/accuracies": 0.7703125476837158,
"rewards/chosen": -0.704402506351471,
"rewards/margins": 0.8275265693664551,
"rewards/rejected": -1.5319291353225708,
"step": 550
},
{
"epoch": 0.3823014890984341,
"grad_norm": 23.672046628232867,
"learning_rate": 6.874051593323217e-07,
"logits/chosen": -0.42757853865623474,
"logits/rejected": -0.394180566072464,
"logps/chosen": -208.079345703125,
"logps/rejected": -228.22598266601562,
"loss": 0.4667,
"rewards/accuracies": 0.770312488079071,
"rewards/chosen": -0.8188365697860718,
"rewards/margins": 0.9388971328735352,
"rewards/rejected": -1.7577338218688965,
"step": 560
},
{
"epoch": 0.38912830140376325,
"grad_norm": 27.539677366232738,
"learning_rate": 6.798179059180577e-07,
"logits/chosen": -0.4404156506061554,
"logits/rejected": -0.3975413739681244,
"logps/chosen": -208.03125,
"logps/rejected": -224.20956420898438,
"loss": 0.5004,
"rewards/accuracies": 0.7593750357627869,
"rewards/chosen": -0.8374041318893433,
"rewards/margins": 0.7886074781417847,
"rewards/rejected": -1.6260114908218384,
"step": 570
},
{
"epoch": 0.3959551137090925,
"grad_norm": 25.29375987198196,
"learning_rate": 6.722306525037936e-07,
"logits/chosen": -0.4404994249343872,
"logits/rejected": -0.40123340487480164,
"logps/chosen": -213.8634490966797,
"logps/rejected": -234.7059326171875,
"loss": 0.497,
"rewards/accuracies": 0.768750011920929,
"rewards/chosen": -0.8731653094291687,
"rewards/margins": 0.9025252461433411,
"rewards/rejected": -1.7756905555725098,
"step": 580
},
{
"epoch": 0.40278192601442164,
"grad_norm": 23.196272876570017,
"learning_rate": 6.646433990895296e-07,
"logits/chosen": -0.42805609107017517,
"logits/rejected": -0.3933747410774231,
"logps/chosen": -210.49766540527344,
"logps/rejected": -230.8019256591797,
"loss": 0.472,
"rewards/accuracies": 0.7718750238418579,
"rewards/chosen": -0.9004274606704712,
"rewards/margins": 0.9456923604011536,
"rewards/rejected": -1.84611976146698,
"step": 590
},
{
"epoch": 0.4096087383197508,
"grad_norm": 26.112729497646914,
"learning_rate": 6.570561456752655e-07,
"logits/chosen": -0.419676810503006,
"logits/rejected": -0.3932231068611145,
"logps/chosen": -212.6820831298828,
"logps/rejected": -230.1705322265625,
"loss": 0.4551,
"rewards/accuracies": 0.776562511920929,
"rewards/chosen": -0.9198075532913208,
"rewards/margins": 1.0145457983016968,
"rewards/rejected": -1.9343533515930176,
"step": 600
},
{
"epoch": 0.41643555062508,
"grad_norm": 26.539025702964505,
"learning_rate": 6.494688922610015e-07,
"logits/chosen": -0.47971057891845703,
"logits/rejected": -0.43692541122436523,
"logps/chosen": -212.31594848632812,
"logps/rejected": -234.5380859375,
"loss": 0.4563,
"rewards/accuracies": 0.770312488079071,
"rewards/chosen": -1.0747839212417603,
"rewards/margins": 1.078429937362671,
"rewards/rejected": -2.1532137393951416,
"step": 610
},
{
"epoch": 0.4232623629304092,
"grad_norm": 26.2859842178028,
"learning_rate": 6.418816388467374e-07,
"logits/chosen": -0.4652007818222046,
"logits/rejected": -0.4464990496635437,
"logps/chosen": -212.9930419921875,
"logps/rejected": -230.19207763671875,
"loss": 0.4778,
"rewards/accuracies": 0.7906250357627869,
"rewards/chosen": -1.1166890859603882,
"rewards/margins": 0.9617180228233337,
"rewards/rejected": -2.0784072875976562,
"step": 620
},
{
"epoch": 0.43008917523573836,
"grad_norm": 27.943160005363282,
"learning_rate": 6.342943854324734e-07,
"logits/chosen": -0.507358968257904,
"logits/rejected": -0.46083295345306396,
"logps/chosen": -211.0389404296875,
"logps/rejected": -234.06576538085938,
"loss": 0.4689,
"rewards/accuracies": 0.78125,
"rewards/chosen": -1.125166654586792,
"rewards/margins": 1.1086124181747437,
"rewards/rejected": -2.233778953552246,
"step": 630
},
{
"epoch": 0.43691598754106753,
"grad_norm": 27.031702699703523,
"learning_rate": 6.267071320182093e-07,
"logits/chosen": -0.5109987854957581,
"logits/rejected": -0.4727884531021118,
"logps/chosen": -216.13302612304688,
"logps/rejected": -241.88287353515625,
"loss": 0.4635,
"rewards/accuracies": 0.7875000238418579,
"rewards/chosen": -1.1550945043563843,
"rewards/margins": 1.1085400581359863,
"rewards/rejected": -2.263634443283081,
"step": 640
},
{
"epoch": 0.4437427998463967,
"grad_norm": 26.49416191451856,
"learning_rate": 6.191198786039453e-07,
"logits/chosen": -0.5049822330474854,
"logits/rejected": -0.46804797649383545,
"logps/chosen": -220.15802001953125,
"logps/rejected": -241.11386108398438,
"loss": 0.4646,
"rewards/accuracies": 0.770312488079071,
"rewards/chosen": -1.1580806970596313,
"rewards/margins": 1.064436435699463,
"rewards/rejected": -2.222517490386963,
"step": 650
},
{
"epoch": 0.4505696121517259,
"grad_norm": 28.052993928802096,
"learning_rate": 6.115326251896813e-07,
"logits/chosen": -0.5224714875221252,
"logits/rejected": -0.496852308511734,
"logps/chosen": -217.48992919921875,
"logps/rejected": -234.48318481445312,
"loss": 0.5188,
"rewards/accuracies": 0.7671874761581421,
"rewards/chosen": -1.1128088235855103,
"rewards/margins": 0.9438337087631226,
"rewards/rejected": -2.056642532348633,
"step": 660
},
{
"epoch": 0.4573964244570551,
"grad_norm": 32.11947138128127,
"learning_rate": 6.039453717754173e-07,
"logits/chosen": -0.4993141293525696,
"logits/rejected": -0.4682856798171997,
"logps/chosen": -206.40176391601562,
"logps/rejected": -231.08042907714844,
"loss": 0.4953,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -1.0732593536376953,
"rewards/margins": 1.1166470050811768,
"rewards/rejected": -2.189906358718872,
"step": 670
},
{
"epoch": 0.46422323676238425,
"grad_norm": 24.595239877463356,
"learning_rate": 5.963581183611533e-07,
"logits/chosen": -0.5414324998855591,
"logits/rejected": -0.5145028233528137,
"logps/chosen": -219.66567993164062,
"logps/rejected": -236.0765380859375,
"loss": 0.4608,
"rewards/accuracies": 0.7781250476837158,
"rewards/chosen": -0.9715930819511414,
"rewards/margins": 1.0554088354110718,
"rewards/rejected": -2.0270018577575684,
"step": 680
},
{
"epoch": 0.47105004906771347,
"grad_norm": 27.819824043736283,
"learning_rate": 5.887708649468892e-07,
"logits/chosen": -0.482106477022171,
"logits/rejected": -0.43574321269989014,
"logps/chosen": -211.92596435546875,
"logps/rejected": -234.6639862060547,
"loss": 0.4352,
"rewards/accuracies": 0.792187511920929,
"rewards/chosen": -0.9926649332046509,
"rewards/margins": 1.141036033630371,
"rewards/rejected": -2.1337008476257324,
"step": 690
},
{
"epoch": 0.47787686137304264,
"grad_norm": 27.82950606174818,
"learning_rate": 5.811836115326252e-07,
"logits/chosen": -0.491192102432251,
"logits/rejected": -0.45507892966270447,
"logps/chosen": -215.52423095703125,
"logps/rejected": -239.1810302734375,
"loss": 0.4534,
"rewards/accuracies": 0.7812500596046448,
"rewards/chosen": -1.0811206102371216,
"rewards/margins": 1.171852469444275,
"rewards/rejected": -2.2529730796813965,
"step": 700
},
{
"epoch": 0.4847036736783718,
"grad_norm": 32.40109215164061,
"learning_rate": 5.735963581183611e-07,
"logits/chosen": -0.48725226521492004,
"logits/rejected": -0.4451846480369568,
"logps/chosen": -211.22933959960938,
"logps/rejected": -236.77740478515625,
"loss": 0.4487,
"rewards/accuracies": 0.7828124761581421,
"rewards/chosen": -1.0895929336547852,
"rewards/margins": 1.1770341396331787,
"rewards/rejected": -2.2666268348693848,
"step": 710
},
{
"epoch": 0.49153048598370097,
"grad_norm": 27.259651037643604,
"learning_rate": 5.660091047040971e-07,
"logits/chosen": -0.5053711533546448,
"logits/rejected": -0.4444194436073303,
"logps/chosen": -205.80319213867188,
"logps/rejected": -230.7117919921875,
"loss": 0.4743,
"rewards/accuracies": 0.770312488079071,
"rewards/chosen": -1.1533528566360474,
"rewards/margins": 1.07535982131958,
"rewards/rejected": -2.228712797164917,
"step": 720
},
{
"epoch": 0.4983572982890302,
"grad_norm": 23.45407239305211,
"learning_rate": 5.584218512898331e-07,
"logits/chosen": -0.46755921840667725,
"logits/rejected": -0.41828638315200806,
"logps/chosen": -214.959716796875,
"logps/rejected": -237.14413452148438,
"loss": 0.4451,
"rewards/accuracies": 0.8125,
"rewards/chosen": -1.1057405471801758,
"rewards/margins": 1.1711297035217285,
"rewards/rejected": -2.2768704891204834,
"step": 730
},
{
"epoch": 0.5051841105943593,
"grad_norm": 24.513672931022274,
"learning_rate": 5.508345978755691e-07,
"logits/chosen": -0.5107758045196533,
"logits/rejected": -0.47158223390579224,
"logps/chosen": -214.1978759765625,
"logps/rejected": -236.34100341796875,
"loss": 0.4356,
"rewards/accuracies": 0.796875,
"rewards/chosen": -1.0896263122558594,
"rewards/margins": 1.132210612297058,
"rewards/rejected": -2.221837043762207,
"step": 740
},
{
"epoch": 0.5120109228996885,
"grad_norm": 27.557361902005226,
"learning_rate": 5.432473444613049e-07,
"logits/chosen": -0.47495898604393005,
"logits/rejected": -0.42891502380371094,
"logps/chosen": -215.3628692626953,
"logps/rejected": -240.29644775390625,
"loss": 0.4433,
"rewards/accuracies": 0.8046875596046448,
"rewards/chosen": -1.1231842041015625,
"rewards/margins": 1.1870129108428955,
"rewards/rejected": -2.310196876525879,
"step": 750
},
{
"epoch": 0.5188377352050177,
"grad_norm": 25.763088367806024,
"learning_rate": 5.356600910470409e-07,
"logits/chosen": -0.5234218835830688,
"logits/rejected": -0.46476346254348755,
"logps/chosen": -214.0421142578125,
"logps/rejected": -238.0985565185547,
"loss": 0.4236,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -1.1129274368286133,
"rewards/margins": 1.2772108316421509,
"rewards/rejected": -2.3901383876800537,
"step": 760
},
{
"epoch": 0.5256645475103469,
"grad_norm": 27.345063991868273,
"learning_rate": 5.280728376327769e-07,
"logits/chosen": -0.5037857294082642,
"logits/rejected": -0.4784386157989502,
"logps/chosen": -210.1291046142578,
"logps/rejected": -236.04969787597656,
"loss": 0.4347,
"rewards/accuracies": 0.8109375238418579,
"rewards/chosen": -1.1615896224975586,
"rewards/margins": 1.2552942037582397,
"rewards/rejected": -2.416883945465088,
"step": 770
},
{
"epoch": 0.5324913598156761,
"grad_norm": 23.559487104074414,
"learning_rate": 5.204855842185128e-07,
"logits/chosen": -0.5264319777488708,
"logits/rejected": -0.47137507796287537,
"logps/chosen": -218.16024780273438,
"logps/rejected": -245.5438995361328,
"loss": 0.4609,
"rewards/accuracies": 0.7703125476837158,
"rewards/chosen": -1.2951855659484863,
"rewards/margins": 1.170878291130066,
"rewards/rejected": -2.4660637378692627,
"step": 780
},
{
"epoch": 0.5393181721210053,
"grad_norm": 30.437623350555043,
"learning_rate": 5.128983308042489e-07,
"logits/chosen": -0.4954899251461029,
"logits/rejected": -0.45233067870140076,
"logps/chosen": -213.85757446289062,
"logps/rejected": -242.7041473388672,
"loss": 0.4193,
"rewards/accuracies": 0.8093750476837158,
"rewards/chosen": -1.2700811624526978,
"rewards/margins": 1.2533843517303467,
"rewards/rejected": -2.523465394973755,
"step": 790
},
{
"epoch": 0.5461449844263344,
"grad_norm": 25.96035380580991,
"learning_rate": 5.053110773899848e-07,
"logits/chosen": -0.49867063760757446,
"logits/rejected": -0.44984591007232666,
"logps/chosen": -218.67074584960938,
"logps/rejected": -247.30982971191406,
"loss": 0.424,
"rewards/accuracies": 0.8046875,
"rewards/chosen": -1.237367868423462,
"rewards/margins": 1.278685212135315,
"rewards/rejected": -2.5160531997680664,
"step": 800
},
{
"epoch": 0.5529717967316636,
"grad_norm": 27.066709483078917,
"learning_rate": 4.977238239757208e-07,
"logits/chosen": -0.4714178144931793,
"logits/rejected": -0.4372885823249817,
"logps/chosen": -218.98892211914062,
"logps/rejected": -242.98770141601562,
"loss": 0.4266,
"rewards/accuracies": 0.7984375357627869,
"rewards/chosen": -1.298151969909668,
"rewards/margins": 1.222092866897583,
"rewards/rejected": -2.520244836807251,
"step": 810
},
{
"epoch": 0.5597986090369927,
"grad_norm": 28.230804755745105,
"learning_rate": 4.901365705614567e-07,
"logits/chosen": -0.45390385389328003,
"logits/rejected": -0.43030381202697754,
"logps/chosen": -220.013427734375,
"logps/rejected": -241.9390411376953,
"loss": 0.4526,
"rewards/accuracies": 0.796875,
"rewards/chosen": -1.354661464691162,
"rewards/margins": 1.215053677558899,
"rewards/rejected": -2.5697154998779297,
"step": 820
},
{
"epoch": 0.566625421342322,
"grad_norm": 32.13534664184047,
"learning_rate": 4.825493171471927e-07,
"logits/chosen": -0.475396066904068,
"logits/rejected": -0.43329310417175293,
"logps/chosen": -210.43185424804688,
"logps/rejected": -236.67987060546875,
"loss": 0.4189,
"rewards/accuracies": 0.815625011920929,
"rewards/chosen": -1.264033317565918,
"rewards/margins": 1.377021074295044,
"rewards/rejected": -2.641054153442383,
"step": 830
},
{
"epoch": 0.5734522336476512,
"grad_norm": 22.262860568714245,
"learning_rate": 4.7496206373292864e-07,
"logits/chosen": -0.4692656993865967,
"logits/rejected": -0.4306912422180176,
"logps/chosen": -211.5372772216797,
"logps/rejected": -246.39736938476562,
"loss": 0.3916,
"rewards/accuracies": 0.8375000357627869,
"rewards/chosen": -1.1525495052337646,
"rewards/margins": 1.4558607339859009,
"rewards/rejected": -2.608410358428955,
"step": 840
},
{
"epoch": 0.5802790459529803,
"grad_norm": 22.80617456340079,
"learning_rate": 4.673748103186646e-07,
"logits/chosen": -0.46342021226882935,
"logits/rejected": -0.41512057185173035,
"logps/chosen": -221.32496643066406,
"logps/rejected": -251.7954864501953,
"loss": 0.394,
"rewards/accuracies": 0.8218750357627869,
"rewards/chosen": -1.2910584211349487,
"rewards/margins": 1.4217520952224731,
"rewards/rejected": -2.712810516357422,
"step": 850
},
{
"epoch": 0.5871058582583095,
"grad_norm": 24.868191575194487,
"learning_rate": 4.597875569044006e-07,
"logits/chosen": -0.48653626441955566,
"logits/rejected": -0.4366312623023987,
"logps/chosen": -217.47422790527344,
"logps/rejected": -241.48968505859375,
"loss": 0.4269,
"rewards/accuracies": 0.801562488079071,
"rewards/chosen": -1.3257293701171875,
"rewards/margins": 1.3266490697860718,
"rewards/rejected": -2.652378559112549,
"step": 860
},
{
"epoch": 0.5939326705636387,
"grad_norm": 27.035059402616938,
"learning_rate": 4.5220030349013654e-07,
"logits/chosen": -0.5033361911773682,
"logits/rejected": -0.4694429039955139,
"logps/chosen": -214.79815673828125,
"logps/rejected": -237.64102172851562,
"loss": 0.4296,
"rewards/accuracies": 0.7921874523162842,
"rewards/chosen": -1.3357491493225098,
"rewards/margins": 1.2649694681167603,
"rewards/rejected": -2.6007187366485596,
"step": 870
},
{
"epoch": 0.6007594828689679,
"grad_norm": 27.746278145893346,
"learning_rate": 4.446130500758725e-07,
"logits/chosen": -0.5227242708206177,
"logits/rejected": -0.4751604497432709,
"logps/chosen": -218.23658752441406,
"logps/rejected": -249.3454132080078,
"loss": 0.4233,
"rewards/accuracies": 0.817187488079071,
"rewards/chosen": -1.3457627296447754,
"rewards/margins": 1.428666591644287,
"rewards/rejected": -2.7744295597076416,
"step": 880
},
{
"epoch": 0.6075862951742971,
"grad_norm": 26.892931653503698,
"learning_rate": 4.370257966616085e-07,
"logits/chosen": -0.5066260099411011,
"logits/rejected": -0.47855502367019653,
"logps/chosen": -214.84915161132812,
"logps/rejected": -240.56436157226562,
"loss": 0.4612,
"rewards/accuracies": 0.7812500596046448,
"rewards/chosen": -1.4467679262161255,
"rewards/margins": 1.3007091283798218,
"rewards/rejected": -2.7474770545959473,
"step": 890
},
{
"epoch": 0.6144131074796262,
"grad_norm": 32.793455771900234,
"learning_rate": 4.2943854324734444e-07,
"logits/chosen": -0.4987248182296753,
"logits/rejected": -0.4517776668071747,
"logps/chosen": -218.49545288085938,
"logps/rejected": -252.3199462890625,
"loss": 0.4007,
"rewards/accuracies": 0.828125,
"rewards/chosen": -1.4036812782287598,
"rewards/margins": 1.497314453125,
"rewards/rejected": -2.9009957313537598,
"step": 900
},
{
"epoch": 0.6212399197849554,
"grad_norm": 30.14977908240741,
"learning_rate": 4.2185128983308036e-07,
"logits/chosen": -0.5123909711837769,
"logits/rejected": -0.456384539604187,
"logps/chosen": -221.94183349609375,
"logps/rejected": -250.5224151611328,
"loss": 0.4185,
"rewards/accuracies": 0.8265625238418579,
"rewards/chosen": -1.3800506591796875,
"rewards/margins": 1.4040327072143555,
"rewards/rejected": -2.784083366394043,
"step": 910
},
{
"epoch": 0.6280667320902846,
"grad_norm": 23.187149506889586,
"learning_rate": 4.142640364188164e-07,
"logits/chosen": -0.5007960200309753,
"logits/rejected": -0.4656420350074768,
"logps/chosen": -224.66000366210938,
"logps/rejected": -250.5994873046875,
"loss": 0.4194,
"rewards/accuracies": 0.817187488079071,
"rewards/chosen": -1.4466440677642822,
"rewards/margins": 1.3647561073303223,
"rewards/rejected": -2.8114004135131836,
"step": 920
},
{
"epoch": 0.6348935443956137,
"grad_norm": 26.465496977643166,
"learning_rate": 4.0667678300455234e-07,
"logits/chosen": -0.5095345973968506,
"logits/rejected": -0.44781219959259033,
"logps/chosen": -219.541259765625,
"logps/rejected": -253.14544677734375,
"loss": 0.3631,
"rewards/accuracies": 0.8765624761581421,
"rewards/chosen": -1.3718998432159424,
"rewards/margins": 1.6033210754394531,
"rewards/rejected": -2.9752209186553955,
"step": 930
},
{
"epoch": 0.641720356700943,
"grad_norm": 21.651167586614733,
"learning_rate": 3.990895295902883e-07,
"logits/chosen": -0.5611530542373657,
"logits/rejected": -0.5065969824790955,
"logps/chosen": -222.84457397460938,
"logps/rejected": -251.35067749023438,
"loss": 0.397,
"rewards/accuracies": 0.8250000476837158,
"rewards/chosen": -1.4304229021072388,
"rewards/margins": 1.4556035995483398,
"rewards/rejected": -2.886026620864868,
"step": 940
},
{
"epoch": 0.6485471690062722,
"grad_norm": 21.56653990852637,
"learning_rate": 3.915022761760243e-07,
"logits/chosen": -0.575349748134613,
"logits/rejected": -0.5415146350860596,
"logps/chosen": -209.71266174316406,
"logps/rejected": -239.22946166992188,
"loss": 0.4001,
"rewards/accuracies": 0.8187500238418579,
"rewards/chosen": -1.521388053894043,
"rewards/margins": 1.4791213274002075,
"rewards/rejected": -3.000509262084961,
"step": 950
},
{
"epoch": 0.6553739813116013,
"grad_norm": 23.31036794244746,
"learning_rate": 3.8391502276176024e-07,
"logits/chosen": -0.5698951482772827,
"logits/rejected": -0.5178714394569397,
"logps/chosen": -228.25030517578125,
"logps/rejected": -261.415771484375,
"loss": 0.3891,
"rewards/accuracies": 0.817187488079071,
"rewards/chosen": -1.6143665313720703,
"rewards/margins": 1.645197868347168,
"rewards/rejected": -3.2595643997192383,
"step": 960
},
{
"epoch": 0.6622007936169305,
"grad_norm": 26.214223596010875,
"learning_rate": 3.763277693474962e-07,
"logits/chosen": -0.5214463472366333,
"logits/rejected": -0.46749287843704224,
"logps/chosen": -218.10549926757812,
"logps/rejected": -251.87442016601562,
"loss": 0.4196,
"rewards/accuracies": 0.8125,
"rewards/chosen": -1.63547945022583,
"rewards/margins": 1.4419658184051514,
"rewards/rejected": -3.0774452686309814,
"step": 970
},
{
"epoch": 0.6690276059222596,
"grad_norm": 24.89349466924626,
"learning_rate": 3.687405159332321e-07,
"logits/chosen": -0.5419428944587708,
"logits/rejected": -0.5022714734077454,
"logps/chosen": -223.1068115234375,
"logps/rejected": -255.94949340820312,
"loss": 0.4144,
"rewards/accuracies": 0.815625011920929,
"rewards/chosen": -1.7430050373077393,
"rewards/margins": 1.4413095712661743,
"rewards/rejected": -3.184314489364624,
"step": 980
},
{
"epoch": 0.6758544182275888,
"grad_norm": 25.914909518247867,
"learning_rate": 3.611532625189681e-07,
"logits/chosen": -0.5115488767623901,
"logits/rejected": -0.4625004827976227,
"logps/chosen": -229.49105834960938,
"logps/rejected": -265.0625,
"loss": 0.3983,
"rewards/accuracies": 0.8218750357627869,
"rewards/chosen": -1.6320453882217407,
"rewards/margins": 1.5734854936599731,
"rewards/rejected": -3.2055306434631348,
"step": 990
},
{
"epoch": 0.6826812305329181,
"grad_norm": 31.456143694319483,
"learning_rate": 3.5356600910470406e-07,
"logits/chosen": -0.5371730327606201,
"logits/rejected": -0.4974362850189209,
"logps/chosen": -236.7477569580078,
"logps/rejected": -264.0472106933594,
"loss": 0.3827,
"rewards/accuracies": 0.8437500596046448,
"rewards/chosen": -1.620214819908142,
"rewards/margins": 1.5398459434509277,
"rewards/rejected": -3.1600606441497803,
"step": 1000
},
{
"epoch": 0.6895080428382472,
"grad_norm": 29.661159656571126,
"learning_rate": 3.459787556904401e-07,
"logits/chosen": -0.5440015196800232,
"logits/rejected": -0.49301889538764954,
"logps/chosen": -224.03494262695312,
"logps/rejected": -254.42193603515625,
"loss": 0.4033,
"rewards/accuracies": 0.8296875357627869,
"rewards/chosen": -1.5924382209777832,
"rewards/margins": 1.5776193141937256,
"rewards/rejected": -3.170057773590088,
"step": 1010
},
{
"epoch": 0.6963348551435764,
"grad_norm": 38.12069128333079,
"learning_rate": 3.3839150227617604e-07,
"logits/chosen": -0.5860447883605957,
"logits/rejected": -0.543270468711853,
"logps/chosen": -228.84930419921875,
"logps/rejected": -262.8966064453125,
"loss": 0.3898,
"rewards/accuracies": 0.8406250476837158,
"rewards/chosen": -1.6053173542022705,
"rewards/margins": 1.590077519416809,
"rewards/rejected": -3.19539475440979,
"step": 1020
},
{
"epoch": 0.7031616674489056,
"grad_norm": 32.08364090632609,
"learning_rate": 3.30804248861912e-07,
"logits/chosen": -0.6051906943321228,
"logits/rejected": -0.5597983598709106,
"logps/chosen": -224.02899169921875,
"logps/rejected": -258.93511962890625,
"loss": 0.396,
"rewards/accuracies": 0.8171875476837158,
"rewards/chosen": -1.7182796001434326,
"rewards/margins": 1.5724890232086182,
"rewards/rejected": -3.290768623352051,
"step": 1030
},
{
"epoch": 0.7099884797542347,
"grad_norm": 25.599680429412086,
"learning_rate": 3.232169954476479e-07,
"logits/chosen": -0.6112679243087769,
"logits/rejected": -0.5801026821136475,
"logps/chosen": -225.71258544921875,
"logps/rejected": -264.3663330078125,
"loss": 0.3637,
"rewards/accuracies": 0.8421875238418579,
"rewards/chosen": -1.4613301753997803,
"rewards/margins": 1.712023138999939,
"rewards/rejected": -3.1733531951904297,
"step": 1040
},
{
"epoch": 0.716815292059564,
"grad_norm": 26.325121380352627,
"learning_rate": 3.156297420333839e-07,
"logits/chosen": -0.6216264963150024,
"logits/rejected": -0.5548665523529053,
"logps/chosen": -226.58059692382812,
"logps/rejected": -263.7754821777344,
"loss": 0.3636,
"rewards/accuracies": 0.8374999761581421,
"rewards/chosen": -1.6483052968978882,
"rewards/margins": 1.7705044746398926,
"rewards/rejected": -3.4188098907470703,
"step": 1050
},
{
"epoch": 0.7236421043648931,
"grad_norm": 23.347203569226366,
"learning_rate": 3.0804248861911986e-07,
"logits/chosen": -0.5403355360031128,
"logits/rejected": -0.49409806728363037,
"logps/chosen": -225.88253784179688,
"logps/rejected": -256.93182373046875,
"loss": 0.393,
"rewards/accuracies": 0.831250011920929,
"rewards/chosen": -1.6747300624847412,
"rewards/margins": 1.6634035110473633,
"rewards/rejected": -3.3381335735321045,
"step": 1060
},
{
"epoch": 0.7304689166702223,
"grad_norm": 26.591582696664684,
"learning_rate": 3.004552352048558e-07,
"logits/chosen": -0.60378497838974,
"logits/rejected": -0.5446761250495911,
"logps/chosen": -222.86285400390625,
"logps/rejected": -254.32901000976562,
"loss": 0.3562,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -1.373286247253418,
"rewards/margins": 1.5952813625335693,
"rewards/rejected": -2.9685676097869873,
"step": 1070
},
{
"epoch": 0.7372957289755515,
"grad_norm": 26.301256433411677,
"learning_rate": 2.928679817905918e-07,
"logits/chosen": -0.575655996799469,
"logits/rejected": -0.5388238430023193,
"logps/chosen": -226.25411987304688,
"logps/rejected": -257.7029724121094,
"loss": 0.3889,
"rewards/accuracies": 0.832812488079071,
"rewards/chosen": -1.5678967237472534,
"rewards/margins": 1.5496362447738647,
"rewards/rejected": -3.1175332069396973,
"step": 1080
},
{
"epoch": 0.7441225412808806,
"grad_norm": 29.1969544488184,
"learning_rate": 2.8528072837632776e-07,
"logits/chosen": -0.563581109046936,
"logits/rejected": -0.4889605939388275,
"logps/chosen": -215.546630859375,
"logps/rejected": -251.0224609375,
"loss": 0.3594,
"rewards/accuracies": 0.854687511920929,
"rewards/chosen": -1.5211578607559204,
"rewards/margins": 1.6970359086990356,
"rewards/rejected": -3.218193531036377,
"step": 1090
},
{
"epoch": 0.7509493535862098,
"grad_norm": 28.75255873182244,
"learning_rate": 2.776934749620637e-07,
"logits/chosen": -0.5607287883758545,
"logits/rejected": -0.5297821760177612,
"logps/chosen": -213.63365173339844,
"logps/rejected": -240.619384765625,
"loss": 0.4057,
"rewards/accuracies": 0.8203125,
"rewards/chosen": -1.6024796962738037,
"rewards/margins": 1.537475347518921,
"rewards/rejected": -3.1399548053741455,
"step": 1100
},
{
"epoch": 0.7577761658915391,
"grad_norm": 28.027697277996715,
"learning_rate": 2.7010622154779964e-07,
"logits/chosen": -0.5775099992752075,
"logits/rejected": -0.5231542587280273,
"logps/chosen": -224.80667114257812,
"logps/rejected": -259.0721435546875,
"loss": 0.4044,
"rewards/accuracies": 0.8140624761581421,
"rewards/chosen": -1.598193883895874,
"rewards/margins": 1.5613579750061035,
"rewards/rejected": -3.1595516204833984,
"step": 1110
},
{
"epoch": 0.7646029781968682,
"grad_norm": 19.772049611357087,
"learning_rate": 2.6251896813353566e-07,
"logits/chosen": -0.5745671987533569,
"logits/rejected": -0.5299438834190369,
"logps/chosen": -225.1347198486328,
"logps/rejected": -255.4309539794922,
"loss": 0.3858,
"rewards/accuracies": 0.8187500238418579,
"rewards/chosen": -1.4894109964370728,
"rewards/margins": 1.691686987876892,
"rewards/rejected": -3.181097984313965,
"step": 1120
},
{
"epoch": 0.7714297905021974,
"grad_norm": 23.20450105175028,
"learning_rate": 2.549317147192716e-07,
"logits/chosen": -0.5600322484970093,
"logits/rejected": -0.5002347230911255,
"logps/chosen": -219.86434936523438,
"logps/rejected": -253.78878784179688,
"loss": 0.3663,
"rewards/accuracies": 0.8453125357627869,
"rewards/chosen": -1.4636483192443848,
"rewards/margins": 1.6817249059677124,
"rewards/rejected": -3.1453733444213867,
"step": 1130
},
{
"epoch": 0.7782566028075265,
"grad_norm": 28.72150866508454,
"learning_rate": 2.473444613050076e-07,
"logits/chosen": -0.6041327118873596,
"logits/rejected": -0.5645285844802856,
"logps/chosen": -216.68939208984375,
"logps/rejected": -247.66275024414062,
"loss": 0.3806,
"rewards/accuracies": 0.8328125476837158,
"rewards/chosen": -1.5338340997695923,
"rewards/margins": 1.5916988849639893,
"rewards/rejected": -3.125532865524292,
"step": 1140
},
{
"epoch": 0.7850834151128557,
"grad_norm": 29.858461214238897,
"learning_rate": 2.3975720789074356e-07,
"logits/chosen": -0.6299252510070801,
"logits/rejected": -0.586955189704895,
"logps/chosen": -231.401611328125,
"logps/rejected": -263.02197265625,
"loss": 0.3998,
"rewards/accuracies": 0.8328125476837158,
"rewards/chosen": -1.6045633554458618,
"rewards/margins": 1.6497775316238403,
"rewards/rejected": -3.2543411254882812,
"step": 1150
},
{
"epoch": 0.791910227418185,
"grad_norm": 26.24413163476253,
"learning_rate": 2.321699544764795e-07,
"logits/chosen": -0.5830259919166565,
"logits/rejected": -0.5397896766662598,
"logps/chosen": -213.19375610351562,
"logps/rejected": -249.24717712402344,
"loss": 0.3717,
"rewards/accuracies": 0.8250000476837158,
"rewards/chosen": -1.6501479148864746,
"rewards/margins": 1.6961115598678589,
"rewards/rejected": -3.346259593963623,
"step": 1160
},
{
"epoch": 0.7987370397235141,
"grad_norm": 31.016581977192125,
"learning_rate": 2.2458270106221546e-07,
"logits/chosen": -0.5983390808105469,
"logits/rejected": -0.5455670952796936,
"logps/chosen": -224.10618591308594,
"logps/rejected": -254.94383239746094,
"loss": 0.3732,
"rewards/accuracies": 0.8296875357627869,
"rewards/chosen": -1.5914267301559448,
"rewards/margins": 1.632917046546936,
"rewards/rejected": -3.2243435382843018,
"step": 1170
},
{
"epoch": 0.8055638520288433,
"grad_norm": 82.84012389678055,
"learning_rate": 2.1699544764795143e-07,
"logits/chosen": -0.6019859910011292,
"logits/rejected": -0.5678104758262634,
"logps/chosen": -222.878662109375,
"logps/rejected": -253.78060913085938,
"loss": 0.4147,
"rewards/accuracies": 0.828125,
"rewards/chosen": -1.6500358581542969,
"rewards/margins": 1.5844390392303467,
"rewards/rejected": -3.2344746589660645,
"step": 1180
},
{
"epoch": 0.8123906643341725,
"grad_norm": 22.55342908794488,
"learning_rate": 2.094081942336874e-07,
"logits/chosen": -0.5819066762924194,
"logits/rejected": -0.5418481826782227,
"logps/chosen": -221.70608520507812,
"logps/rejected": -254.09922790527344,
"loss": 0.3354,
"rewards/accuracies": 0.8609375357627869,
"rewards/chosen": -1.6243677139282227,
"rewards/margins": 1.7402938604354858,
"rewards/rejected": -3.364661455154419,
"step": 1190
},
{
"epoch": 0.8192174766395016,
"grad_norm": 21.249823285036445,
"learning_rate": 2.0182094081942336e-07,
"logits/chosen": -0.5841631889343262,
"logits/rejected": -0.5415323972702026,
"logps/chosen": -225.88800048828125,
"logps/rejected": -254.038818359375,
"loss": 0.3821,
"rewards/accuracies": 0.839062511920929,
"rewards/chosen": -1.6330121755599976,
"rewards/margins": 1.732587456703186,
"rewards/rejected": -3.3655996322631836,
"step": 1200
},
{
"epoch": 0.8260442889448308,
"grad_norm": 22.957761561567523,
"learning_rate": 1.9423368740515933e-07,
"logits/chosen": -0.5876274704933167,
"logits/rejected": -0.5527446866035461,
"logps/chosen": -237.04470825195312,
"logps/rejected": -263.58868408203125,
"loss": 0.3658,
"rewards/accuracies": 0.8531250357627869,
"rewards/chosen": -1.6271567344665527,
"rewards/margins": 1.6703208684921265,
"rewards/rejected": -3.297477960586548,
"step": 1210
},
{
"epoch": 0.83287110125016,
"grad_norm": 26.3109466733547,
"learning_rate": 1.8664643399089527e-07,
"logits/chosen": -0.5855602622032166,
"logits/rejected": -0.5348464846611023,
"logps/chosen": -220.74581909179688,
"logps/rejected": -259.97076416015625,
"loss": 0.392,
"rewards/accuracies": 0.8234375715255737,
"rewards/chosen": -1.666372299194336,
"rewards/margins": 1.7341811656951904,
"rewards/rejected": -3.4005534648895264,
"step": 1220
},
{
"epoch": 0.8396979135554892,
"grad_norm": 32.86005475979103,
"learning_rate": 1.7905918057663124e-07,
"logits/chosen": -0.6146824359893799,
"logits/rejected": -0.5769205093383789,
"logps/chosen": -223.04859924316406,
"logps/rejected": -259.2931213378906,
"loss": 0.3747,
"rewards/accuracies": 0.8484375476837158,
"rewards/chosen": -1.6388548612594604,
"rewards/margins": 1.6829884052276611,
"rewards/rejected": -3.321843147277832,
"step": 1230
},
{
"epoch": 0.8465247258608184,
"grad_norm": 27.824013672905682,
"learning_rate": 1.7147192716236723e-07,
"logits/chosen": -0.5848041772842407,
"logits/rejected": -0.5365484356880188,
"logps/chosen": -224.9688262939453,
"logps/rejected": -253.75857543945312,
"loss": 0.374,
"rewards/accuracies": 0.8343750238418579,
"rewards/chosen": -1.633022427558899,
"rewards/margins": 1.6262296438217163,
"rewards/rejected": -3.2592520713806152,
"step": 1240
},
{
"epoch": 0.8533515381661475,
"grad_norm": 28.870976428951412,
"learning_rate": 1.638846737481032e-07,
"logits/chosen": -0.6266176700592041,
"logits/rejected": -0.5750494003295898,
"logps/chosen": -225.53489685058594,
"logps/rejected": -251.16812133789062,
"loss": 0.3643,
"rewards/accuracies": 0.8421875238418579,
"rewards/chosen": -1.6029326915740967,
"rewards/margins": 1.637751817703247,
"rewards/rejected": -3.2406845092773438,
"step": 1250
},
{
"epoch": 0.8601783504714767,
"grad_norm": 28.44671682958466,
"learning_rate": 1.5629742033383914e-07,
"logits/chosen": -0.5748768448829651,
"logits/rejected": -0.5039246082305908,
"logps/chosen": -229.083740234375,
"logps/rejected": -265.5872802734375,
"loss": 0.3464,
"rewards/accuracies": 0.8515625,
"rewards/chosen": -1.6385741233825684,
"rewards/margins": 2.006284713745117,
"rewards/rejected": -3.6448588371276855,
"step": 1260
},
{
"epoch": 0.867005162776806,
"grad_norm": 26.03554320093484,
"learning_rate": 1.487101669195751e-07,
"logits/chosen": -0.580173671245575,
"logits/rejected": -0.5294475555419922,
"logps/chosen": -225.72938537597656,
"logps/rejected": -262.03546142578125,
"loss": 0.3718,
"rewards/accuracies": 0.8359375,
"rewards/chosen": -1.6143238544464111,
"rewards/margins": 1.8427155017852783,
"rewards/rejected": -3.4570393562316895,
"step": 1270
},
{
"epoch": 0.8738319750821351,
"grad_norm": 22.97729500897279,
"learning_rate": 1.4112291350531107e-07,
"logits/chosen": -0.6003884673118591,
"logits/rejected": -0.5561665296554565,
"logps/chosen": -221.987548828125,
"logps/rejected": -258.51727294921875,
"loss": 0.3686,
"rewards/accuracies": 0.8359375,
"rewards/chosen": -1.4877190589904785,
"rewards/margins": 1.6500287055969238,
"rewards/rejected": -3.1377477645874023,
"step": 1280
},
{
"epoch": 0.8806587873874643,
"grad_norm": 31.37447822214391,
"learning_rate": 1.3353566009104704e-07,
"logits/chosen": -0.6364210844039917,
"logits/rejected": -0.575194239616394,
"logps/chosen": -225.1094207763672,
"logps/rejected": -260.13885498046875,
"loss": 0.3534,
"rewards/accuracies": 0.864062488079071,
"rewards/chosen": -1.6206319332122803,
"rewards/margins": 1.7905977964401245,
"rewards/rejected": -3.4112298488616943,
"step": 1290
},
{
"epoch": 0.8874855996927934,
"grad_norm": 22.936789815076953,
"learning_rate": 1.25948406676783e-07,
"logits/chosen": -0.6323338747024536,
"logits/rejected": -0.6003640294075012,
"logps/chosen": -227.20034790039062,
"logps/rejected": -259.46502685546875,
"loss": 0.3575,
"rewards/accuracies": 0.8406250476837158,
"rewards/chosen": -1.6749684810638428,
"rewards/margins": 1.7170754671096802,
"rewards/rejected": -3.3920438289642334,
"step": 1300
},
{
"epoch": 0.8943124119981226,
"grad_norm": 22.489511604558004,
"learning_rate": 1.1836115326251896e-07,
"logits/chosen": -0.6401182413101196,
"logits/rejected": -0.5833394527435303,
"logps/chosen": -223.30029296875,
"logps/rejected": -262.72998046875,
"loss": 0.3353,
"rewards/accuracies": 0.8593750596046448,
"rewards/chosen": -1.560599446296692,
"rewards/margins": 1.906503677368164,
"rewards/rejected": -3.4671034812927246,
"step": 1310
},
{
"epoch": 0.9011392243034518,
"grad_norm": 37.43162732034228,
"learning_rate": 1.1077389984825493e-07,
"logits/chosen": -0.5761069059371948,
"logits/rejected": -0.5430048108100891,
"logps/chosen": -237.7594757080078,
"logps/rejected": -275.5934753417969,
"loss": 0.3514,
"rewards/accuracies": 0.859375,
"rewards/chosen": -1.6714935302734375,
"rewards/margins": 1.8643473386764526,
"rewards/rejected": -3.5358407497406006,
"step": 1320
},
{
"epoch": 0.907966036608781,
"grad_norm": 22.988879587386872,
"learning_rate": 1.0318664643399089e-07,
"logits/chosen": -0.5806565284729004,
"logits/rejected": -0.5450279116630554,
"logps/chosen": -221.33053588867188,
"logps/rejected": -256.5147705078125,
"loss": 0.3729,
"rewards/accuracies": 0.856249988079071,
"rewards/chosen": -1.6562050580978394,
"rewards/margins": 1.747424840927124,
"rewards/rejected": -3.403630018234253,
"step": 1330
},
{
"epoch": 0.9147928489141102,
"grad_norm": 19.80848176554877,
"learning_rate": 9.559939301972686e-08,
"logits/chosen": -0.6481366157531738,
"logits/rejected": -0.6148696541786194,
"logps/chosen": -224.6954803466797,
"logps/rejected": -256.4845275878906,
"loss": 0.3775,
"rewards/accuracies": 0.8421875238418579,
"rewards/chosen": -1.7428375482559204,
"rewards/margins": 1.636692762374878,
"rewards/rejected": -3.379530191421509,
"step": 1340
},
{
"epoch": 0.9216196612194394,
"grad_norm": 25.8470434123946,
"learning_rate": 8.801213960546281e-08,
"logits/chosen": -0.6496397852897644,
"logits/rejected": -0.5912147164344788,
"logps/chosen": -223.9413299560547,
"logps/rejected": -259.1372375488281,
"loss": 0.3461,
"rewards/accuracies": 0.856249988079071,
"rewards/chosen": -1.6729114055633545,
"rewards/margins": 1.7730145454406738,
"rewards/rejected": -3.445925712585449,
"step": 1350
},
{
"epoch": 0.9284464735247685,
"grad_norm": 33.2201336722171,
"learning_rate": 8.042488619119878e-08,
"logits/chosen": -0.645717203617096,
"logits/rejected": -0.6112032532691956,
"logps/chosen": -225.99624633789062,
"logps/rejected": -257.4811706542969,
"loss": 0.4065,
"rewards/accuracies": 0.8218750357627869,
"rewards/chosen": -1.761589527130127,
"rewards/margins": 1.6280558109283447,
"rewards/rejected": -3.389645576477051,
"step": 1360
},
{
"epoch": 0.9352732858300977,
"grad_norm": 27.005710517490183,
"learning_rate": 7.283763277693475e-08,
"logits/chosen": -0.573918342590332,
"logits/rejected": -0.5335432291030884,
"logps/chosen": -225.52552795410156,
"logps/rejected": -255.49449157714844,
"loss": 0.3465,
"rewards/accuracies": 0.8531249761581421,
"rewards/chosen": -1.7273519039154053,
"rewards/margins": 1.7527152299880981,
"rewards/rejected": -3.480067253112793,
"step": 1370
},
{
"epoch": 0.9421000981354269,
"grad_norm": 32.140399259495645,
"learning_rate": 6.525037936267071e-08,
"logits/chosen": -0.6214314103126526,
"logits/rejected": -0.570462167263031,
"logps/chosen": -224.70672607421875,
"logps/rejected": -264.4761962890625,
"loss": 0.3218,
"rewards/accuracies": 0.887499988079071,
"rewards/chosen": -1.6792542934417725,
"rewards/margins": 1.915861964225769,
"rewards/rejected": -3.595116138458252,
"step": 1380
},
{
"epoch": 0.948926910440756,
"grad_norm": 31.250154294424732,
"learning_rate": 5.766312594840667e-08,
"logits/chosen": -0.6339004635810852,
"logits/rejected": -0.5892723798751831,
"logps/chosen": -220.26611328125,
"logps/rejected": -252.96212768554688,
"loss": 0.3864,
"rewards/accuracies": 0.8312499523162842,
"rewards/chosen": -1.6645467281341553,
"rewards/margins": 1.5790960788726807,
"rewards/rejected": -3.243642807006836,
"step": 1390
},
{
"epoch": 0.9557537227460853,
"grad_norm": 30.068762957187783,
"learning_rate": 5.007587253414264e-08,
"logits/chosen": -0.678811252117157,
"logits/rejected": -0.6359538435935974,
"logps/chosen": -224.49069213867188,
"logps/rejected": -258.3272705078125,
"loss": 0.3447,
"rewards/accuracies": 0.8531250357627869,
"rewards/chosen": -1.575748085975647,
"rewards/margins": 1.9220972061157227,
"rewards/rejected": -3.49784517288208,
"step": 1400
},
{
"epoch": 0.9625805350514144,
"grad_norm": 22.16371068962549,
"learning_rate": 4.2488619119878606e-08,
"logits/chosen": -0.6366287469863892,
"logits/rejected": -0.5852836966514587,
"logps/chosen": -227.71780395507812,
"logps/rejected": -267.0358581542969,
"loss": 0.3718,
"rewards/accuracies": 0.831250011920929,
"rewards/chosen": -1.6289258003234863,
"rewards/margins": 1.7643526792526245,
"rewards/rejected": -3.3932785987854004,
"step": 1410
},
{
"epoch": 0.9694073473567436,
"grad_norm": 31.032456565988113,
"learning_rate": 3.4901365705614566e-08,
"logits/chosen": -0.6306103467941284,
"logits/rejected": -0.5921708345413208,
"logps/chosen": -221.66065979003906,
"logps/rejected": -254.41958618164062,
"loss": 0.3678,
"rewards/accuracies": 0.823437511920929,
"rewards/chosen": -1.5656054019927979,
"rewards/margins": 1.682039499282837,
"rewards/rejected": -3.2476449012756348,
"step": 1420
},
{
"epoch": 0.9762341596620728,
"grad_norm": 26.873435878225383,
"learning_rate": 2.731411229135053e-08,
"logits/chosen": -0.6624563336372375,
"logits/rejected": -0.6294071078300476,
"logps/chosen": -224.36407470703125,
"logps/rejected": -263.2255859375,
"loss": 0.3681,
"rewards/accuracies": 0.8484375476837158,
"rewards/chosen": -1.7730777263641357,
"rewards/margins": 1.7527307271957397,
"rewards/rejected": -3.525808334350586,
"step": 1430
},
{
"epoch": 0.9830609719674019,
"grad_norm": 28.36352572432148,
"learning_rate": 1.9726858877086493e-08,
"logits/chosen": -0.6402366161346436,
"logits/rejected": -0.5960521697998047,
"logps/chosen": -225.24977111816406,
"logps/rejected": -257.8275451660156,
"loss": 0.3734,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -1.7362611293792725,
"rewards/margins": 1.7620372772216797,
"rewards/rejected": -3.498298168182373,
"step": 1440
},
{
"epoch": 0.9898877842727312,
"grad_norm": 30.66526971215358,
"learning_rate": 1.2139605462822458e-08,
"logits/chosen": -0.6005350351333618,
"logits/rejected": -0.5661831498146057,
"logps/chosen": -227.962158203125,
"logps/rejected": -261.6782531738281,
"loss": 0.3924,
"rewards/accuracies": 0.8328125476837158,
"rewards/chosen": -1.720937728881836,
"rewards/margins": 1.5882391929626465,
"rewards/rejected": -3.3091769218444824,
"step": 1450
},
{
"epoch": 0.9967145965780604,
"grad_norm": 36.64240487573334,
"learning_rate": 4.552352048558422e-09,
"logits/chosen": -0.6393886804580688,
"logits/rejected": -0.6115251183509827,
"logps/chosen": -229.70652770996094,
"logps/rejected": -268.06982421875,
"loss": 0.3379,
"rewards/accuracies": 0.8734375238418579,
"rewards/chosen": -1.651149034500122,
"rewards/margins": 1.8959904909133911,
"rewards/rejected": -3.5471396446228027,
"step": 1460
},
{
"epoch": 1.0,
"step": 1465,
"total_flos": 161167907028992.0,
"train_loss": 0.47723283336431094,
"train_runtime": 14257.9418,
"train_samples_per_second": 6.575,
"train_steps_per_second": 0.103
}
],
"logging_steps": 10,
"max_steps": 1465,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 161167907028992.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}