|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 1465, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00682681230532918, |
|
"grad_norm": 20.491548678692148, |
|
"learning_rate": 6.122448979591837e-08, |
|
"logits/chosen": 0.03672148287296295, |
|
"logits/rejected": 0.041521187871694565, |
|
"logps/chosen": -191.74862670898438, |
|
"logps/rejected": -189.4052276611328, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": 0.0014678842853754759, |
|
"rewards/margins": 0.0024292597081512213, |
|
"rewards/rejected": -0.0009613755391910672, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01365362461065836, |
|
"grad_norm": 21.860852469835415, |
|
"learning_rate": 1.2925170068027211e-07, |
|
"logits/chosen": 0.04523754119873047, |
|
"logits/rejected": 0.05510401353240013, |
|
"logps/chosen": -187.8703155517578, |
|
"logps/rejected": -187.6009979248047, |
|
"loss": 0.6937, |
|
"rewards/accuracies": 0.4859375059604645, |
|
"rewards/chosen": -0.0003124059294350445, |
|
"rewards/margins": -0.0007655444787815213, |
|
"rewards/rejected": 0.0004531386948656291, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02048043691598754, |
|
"grad_norm": 20.278529512570657, |
|
"learning_rate": 1.9727891156462583e-07, |
|
"logits/chosen": 0.020983930677175522, |
|
"logits/rejected": 0.04532231390476227, |
|
"logps/chosen": -185.85728454589844, |
|
"logps/rejected": -188.9866180419922, |
|
"loss": 0.6936, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 0.00064073596149683, |
|
"rewards/margins": -0.0005829028668813407, |
|
"rewards/rejected": 0.001223638653755188, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02730724922131672, |
|
"grad_norm": 19.626379046619967, |
|
"learning_rate": 2.653061224489796e-07, |
|
"logits/chosen": 0.03043345920741558, |
|
"logits/rejected": 0.032446593046188354, |
|
"logps/chosen": -193.6338653564453, |
|
"logps/rejected": -190.4232635498047, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.002521326532587409, |
|
"rewards/margins": 0.004052319563925266, |
|
"rewards/rejected": -0.0015309930313378572, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0341340615266459, |
|
"grad_norm": 21.08295374738999, |
|
"learning_rate": 3.333333333333333e-07, |
|
"logits/chosen": 0.04947035759687424, |
|
"logits/rejected": 0.06372452527284622, |
|
"logps/chosen": -188.39315795898438, |
|
"logps/rejected": -190.05992126464844, |
|
"loss": 0.6942, |
|
"rewards/accuracies": 0.46406251192092896, |
|
"rewards/chosen": 0.0021625806111842394, |
|
"rewards/margins": -0.0017312343697994947, |
|
"rewards/rejected": 0.003893814980983734, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04096087383197508, |
|
"grad_norm": 20.25039554823623, |
|
"learning_rate": 4.0136054421768705e-07, |
|
"logits/chosen": 0.053825099021196365, |
|
"logits/rejected": 0.0521962009370327, |
|
"logps/chosen": -189.28480529785156, |
|
"logps/rejected": -184.31430053710938, |
|
"loss": 0.6937, |
|
"rewards/accuracies": 0.5015624761581421, |
|
"rewards/chosen": 0.004196351859718561, |
|
"rewards/margins": -0.0006979627651162446, |
|
"rewards/rejected": 0.0048943147994577885, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04778768613730426, |
|
"grad_norm": 22.505298366939336, |
|
"learning_rate": 4.693877551020408e-07, |
|
"logits/chosen": 0.03855639323592186, |
|
"logits/rejected": 0.041457682847976685, |
|
"logps/chosen": -189.49111938476562, |
|
"logps/rejected": -190.42034912109375, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.4937499761581421, |
|
"rewards/chosen": 0.008006598800420761, |
|
"rewards/margins": 4.7756126150488853e-05, |
|
"rewards/rejected": 0.007958842441439629, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05461449844263344, |
|
"grad_norm": 19.99809543741437, |
|
"learning_rate": 5.374149659863945e-07, |
|
"logits/chosen": 0.026321567595005035, |
|
"logits/rejected": 0.013571225106716156, |
|
"logps/chosen": -189.8534393310547, |
|
"logps/rejected": -187.626708984375, |
|
"loss": 0.6878, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": 0.018732454627752304, |
|
"rewards/margins": 0.011271494440734386, |
|
"rewards/rejected": 0.007460957858711481, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06144131074796262, |
|
"grad_norm": 22.176568391543768, |
|
"learning_rate": 6.054421768707482e-07, |
|
"logits/chosen": 0.020383019000291824, |
|
"logits/rejected": 0.02592673897743225, |
|
"logps/chosen": -186.662841796875, |
|
"logps/rejected": -189.3004608154297, |
|
"loss": 0.6876, |
|
"rewards/accuracies": 0.582812488079071, |
|
"rewards/chosen": 0.027650414034724236, |
|
"rewards/margins": 0.011809633113443851, |
|
"rewards/rejected": 0.01584078185260296, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0682681230532918, |
|
"grad_norm": 20.53234701755388, |
|
"learning_rate": 6.734693877551019e-07, |
|
"logits/chosen": 0.02966993674635887, |
|
"logits/rejected": 0.05219441279768944, |
|
"logps/chosen": -190.25782775878906, |
|
"logps/rejected": -189.80935668945312, |
|
"loss": 0.6858, |
|
"rewards/accuracies": 0.5718749761581421, |
|
"rewards/chosen": 0.040990687906742096, |
|
"rewards/margins": 0.01583397202193737, |
|
"rewards/rejected": 0.025156717747449875, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07509493535862098, |
|
"grad_norm": 21.19602898096358, |
|
"learning_rate": 7.414965986394558e-07, |
|
"logits/chosen": -0.007384412921965122, |
|
"logits/rejected": -0.016086794435977936, |
|
"logps/chosen": -189.52395629882812, |
|
"logps/rejected": -192.64816284179688, |
|
"loss": 0.6817, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.05572628974914551, |
|
"rewards/margins": 0.024477079510688782, |
|
"rewards/rejected": 0.031249215826392174, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.08192174766395016, |
|
"grad_norm": 20.08862529877448, |
|
"learning_rate": 8.095238095238095e-07, |
|
"logits/chosen": -0.04889947175979614, |
|
"logits/rejected": -0.049361489713191986, |
|
"logps/chosen": -197.39492797851562, |
|
"logps/rejected": -192.8791046142578, |
|
"loss": 0.6828, |
|
"rewards/accuracies": 0.5843750238418579, |
|
"rewards/chosen": 0.059998854994773865, |
|
"rewards/margins": 0.023517701774835587, |
|
"rewards/rejected": 0.03648114949464798, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.08874855996927934, |
|
"grad_norm": 19.78186965312465, |
|
"learning_rate": 8.775510204081632e-07, |
|
"logits/chosen": -0.022162066772580147, |
|
"logits/rejected": -0.02603471651673317, |
|
"logps/chosen": -192.2538604736328, |
|
"logps/rejected": -190.6973876953125, |
|
"loss": 0.6782, |
|
"rewards/accuracies": 0.651562511920929, |
|
"rewards/chosen": 0.07047584652900696, |
|
"rewards/margins": 0.03453099727630615, |
|
"rewards/rejected": 0.035944852977991104, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.09557537227460852, |
|
"grad_norm": 21.72668562860521, |
|
"learning_rate": 9.45578231292517e-07, |
|
"logits/chosen": -0.028122998774051666, |
|
"logits/rejected": -0.0023567965254187584, |
|
"logps/chosen": -193.58602905273438, |
|
"logps/rejected": -189.49517822265625, |
|
"loss": 0.6721, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.08927410840988159, |
|
"rewards/margins": 0.04811044782400131, |
|
"rewards/rejected": 0.04116365686058998, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.1024021845799377, |
|
"grad_norm": 20.630914226397604, |
|
"learning_rate": 9.98482549317147e-07, |
|
"logits/chosen": -0.07732997089624405, |
|
"logits/rejected": -0.08366119861602783, |
|
"logps/chosen": -203.80441284179688, |
|
"logps/rejected": -202.51812744140625, |
|
"loss": 0.666, |
|
"rewards/accuracies": 0.6500000357627869, |
|
"rewards/chosen": 0.10389578342437744, |
|
"rewards/margins": 0.06255247443914413, |
|
"rewards/rejected": 0.04134330898523331, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.10922899688526688, |
|
"grad_norm": 20.25669433495337, |
|
"learning_rate": 9.908952959028832e-07, |
|
"logits/chosen": -0.09441889822483063, |
|
"logits/rejected": -0.08870529383420944, |
|
"logps/chosen": -185.63307189941406, |
|
"logps/rejected": -186.53253173828125, |
|
"loss": 0.6654, |
|
"rewards/accuracies": 0.6500000357627869, |
|
"rewards/chosen": 0.08515263348817825, |
|
"rewards/margins": 0.0660884901881218, |
|
"rewards/rejected": 0.019064147025346756, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.11605580919059606, |
|
"grad_norm": 20.384593980794733, |
|
"learning_rate": 9.833080424886191e-07, |
|
"logits/chosen": -0.08715031296014786, |
|
"logits/rejected": -0.05636933073401451, |
|
"logps/chosen": -188.3374481201172, |
|
"logps/rejected": -190.37437438964844, |
|
"loss": 0.659, |
|
"rewards/accuracies": 0.6343749761581421, |
|
"rewards/chosen": 0.04690036177635193, |
|
"rewards/margins": 0.08634677529335022, |
|
"rewards/rejected": -0.03944641351699829, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.12288262149592524, |
|
"grad_norm": 21.86056528276187, |
|
"learning_rate": 9.75720789074355e-07, |
|
"logits/chosen": -0.07912790030241013, |
|
"logits/rejected": -0.07271625846624374, |
|
"logps/chosen": -197.11959838867188, |
|
"logps/rejected": -197.41287231445312, |
|
"loss": 0.6528, |
|
"rewards/accuracies": 0.6749999523162842, |
|
"rewards/chosen": 0.04622086510062218, |
|
"rewards/margins": 0.10496747493743896, |
|
"rewards/rejected": -0.058746613562107086, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.12970943380125444, |
|
"grad_norm": 22.24802422589698, |
|
"learning_rate": 9.68133535660091e-07, |
|
"logits/chosen": -0.07506565004587173, |
|
"logits/rejected": -0.05108420550823212, |
|
"logps/chosen": -190.35340881347656, |
|
"logps/rejected": -195.009521484375, |
|
"loss": 0.6441, |
|
"rewards/accuracies": 0.6812500357627869, |
|
"rewards/chosen": 0.052541881799697876, |
|
"rewards/margins": 0.12386594712734222, |
|
"rewards/rejected": -0.07132406532764435, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1365362461065836, |
|
"grad_norm": 22.419822765649933, |
|
"learning_rate": 9.60546282245827e-07, |
|
"logits/chosen": -0.11874101310968399, |
|
"logits/rejected": -0.08336825668811798, |
|
"logps/chosen": -193.62611389160156, |
|
"logps/rejected": -196.01084899902344, |
|
"loss": 0.6249, |
|
"rewards/accuracies": 0.7046875357627869, |
|
"rewards/chosen": 0.03949081152677536, |
|
"rewards/margins": 0.17370560765266418, |
|
"rewards/rejected": -0.13421478867530823, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1433630584119128, |
|
"grad_norm": 22.915739502006815, |
|
"learning_rate": 9.52959028831563e-07, |
|
"logits/chosen": -0.17365601658821106, |
|
"logits/rejected": -0.15520283579826355, |
|
"logps/chosen": -203.1890869140625, |
|
"logps/rejected": -200.14974975585938, |
|
"loss": 0.6287, |
|
"rewards/accuracies": 0.6687500476837158, |
|
"rewards/chosen": -0.01979774236679077, |
|
"rewards/margins": 0.18479280173778534, |
|
"rewards/rejected": -0.2045905441045761, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.15018987071724196, |
|
"grad_norm": 20.769969852017695, |
|
"learning_rate": 9.453717754172988e-07, |
|
"logits/chosen": -0.1847243756055832, |
|
"logits/rejected": -0.15192236006259918, |
|
"logps/chosen": -198.33010864257812, |
|
"logps/rejected": -200.56228637695312, |
|
"loss": 0.6015, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -0.03269830346107483, |
|
"rewards/margins": 0.25339096784591675, |
|
"rewards/rejected": -0.2860892415046692, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.15701668302257116, |
|
"grad_norm": 21.597574913870996, |
|
"learning_rate": 9.377845220030348e-07, |
|
"logits/chosen": -0.21274694800376892, |
|
"logits/rejected": -0.19206659495830536, |
|
"logps/chosen": -197.59228515625, |
|
"logps/rejected": -200.42283630371094, |
|
"loss": 0.611, |
|
"rewards/accuracies": 0.6734374761581421, |
|
"rewards/chosen": -0.09015801548957825, |
|
"rewards/margins": 0.24926723539829254, |
|
"rewards/rejected": -0.3394252359867096, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.16384349532790032, |
|
"grad_norm": 24.09497342960952, |
|
"learning_rate": 9.301972685887707e-07, |
|
"logits/chosen": -0.2293986827135086, |
|
"logits/rejected": -0.19997453689575195, |
|
"logps/chosen": -191.1751251220703, |
|
"logps/rejected": -196.63511657714844, |
|
"loss": 0.6125, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": -0.15094764530658722, |
|
"rewards/margins": 0.24523335695266724, |
|
"rewards/rejected": -0.39618098735809326, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.17067030763322952, |
|
"grad_norm": 22.186402685803138, |
|
"learning_rate": 9.226100151745068e-07, |
|
"logits/chosen": -0.23599499464035034, |
|
"logits/rejected": -0.20987126231193542, |
|
"logps/chosen": -191.61639404296875, |
|
"logps/rejected": -197.80091857910156, |
|
"loss": 0.6205, |
|
"rewards/accuracies": 0.6546875238418579, |
|
"rewards/chosen": -0.22373469173908234, |
|
"rewards/margins": 0.2635762691497803, |
|
"rewards/rejected": -0.4873109459877014, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.17749711993855868, |
|
"grad_norm": 23.30196457741843, |
|
"learning_rate": 9.150227617602428e-07, |
|
"logits/chosen": -0.2195354700088501, |
|
"logits/rejected": -0.19019638001918793, |
|
"logps/chosen": -190.50746154785156, |
|
"logps/rejected": -195.74331665039062, |
|
"loss": 0.6056, |
|
"rewards/accuracies": 0.7046875357627869, |
|
"rewards/chosen": -0.2523514926433563, |
|
"rewards/margins": 0.29894089698791504, |
|
"rewards/rejected": -0.5512923002243042, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.18432393224388788, |
|
"grad_norm": 23.437160399579792, |
|
"learning_rate": 9.074355083459787e-07, |
|
"logits/chosen": -0.2144363671541214, |
|
"logits/rejected": -0.19538246095180511, |
|
"logps/chosen": -194.883056640625, |
|
"logps/rejected": -202.83575439453125, |
|
"loss": 0.595, |
|
"rewards/accuracies": 0.7078125476837158, |
|
"rewards/chosen": -0.27382633090019226, |
|
"rewards/margins": 0.3095867931842804, |
|
"rewards/rejected": -0.5834130644798279, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.19115074454921704, |
|
"grad_norm": 23.67928529051871, |
|
"learning_rate": 8.998482549317147e-07, |
|
"logits/chosen": -0.2671777606010437, |
|
"logits/rejected": -0.23835715651512146, |
|
"logps/chosen": -189.7034912109375, |
|
"logps/rejected": -194.55117797851562, |
|
"loss": 0.589, |
|
"rewards/accuracies": 0.7000000476837158, |
|
"rewards/chosen": -0.2815781235694885, |
|
"rewards/margins": 0.34006255865097046, |
|
"rewards/rejected": -0.621640682220459, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.19797755685454624, |
|
"grad_norm": 26.3785919721159, |
|
"learning_rate": 8.922610015174506e-07, |
|
"logits/chosen": -0.2851921319961548, |
|
"logits/rejected": -0.2668570280075073, |
|
"logps/chosen": -202.77801513671875, |
|
"logps/rejected": -207.8894805908203, |
|
"loss": 0.59, |
|
"rewards/accuracies": 0.7046875357627869, |
|
"rewards/chosen": -0.33676964044570923, |
|
"rewards/margins": 0.35969871282577515, |
|
"rewards/rejected": -0.6964683532714844, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.2048043691598754, |
|
"grad_norm": 23.715391013722297, |
|
"learning_rate": 8.846737481031866e-07, |
|
"logits/chosen": -0.2776036262512207, |
|
"logits/rejected": -0.24332435429096222, |
|
"logps/chosen": -201.10296630859375, |
|
"logps/rejected": -203.72195434570312, |
|
"loss": 0.6111, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -0.3781723380088806, |
|
"rewards/margins": 0.3227519989013672, |
|
"rewards/rejected": -0.700924277305603, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2116311814652046, |
|
"grad_norm": 21.57268816738927, |
|
"learning_rate": 8.770864946889226e-07, |
|
"logits/chosen": -0.29242080450057983, |
|
"logits/rejected": -0.2669425308704376, |
|
"logps/chosen": -204.4817352294922, |
|
"logps/rejected": -214.0943603515625, |
|
"loss": 0.5794, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -0.3647349178791046, |
|
"rewards/margins": 0.4395143985748291, |
|
"rewards/rejected": -0.8042493462562561, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.21845799377053376, |
|
"grad_norm": 25.227342019618998, |
|
"learning_rate": 8.694992412746586e-07, |
|
"logits/chosen": -0.27386438846588135, |
|
"logits/rejected": -0.2711098790168762, |
|
"logps/chosen": -198.40101623535156, |
|
"logps/rejected": -204.6220703125, |
|
"loss": 0.5727, |
|
"rewards/accuracies": 0.7281250357627869, |
|
"rewards/chosen": -0.3862449824810028, |
|
"rewards/margins": 0.41143903136253357, |
|
"rewards/rejected": -0.7976840734481812, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.22528480607586296, |
|
"grad_norm": 24.00522520700325, |
|
"learning_rate": 8.619119878603945e-07, |
|
"logits/chosen": -0.3334537744522095, |
|
"logits/rejected": -0.3187546730041504, |
|
"logps/chosen": -208.01986694335938, |
|
"logps/rejected": -212.91488647460938, |
|
"loss": 0.5913, |
|
"rewards/accuracies": 0.6703125238418579, |
|
"rewards/chosen": -0.4798099398612976, |
|
"rewards/margins": 0.37955817580223083, |
|
"rewards/rejected": -0.8593681454658508, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.23211161838119213, |
|
"grad_norm": 23.49360024665317, |
|
"learning_rate": 8.543247344461305e-07, |
|
"logits/chosen": -0.30438894033432007, |
|
"logits/rejected": -0.28073978424072266, |
|
"logps/chosen": -203.7110595703125, |
|
"logps/rejected": -211.83615112304688, |
|
"loss": 0.56, |
|
"rewards/accuracies": 0.7328125238418579, |
|
"rewards/chosen": -0.3902357518672943, |
|
"rewards/margins": 0.5086088180541992, |
|
"rewards/rejected": -0.8988445401191711, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.23893843068652132, |
|
"grad_norm": 23.086500001623612, |
|
"learning_rate": 8.467374810318663e-07, |
|
"logits/chosen": -0.3257724940776825, |
|
"logits/rejected": -0.2853447198867798, |
|
"logps/chosen": -204.09765625, |
|
"logps/rejected": -212.38494873046875, |
|
"loss": 0.5515, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.4673992991447449, |
|
"rewards/margins": 0.5267953872680664, |
|
"rewards/rejected": -0.9941946864128113, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.24576524299185049, |
|
"grad_norm": 24.60129579583855, |
|
"learning_rate": 8.391502276176023e-07, |
|
"logits/chosen": -0.3029869794845581, |
|
"logits/rejected": -0.2718327045440674, |
|
"logps/chosen": -196.5174560546875, |
|
"logps/rejected": -204.4929656982422, |
|
"loss": 0.5809, |
|
"rewards/accuracies": 0.7046875357627869, |
|
"rewards/chosen": -0.4800136089324951, |
|
"rewards/margins": 0.43177759647369385, |
|
"rewards/rejected": -0.9117912650108337, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.25259205529717965, |
|
"grad_norm": 23.03353178121409, |
|
"learning_rate": 8.315629742033384e-07, |
|
"logits/chosen": -0.28175657987594604, |
|
"logits/rejected": -0.2525416612625122, |
|
"logps/chosen": -197.58517456054688, |
|
"logps/rejected": -210.83853149414062, |
|
"loss": 0.5675, |
|
"rewards/accuracies": 0.7234375476837158, |
|
"rewards/chosen": -0.5489044785499573, |
|
"rewards/margins": 0.4759043753147125, |
|
"rewards/rejected": -1.0248088836669922, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2594188676025089, |
|
"grad_norm": 21.702116754195792, |
|
"learning_rate": 8.239757207890743e-07, |
|
"logits/chosen": -0.3090224266052246, |
|
"logits/rejected": -0.2872709333896637, |
|
"logps/chosen": -204.044921875, |
|
"logps/rejected": -214.3769989013672, |
|
"loss": 0.5414, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -0.501671552658081, |
|
"rewards/margins": 0.5782625675201416, |
|
"rewards/rejected": -1.0799341201782227, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.26624567990783804, |
|
"grad_norm": 22.690534272455945, |
|
"learning_rate": 8.163884673748103e-07, |
|
"logits/chosen": -0.2652078866958618, |
|
"logits/rejected": -0.22916777431964874, |
|
"logps/chosen": -206.28855895996094, |
|
"logps/rejected": -217.3023681640625, |
|
"loss": 0.532, |
|
"rewards/accuracies": 0.7343750596046448, |
|
"rewards/chosen": -0.47486239671707153, |
|
"rewards/margins": 0.6135950684547424, |
|
"rewards/rejected": -1.088457465171814, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.2730724922131672, |
|
"grad_norm": 24.587498727216616, |
|
"learning_rate": 8.088012139605462e-07, |
|
"logits/chosen": -0.28489071130752563, |
|
"logits/rejected": -0.23875750601291656, |
|
"logps/chosen": -202.77565002441406, |
|
"logps/rejected": -216.6030731201172, |
|
"loss": 0.5272, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.5735213756561279, |
|
"rewards/margins": 0.6210550665855408, |
|
"rewards/rejected": -1.194576382637024, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.2798993045184964, |
|
"grad_norm": 24.707605897401567, |
|
"learning_rate": 8.012139605462822e-07, |
|
"logits/chosen": -0.3593894839286804, |
|
"logits/rejected": -0.3138624429702759, |
|
"logps/chosen": -202.06204223632812, |
|
"logps/rejected": -208.73065185546875, |
|
"loss": 0.5575, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.604642391204834, |
|
"rewards/margins": 0.5522481203079224, |
|
"rewards/rejected": -1.156890630722046, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.2867261168238256, |
|
"grad_norm": 24.754070000277498, |
|
"learning_rate": 7.936267071320181e-07, |
|
"logits/chosen": -0.3502323627471924, |
|
"logits/rejected": -0.3173756003379822, |
|
"logps/chosen": -207.6633758544922, |
|
"logps/rejected": -216.3917236328125, |
|
"loss": 0.5265, |
|
"rewards/accuracies": 0.7484375238418579, |
|
"rewards/chosen": -0.6551162004470825, |
|
"rewards/margins": 0.6169639229774475, |
|
"rewards/rejected": -1.2720801830291748, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.29355292912915476, |
|
"grad_norm": 23.564476771066985, |
|
"learning_rate": 7.860394537177542e-07, |
|
"logits/chosen": -0.3500007092952728, |
|
"logits/rejected": -0.32545575499534607, |
|
"logps/chosen": -211.29928588867188, |
|
"logps/rejected": -227.12037658691406, |
|
"loss": 0.5223, |
|
"rewards/accuracies": 0.7421875596046448, |
|
"rewards/chosen": -0.7528213262557983, |
|
"rewards/margins": 0.739406943321228, |
|
"rewards/rejected": -1.492228388786316, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.3003797414344839, |
|
"grad_norm": 21.091018091079327, |
|
"learning_rate": 7.784522003034901e-07, |
|
"logits/chosen": -0.35516998171806335, |
|
"logits/rejected": -0.3074837327003479, |
|
"logps/chosen": -203.1188507080078, |
|
"logps/rejected": -212.15496826171875, |
|
"loss": 0.5055, |
|
"rewards/accuracies": 0.7765625715255737, |
|
"rewards/chosen": -0.6801650524139404, |
|
"rewards/margins": 0.7159599661827087, |
|
"rewards/rejected": -1.396125078201294, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.3072065537398131, |
|
"grad_norm": 30.178688833532316, |
|
"learning_rate": 7.708649468892261e-07, |
|
"logits/chosen": -0.3771928548812866, |
|
"logits/rejected": -0.34754854440689087, |
|
"logps/chosen": -208.95216369628906, |
|
"logps/rejected": -225.38938903808594, |
|
"loss": 0.5226, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -0.7856850624084473, |
|
"rewards/margins": 0.6984450817108154, |
|
"rewards/rejected": -1.4841301441192627, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.3140333660451423, |
|
"grad_norm": 22.73508892423378, |
|
"learning_rate": 7.632776934749621e-07, |
|
"logits/chosen": -0.40090760588645935, |
|
"logits/rejected": -0.3806273937225342, |
|
"logps/chosen": -208.29766845703125, |
|
"logps/rejected": -223.73020935058594, |
|
"loss": 0.5013, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.7431963086128235, |
|
"rewards/margins": 0.8224382400512695, |
|
"rewards/rejected": -1.5656344890594482, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.3208601783504715, |
|
"grad_norm": 24.65367082247547, |
|
"learning_rate": 7.55690440060698e-07, |
|
"logits/chosen": -0.41392359137535095, |
|
"logits/rejected": -0.3990693688392639, |
|
"logps/chosen": -211.69845581054688, |
|
"logps/rejected": -222.681884765625, |
|
"loss": 0.4896, |
|
"rewards/accuracies": 0.7671874761581421, |
|
"rewards/chosen": -0.7812504768371582, |
|
"rewards/margins": 0.8228715062141418, |
|
"rewards/rejected": -1.6041220426559448, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.32768699065580065, |
|
"grad_norm": 26.060565630616303, |
|
"learning_rate": 7.481031866464339e-07, |
|
"logits/chosen": -0.4470677673816681, |
|
"logits/rejected": -0.4043146073818207, |
|
"logps/chosen": -201.87158203125, |
|
"logps/rejected": -216.65240478515625, |
|
"loss": 0.5178, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": -0.8349807858467102, |
|
"rewards/margins": 0.7298619151115417, |
|
"rewards/rejected": -1.564842700958252, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.3345138029611298, |
|
"grad_norm": 24.867787006387463, |
|
"learning_rate": 7.405159332321699e-07, |
|
"logits/chosen": -0.4602758288383484, |
|
"logits/rejected": -0.4031441807746887, |
|
"logps/chosen": -215.20541381835938, |
|
"logps/rejected": -234.6583251953125, |
|
"loss": 0.5155, |
|
"rewards/accuracies": 0.7484375238418579, |
|
"rewards/chosen": -0.9265861511230469, |
|
"rewards/margins": 0.8055697679519653, |
|
"rewards/rejected": -1.7321559190750122, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.34134061526645904, |
|
"grad_norm": 32.86790243336268, |
|
"learning_rate": 7.329286798179059e-07, |
|
"logits/chosen": -0.4144153594970703, |
|
"logits/rejected": -0.3892706036567688, |
|
"logps/chosen": -216.45887756347656, |
|
"logps/rejected": -225.97056579589844, |
|
"loss": 0.5274, |
|
"rewards/accuracies": 0.7328125238418579, |
|
"rewards/chosen": -0.9314414262771606, |
|
"rewards/margins": 0.7752954363822937, |
|
"rewards/rejected": -1.7067368030548096, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3481674275717882, |
|
"grad_norm": 29.0406209714796, |
|
"learning_rate": 7.253414264036418e-07, |
|
"logits/chosen": -0.4518946707248688, |
|
"logits/rejected": -0.4360005855560303, |
|
"logps/chosen": -210.40875244140625, |
|
"logps/rejected": -227.6586456298828, |
|
"loss": 0.4918, |
|
"rewards/accuracies": 0.7640624642372131, |
|
"rewards/chosen": -0.7644888162612915, |
|
"rewards/margins": 0.8264600038528442, |
|
"rewards/rejected": -1.5909489393234253, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.35499423987711737, |
|
"grad_norm": 29.792037648827193, |
|
"learning_rate": 7.177541729893778e-07, |
|
"logits/chosen": -0.46055272221565247, |
|
"logits/rejected": -0.41955289244651794, |
|
"logps/chosen": -203.9451904296875, |
|
"logps/rejected": -225.48402404785156, |
|
"loss": 0.5137, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7920026779174805, |
|
"rewards/margins": 0.7750235795974731, |
|
"rewards/rejected": -1.5670262575149536, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.36182105218244653, |
|
"grad_norm": 28.48324275582042, |
|
"learning_rate": 7.101669195751137e-07, |
|
"logits/chosen": -0.44266417622566223, |
|
"logits/rejected": -0.4136849045753479, |
|
"logps/chosen": -217.11045837402344, |
|
"logps/rejected": -232.384521484375, |
|
"loss": 0.5059, |
|
"rewards/accuracies": 0.7500000596046448, |
|
"rewards/chosen": -0.9079422950744629, |
|
"rewards/margins": 0.8452929258346558, |
|
"rewards/rejected": -1.7532353401184082, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.36864786448777576, |
|
"grad_norm": 24.346858846505146, |
|
"learning_rate": 7.025796661608497e-07, |
|
"logits/chosen": -0.4453073740005493, |
|
"logits/rejected": -0.39773428440093994, |
|
"logps/chosen": -199.64686584472656, |
|
"logps/rejected": -217.36294555664062, |
|
"loss": 0.5282, |
|
"rewards/accuracies": 0.745312511920929, |
|
"rewards/chosen": -0.7881425023078918, |
|
"rewards/margins": 0.745051383972168, |
|
"rewards/rejected": -1.533193826675415, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.3754746767931049, |
|
"grad_norm": 23.88017645464549, |
|
"learning_rate": 6.949924127465857e-07, |
|
"logits/chosen": -0.4227825701236725, |
|
"logits/rejected": -0.3899107873439789, |
|
"logps/chosen": -218.3785400390625, |
|
"logps/rejected": -230.14222717285156, |
|
"loss": 0.5021, |
|
"rewards/accuracies": 0.7703125476837158, |
|
"rewards/chosen": -0.704402506351471, |
|
"rewards/margins": 0.8275265693664551, |
|
"rewards/rejected": -1.5319291353225708, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.3823014890984341, |
|
"grad_norm": 23.672046628232867, |
|
"learning_rate": 6.874051593323217e-07, |
|
"logits/chosen": -0.42757853865623474, |
|
"logits/rejected": -0.394180566072464, |
|
"logps/chosen": -208.079345703125, |
|
"logps/rejected": -228.22598266601562, |
|
"loss": 0.4667, |
|
"rewards/accuracies": 0.770312488079071, |
|
"rewards/chosen": -0.8188365697860718, |
|
"rewards/margins": 0.9388971328735352, |
|
"rewards/rejected": -1.7577338218688965, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.38912830140376325, |
|
"grad_norm": 27.539677366232738, |
|
"learning_rate": 6.798179059180577e-07, |
|
"logits/chosen": -0.4404156506061554, |
|
"logits/rejected": -0.3975413739681244, |
|
"logps/chosen": -208.03125, |
|
"logps/rejected": -224.20956420898438, |
|
"loss": 0.5004, |
|
"rewards/accuracies": 0.7593750357627869, |
|
"rewards/chosen": -0.8374041318893433, |
|
"rewards/margins": 0.7886074781417847, |
|
"rewards/rejected": -1.6260114908218384, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.3959551137090925, |
|
"grad_norm": 25.29375987198196, |
|
"learning_rate": 6.722306525037936e-07, |
|
"logits/chosen": -0.4404994249343872, |
|
"logits/rejected": -0.40123340487480164, |
|
"logps/chosen": -213.8634490966797, |
|
"logps/rejected": -234.7059326171875, |
|
"loss": 0.497, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.8731653094291687, |
|
"rewards/margins": 0.9025252461433411, |
|
"rewards/rejected": -1.7756905555725098, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.40278192601442164, |
|
"grad_norm": 23.196272876570017, |
|
"learning_rate": 6.646433990895296e-07, |
|
"logits/chosen": -0.42805609107017517, |
|
"logits/rejected": -0.3933747410774231, |
|
"logps/chosen": -210.49766540527344, |
|
"logps/rejected": -230.8019256591797, |
|
"loss": 0.472, |
|
"rewards/accuracies": 0.7718750238418579, |
|
"rewards/chosen": -0.9004274606704712, |
|
"rewards/margins": 0.9456923604011536, |
|
"rewards/rejected": -1.84611976146698, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.4096087383197508, |
|
"grad_norm": 26.112729497646914, |
|
"learning_rate": 6.570561456752655e-07, |
|
"logits/chosen": -0.419676810503006, |
|
"logits/rejected": -0.3932231068611145, |
|
"logps/chosen": -212.6820831298828, |
|
"logps/rejected": -230.1705322265625, |
|
"loss": 0.4551, |
|
"rewards/accuracies": 0.776562511920929, |
|
"rewards/chosen": -0.9198075532913208, |
|
"rewards/margins": 1.0145457983016968, |
|
"rewards/rejected": -1.9343533515930176, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.41643555062508, |
|
"grad_norm": 26.539025702964505, |
|
"learning_rate": 6.494688922610015e-07, |
|
"logits/chosen": -0.47971057891845703, |
|
"logits/rejected": -0.43692541122436523, |
|
"logps/chosen": -212.31594848632812, |
|
"logps/rejected": -234.5380859375, |
|
"loss": 0.4563, |
|
"rewards/accuracies": 0.770312488079071, |
|
"rewards/chosen": -1.0747839212417603, |
|
"rewards/margins": 1.078429937362671, |
|
"rewards/rejected": -2.1532137393951416, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.4232623629304092, |
|
"grad_norm": 26.2859842178028, |
|
"learning_rate": 6.418816388467374e-07, |
|
"logits/chosen": -0.4652007818222046, |
|
"logits/rejected": -0.4464990496635437, |
|
"logps/chosen": -212.9930419921875, |
|
"logps/rejected": -230.19207763671875, |
|
"loss": 0.4778, |
|
"rewards/accuracies": 0.7906250357627869, |
|
"rewards/chosen": -1.1166890859603882, |
|
"rewards/margins": 0.9617180228233337, |
|
"rewards/rejected": -2.0784072875976562, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.43008917523573836, |
|
"grad_norm": 27.943160005363282, |
|
"learning_rate": 6.342943854324734e-07, |
|
"logits/chosen": -0.507358968257904, |
|
"logits/rejected": -0.46083295345306396, |
|
"logps/chosen": -211.0389404296875, |
|
"logps/rejected": -234.06576538085938, |
|
"loss": 0.4689, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.125166654586792, |
|
"rewards/margins": 1.1086124181747437, |
|
"rewards/rejected": -2.233778953552246, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.43691598754106753, |
|
"grad_norm": 27.031702699703523, |
|
"learning_rate": 6.267071320182093e-07, |
|
"logits/chosen": -0.5109987854957581, |
|
"logits/rejected": -0.4727884531021118, |
|
"logps/chosen": -216.13302612304688, |
|
"logps/rejected": -241.88287353515625, |
|
"loss": 0.4635, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.1550945043563843, |
|
"rewards/margins": 1.1085400581359863, |
|
"rewards/rejected": -2.263634443283081, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.4437427998463967, |
|
"grad_norm": 26.49416191451856, |
|
"learning_rate": 6.191198786039453e-07, |
|
"logits/chosen": -0.5049822330474854, |
|
"logits/rejected": -0.46804797649383545, |
|
"logps/chosen": -220.15802001953125, |
|
"logps/rejected": -241.11386108398438, |
|
"loss": 0.4646, |
|
"rewards/accuracies": 0.770312488079071, |
|
"rewards/chosen": -1.1580806970596313, |
|
"rewards/margins": 1.064436435699463, |
|
"rewards/rejected": -2.222517490386963, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.4505696121517259, |
|
"grad_norm": 28.052993928802096, |
|
"learning_rate": 6.115326251896813e-07, |
|
"logits/chosen": -0.5224714875221252, |
|
"logits/rejected": -0.496852308511734, |
|
"logps/chosen": -217.48992919921875, |
|
"logps/rejected": -234.48318481445312, |
|
"loss": 0.5188, |
|
"rewards/accuracies": 0.7671874761581421, |
|
"rewards/chosen": -1.1128088235855103, |
|
"rewards/margins": 0.9438337087631226, |
|
"rewards/rejected": -2.056642532348633, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.4573964244570551, |
|
"grad_norm": 32.11947138128127, |
|
"learning_rate": 6.039453717754173e-07, |
|
"logits/chosen": -0.4993141293525696, |
|
"logits/rejected": -0.4682856798171997, |
|
"logps/chosen": -206.40176391601562, |
|
"logps/rejected": -231.08042907714844, |
|
"loss": 0.4953, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.0732593536376953, |
|
"rewards/margins": 1.1166470050811768, |
|
"rewards/rejected": -2.189906358718872, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.46422323676238425, |
|
"grad_norm": 24.595239877463356, |
|
"learning_rate": 5.963581183611533e-07, |
|
"logits/chosen": -0.5414324998855591, |
|
"logits/rejected": -0.5145028233528137, |
|
"logps/chosen": -219.66567993164062, |
|
"logps/rejected": -236.0765380859375, |
|
"loss": 0.4608, |
|
"rewards/accuracies": 0.7781250476837158, |
|
"rewards/chosen": -0.9715930819511414, |
|
"rewards/margins": 1.0554088354110718, |
|
"rewards/rejected": -2.0270018577575684, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.47105004906771347, |
|
"grad_norm": 27.819824043736283, |
|
"learning_rate": 5.887708649468892e-07, |
|
"logits/chosen": -0.482106477022171, |
|
"logits/rejected": -0.43574321269989014, |
|
"logps/chosen": -211.92596435546875, |
|
"logps/rejected": -234.6639862060547, |
|
"loss": 0.4352, |
|
"rewards/accuracies": 0.792187511920929, |
|
"rewards/chosen": -0.9926649332046509, |
|
"rewards/margins": 1.141036033630371, |
|
"rewards/rejected": -2.1337008476257324, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.47787686137304264, |
|
"grad_norm": 27.82950606174818, |
|
"learning_rate": 5.811836115326252e-07, |
|
"logits/chosen": -0.491192102432251, |
|
"logits/rejected": -0.45507892966270447, |
|
"logps/chosen": -215.52423095703125, |
|
"logps/rejected": -239.1810302734375, |
|
"loss": 0.4534, |
|
"rewards/accuracies": 0.7812500596046448, |
|
"rewards/chosen": -1.0811206102371216, |
|
"rewards/margins": 1.171852469444275, |
|
"rewards/rejected": -2.2529730796813965, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.4847036736783718, |
|
"grad_norm": 32.40109215164061, |
|
"learning_rate": 5.735963581183611e-07, |
|
"logits/chosen": -0.48725226521492004, |
|
"logits/rejected": -0.4451846480369568, |
|
"logps/chosen": -211.22933959960938, |
|
"logps/rejected": -236.77740478515625, |
|
"loss": 0.4487, |
|
"rewards/accuracies": 0.7828124761581421, |
|
"rewards/chosen": -1.0895929336547852, |
|
"rewards/margins": 1.1770341396331787, |
|
"rewards/rejected": -2.2666268348693848, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.49153048598370097, |
|
"grad_norm": 27.259651037643604, |
|
"learning_rate": 5.660091047040971e-07, |
|
"logits/chosen": -0.5053711533546448, |
|
"logits/rejected": -0.4444194436073303, |
|
"logps/chosen": -205.80319213867188, |
|
"logps/rejected": -230.7117919921875, |
|
"loss": 0.4743, |
|
"rewards/accuracies": 0.770312488079071, |
|
"rewards/chosen": -1.1533528566360474, |
|
"rewards/margins": 1.07535982131958, |
|
"rewards/rejected": -2.228712797164917, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.4983572982890302, |
|
"grad_norm": 23.45407239305211, |
|
"learning_rate": 5.584218512898331e-07, |
|
"logits/chosen": -0.46755921840667725, |
|
"logits/rejected": -0.41828638315200806, |
|
"logps/chosen": -214.959716796875, |
|
"logps/rejected": -237.14413452148438, |
|
"loss": 0.4451, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.1057405471801758, |
|
"rewards/margins": 1.1711297035217285, |
|
"rewards/rejected": -2.2768704891204834, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.5051841105943593, |
|
"grad_norm": 24.513672931022274, |
|
"learning_rate": 5.508345978755691e-07, |
|
"logits/chosen": -0.5107758045196533, |
|
"logits/rejected": -0.47158223390579224, |
|
"logps/chosen": -214.1978759765625, |
|
"logps/rejected": -236.34100341796875, |
|
"loss": 0.4356, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -1.0896263122558594, |
|
"rewards/margins": 1.132210612297058, |
|
"rewards/rejected": -2.221837043762207, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.5120109228996885, |
|
"grad_norm": 27.557361902005226, |
|
"learning_rate": 5.432473444613049e-07, |
|
"logits/chosen": -0.47495898604393005, |
|
"logits/rejected": -0.42891502380371094, |
|
"logps/chosen": -215.3628692626953, |
|
"logps/rejected": -240.29644775390625, |
|
"loss": 0.4433, |
|
"rewards/accuracies": 0.8046875596046448, |
|
"rewards/chosen": -1.1231842041015625, |
|
"rewards/margins": 1.1870129108428955, |
|
"rewards/rejected": -2.310196876525879, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.5188377352050177, |
|
"grad_norm": 25.763088367806024, |
|
"learning_rate": 5.356600910470409e-07, |
|
"logits/chosen": -0.5234218835830688, |
|
"logits/rejected": -0.46476346254348755, |
|
"logps/chosen": -214.0421142578125, |
|
"logps/rejected": -238.0985565185547, |
|
"loss": 0.4236, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1129274368286133, |
|
"rewards/margins": 1.2772108316421509, |
|
"rewards/rejected": -2.3901383876800537, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.5256645475103469, |
|
"grad_norm": 27.345063991868273, |
|
"learning_rate": 5.280728376327769e-07, |
|
"logits/chosen": -0.5037857294082642, |
|
"logits/rejected": -0.4784386157989502, |
|
"logps/chosen": -210.1291046142578, |
|
"logps/rejected": -236.04969787597656, |
|
"loss": 0.4347, |
|
"rewards/accuracies": 0.8109375238418579, |
|
"rewards/chosen": -1.1615896224975586, |
|
"rewards/margins": 1.2552942037582397, |
|
"rewards/rejected": -2.416883945465088, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.5324913598156761, |
|
"grad_norm": 23.559487104074414, |
|
"learning_rate": 5.204855842185128e-07, |
|
"logits/chosen": -0.5264319777488708, |
|
"logits/rejected": -0.47137507796287537, |
|
"logps/chosen": -218.16024780273438, |
|
"logps/rejected": -245.5438995361328, |
|
"loss": 0.4609, |
|
"rewards/accuracies": 0.7703125476837158, |
|
"rewards/chosen": -1.2951855659484863, |
|
"rewards/margins": 1.170878291130066, |
|
"rewards/rejected": -2.4660637378692627, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.5393181721210053, |
|
"grad_norm": 30.437623350555043, |
|
"learning_rate": 5.128983308042489e-07, |
|
"logits/chosen": -0.4954899251461029, |
|
"logits/rejected": -0.45233067870140076, |
|
"logps/chosen": -213.85757446289062, |
|
"logps/rejected": -242.7041473388672, |
|
"loss": 0.4193, |
|
"rewards/accuracies": 0.8093750476837158, |
|
"rewards/chosen": -1.2700811624526978, |
|
"rewards/margins": 1.2533843517303467, |
|
"rewards/rejected": -2.523465394973755, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.5461449844263344, |
|
"grad_norm": 25.96035380580991, |
|
"learning_rate": 5.053110773899848e-07, |
|
"logits/chosen": -0.49867063760757446, |
|
"logits/rejected": -0.44984591007232666, |
|
"logps/chosen": -218.67074584960938, |
|
"logps/rejected": -247.30982971191406, |
|
"loss": 0.424, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -1.237367868423462, |
|
"rewards/margins": 1.278685212135315, |
|
"rewards/rejected": -2.5160531997680664, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5529717967316636, |
|
"grad_norm": 27.066709483078917, |
|
"learning_rate": 4.977238239757208e-07, |
|
"logits/chosen": -0.4714178144931793, |
|
"logits/rejected": -0.4372885823249817, |
|
"logps/chosen": -218.98892211914062, |
|
"logps/rejected": -242.98770141601562, |
|
"loss": 0.4266, |
|
"rewards/accuracies": 0.7984375357627869, |
|
"rewards/chosen": -1.298151969909668, |
|
"rewards/margins": 1.222092866897583, |
|
"rewards/rejected": -2.520244836807251, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.5597986090369927, |
|
"grad_norm": 28.230804755745105, |
|
"learning_rate": 4.901365705614567e-07, |
|
"logits/chosen": -0.45390385389328003, |
|
"logits/rejected": -0.43030381202697754, |
|
"logps/chosen": -220.013427734375, |
|
"logps/rejected": -241.9390411376953, |
|
"loss": 0.4526, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -1.354661464691162, |
|
"rewards/margins": 1.215053677558899, |
|
"rewards/rejected": -2.5697154998779297, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.566625421342322, |
|
"grad_norm": 32.13534664184047, |
|
"learning_rate": 4.825493171471927e-07, |
|
"logits/chosen": -0.475396066904068, |
|
"logits/rejected": -0.43329310417175293, |
|
"logps/chosen": -210.43185424804688, |
|
"logps/rejected": -236.67987060546875, |
|
"loss": 0.4189, |
|
"rewards/accuracies": 0.815625011920929, |
|
"rewards/chosen": -1.264033317565918, |
|
"rewards/margins": 1.377021074295044, |
|
"rewards/rejected": -2.641054153442383, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.5734522336476512, |
|
"grad_norm": 22.262860568714245, |
|
"learning_rate": 4.7496206373292864e-07, |
|
"logits/chosen": -0.4692656993865967, |
|
"logits/rejected": -0.4306912422180176, |
|
"logps/chosen": -211.5372772216797, |
|
"logps/rejected": -246.39736938476562, |
|
"loss": 0.3916, |
|
"rewards/accuracies": 0.8375000357627869, |
|
"rewards/chosen": -1.1525495052337646, |
|
"rewards/margins": 1.4558607339859009, |
|
"rewards/rejected": -2.608410358428955, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.5802790459529803, |
|
"grad_norm": 22.80617456340079, |
|
"learning_rate": 4.673748103186646e-07, |
|
"logits/chosen": -0.46342021226882935, |
|
"logits/rejected": -0.41512057185173035, |
|
"logps/chosen": -221.32496643066406, |
|
"logps/rejected": -251.7954864501953, |
|
"loss": 0.394, |
|
"rewards/accuracies": 0.8218750357627869, |
|
"rewards/chosen": -1.2910584211349487, |
|
"rewards/margins": 1.4217520952224731, |
|
"rewards/rejected": -2.712810516357422, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.5871058582583095, |
|
"grad_norm": 24.868191575194487, |
|
"learning_rate": 4.597875569044006e-07, |
|
"logits/chosen": -0.48653626441955566, |
|
"logits/rejected": -0.4366312623023987, |
|
"logps/chosen": -217.47422790527344, |
|
"logps/rejected": -241.48968505859375, |
|
"loss": 0.4269, |
|
"rewards/accuracies": 0.801562488079071, |
|
"rewards/chosen": -1.3257293701171875, |
|
"rewards/margins": 1.3266490697860718, |
|
"rewards/rejected": -2.652378559112549, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.5939326705636387, |
|
"grad_norm": 27.035059402616938, |
|
"learning_rate": 4.5220030349013654e-07, |
|
"logits/chosen": -0.5033361911773682, |
|
"logits/rejected": -0.4694429039955139, |
|
"logps/chosen": -214.79815673828125, |
|
"logps/rejected": -237.64102172851562, |
|
"loss": 0.4296, |
|
"rewards/accuracies": 0.7921874523162842, |
|
"rewards/chosen": -1.3357491493225098, |
|
"rewards/margins": 1.2649694681167603, |
|
"rewards/rejected": -2.6007187366485596, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.6007594828689679, |
|
"grad_norm": 27.746278145893346, |
|
"learning_rate": 4.446130500758725e-07, |
|
"logits/chosen": -0.5227242708206177, |
|
"logits/rejected": -0.4751604497432709, |
|
"logps/chosen": -218.23658752441406, |
|
"logps/rejected": -249.3454132080078, |
|
"loss": 0.4233, |
|
"rewards/accuracies": 0.817187488079071, |
|
"rewards/chosen": -1.3457627296447754, |
|
"rewards/margins": 1.428666591644287, |
|
"rewards/rejected": -2.7744295597076416, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.6075862951742971, |
|
"grad_norm": 26.892931653503698, |
|
"learning_rate": 4.370257966616085e-07, |
|
"logits/chosen": -0.5066260099411011, |
|
"logits/rejected": -0.47855502367019653, |
|
"logps/chosen": -214.84915161132812, |
|
"logps/rejected": -240.56436157226562, |
|
"loss": 0.4612, |
|
"rewards/accuracies": 0.7812500596046448, |
|
"rewards/chosen": -1.4467679262161255, |
|
"rewards/margins": 1.3007091283798218, |
|
"rewards/rejected": -2.7474770545959473, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.6144131074796262, |
|
"grad_norm": 32.793455771900234, |
|
"learning_rate": 4.2943854324734444e-07, |
|
"logits/chosen": -0.4987248182296753, |
|
"logits/rejected": -0.4517776668071747, |
|
"logps/chosen": -218.49545288085938, |
|
"logps/rejected": -252.3199462890625, |
|
"loss": 0.4007, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -1.4036812782287598, |
|
"rewards/margins": 1.497314453125, |
|
"rewards/rejected": -2.9009957313537598, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.6212399197849554, |
|
"grad_norm": 30.14977908240741, |
|
"learning_rate": 4.2185128983308036e-07, |
|
"logits/chosen": -0.5123909711837769, |
|
"logits/rejected": -0.456384539604187, |
|
"logps/chosen": -221.94183349609375, |
|
"logps/rejected": -250.5224151611328, |
|
"loss": 0.4185, |
|
"rewards/accuracies": 0.8265625238418579, |
|
"rewards/chosen": -1.3800506591796875, |
|
"rewards/margins": 1.4040327072143555, |
|
"rewards/rejected": -2.784083366394043, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.6280667320902846, |
|
"grad_norm": 23.187149506889586, |
|
"learning_rate": 4.142640364188164e-07, |
|
"logits/chosen": -0.5007960200309753, |
|
"logits/rejected": -0.4656420350074768, |
|
"logps/chosen": -224.66000366210938, |
|
"logps/rejected": -250.5994873046875, |
|
"loss": 0.4194, |
|
"rewards/accuracies": 0.817187488079071, |
|
"rewards/chosen": -1.4466440677642822, |
|
"rewards/margins": 1.3647561073303223, |
|
"rewards/rejected": -2.8114004135131836, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.6348935443956137, |
|
"grad_norm": 26.465496977643166, |
|
"learning_rate": 4.0667678300455234e-07, |
|
"logits/chosen": -0.5095345973968506, |
|
"logits/rejected": -0.44781219959259033, |
|
"logps/chosen": -219.541259765625, |
|
"logps/rejected": -253.14544677734375, |
|
"loss": 0.3631, |
|
"rewards/accuracies": 0.8765624761581421, |
|
"rewards/chosen": -1.3718998432159424, |
|
"rewards/margins": 1.6033210754394531, |
|
"rewards/rejected": -2.9752209186553955, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.641720356700943, |
|
"grad_norm": 21.651167586614733, |
|
"learning_rate": 3.990895295902883e-07, |
|
"logits/chosen": -0.5611530542373657, |
|
"logits/rejected": -0.5065969824790955, |
|
"logps/chosen": -222.84457397460938, |
|
"logps/rejected": -251.35067749023438, |
|
"loss": 0.397, |
|
"rewards/accuracies": 0.8250000476837158, |
|
"rewards/chosen": -1.4304229021072388, |
|
"rewards/margins": 1.4556035995483398, |
|
"rewards/rejected": -2.886026620864868, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.6485471690062722, |
|
"grad_norm": 21.56653990852637, |
|
"learning_rate": 3.915022761760243e-07, |
|
"logits/chosen": -0.575349748134613, |
|
"logits/rejected": -0.5415146350860596, |
|
"logps/chosen": -209.71266174316406, |
|
"logps/rejected": -239.22946166992188, |
|
"loss": 0.4001, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -1.521388053894043, |
|
"rewards/margins": 1.4791213274002075, |
|
"rewards/rejected": -3.000509262084961, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.6553739813116013, |
|
"grad_norm": 23.31036794244746, |
|
"learning_rate": 3.8391502276176024e-07, |
|
"logits/chosen": -0.5698951482772827, |
|
"logits/rejected": -0.5178714394569397, |
|
"logps/chosen": -228.25030517578125, |
|
"logps/rejected": -261.415771484375, |
|
"loss": 0.3891, |
|
"rewards/accuracies": 0.817187488079071, |
|
"rewards/chosen": -1.6143665313720703, |
|
"rewards/margins": 1.645197868347168, |
|
"rewards/rejected": -3.2595643997192383, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.6622007936169305, |
|
"grad_norm": 26.214223596010875, |
|
"learning_rate": 3.763277693474962e-07, |
|
"logits/chosen": -0.5214463472366333, |
|
"logits/rejected": -0.46749287843704224, |
|
"logps/chosen": -218.10549926757812, |
|
"logps/rejected": -251.87442016601562, |
|
"loss": 0.4196, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.63547945022583, |
|
"rewards/margins": 1.4419658184051514, |
|
"rewards/rejected": -3.0774452686309814, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.6690276059222596, |
|
"grad_norm": 24.89349466924626, |
|
"learning_rate": 3.687405159332321e-07, |
|
"logits/chosen": -0.5419428944587708, |
|
"logits/rejected": -0.5022714734077454, |
|
"logps/chosen": -223.1068115234375, |
|
"logps/rejected": -255.94949340820312, |
|
"loss": 0.4144, |
|
"rewards/accuracies": 0.815625011920929, |
|
"rewards/chosen": -1.7430050373077393, |
|
"rewards/margins": 1.4413095712661743, |
|
"rewards/rejected": -3.184314489364624, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.6758544182275888, |
|
"grad_norm": 25.914909518247867, |
|
"learning_rate": 3.611532625189681e-07, |
|
"logits/chosen": -0.5115488767623901, |
|
"logits/rejected": -0.4625004827976227, |
|
"logps/chosen": -229.49105834960938, |
|
"logps/rejected": -265.0625, |
|
"loss": 0.3983, |
|
"rewards/accuracies": 0.8218750357627869, |
|
"rewards/chosen": -1.6320453882217407, |
|
"rewards/margins": 1.5734854936599731, |
|
"rewards/rejected": -3.2055306434631348, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.6826812305329181, |
|
"grad_norm": 31.456143694319483, |
|
"learning_rate": 3.5356600910470406e-07, |
|
"logits/chosen": -0.5371730327606201, |
|
"logits/rejected": -0.4974362850189209, |
|
"logps/chosen": -236.7477569580078, |
|
"logps/rejected": -264.0472106933594, |
|
"loss": 0.3827, |
|
"rewards/accuracies": 0.8437500596046448, |
|
"rewards/chosen": -1.620214819908142, |
|
"rewards/margins": 1.5398459434509277, |
|
"rewards/rejected": -3.1600606441497803, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.6895080428382472, |
|
"grad_norm": 29.661159656571126, |
|
"learning_rate": 3.459787556904401e-07, |
|
"logits/chosen": -0.5440015196800232, |
|
"logits/rejected": -0.49301889538764954, |
|
"logps/chosen": -224.03494262695312, |
|
"logps/rejected": -254.42193603515625, |
|
"loss": 0.4033, |
|
"rewards/accuracies": 0.8296875357627869, |
|
"rewards/chosen": -1.5924382209777832, |
|
"rewards/margins": 1.5776193141937256, |
|
"rewards/rejected": -3.170057773590088, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.6963348551435764, |
|
"grad_norm": 38.12069128333079, |
|
"learning_rate": 3.3839150227617604e-07, |
|
"logits/chosen": -0.5860447883605957, |
|
"logits/rejected": -0.543270468711853, |
|
"logps/chosen": -228.84930419921875, |
|
"logps/rejected": -262.8966064453125, |
|
"loss": 0.3898, |
|
"rewards/accuracies": 0.8406250476837158, |
|
"rewards/chosen": -1.6053173542022705, |
|
"rewards/margins": 1.590077519416809, |
|
"rewards/rejected": -3.19539475440979, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.7031616674489056, |
|
"grad_norm": 32.08364090632609, |
|
"learning_rate": 3.30804248861912e-07, |
|
"logits/chosen": -0.6051906943321228, |
|
"logits/rejected": -0.5597983598709106, |
|
"logps/chosen": -224.02899169921875, |
|
"logps/rejected": -258.93511962890625, |
|
"loss": 0.396, |
|
"rewards/accuracies": 0.8171875476837158, |
|
"rewards/chosen": -1.7182796001434326, |
|
"rewards/margins": 1.5724890232086182, |
|
"rewards/rejected": -3.290768623352051, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.7099884797542347, |
|
"grad_norm": 25.599680429412086, |
|
"learning_rate": 3.232169954476479e-07, |
|
"logits/chosen": -0.6112679243087769, |
|
"logits/rejected": -0.5801026821136475, |
|
"logps/chosen": -225.71258544921875, |
|
"logps/rejected": -264.3663330078125, |
|
"loss": 0.3637, |
|
"rewards/accuracies": 0.8421875238418579, |
|
"rewards/chosen": -1.4613301753997803, |
|
"rewards/margins": 1.712023138999939, |
|
"rewards/rejected": -3.1733531951904297, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.716815292059564, |
|
"grad_norm": 26.325121380352627, |
|
"learning_rate": 3.156297420333839e-07, |
|
"logits/chosen": -0.6216264963150024, |
|
"logits/rejected": -0.5548665523529053, |
|
"logps/chosen": -226.58059692382812, |
|
"logps/rejected": -263.7754821777344, |
|
"loss": 0.3636, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.6483052968978882, |
|
"rewards/margins": 1.7705044746398926, |
|
"rewards/rejected": -3.4188098907470703, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.7236421043648931, |
|
"grad_norm": 23.347203569226366, |
|
"learning_rate": 3.0804248861911986e-07, |
|
"logits/chosen": -0.5403355360031128, |
|
"logits/rejected": -0.49409806728363037, |
|
"logps/chosen": -225.88253784179688, |
|
"logps/rejected": -256.93182373046875, |
|
"loss": 0.393, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -1.6747300624847412, |
|
"rewards/margins": 1.6634035110473633, |
|
"rewards/rejected": -3.3381335735321045, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.7304689166702223, |
|
"grad_norm": 26.591582696664684, |
|
"learning_rate": 3.004552352048558e-07, |
|
"logits/chosen": -0.60378497838974, |
|
"logits/rejected": -0.5446761250495911, |
|
"logps/chosen": -222.86285400390625, |
|
"logps/rejected": -254.32901000976562, |
|
"loss": 0.3562, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.373286247253418, |
|
"rewards/margins": 1.5952813625335693, |
|
"rewards/rejected": -2.9685676097869873, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.7372957289755515, |
|
"grad_norm": 26.301256433411677, |
|
"learning_rate": 2.928679817905918e-07, |
|
"logits/chosen": -0.575655996799469, |
|
"logits/rejected": -0.5388238430023193, |
|
"logps/chosen": -226.25411987304688, |
|
"logps/rejected": -257.7029724121094, |
|
"loss": 0.3889, |
|
"rewards/accuracies": 0.832812488079071, |
|
"rewards/chosen": -1.5678967237472534, |
|
"rewards/margins": 1.5496362447738647, |
|
"rewards/rejected": -3.1175332069396973, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.7441225412808806, |
|
"grad_norm": 29.1969544488184, |
|
"learning_rate": 2.8528072837632776e-07, |
|
"logits/chosen": -0.563581109046936, |
|
"logits/rejected": -0.4889605939388275, |
|
"logps/chosen": -215.546630859375, |
|
"logps/rejected": -251.0224609375, |
|
"loss": 0.3594, |
|
"rewards/accuracies": 0.854687511920929, |
|
"rewards/chosen": -1.5211578607559204, |
|
"rewards/margins": 1.6970359086990356, |
|
"rewards/rejected": -3.218193531036377, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.7509493535862098, |
|
"grad_norm": 28.75255873182244, |
|
"learning_rate": 2.776934749620637e-07, |
|
"logits/chosen": -0.5607287883758545, |
|
"logits/rejected": -0.5297821760177612, |
|
"logps/chosen": -213.63365173339844, |
|
"logps/rejected": -240.619384765625, |
|
"loss": 0.4057, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -1.6024796962738037, |
|
"rewards/margins": 1.537475347518921, |
|
"rewards/rejected": -3.1399548053741455, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.7577761658915391, |
|
"grad_norm": 28.027697277996715, |
|
"learning_rate": 2.7010622154779964e-07, |
|
"logits/chosen": -0.5775099992752075, |
|
"logits/rejected": -0.5231542587280273, |
|
"logps/chosen": -224.80667114257812, |
|
"logps/rejected": -259.0721435546875, |
|
"loss": 0.4044, |
|
"rewards/accuracies": 0.8140624761581421, |
|
"rewards/chosen": -1.598193883895874, |
|
"rewards/margins": 1.5613579750061035, |
|
"rewards/rejected": -3.1595516204833984, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.7646029781968682, |
|
"grad_norm": 19.772049611357087, |
|
"learning_rate": 2.6251896813353566e-07, |
|
"logits/chosen": -0.5745671987533569, |
|
"logits/rejected": -0.5299438834190369, |
|
"logps/chosen": -225.1347198486328, |
|
"logps/rejected": -255.4309539794922, |
|
"loss": 0.3858, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -1.4894109964370728, |
|
"rewards/margins": 1.691686987876892, |
|
"rewards/rejected": -3.181097984313965, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.7714297905021974, |
|
"grad_norm": 23.20450105175028, |
|
"learning_rate": 2.549317147192716e-07, |
|
"logits/chosen": -0.5600322484970093, |
|
"logits/rejected": -0.5002347230911255, |
|
"logps/chosen": -219.86434936523438, |
|
"logps/rejected": -253.78878784179688, |
|
"loss": 0.3663, |
|
"rewards/accuracies": 0.8453125357627869, |
|
"rewards/chosen": -1.4636483192443848, |
|
"rewards/margins": 1.6817249059677124, |
|
"rewards/rejected": -3.1453733444213867, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.7782566028075265, |
|
"grad_norm": 28.72150866508454, |
|
"learning_rate": 2.473444613050076e-07, |
|
"logits/chosen": -0.6041327118873596, |
|
"logits/rejected": -0.5645285844802856, |
|
"logps/chosen": -216.68939208984375, |
|
"logps/rejected": -247.66275024414062, |
|
"loss": 0.3806, |
|
"rewards/accuracies": 0.8328125476837158, |
|
"rewards/chosen": -1.5338340997695923, |
|
"rewards/margins": 1.5916988849639893, |
|
"rewards/rejected": -3.125532865524292, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.7850834151128557, |
|
"grad_norm": 29.858461214238897, |
|
"learning_rate": 2.3975720789074356e-07, |
|
"logits/chosen": -0.6299252510070801, |
|
"logits/rejected": -0.586955189704895, |
|
"logps/chosen": -231.401611328125, |
|
"logps/rejected": -263.02197265625, |
|
"loss": 0.3998, |
|
"rewards/accuracies": 0.8328125476837158, |
|
"rewards/chosen": -1.6045633554458618, |
|
"rewards/margins": 1.6497775316238403, |
|
"rewards/rejected": -3.2543411254882812, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.791910227418185, |
|
"grad_norm": 26.24413163476253, |
|
"learning_rate": 2.321699544764795e-07, |
|
"logits/chosen": -0.5830259919166565, |
|
"logits/rejected": -0.5397896766662598, |
|
"logps/chosen": -213.19375610351562, |
|
"logps/rejected": -249.24717712402344, |
|
"loss": 0.3717, |
|
"rewards/accuracies": 0.8250000476837158, |
|
"rewards/chosen": -1.6501479148864746, |
|
"rewards/margins": 1.6961115598678589, |
|
"rewards/rejected": -3.346259593963623, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.7987370397235141, |
|
"grad_norm": 31.016581977192125, |
|
"learning_rate": 2.2458270106221546e-07, |
|
"logits/chosen": -0.5983390808105469, |
|
"logits/rejected": -0.5455670952796936, |
|
"logps/chosen": -224.10618591308594, |
|
"logps/rejected": -254.94383239746094, |
|
"loss": 0.3732, |
|
"rewards/accuracies": 0.8296875357627869, |
|
"rewards/chosen": -1.5914267301559448, |
|
"rewards/margins": 1.632917046546936, |
|
"rewards/rejected": -3.2243435382843018, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.8055638520288433, |
|
"grad_norm": 82.84012389678055, |
|
"learning_rate": 2.1699544764795143e-07, |
|
"logits/chosen": -0.6019859910011292, |
|
"logits/rejected": -0.5678104758262634, |
|
"logps/chosen": -222.878662109375, |
|
"logps/rejected": -253.78060913085938, |
|
"loss": 0.4147, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -1.6500358581542969, |
|
"rewards/margins": 1.5844390392303467, |
|
"rewards/rejected": -3.2344746589660645, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.8123906643341725, |
|
"grad_norm": 22.55342908794488, |
|
"learning_rate": 2.094081942336874e-07, |
|
"logits/chosen": -0.5819066762924194, |
|
"logits/rejected": -0.5418481826782227, |
|
"logps/chosen": -221.70608520507812, |
|
"logps/rejected": -254.09922790527344, |
|
"loss": 0.3354, |
|
"rewards/accuracies": 0.8609375357627869, |
|
"rewards/chosen": -1.6243677139282227, |
|
"rewards/margins": 1.7402938604354858, |
|
"rewards/rejected": -3.364661455154419, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.8192174766395016, |
|
"grad_norm": 21.249823285036445, |
|
"learning_rate": 2.0182094081942336e-07, |
|
"logits/chosen": -0.5841631889343262, |
|
"logits/rejected": -0.5415323972702026, |
|
"logps/chosen": -225.88800048828125, |
|
"logps/rejected": -254.038818359375, |
|
"loss": 0.3821, |
|
"rewards/accuracies": 0.839062511920929, |
|
"rewards/chosen": -1.6330121755599976, |
|
"rewards/margins": 1.732587456703186, |
|
"rewards/rejected": -3.3655996322631836, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.8260442889448308, |
|
"grad_norm": 22.957761561567523, |
|
"learning_rate": 1.9423368740515933e-07, |
|
"logits/chosen": -0.5876274704933167, |
|
"logits/rejected": -0.5527446866035461, |
|
"logps/chosen": -237.04470825195312, |
|
"logps/rejected": -263.58868408203125, |
|
"loss": 0.3658, |
|
"rewards/accuracies": 0.8531250357627869, |
|
"rewards/chosen": -1.6271567344665527, |
|
"rewards/margins": 1.6703208684921265, |
|
"rewards/rejected": -3.297477960586548, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.83287110125016, |
|
"grad_norm": 26.3109466733547, |
|
"learning_rate": 1.8664643399089527e-07, |
|
"logits/chosen": -0.5855602622032166, |
|
"logits/rejected": -0.5348464846611023, |
|
"logps/chosen": -220.74581909179688, |
|
"logps/rejected": -259.97076416015625, |
|
"loss": 0.392, |
|
"rewards/accuracies": 0.8234375715255737, |
|
"rewards/chosen": -1.666372299194336, |
|
"rewards/margins": 1.7341811656951904, |
|
"rewards/rejected": -3.4005534648895264, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.8396979135554892, |
|
"grad_norm": 32.86005475979103, |
|
"learning_rate": 1.7905918057663124e-07, |
|
"logits/chosen": -0.6146824359893799, |
|
"logits/rejected": -0.5769205093383789, |
|
"logps/chosen": -223.04859924316406, |
|
"logps/rejected": -259.2931213378906, |
|
"loss": 0.3747, |
|
"rewards/accuracies": 0.8484375476837158, |
|
"rewards/chosen": -1.6388548612594604, |
|
"rewards/margins": 1.6829884052276611, |
|
"rewards/rejected": -3.321843147277832, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.8465247258608184, |
|
"grad_norm": 27.824013672905682, |
|
"learning_rate": 1.7147192716236723e-07, |
|
"logits/chosen": -0.5848041772842407, |
|
"logits/rejected": -0.5365484356880188, |
|
"logps/chosen": -224.9688262939453, |
|
"logps/rejected": -253.75857543945312, |
|
"loss": 0.374, |
|
"rewards/accuracies": 0.8343750238418579, |
|
"rewards/chosen": -1.633022427558899, |
|
"rewards/margins": 1.6262296438217163, |
|
"rewards/rejected": -3.2592520713806152, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.8533515381661475, |
|
"grad_norm": 28.870976428951412, |
|
"learning_rate": 1.638846737481032e-07, |
|
"logits/chosen": -0.6266176700592041, |
|
"logits/rejected": -0.5750494003295898, |
|
"logps/chosen": -225.53489685058594, |
|
"logps/rejected": -251.16812133789062, |
|
"loss": 0.3643, |
|
"rewards/accuracies": 0.8421875238418579, |
|
"rewards/chosen": -1.6029326915740967, |
|
"rewards/margins": 1.637751817703247, |
|
"rewards/rejected": -3.2406845092773438, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.8601783504714767, |
|
"grad_norm": 28.44671682958466, |
|
"learning_rate": 1.5629742033383914e-07, |
|
"logits/chosen": -0.5748768448829651, |
|
"logits/rejected": -0.5039246082305908, |
|
"logps/chosen": -229.083740234375, |
|
"logps/rejected": -265.5872802734375, |
|
"loss": 0.3464, |
|
"rewards/accuracies": 0.8515625, |
|
"rewards/chosen": -1.6385741233825684, |
|
"rewards/margins": 2.006284713745117, |
|
"rewards/rejected": -3.6448588371276855, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.867005162776806, |
|
"grad_norm": 26.03554320093484, |
|
"learning_rate": 1.487101669195751e-07, |
|
"logits/chosen": -0.580173671245575, |
|
"logits/rejected": -0.5294475555419922, |
|
"logps/chosen": -225.72938537597656, |
|
"logps/rejected": -262.03546142578125, |
|
"loss": 0.3718, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -1.6143238544464111, |
|
"rewards/margins": 1.8427155017852783, |
|
"rewards/rejected": -3.4570393562316895, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.8738319750821351, |
|
"grad_norm": 22.97729500897279, |
|
"learning_rate": 1.4112291350531107e-07, |
|
"logits/chosen": -0.6003884673118591, |
|
"logits/rejected": -0.5561665296554565, |
|
"logps/chosen": -221.987548828125, |
|
"logps/rejected": -258.51727294921875, |
|
"loss": 0.3686, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -1.4877190589904785, |
|
"rewards/margins": 1.6500287055969238, |
|
"rewards/rejected": -3.1377477645874023, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.8806587873874643, |
|
"grad_norm": 31.37447822214391, |
|
"learning_rate": 1.3353566009104704e-07, |
|
"logits/chosen": -0.6364210844039917, |
|
"logits/rejected": -0.575194239616394, |
|
"logps/chosen": -225.1094207763672, |
|
"logps/rejected": -260.13885498046875, |
|
"loss": 0.3534, |
|
"rewards/accuracies": 0.864062488079071, |
|
"rewards/chosen": -1.6206319332122803, |
|
"rewards/margins": 1.7905977964401245, |
|
"rewards/rejected": -3.4112298488616943, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.8874855996927934, |
|
"grad_norm": 22.936789815076953, |
|
"learning_rate": 1.25948406676783e-07, |
|
"logits/chosen": -0.6323338747024536, |
|
"logits/rejected": -0.6003640294075012, |
|
"logps/chosen": -227.20034790039062, |
|
"logps/rejected": -259.46502685546875, |
|
"loss": 0.3575, |
|
"rewards/accuracies": 0.8406250476837158, |
|
"rewards/chosen": -1.6749684810638428, |
|
"rewards/margins": 1.7170754671096802, |
|
"rewards/rejected": -3.3920438289642334, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.8943124119981226, |
|
"grad_norm": 22.489511604558004, |
|
"learning_rate": 1.1836115326251896e-07, |
|
"logits/chosen": -0.6401182413101196, |
|
"logits/rejected": -0.5833394527435303, |
|
"logps/chosen": -223.30029296875, |
|
"logps/rejected": -262.72998046875, |
|
"loss": 0.3353, |
|
"rewards/accuracies": 0.8593750596046448, |
|
"rewards/chosen": -1.560599446296692, |
|
"rewards/margins": 1.906503677368164, |
|
"rewards/rejected": -3.4671034812927246, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.9011392243034518, |
|
"grad_norm": 37.43162732034228, |
|
"learning_rate": 1.1077389984825493e-07, |
|
"logits/chosen": -0.5761069059371948, |
|
"logits/rejected": -0.5430048108100891, |
|
"logps/chosen": -237.7594757080078, |
|
"logps/rejected": -275.5934753417969, |
|
"loss": 0.3514, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -1.6714935302734375, |
|
"rewards/margins": 1.8643473386764526, |
|
"rewards/rejected": -3.5358407497406006, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.907966036608781, |
|
"grad_norm": 22.988879587386872, |
|
"learning_rate": 1.0318664643399089e-07, |
|
"logits/chosen": -0.5806565284729004, |
|
"logits/rejected": -0.5450279116630554, |
|
"logps/chosen": -221.33053588867188, |
|
"logps/rejected": -256.5147705078125, |
|
"loss": 0.3729, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -1.6562050580978394, |
|
"rewards/margins": 1.747424840927124, |
|
"rewards/rejected": -3.403630018234253, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.9147928489141102, |
|
"grad_norm": 19.80848176554877, |
|
"learning_rate": 9.559939301972686e-08, |
|
"logits/chosen": -0.6481366157531738, |
|
"logits/rejected": -0.6148696541786194, |
|
"logps/chosen": -224.6954803466797, |
|
"logps/rejected": -256.4845275878906, |
|
"loss": 0.3775, |
|
"rewards/accuracies": 0.8421875238418579, |
|
"rewards/chosen": -1.7428375482559204, |
|
"rewards/margins": 1.636692762374878, |
|
"rewards/rejected": -3.379530191421509, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.9216196612194394, |
|
"grad_norm": 25.8470434123946, |
|
"learning_rate": 8.801213960546281e-08, |
|
"logits/chosen": -0.6496397852897644, |
|
"logits/rejected": -0.5912147164344788, |
|
"logps/chosen": -223.9413299560547, |
|
"logps/rejected": -259.1372375488281, |
|
"loss": 0.3461, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -1.6729114055633545, |
|
"rewards/margins": 1.7730145454406738, |
|
"rewards/rejected": -3.445925712585449, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.9284464735247685, |
|
"grad_norm": 33.2201336722171, |
|
"learning_rate": 8.042488619119878e-08, |
|
"logits/chosen": -0.645717203617096, |
|
"logits/rejected": -0.6112032532691956, |
|
"logps/chosen": -225.99624633789062, |
|
"logps/rejected": -257.4811706542969, |
|
"loss": 0.4065, |
|
"rewards/accuracies": 0.8218750357627869, |
|
"rewards/chosen": -1.761589527130127, |
|
"rewards/margins": 1.6280558109283447, |
|
"rewards/rejected": -3.389645576477051, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.9352732858300977, |
|
"grad_norm": 27.005710517490183, |
|
"learning_rate": 7.283763277693475e-08, |
|
"logits/chosen": -0.573918342590332, |
|
"logits/rejected": -0.5335432291030884, |
|
"logps/chosen": -225.52552795410156, |
|
"logps/rejected": -255.49449157714844, |
|
"loss": 0.3465, |
|
"rewards/accuracies": 0.8531249761581421, |
|
"rewards/chosen": -1.7273519039154053, |
|
"rewards/margins": 1.7527152299880981, |
|
"rewards/rejected": -3.480067253112793, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.9421000981354269, |
|
"grad_norm": 32.140399259495645, |
|
"learning_rate": 6.525037936267071e-08, |
|
"logits/chosen": -0.6214314103126526, |
|
"logits/rejected": -0.570462167263031, |
|
"logps/chosen": -224.70672607421875, |
|
"logps/rejected": -264.4761962890625, |
|
"loss": 0.3218, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -1.6792542934417725, |
|
"rewards/margins": 1.915861964225769, |
|
"rewards/rejected": -3.595116138458252, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.948926910440756, |
|
"grad_norm": 31.250154294424732, |
|
"learning_rate": 5.766312594840667e-08, |
|
"logits/chosen": -0.6339004635810852, |
|
"logits/rejected": -0.5892723798751831, |
|
"logps/chosen": -220.26611328125, |
|
"logps/rejected": -252.96212768554688, |
|
"loss": 0.3864, |
|
"rewards/accuracies": 0.8312499523162842, |
|
"rewards/chosen": -1.6645467281341553, |
|
"rewards/margins": 1.5790960788726807, |
|
"rewards/rejected": -3.243642807006836, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.9557537227460853, |
|
"grad_norm": 30.068762957187783, |
|
"learning_rate": 5.007587253414264e-08, |
|
"logits/chosen": -0.678811252117157, |
|
"logits/rejected": -0.6359538435935974, |
|
"logps/chosen": -224.49069213867188, |
|
"logps/rejected": -258.3272705078125, |
|
"loss": 0.3447, |
|
"rewards/accuracies": 0.8531250357627869, |
|
"rewards/chosen": -1.575748085975647, |
|
"rewards/margins": 1.9220972061157227, |
|
"rewards/rejected": -3.49784517288208, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.9625805350514144, |
|
"grad_norm": 22.16371068962549, |
|
"learning_rate": 4.2488619119878606e-08, |
|
"logits/chosen": -0.6366287469863892, |
|
"logits/rejected": -0.5852836966514587, |
|
"logps/chosen": -227.71780395507812, |
|
"logps/rejected": -267.0358581542969, |
|
"loss": 0.3718, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -1.6289258003234863, |
|
"rewards/margins": 1.7643526792526245, |
|
"rewards/rejected": -3.3932785987854004, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.9694073473567436, |
|
"grad_norm": 31.032456565988113, |
|
"learning_rate": 3.4901365705614566e-08, |
|
"logits/chosen": -0.6306103467941284, |
|
"logits/rejected": -0.5921708345413208, |
|
"logps/chosen": -221.66065979003906, |
|
"logps/rejected": -254.41958618164062, |
|
"loss": 0.3678, |
|
"rewards/accuracies": 0.823437511920929, |
|
"rewards/chosen": -1.5656054019927979, |
|
"rewards/margins": 1.682039499282837, |
|
"rewards/rejected": -3.2476449012756348, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.9762341596620728, |
|
"grad_norm": 26.873435878225383, |
|
"learning_rate": 2.731411229135053e-08, |
|
"logits/chosen": -0.6624563336372375, |
|
"logits/rejected": -0.6294071078300476, |
|
"logps/chosen": -224.36407470703125, |
|
"logps/rejected": -263.2255859375, |
|
"loss": 0.3681, |
|
"rewards/accuracies": 0.8484375476837158, |
|
"rewards/chosen": -1.7730777263641357, |
|
"rewards/margins": 1.7527307271957397, |
|
"rewards/rejected": -3.525808334350586, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.9830609719674019, |
|
"grad_norm": 28.36352572432148, |
|
"learning_rate": 1.9726858877086493e-08, |
|
"logits/chosen": -0.6402366161346436, |
|
"logits/rejected": -0.5960521697998047, |
|
"logps/chosen": -225.24977111816406, |
|
"logps/rejected": -257.8275451660156, |
|
"loss": 0.3734, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.7362611293792725, |
|
"rewards/margins": 1.7620372772216797, |
|
"rewards/rejected": -3.498298168182373, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.9898877842727312, |
|
"grad_norm": 30.66526971215358, |
|
"learning_rate": 1.2139605462822458e-08, |
|
"logits/chosen": -0.6005350351333618, |
|
"logits/rejected": -0.5661831498146057, |
|
"logps/chosen": -227.962158203125, |
|
"logps/rejected": -261.6782531738281, |
|
"loss": 0.3924, |
|
"rewards/accuracies": 0.8328125476837158, |
|
"rewards/chosen": -1.720937728881836, |
|
"rewards/margins": 1.5882391929626465, |
|
"rewards/rejected": -3.3091769218444824, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.9967145965780604, |
|
"grad_norm": 36.64240487573334, |
|
"learning_rate": 4.552352048558422e-09, |
|
"logits/chosen": -0.6393886804580688, |
|
"logits/rejected": -0.6115251183509827, |
|
"logps/chosen": -229.70652770996094, |
|
"logps/rejected": -268.06982421875, |
|
"loss": 0.3379, |
|
"rewards/accuracies": 0.8734375238418579, |
|
"rewards/chosen": -1.651149034500122, |
|
"rewards/margins": 1.8959904909133911, |
|
"rewards/rejected": -3.5471396446228027, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1465, |
|
"total_flos": 161167907028992.0, |
|
"train_loss": 0.47723283336431094, |
|
"train_runtime": 14257.9418, |
|
"train_samples_per_second": 6.575, |
|
"train_steps_per_second": 0.103 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1465, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 161167907028992.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|