|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.6379585326953748, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4e-07, |
|
"logits/chosen": -2.9071130752563477, |
|
"logits/rejected": -2.8750061988830566, |
|
"loss": 0.6931, |
|
"policy_logps/chosen": -127.82667541503906, |
|
"policy_logps/rejected": -130.1011505126953, |
|
"referece_logps/chosen": -127.82667541503906, |
|
"referece_logps/rejected": -130.1011505126953, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 8e-07, |
|
"logits/chosen": -2.8690595626831055, |
|
"logits/rejected": -2.921374797821045, |
|
"loss": 0.6931, |
|
"policy_logps/chosen": -127.44483947753906, |
|
"policy_logps/rejected": -118.97954559326172, |
|
"referece_logps/chosen": -127.44483947753906, |
|
"referece_logps/rejected": -118.97954559326172, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.2e-06, |
|
"logits/chosen": -2.876248359680176, |
|
"logits/rejected": -2.9442975521087646, |
|
"loss": 0.6931, |
|
"policy_logps/chosen": -151.38050842285156, |
|
"policy_logps/rejected": -104.58517456054688, |
|
"referece_logps/chosen": -151.3734893798828, |
|
"referece_logps/rejected": -104.58135986328125, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.0007032513385638595, |
|
"rewards/margins": -0.00032021405058912933, |
|
"rewards/rejected": -0.0003830373170785606, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.6e-06, |
|
"logits/chosen": -2.923973560333252, |
|
"logits/rejected": -2.9425337314605713, |
|
"loss": 0.6924, |
|
"policy_logps/chosen": -121.4244613647461, |
|
"policy_logps/rejected": -117.06787109375, |
|
"referece_logps/chosen": -121.42953491210938, |
|
"referece_logps/rejected": -117.06060028076172, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0005075454828329384, |
|
"rewards/margins": 0.0012342334957793355, |
|
"rewards/rejected": -0.0007266878965310752, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2e-06, |
|
"logits/chosen": -2.896630048751831, |
|
"logits/rejected": -2.874107837677002, |
|
"loss": 0.6925, |
|
"policy_logps/chosen": -112.3115234375, |
|
"policy_logps/rejected": -117.28302764892578, |
|
"referece_logps/chosen": -112.33646392822266, |
|
"referece_logps/rejected": -117.27299499511719, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.0024944781325757504, |
|
"rewards/margins": 0.003496956778690219, |
|
"rewards/rejected": -0.0010024786461144686, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.999783578606323e-06, |
|
"logits/chosen": -2.8770463466644287, |
|
"logits/rejected": -2.893326759338379, |
|
"loss": 0.6908, |
|
"policy_logps/chosen": -111.89071655273438, |
|
"policy_logps/rejected": -110.08009338378906, |
|
"referece_logps/chosen": -111.92901611328125, |
|
"referece_logps/rejected": -110.06503295898438, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.00383090996183455, |
|
"rewards/margins": 0.005336493253707886, |
|
"rewards/rejected": -0.0015055835247039795, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.999134408101731e-06, |
|
"logits/chosen": -2.8742122650146484, |
|
"logits/rejected": -2.9248428344726562, |
|
"loss": 0.6872, |
|
"policy_logps/chosen": -160.53836059570312, |
|
"policy_logps/rejected": -137.42295837402344, |
|
"referece_logps/chosen": -160.57460021972656, |
|
"referece_logps/rejected": -137.33425903320312, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.003623390104621649, |
|
"rewards/margins": 0.01249313447624445, |
|
"rewards/rejected": -0.008869742974638939, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.998052769474995e-06, |
|
"logits/chosen": -2.9328360557556152, |
|
"logits/rejected": -2.975874185562134, |
|
"loss": 0.6898, |
|
"policy_logps/chosen": -75.984619140625, |
|
"policy_logps/rejected": -74.07447814941406, |
|
"referece_logps/chosen": -76.07889556884766, |
|
"referece_logps/rejected": -74.01802825927734, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.009427506476640701, |
|
"rewards/margins": 0.015072083100676537, |
|
"rewards/rejected": -0.0056445784866809845, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9965391309055927e-06, |
|
"logits/chosen": -2.882821798324585, |
|
"logits/rejected": -2.901142120361328, |
|
"loss": 0.6773, |
|
"policy_logps/chosen": -107.12052917480469, |
|
"policy_logps/rejected": -93.86226654052734, |
|
"referece_logps/chosen": -107.38616943359375, |
|
"referece_logps/rejected": -93.67011260986328, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.026563584804534912, |
|
"rewards/margins": 0.04577912390232086, |
|
"rewards/rejected": -0.019215542823076248, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9945941475610623e-06, |
|
"logits/chosen": -2.8708529472351074, |
|
"logits/rejected": -2.9397594928741455, |
|
"loss": 0.6735, |
|
"policy_logps/chosen": -123.71336364746094, |
|
"policy_logps/rejected": -100.81864166259766, |
|
"referece_logps/chosen": -124.01136779785156, |
|
"referece_logps/rejected": -100.63360595703125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.029799818992614746, |
|
"rewards/margins": 0.048302434384822845, |
|
"rewards/rejected": -0.0185026116669178, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.992218661313415e-06, |
|
"logits/chosen": -2.8874049186706543, |
|
"logits/rejected": -2.9123711585998535, |
|
"loss": 0.6729, |
|
"policy_logps/chosen": -104.46468353271484, |
|
"policy_logps/rejected": -98.05982971191406, |
|
"referece_logps/chosen": -104.58216857910156, |
|
"referece_logps/rejected": -97.82801055908203, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.011748719029128551, |
|
"rewards/margins": 0.03493000194430351, |
|
"rewards/rejected": -0.023181283846497536, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.98941370037474e-06, |
|
"logits/chosen": -2.9089269638061523, |
|
"logits/rejected": -2.887115478515625, |
|
"loss": 0.67, |
|
"policy_logps/chosen": -125.26738739013672, |
|
"policy_logps/rejected": -118.79678344726562, |
|
"referece_logps/chosen": -125.5167007446289, |
|
"referece_logps/rejected": -118.42857360839844, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.024931641295552254, |
|
"rewards/margins": 0.06175263226032257, |
|
"rewards/rejected": -0.036820992827415466, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.986180478852149e-06, |
|
"logits/chosen": -2.8961265087127686, |
|
"logits/rejected": -2.85491943359375, |
|
"loss": 0.6635, |
|
"policy_logps/chosen": -152.51437377929688, |
|
"policy_logps/rejected": -152.42971801757812, |
|
"referece_logps/chosen": -152.56588745117188, |
|
"referece_logps/rejected": -152.2041015625, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.005150413140654564, |
|
"rewards/margins": 0.027712417766451836, |
|
"rewards/rejected": -0.02256200462579727, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.982520396222257e-06, |
|
"logits/chosen": -2.8753738403320312, |
|
"logits/rejected": -2.912999153137207, |
|
"loss": 0.6472, |
|
"policy_logps/chosen": -101.14571380615234, |
|
"policy_logps/rejected": -99.60147094726562, |
|
"referece_logps/chosen": -101.51478576660156, |
|
"referece_logps/rejected": -98.9403076171875, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.03690744936466217, |
|
"rewards/margins": 0.10302485525608063, |
|
"rewards/rejected": -0.06611741334199905, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.978435036725432e-06, |
|
"logits/chosen": -2.9012084007263184, |
|
"logits/rejected": -2.8953423500061035, |
|
"loss": 0.6372, |
|
"policy_logps/chosen": -126.27330017089844, |
|
"policy_logps/rejected": -128.28578186035156, |
|
"referece_logps/chosen": -126.68486785888672, |
|
"referece_logps/rejected": -127.51297760009766, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.04115738719701767, |
|
"rewards/margins": 0.11843809485435486, |
|
"rewards/rejected": -0.07728070020675659, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9739261686800657e-06, |
|
"logits/chosen": -2.9024605751037598, |
|
"logits/rejected": -2.8853814601898193, |
|
"loss": 0.6414, |
|
"policy_logps/chosen": -114.59141540527344, |
|
"policy_logps/rejected": -128.96331787109375, |
|
"referece_logps/chosen": -114.98219299316406, |
|
"referece_logps/rejected": -128.28781127929688, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.03907782956957817, |
|
"rewards/margins": 0.1066286489367485, |
|
"rewards/rejected": -0.06755081564188004, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.968995743717171e-06, |
|
"logits/chosen": -2.912461757659912, |
|
"logits/rejected": -2.9493064880371094, |
|
"loss": 0.6429, |
|
"policy_logps/chosen": -111.6058349609375, |
|
"policy_logps/rejected": -109.18556213378906, |
|
"referece_logps/chosen": -111.89552307128906, |
|
"referece_logps/rejected": -108.55181884765625, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.028969965875148773, |
|
"rewards/margins": 0.09234414994716644, |
|
"rewards/rejected": -0.06337418407201767, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.9636458959356316e-06, |
|
"logits/chosen": -2.8974556922912598, |
|
"logits/rejected": -2.89353084564209, |
|
"loss": 0.6528, |
|
"policy_logps/chosen": -138.28262329101562, |
|
"policy_logps/rejected": -128.17782592773438, |
|
"referece_logps/chosen": -138.3295440673828, |
|
"referece_logps/rejected": -127.26476287841797, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.004691671580076218, |
|
"rewards/margins": 0.09599801898002625, |
|
"rewards/rejected": -0.09130635112524033, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9578789409784727e-06, |
|
"logits/chosen": -2.8959522247314453, |
|
"logits/rejected": -2.9369704723358154, |
|
"loss": 0.6446, |
|
"policy_logps/chosen": -111.41603088378906, |
|
"policy_logps/rejected": -104.68195343017578, |
|
"referece_logps/chosen": -111.60549926757812, |
|
"referece_logps/rejected": -104.04905700683594, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.018947793170809746, |
|
"rewards/margins": 0.08223824948072433, |
|
"rewards/rejected": -0.06329045444726944, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.951697375030553e-06, |
|
"logits/chosen": -2.853641986846924, |
|
"logits/rejected": -2.8619699478149414, |
|
"loss": 0.6466, |
|
"policy_logps/chosen": -145.3450927734375, |
|
"policy_logps/rejected": -134.5753631591797, |
|
"referece_logps/chosen": -145.5601348876953, |
|
"referece_logps/rejected": -133.66200256347656, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.021503955125808716, |
|
"rewards/margins": 0.11283906549215317, |
|
"rewards/rejected": -0.09133510291576385, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.9451038737381077e-06, |
|
"logits/chosen": -2.93542218208313, |
|
"logits/rejected": -2.9347047805786133, |
|
"loss": 0.6346, |
|
"policy_logps/chosen": -97.80740356445312, |
|
"policy_logps/rejected": -92.36180877685547, |
|
"referece_logps/chosen": -97.90306091308594, |
|
"referece_logps/rejected": -91.67369079589844, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.009566396474838257, |
|
"rewards/margins": 0.07837802916765213, |
|
"rewards/rejected": -0.06881163269281387, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9381012910506143e-06, |
|
"logits/chosen": -2.8574209213256836, |
|
"logits/rejected": -2.860379219055176, |
|
"loss": 0.6102, |
|
"policy_logps/chosen": -152.8387908935547, |
|
"policy_logps/rejected": -156.60391235351562, |
|
"referece_logps/chosen": -152.914306640625, |
|
"referece_logps/rejected": -155.2355499267578, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.007550956681370735, |
|
"rewards/margins": 0.14438626170158386, |
|
"rewards/rejected": -0.13683530688285828, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.9306926579854817e-06, |
|
"logits/chosen": -2.8623623847961426, |
|
"logits/rejected": -2.8780465126037598, |
|
"loss": 0.6306, |
|
"policy_logps/chosen": -143.302490234375, |
|
"policy_logps/rejected": -127.38278198242188, |
|
"referece_logps/chosen": -143.29205322265625, |
|
"referece_logps/rejected": -126.24747467041016, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.0010449867695569992, |
|
"rewards/margins": 0.11248550564050674, |
|
"rewards/rejected": -0.11353050917387009, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.922881181316097e-06, |
|
"logits/chosen": -2.949070930480957, |
|
"logits/rejected": -2.954594135284424, |
|
"loss": 0.6002, |
|
"policy_logps/chosen": -74.232177734375, |
|
"policy_logps/rejected": -73.76258850097656, |
|
"referece_logps/chosen": -75.20740509033203, |
|
"referece_logps/rejected": -72.94302368164062, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.09752248972654343, |
|
"rewards/margins": 0.17947959899902344, |
|
"rewards/rejected": -0.08195710927248001, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.9146702421837946e-06, |
|
"logits/chosen": -2.8681116104125977, |
|
"logits/rejected": -2.8957810401916504, |
|
"loss": 0.612, |
|
"policy_logps/chosen": -127.76103210449219, |
|
"policy_logps/rejected": -125.47801208496094, |
|
"referece_logps/chosen": -128.02197265625, |
|
"referece_logps/rejected": -124.08172607421875, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.026093529537320137, |
|
"rewards/margins": 0.1657221019268036, |
|
"rewards/rejected": -0.1396285593509674, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.906063394634356e-06, |
|
"logits/chosen": -2.862689256668091, |
|
"logits/rejected": -2.877995252609253, |
|
"loss": 0.6398, |
|
"policy_logps/chosen": -120.70744323730469, |
|
"policy_logps/rejected": -113.76991271972656, |
|
"referece_logps/chosen": -120.96971130371094, |
|
"referece_logps/rejected": -112.71368408203125, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.026226602494716644, |
|
"rewards/margins": 0.13184988498687744, |
|
"rewards/rejected": -0.1056232899427414, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.897064364079664e-06, |
|
"logits/chosen": -2.9357306957244873, |
|
"logits/rejected": -2.976917028427124, |
|
"loss": 0.5904, |
|
"policy_logps/chosen": -105.41529846191406, |
|
"policy_logps/rejected": -96.86430358886719, |
|
"referece_logps/chosen": -106.42874145507812, |
|
"referece_logps/rejected": -95.49075317382812, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.10134478658437729, |
|
"rewards/margins": 0.23869961500167847, |
|
"rewards/rejected": -0.13735483586788177, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.8876770456851876e-06, |
|
"logits/chosen": -2.8511600494384766, |
|
"logits/rejected": -2.888685941696167, |
|
"loss": 0.5817, |
|
"policy_logps/chosen": -135.38853454589844, |
|
"policy_logps/rejected": -136.4590301513672, |
|
"referece_logps/chosen": -135.95562744140625, |
|
"referece_logps/rejected": -134.50482177734375, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.05670913681387901, |
|
"rewards/margins": 0.2521297335624695, |
|
"rewards/rejected": -0.19542059302330017, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.8779055026839868e-06, |
|
"logits/chosen": -2.9084866046905518, |
|
"logits/rejected": -2.9267258644104004, |
|
"loss": 0.6042, |
|
"policy_logps/chosen": -134.9838409423828, |
|
"policy_logps/rejected": -114.98860931396484, |
|
"referece_logps/chosen": -135.53085327148438, |
|
"referece_logps/rejected": -113.39382934570312, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.05469997972249985, |
|
"rewards/margins": 0.21417750418186188, |
|
"rewards/rejected": -0.15947751700878143, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.8677539646179705e-06, |
|
"logits/chosen": -2.88179349899292, |
|
"logits/rejected": -2.929935932159424, |
|
"loss": 0.5827, |
|
"policy_logps/chosen": -163.58660888671875, |
|
"policy_logps/rejected": -131.79258728027344, |
|
"referece_logps/chosen": -163.90985107421875, |
|
"referece_logps/rejected": -129.81314086914062, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.0323248989880085, |
|
"rewards/margins": 0.23026807606220245, |
|
"rewards/rejected": -0.19794318079948425, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.8572268255071718e-06, |
|
"logits/chosen": -2.9416298866271973, |
|
"logits/rejected": -2.961357831954956, |
|
"loss": 0.5977, |
|
"policy_logps/chosen": -98.88802337646484, |
|
"policy_logps/rejected": -94.90333557128906, |
|
"referece_logps/chosen": -99.61119842529297, |
|
"referece_logps/rejected": -93.1702651977539, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.07231828570365906, |
|
"rewards/margins": 0.24562585353851318, |
|
"rewards/rejected": -0.17330753803253174, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.8463286419478252e-06, |
|
"logits/chosen": -2.9383907318115234, |
|
"logits/rejected": -2.880384922027588, |
|
"loss": 0.5772, |
|
"policy_logps/chosen": -118.6290283203125, |
|
"policy_logps/rejected": -126.94461822509766, |
|
"referece_logps/chosen": -118.98780822753906, |
|
"referece_logps/rejected": -125.21376037597656, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.03587843477725983, |
|
"rewards/margins": 0.20896492898464203, |
|
"rewards/rejected": -0.173086479306221, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.835064131140081e-06, |
|
"logits/chosen": -2.908090829849243, |
|
"logits/rejected": -2.9141459465026855, |
|
"loss": 0.5723, |
|
"policy_logps/chosen": -132.4810333251953, |
|
"policy_logps/rejected": -130.16603088378906, |
|
"referece_logps/chosen": -132.7151336669922, |
|
"referece_logps/rejected": -127.14649963378906, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.02341010421514511, |
|
"rewards/margins": 0.32536280155181885, |
|
"rewards/rejected": -0.3019527196884155, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.8234381688461941e-06, |
|
"logits/chosen": -2.9611878395080566, |
|
"logits/rejected": -2.966728448867798, |
|
"loss": 0.5734, |
|
"policy_logps/chosen": -119.45679473876953, |
|
"policy_logps/rejected": -118.76419830322266, |
|
"referece_logps/chosen": -118.86337280273438, |
|
"referece_logps/rejected": -115.91516876220703, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.059341806918382645, |
|
"rewards/margins": 0.22556202113628387, |
|
"rewards/rejected": -0.28490379452705383, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.8114557872800905e-06, |
|
"logits/chosen": -2.967761993408203, |
|
"logits/rejected": -2.900844097137451, |
|
"loss": 0.5633, |
|
"policy_logps/chosen": -130.78848266601562, |
|
"policy_logps/rejected": -142.6917724609375, |
|
"referece_logps/chosen": -130.37750244140625, |
|
"referece_logps/rejected": -139.14178466796875, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.041097551584243774, |
|
"rewards/margins": 0.31390050053596497, |
|
"rewards/rejected": -0.35499805212020874, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.7991221729292058e-06, |
|
"logits/chosen": -2.8585638999938965, |
|
"logits/rejected": -2.90903377532959, |
|
"loss": 0.5649, |
|
"policy_logps/chosen": -137.8734893798828, |
|
"policy_logps/rejected": -127.91438293457031, |
|
"referece_logps/chosen": -137.9367218017578, |
|
"referece_logps/rejected": -125.2115478515625, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.0063229575753211975, |
|
"rewards/margins": 0.27660617232322693, |
|
"rewards/rejected": -0.27028322219848633, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.7864426643095536e-06, |
|
"logits/chosen": -2.9357798099517822, |
|
"logits/rejected": -2.900618553161621, |
|
"loss": 0.5592, |
|
"policy_logps/chosen": -151.2121124267578, |
|
"policy_logps/rejected": -134.10804748535156, |
|
"referece_logps/chosen": -150.83583068847656, |
|
"referece_logps/rejected": -131.0611572265625, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.03762848675251007, |
|
"rewards/margins": 0.26705947518348694, |
|
"rewards/rejected": -0.3046879470348358, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.7734227496549878e-06, |
|
"logits/chosen": -2.9032468795776367, |
|
"logits/rejected": -2.9027702808380127, |
|
"loss": 0.5684, |
|
"policy_logps/chosen": -104.47734069824219, |
|
"policy_logps/rejected": -106.55696105957031, |
|
"referece_logps/chosen": -105.3730697631836, |
|
"referece_logps/rejected": -104.14241027832031, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.0895722359418869, |
|
"rewards/margins": 0.3310272991657257, |
|
"rewards/rejected": -0.241455078125, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.7600680645416582e-06, |
|
"logits/chosen": -2.9712843894958496, |
|
"logits/rejected": -2.894430637359619, |
|
"loss": 0.5499, |
|
"policy_logps/chosen": -126.83047485351562, |
|
"policy_logps/rejected": -138.54771423339844, |
|
"referece_logps/chosen": -126.56389617919922, |
|
"referece_logps/rejected": -135.42103576660156, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.026657823473215103, |
|
"rewards/margins": 0.2860097885131836, |
|
"rewards/rejected": -0.312667578458786, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.7463843894486936e-06, |
|
"logits/chosen": -2.942542552947998, |
|
"logits/rejected": -2.9863080978393555, |
|
"loss": 0.5534, |
|
"policy_logps/chosen": -93.51473999023438, |
|
"policy_logps/rejected": -94.46737670898438, |
|
"referece_logps/chosen": -93.94270324707031, |
|
"referece_logps/rejected": -91.81558227539062, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.04279506206512451, |
|
"rewards/margins": 0.3079749345779419, |
|
"rewards/rejected": -0.2651798725128174, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.7323776472561625e-06, |
|
"logits/chosen": -2.9191946983337402, |
|
"logits/rejected": -2.935122013092041, |
|
"loss": 0.5682, |
|
"policy_logps/chosen": -128.17813110351562, |
|
"policy_logps/rejected": -139.06613159179688, |
|
"referece_logps/chosen": -127.54570770263672, |
|
"referece_logps/rejected": -135.201904296875, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.06324195861816406, |
|
"rewards/margins": 0.32317861914634705, |
|
"rewards/rejected": -0.3864205777645111, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.7180539006813969e-06, |
|
"logits/chosen": -2.920085906982422, |
|
"logits/rejected": -2.906773567199707, |
|
"loss": 0.5454, |
|
"policy_logps/chosen": -126.83687591552734, |
|
"policy_logps/rejected": -114.17430877685547, |
|
"referece_logps/chosen": -126.77012634277344, |
|
"referece_logps/rejected": -110.9568862915039, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.006674099713563919, |
|
"rewards/margins": 0.31506818532943726, |
|
"rewards/rejected": -0.3217422664165497, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.7034193496547902e-06, |
|
"logits/chosen": -2.8620564937591553, |
|
"logits/rejected": -2.9222469329833984, |
|
"loss": 0.534, |
|
"policy_logps/chosen": -125.01854705810547, |
|
"policy_logps/rejected": -116.95589447021484, |
|
"referece_logps/chosen": -125.00530242919922, |
|
"referece_logps/rejected": -113.22078704833984, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.0013249870389699936, |
|
"rewards/margins": 0.3721860647201538, |
|
"rewards/rejected": -0.37351107597351074, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.6884803286362e-06, |
|
"logits/chosen": -2.8868565559387207, |
|
"logits/rejected": -2.91511869430542, |
|
"loss": 0.5459, |
|
"policy_logps/chosen": -150.9369354248047, |
|
"policy_logps/rejected": -144.1173858642578, |
|
"referece_logps/chosen": -150.93002319335938, |
|
"referece_logps/rejected": -139.8741455078125, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.0006910450756549835, |
|
"rewards/margins": 0.42363405227661133, |
|
"rewards/rejected": -0.4243250787258148, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.673243303873124e-06, |
|
"logits/chosen": -2.9313180446624756, |
|
"logits/rejected": -2.9371540546417236, |
|
"loss": 0.5569, |
|
"policy_logps/chosen": -122.50914764404297, |
|
"policy_logps/rejected": -110.94712829589844, |
|
"referece_logps/chosen": -122.10940551757812, |
|
"referece_logps/rejected": -108.29950714111328, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.03997454792261124, |
|
"rewards/margins": 0.22478806972503662, |
|
"rewards/rejected": -0.26476261019706726, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.6577148706018328e-06, |
|
"logits/chosen": -2.894786834716797, |
|
"logits/rejected": -2.977565288543701, |
|
"loss": 0.5775, |
|
"policy_logps/chosen": -114.12425231933594, |
|
"policy_logps/rejected": -113.44467163085938, |
|
"referece_logps/chosen": -112.93356323242188, |
|
"referece_logps/rejected": -110.42334747314453, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.11906924843788147, |
|
"rewards/margins": 0.18306350708007812, |
|
"rewards/rejected": -0.3021327555179596, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.6419017501926656e-06, |
|
"logits/chosen": -2.90000581741333, |
|
"logits/rejected": -2.9254188537597656, |
|
"loss": 0.5176, |
|
"policy_logps/chosen": -128.19607543945312, |
|
"policy_logps/rejected": -122.17625427246094, |
|
"referece_logps/chosen": -128.63787841796875, |
|
"referece_logps/rejected": -118.01435852050781, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.04418013244867325, |
|
"rewards/margins": 0.4603692889213562, |
|
"rewards/rejected": -0.41618919372558594, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.6258107872407374e-06, |
|
"logits/chosen": -2.9072225093841553, |
|
"logits/rejected": -2.9065234661102295, |
|
"loss": 0.5243, |
|
"policy_logps/chosen": -125.48599243164062, |
|
"policy_logps/rejected": -129.7559814453125, |
|
"referece_logps/chosen": -126.17718505859375, |
|
"referece_logps/rejected": -127.1695785522461, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.06911970674991608, |
|
"rewards/margins": 0.32776087522506714, |
|
"rewards/rejected": -0.25864115357398987, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.6094489466033042e-06, |
|
"logits/chosen": -2.917146682739258, |
|
"logits/rejected": -2.9667577743530273, |
|
"loss": 0.5559, |
|
"policy_logps/chosen": -108.18141174316406, |
|
"policy_logps/rejected": -99.39623260498047, |
|
"referece_logps/chosen": -108.76286315917969, |
|
"referece_logps/rejected": -96.53907775878906, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.0581454373896122, |
|
"rewards/margins": 0.34386110305786133, |
|
"rewards/rejected": -0.28571566939353943, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.5928233103850727e-06, |
|
"logits/chosen": -2.906874179840088, |
|
"logits/rejected": -2.919196844100952, |
|
"loss": 0.5364, |
|
"policy_logps/chosen": -161.1468048095703, |
|
"policy_logps/rejected": -143.97592163085938, |
|
"referece_logps/chosen": -160.62081909179688, |
|
"referece_logps/rejected": -140.28585815429688, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.052598677575588226, |
|
"rewards/margins": 0.31640806794166565, |
|
"rewards/rejected": -0.3690067529678345, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.575941074872766e-06, |
|
"logits/chosen": -2.957002878189087, |
|
"logits/rejected": -2.9672722816467285, |
|
"loss": 0.5172, |
|
"policy_logps/chosen": -119.42903900146484, |
|
"policy_logps/rejected": -116.86624145507812, |
|
"referece_logps/chosen": -120.13378143310547, |
|
"referece_logps/rejected": -113.2657241821289, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.07047442346811295, |
|
"rewards/margins": 0.4305253326892853, |
|
"rewards/rejected": -0.36005088686943054, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.5588095474202594e-06, |
|
"logits/chosen": -2.8591084480285645, |
|
"logits/rejected": -2.8590025901794434, |
|
"loss": 0.4933, |
|
"policy_logps/chosen": -157.062744140625, |
|
"policy_logps/rejected": -151.7144775390625, |
|
"referece_logps/chosen": -156.53114318847656, |
|
"referece_logps/rejected": -147.15911865234375, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.05316000431776047, |
|
"rewards/margins": 0.4023759067058563, |
|
"rewards/rejected": -0.4555359482765198, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.5414361432856474e-06, |
|
"logits/chosen": -2.9438529014587402, |
|
"logits/rejected": -2.9987499713897705, |
|
"loss": 0.4979, |
|
"policy_logps/chosen": -117.94309997558594, |
|
"policy_logps/rejected": -108.414306640625, |
|
"referece_logps/chosen": -118.55531311035156, |
|
"referece_logps/rejected": -104.54682922363281, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.061221349984407425, |
|
"rewards/margins": 0.4479690194129944, |
|
"rewards/rejected": -0.38674765825271606, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.5238283824216013e-06, |
|
"logits/chosen": -2.9244260787963867, |
|
"logits/rejected": -2.9373199939727783, |
|
"loss": 0.4961, |
|
"policy_logps/chosen": -119.873779296875, |
|
"policy_logps/rejected": -108.0604248046875, |
|
"referece_logps/chosen": -120.80511474609375, |
|
"referece_logps/rejected": -104.79627990722656, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.09313352406024933, |
|
"rewards/margins": 0.4195476770401001, |
|
"rewards/rejected": -0.32641416788101196, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.5059938862204125e-06, |
|
"logits/chosen": -2.920050621032715, |
|
"logits/rejected": -2.9494502544403076, |
|
"loss": 0.5339, |
|
"policy_logps/chosen": -128.6884307861328, |
|
"policy_logps/rejected": -124.91862487792969, |
|
"referece_logps/chosen": -127.86734008789062, |
|
"referece_logps/rejected": -120.62930297851562, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.08210951089859009, |
|
"rewards/margins": 0.34682315587997437, |
|
"rewards/rejected": -0.42893266677856445, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.4879403742151283e-06, |
|
"logits/chosen": -2.907794713973999, |
|
"logits/rejected": -2.9134182929992676, |
|
"loss": 0.5389, |
|
"policy_logps/chosen": -129.15188598632812, |
|
"policy_logps/rejected": -135.58035278320312, |
|
"referece_logps/chosen": -127.82069396972656, |
|
"referece_logps/rejected": -131.39085388183594, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.13311973214149475, |
|
"rewards/margins": 0.28583019971847534, |
|
"rewards/rejected": -0.4189499020576477, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.4696756607382058e-06, |
|
"logits/chosen": -2.9352166652679443, |
|
"logits/rejected": -2.9330966472625732, |
|
"loss": 0.543, |
|
"policy_logps/chosen": -125.51739501953125, |
|
"policy_logps/rejected": -128.10951232910156, |
|
"referece_logps/chosen": -124.24830627441406, |
|
"referece_logps/rejected": -123.51809692382812, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.12690886855125427, |
|
"rewards/margins": 0.33223241567611694, |
|
"rewards/rejected": -0.4591412842273712, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.4512076515391374e-06, |
|
"logits/chosen": -2.941624164581299, |
|
"logits/rejected": -2.9078333377838135, |
|
"loss": 0.4758, |
|
"policy_logps/chosen": -109.23553466796875, |
|
"policy_logps/rejected": -104.69176483154297, |
|
"referece_logps/chosen": -110.22058868408203, |
|
"referece_logps/rejected": -100.0502700805664, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.09850560873746872, |
|
"rewards/margins": 0.5626559257507324, |
|
"rewards/rejected": -0.4641503393650055, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.432544340362501e-06, |
|
"logits/chosen": -2.972963809967041, |
|
"logits/rejected": -2.9390883445739746, |
|
"loss": 0.4904, |
|
"policy_logps/chosen": -95.29005432128906, |
|
"policy_logps/rejected": -121.40338897705078, |
|
"referece_logps/chosen": -95.24687194824219, |
|
"referece_logps/rejected": -116.24635314941406, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.004318548366427422, |
|
"rewards/margins": 0.5113850235939026, |
|
"rewards/rejected": -0.515703558921814, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.4136938054879282e-06, |
|
"logits/chosen": -2.9395623207092285, |
|
"logits/rejected": -2.953756809234619, |
|
"loss": 0.5286, |
|
"policy_logps/chosen": -122.0515365600586, |
|
"policy_logps/rejected": -128.63021850585938, |
|
"referece_logps/chosen": -120.7651596069336, |
|
"referece_logps/rejected": -123.8788070678711, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1286384016275406, |
|
"rewards/margins": 0.34650319814682007, |
|
"rewards/rejected": -0.47514158487319946, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.3946642062334763e-06, |
|
"logits/chosen": -2.9107141494750977, |
|
"logits/rejected": -2.9332330226898193, |
|
"loss": 0.4832, |
|
"policy_logps/chosen": -121.9334716796875, |
|
"policy_logps/rejected": -119.00639343261719, |
|
"referece_logps/chosen": -122.2487564086914, |
|
"referece_logps/rejected": -114.04248809814453, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.03152900189161301, |
|
"rewards/margins": 0.5279202461242676, |
|
"rewards/rejected": -0.49639129638671875, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.37546377942393e-06, |
|
"logits/chosen": -2.950266122817993, |
|
"logits/rejected": -2.9494924545288086, |
|
"loss": 0.5178, |
|
"policy_logps/chosen": -104.2540283203125, |
|
"policy_logps/rejected": -125.53955841064453, |
|
"referece_logps/chosen": -104.66864776611328, |
|
"referece_logps/rejected": -121.41145324707031, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.04146187752485275, |
|
"rewards/margins": 0.45427215099334717, |
|
"rewards/rejected": -0.4128102958202362, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.3561008358255469e-06, |
|
"logits/chosen": -2.917292594909668, |
|
"logits/rejected": -2.952850818634033, |
|
"loss": 0.5263, |
|
"policy_logps/chosen": -118.34769439697266, |
|
"policy_logps/rejected": -112.80188751220703, |
|
"referece_logps/chosen": -118.5436782836914, |
|
"referece_logps/rejected": -109.08131408691406, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.01959807053208351, |
|
"rewards/margins": 0.39165574312210083, |
|
"rewards/rejected": -0.3720576763153076, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.3365837565488062e-06, |
|
"logits/chosen": -2.92209529876709, |
|
"logits/rejected": -2.9668972492218018, |
|
"loss": 0.4813, |
|
"policy_logps/chosen": -161.57546997070312, |
|
"policy_logps/rejected": -137.75039672851562, |
|
"referece_logps/chosen": -161.21104431152344, |
|
"referece_logps/rejected": -130.89552307128906, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.03644174337387085, |
|
"rewards/margins": 0.6490457057952881, |
|
"rewards/rejected": -0.6854873895645142, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.3169209894207027e-06, |
|
"logits/chosen": -2.9616472721099854, |
|
"logits/rejected": -2.961239814758301, |
|
"loss": 0.4486, |
|
"policy_logps/chosen": -154.44467163085938, |
|
"policy_logps/rejected": -158.21372985839844, |
|
"referece_logps/chosen": -154.07650756835938, |
|
"referece_logps/rejected": -151.34902954101562, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.03681756183505058, |
|
"rewards/margins": 0.6496531367301941, |
|
"rewards/rejected": -0.6864707469940186, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.2971210453281673e-06, |
|
"logits/chosen": -2.892361640930176, |
|
"logits/rejected": -2.8887600898742676, |
|
"loss": 0.4939, |
|
"policy_logps/chosen": -121.81805419921875, |
|
"policy_logps/rejected": -114.29660034179688, |
|
"referece_logps/chosen": -122.0481948852539, |
|
"referece_logps/rejected": -110.4261474609375, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.023014426231384277, |
|
"rewards/margins": 0.41005975008010864, |
|
"rewards/rejected": -0.38704538345336914, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.2771924945341906e-06, |
|
"logits/chosen": -2.91727352142334, |
|
"logits/rejected": -2.922391891479492, |
|
"loss": 0.4868, |
|
"policy_logps/chosen": -111.62061309814453, |
|
"policy_logps/rejected": -106.5035400390625, |
|
"referece_logps/chosen": -112.6988525390625, |
|
"referece_logps/rejected": -101.50572967529297, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.10782448947429657, |
|
"rewards/margins": 0.6076046228408813, |
|
"rewards/rejected": -0.49978014826774597, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.257143962968246e-06, |
|
"logits/chosen": -2.9374663829803467, |
|
"logits/rejected": -2.9512386322021484, |
|
"loss": 0.5144, |
|
"policy_logps/chosen": -149.2052001953125, |
|
"policy_logps/rejected": -130.6556396484375, |
|
"referece_logps/chosen": -147.80715942382812, |
|
"referece_logps/rejected": -125.28959655761719, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.13980263471603394, |
|
"rewards/margins": 0.39680200815200806, |
|
"rewards/rejected": -0.536604642868042, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.236984128492619e-06, |
|
"logits/chosen": -2.9574966430664062, |
|
"logits/rejected": -2.9541845321655273, |
|
"loss": 0.4811, |
|
"policy_logps/chosen": -105.58607482910156, |
|
"policy_logps/rejected": -107.0927734375, |
|
"referece_logps/chosen": -106.00244903564453, |
|
"referece_logps/rejected": -103.1064453125, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.041637033224105835, |
|
"rewards/margins": 0.44027072191238403, |
|
"rewards/rejected": -0.3986337184906006, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.2167217171462566e-06, |
|
"logits/chosen": -2.96567964553833, |
|
"logits/rejected": -2.9865453243255615, |
|
"loss": 0.4782, |
|
"policy_logps/chosen": -137.23434448242188, |
|
"policy_logps/rejected": -116.43749237060547, |
|
"referece_logps/chosen": -137.0900421142578, |
|
"referece_logps/rejected": -110.79519653320312, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.014430008828639984, |
|
"rewards/margins": 0.5497984886169434, |
|
"rewards/rejected": -0.5642285346984863, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.1963654993677643e-06, |
|
"logits/chosen": -2.9047865867614746, |
|
"logits/rejected": -2.9375927448272705, |
|
"loss": 0.4831, |
|
"policy_logps/chosen": -142.0609588623047, |
|
"policy_logps/rejected": -138.23785400390625, |
|
"referece_logps/chosen": -140.9311981201172, |
|
"referece_logps/rejected": -132.38949584960938, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.1129767894744873, |
|
"rewards/margins": 0.47185903787612915, |
|
"rewards/rejected": -0.5848358869552612, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.1759242861991854e-06, |
|
"logits/chosen": -2.9874184131622314, |
|
"logits/rejected": -2.9638171195983887, |
|
"loss": 0.4436, |
|
"policy_logps/chosen": -111.93683624267578, |
|
"policy_logps/rejected": -113.12979888916016, |
|
"referece_logps/chosen": -111.51397705078125, |
|
"referece_logps/rejected": -107.90960693359375, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.042286355048418045, |
|
"rewards/margins": 0.4797336161136627, |
|
"rewards/rejected": -0.5220199227333069, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.155406925472205e-06, |
|
"logits/chosen": -2.927311420440674, |
|
"logits/rejected": -2.921337366104126, |
|
"loss": 0.4284, |
|
"policy_logps/chosen": -147.83377075195312, |
|
"policy_logps/rejected": -148.07089233398438, |
|
"referece_logps/chosen": -147.18499755859375, |
|
"referece_logps/rejected": -140.68211364746094, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0648760199546814, |
|
"rewards/margins": 0.6740000247955322, |
|
"rewards/rejected": -0.7388760447502136, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.1348222979784287e-06, |
|
"logits/chosen": -2.9232394695281982, |
|
"logits/rejected": -2.9430999755859375, |
|
"loss": 0.5002, |
|
"policy_logps/chosen": -143.42274475097656, |
|
"policy_logps/rejected": -143.17144775390625, |
|
"referece_logps/chosen": -141.336669921875, |
|
"referece_logps/rejected": -136.30712890625, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.20860746502876282, |
|
"rewards/margins": 0.4778253436088562, |
|
"rewards/rejected": -0.6864327788352966, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.1141793136253986e-06, |
|
"logits/chosen": -2.8802056312561035, |
|
"logits/rejected": -2.9181809425354004, |
|
"loss": 0.5141, |
|
"policy_logps/chosen": -155.84146118164062, |
|
"policy_logps/rejected": -145.16802978515625, |
|
"referece_logps/chosen": -154.25274658203125, |
|
"referece_logps/rejected": -140.40045166015625, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.15887098014354706, |
|
"rewards/margins": 0.31788796186447144, |
|
"rewards/rejected": -0.4767589569091797, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.09348690758e-06, |
|
"logits/chosen": -2.921013355255127, |
|
"logits/rejected": -2.949887275695801, |
|
"loss": 0.4978, |
|
"policy_logps/chosen": -140.55694580078125, |
|
"policy_logps/rejected": -137.7884521484375, |
|
"referece_logps/chosen": -139.3011474609375, |
|
"referece_logps/rejected": -130.94146728515625, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.12557877600193024, |
|
"rewards/margins": 0.5591215491294861, |
|
"rewards/rejected": -0.6847003102302551, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.072754036400944e-06, |
|
"logits/chosen": -2.9696455001831055, |
|
"logits/rejected": -2.9857373237609863, |
|
"loss": 0.417, |
|
"policy_logps/chosen": -113.93096923828125, |
|
"policy_logps/rejected": -107.09919738769531, |
|
"referece_logps/chosen": -115.27886962890625, |
|
"referece_logps/rejected": -102.10234069824219, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1347896158695221, |
|
"rewards/margins": 0.6344748139381409, |
|
"rewards/rejected": -0.49968522787094116, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.0519896741619803e-06, |
|
"logits/chosen": -2.919851303100586, |
|
"logits/rejected": -2.9218404293060303, |
|
"loss": 0.4688, |
|
"policy_logps/chosen": -155.71444702148438, |
|
"policy_logps/rejected": -144.62152099609375, |
|
"referece_logps/chosen": -155.02752685546875, |
|
"referece_logps/rejected": -138.43026733398438, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.0686924085021019, |
|
"rewards/margins": 0.5504311323165894, |
|
"rewards/rejected": -0.6191235184669495, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.031202808567539e-06, |
|
"logits/chosen": -2.8995673656463623, |
|
"logits/rejected": -2.930441379547119, |
|
"loss": 0.4539, |
|
"policy_logps/chosen": -155.89486694335938, |
|
"policy_logps/rejected": -133.66708374023438, |
|
"referece_logps/chosen": -154.44532775878906, |
|
"referece_logps/rejected": -126.08793640136719, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.14495408535003662, |
|
"rewards/margins": 0.6129606366157532, |
|
"rewards/rejected": -0.7579147815704346, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.0104024370624642e-06, |
|
"logits/chosen": -2.9370193481445312, |
|
"logits/rejected": -2.9806201457977295, |
|
"loss": 0.4106, |
|
"policy_logps/chosen": -134.17471313476562, |
|
"policy_logps/rejected": -112.07872772216797, |
|
"referece_logps/chosen": -134.83750915527344, |
|
"referece_logps/rejected": -105.02826690673828, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.06628021597862244, |
|
"rewards/margins": 0.7713260650634766, |
|
"rewards/rejected": -0.7050458192825317, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.895975629375357e-07, |
|
"logits/chosen": -2.9489431381225586, |
|
"logits/rejected": -2.931128978729248, |
|
"loss": 0.4902, |
|
"policy_logps/chosen": -132.9930877685547, |
|
"policy_logps/rejected": -148.03472900390625, |
|
"referece_logps/chosen": -131.95632934570312, |
|
"referece_logps/rejected": -141.67227172851562, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.1036761999130249, |
|
"rewards/margins": 0.5325698852539062, |
|
"rewards/rejected": -0.6362460851669312, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.687971914324607e-07, |
|
"logits/chosen": -2.911447525024414, |
|
"logits/rejected": -2.9363858699798584, |
|
"loss": 0.4865, |
|
"policy_logps/chosen": -136.85560607910156, |
|
"policy_logps/rejected": -110.8121337890625, |
|
"referece_logps/chosen": -135.94354248046875, |
|
"referece_logps/rejected": -105.58758544921875, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.09120647609233856, |
|
"rewards/margins": 0.4312480390071869, |
|
"rewards/rejected": -0.5224545001983643, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.480103258380197e-07, |
|
"logits/chosen": -2.9371180534362793, |
|
"logits/rejected": -2.9503769874572754, |
|
"loss": 0.4557, |
|
"policy_logps/chosen": -150.19725036621094, |
|
"policy_logps/rejected": -149.049072265625, |
|
"referece_logps/chosen": -147.80014038085938, |
|
"referece_logps/rejected": -141.38333129882812, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.23970948159694672, |
|
"rewards/margins": 0.5268632173538208, |
|
"rewards/rejected": -0.7665727138519287, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.272459635990562e-07, |
|
"logits/chosen": -2.914623737335205, |
|
"logits/rejected": -2.909700393676758, |
|
"loss": 0.4753, |
|
"policy_logps/chosen": -149.44277954101562, |
|
"policy_logps/rejected": -158.23593139648438, |
|
"referece_logps/chosen": -147.42015075683594, |
|
"referece_logps/rejected": -149.85214233398438, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.2022620439529419, |
|
"rewards/margins": 0.6361156702041626, |
|
"rewards/rejected": -0.838377833366394, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.065130924199998e-07, |
|
"logits/chosen": -2.964315414428711, |
|
"logits/rejected": -2.9234838485717773, |
|
"loss": 0.5092, |
|
"policy_logps/chosen": -133.36880493164062, |
|
"policy_logps/rejected": -146.08169555664062, |
|
"referece_logps/chosen": -132.2157440185547, |
|
"referece_logps/rejected": -140.08538818359375, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.11530620604753494, |
|
"rewards/margins": 0.4843238890171051, |
|
"rewards/rejected": -0.5996301174163818, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.858206863746017e-07, |
|
"logits/chosen": -2.909698486328125, |
|
"logits/rejected": -2.918955087661743, |
|
"loss": 0.4525, |
|
"policy_logps/chosen": -132.2825469970703, |
|
"policy_logps/rejected": -136.70962524414062, |
|
"referece_logps/chosen": -130.87799072265625, |
|
"referece_logps/rejected": -130.4683837890625, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.1404554843902588, |
|
"rewards/margins": 0.48366838693618774, |
|
"rewards/rejected": -0.6241238713264465, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.651777020215712e-07, |
|
"logits/chosen": -2.8775806427001953, |
|
"logits/rejected": -2.876236915588379, |
|
"loss": 0.4829, |
|
"policy_logps/chosen": -138.22640991210938, |
|
"policy_logps/rejected": -135.67745971679688, |
|
"referece_logps/chosen": -137.68087768554688, |
|
"referece_logps/rejected": -129.38088989257812, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.05455498397350311, |
|
"rewards/margins": 0.5751017928123474, |
|
"rewards/rejected": -0.6296567916870117, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.445930745277951e-07, |
|
"logits/chosen": -2.9498825073242188, |
|
"logits/rejected": -2.9456593990325928, |
|
"loss": 0.4666, |
|
"policy_logps/chosen": -99.36846160888672, |
|
"policy_logps/rejected": -110.91441345214844, |
|
"referece_logps/chosen": -97.86239624023438, |
|
"referece_logps/rejected": -104.65141296386719, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.15060700476169586, |
|
"rewards/margins": 0.47569307684898376, |
|
"rewards/rejected": -0.6263000965118408, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.240757138008148e-07, |
|
"logits/chosen": -2.9100852012634277, |
|
"logits/rejected": -2.9250974655151367, |
|
"loss": 0.4921, |
|
"policy_logps/chosen": -144.23516845703125, |
|
"policy_logps/rejected": -140.12838745117188, |
|
"referece_logps/chosen": -142.4854736328125, |
|
"referece_logps/rejected": -133.91094970703125, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.17496907711029053, |
|
"rewards/margins": 0.4467761516571045, |
|
"rewards/rejected": -0.621745228767395, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.036345006322358e-07, |
|
"logits/chosen": -2.9227583408355713, |
|
"logits/rejected": -2.936587333679199, |
|
"loss": 0.4516, |
|
"policy_logps/chosen": -130.54054260253906, |
|
"policy_logps/rejected": -137.36415100097656, |
|
"referece_logps/chosen": -129.6831512451172, |
|
"referece_logps/rejected": -129.63467407226562, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.08573976159095764, |
|
"rewards/margins": 0.6872075796127319, |
|
"rewards/rejected": -0.772947371006012, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 7.832782828537435e-07, |
|
"logits/chosen": -2.946342706680298, |
|
"logits/rejected": -2.9744181632995605, |
|
"loss": 0.4716, |
|
"policy_logps/chosen": -155.42410278320312, |
|
"policy_logps/rejected": -155.65284729003906, |
|
"referece_logps/chosen": -153.79293823242188, |
|
"referece_logps/rejected": -145.5734100341797, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.16311725974082947, |
|
"rewards/margins": 0.8448256850242615, |
|
"rewards/rejected": -1.0079429149627686, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 7.630158715073812e-07, |
|
"logits/chosen": -2.9480092525482178, |
|
"logits/rejected": -2.971871852874756, |
|
"loss": 0.4588, |
|
"policy_logps/chosen": -117.96566772460938, |
|
"policy_logps/rejected": -120.06287384033203, |
|
"referece_logps/chosen": -117.35060119628906, |
|
"referece_logps/rejected": -114.15763092041016, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.06150689721107483, |
|
"rewards/margins": 0.5290161371231079, |
|
"rewards/rejected": -0.5905230045318604, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 7.428560370317541e-07, |
|
"logits/chosen": -2.9216105937957764, |
|
"logits/rejected": -2.973964214324951, |
|
"loss": 0.4124, |
|
"policy_logps/chosen": -118.88021850585938, |
|
"policy_logps/rejected": -116.6799087524414, |
|
"referece_logps/chosen": -118.77764892578125, |
|
"referece_logps/rejected": -110.41334533691406, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.010256083682179451, |
|
"rewards/margins": 0.6163986921310425, |
|
"rewards/rejected": -0.6266547441482544, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.228075054658095e-07, |
|
"logits/chosen": -2.914088249206543, |
|
"logits/rejected": -2.949735164642334, |
|
"loss": 0.5151, |
|
"policy_logps/chosen": -156.11376953125, |
|
"policy_logps/rejected": -136.4941864013672, |
|
"referece_logps/chosen": -154.32318115234375, |
|
"referece_logps/rejected": -129.20169067382812, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.17905890941619873, |
|
"rewards/margins": 0.5501898527145386, |
|
"rewards/rejected": -0.7292487621307373, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.028789546718325e-07, |
|
"logits/chosen": -2.9285836219787598, |
|
"logits/rejected": -2.9289567470550537, |
|
"loss": 0.46, |
|
"policy_logps/chosen": -138.6322021484375, |
|
"policy_logps/rejected": -140.56346130371094, |
|
"referece_logps/chosen": -137.0457763671875, |
|
"referece_logps/rejected": -134.56263732910156, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.15864019095897675, |
|
"rewards/margins": 0.44144219160079956, |
|
"rewards/rejected": -0.6000823974609375, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 6.830790105792973e-07, |
|
"logits/chosen": -2.9379961490631104, |
|
"logits/rejected": -2.9724767208099365, |
|
"loss": 0.4291, |
|
"policy_logps/chosen": -101.97422790527344, |
|
"policy_logps/rejected": -120.30264282226562, |
|
"referece_logps/chosen": -102.00912475585938, |
|
"referece_logps/rejected": -112.79841613769531, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.0034902598708868027, |
|
"rewards/margins": 0.7539129257202148, |
|
"rewards/rejected": -0.7504226565361023, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 6.634162434511938e-07, |
|
"logits/chosen": -2.9047234058380127, |
|
"logits/rejected": -2.9031195640563965, |
|
"loss": 0.4965, |
|
"policy_logps/chosen": -143.34796142578125, |
|
"policy_logps/rejected": -137.45285034179688, |
|
"referece_logps/chosen": -140.2106170654297, |
|
"referece_logps/rejected": -128.766845703125, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.3137343227863312, |
|
"rewards/margins": 0.5548651814460754, |
|
"rewards/rejected": -0.868599534034729, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 6.43899164174453e-07, |
|
"logits/chosen": -2.9683728218078613, |
|
"logits/rejected": -2.9473183155059814, |
|
"loss": 0.4951, |
|
"policy_logps/chosen": -99.94713592529297, |
|
"policy_logps/rejected": -99.26773834228516, |
|
"referece_logps/chosen": -100.4857406616211, |
|
"referece_logps/rejected": -93.8021011352539, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.05386023223400116, |
|
"rewards/margins": 0.6004235148429871, |
|
"rewards/rejected": -0.5465632677078247, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 6.245362205760703e-07, |
|
"logits/chosen": -2.912815570831299, |
|
"logits/rejected": -2.941206216812134, |
|
"loss": 0.4346, |
|
"policy_logps/chosen": -149.0521240234375, |
|
"policy_logps/rejected": -140.37326049804688, |
|
"referece_logps/chosen": -146.72096252441406, |
|
"referece_logps/rejected": -131.8609161376953, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.23311704397201538, |
|
"rewards/margins": 0.6181185245513916, |
|
"rewards/rejected": -0.8512355089187622, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 6.053357937665236e-07, |
|
"logits/chosen": -2.9297289848327637, |
|
"logits/rejected": -2.9020700454711914, |
|
"loss": 0.4962, |
|
"policy_logps/chosen": -130.0217742919922, |
|
"policy_logps/rejected": -139.19395446777344, |
|
"referece_logps/chosen": -128.82655334472656, |
|
"referece_logps/rejected": -132.361572265625, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.11952205747365952, |
|
"rewards/margins": 0.563715934753418, |
|
"rewards/rejected": -0.6832380294799805, |
|
"step": 100 |
|
} |
|
], |
|
"max_steps": 156, |
|
"num_train_epochs": 1, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|