|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9973828840617638, |
|
"eval_steps": 500, |
|
"global_step": 954, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"diff_generated": 0.0, |
|
"epoch": 0.002093692750588851, |
|
"grad_norm": 4027.4986845337753, |
|
"learning_rate": 2.083333333333333e-08, |
|
"logits/chosen": -2.1441590785980225, |
|
"logits/rejected": -2.0543735027313232, |
|
"logps/chosen": -276.82366943359375, |
|
"logps/rejected": -131.32485961914062, |
|
"loss": 140.2437, |
|
"losses_ref": -131.32485961914062, |
|
"ref_logps/chosen": -276.82366943359375, |
|
"ref_logps/rejected": -131.32485961914062, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1, |
|
"u": 1.4901161193847656e-08, |
|
"weight": 1.0 |
|
}, |
|
{ |
|
"diff_generated": 0.004567362368106842, |
|
"epoch": 0.010468463752944255, |
|
"grad_norm": 4012.8373505662616, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -2.2097952365875244, |
|
"logits/rejected": -2.1078758239746094, |
|
"logps/chosen": -280.6259460449219, |
|
"logps/rejected": -162.3510284423828, |
|
"loss": 129.4337, |
|
"losses_ref": -163.54556274414062, |
|
"ref_logps/chosen": -280.68133544921875, |
|
"ref_logps/rejected": -162.3555908203125, |
|
"rewards/accuracies": 0.43359375, |
|
"rewards/chosen": 0.000553958467207849, |
|
"rewards/margins": 0.0005082848947495222, |
|
"rewards/rejected": 4.567361975205131e-05, |
|
"step": 5, |
|
"u": 0.01998738758265972, |
|
"weight": 1.0011132955551147 |
|
}, |
|
{ |
|
"diff_generated": -0.883712887763977, |
|
"epoch": 0.02093692750588851, |
|
"grad_norm": 3617.405413942307, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -2.355677843093872, |
|
"logits/rejected": -2.1583828926086426, |
|
"logps/chosen": -302.09747314453125, |
|
"logps/rejected": -169.69467163085938, |
|
"loss": 157.3847, |
|
"losses_ref": -137.87350463867188, |
|
"ref_logps/chosen": -302.58917236328125, |
|
"ref_logps/rejected": -168.81094360351562, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.004917326383292675, |
|
"rewards/margins": 0.01375445444136858, |
|
"rewards/rejected": -0.00883712898939848, |
|
"step": 10, |
|
"u": -0.573723316192627, |
|
"weight": 0.8237913250923157 |
|
}, |
|
{ |
|
"diff_generated": -3.757080078125, |
|
"epoch": 0.031405391258832765, |
|
"grad_norm": 3487.9086553330885, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.285557270050049, |
|
"logits/rejected": -2.1396851539611816, |
|
"logps/chosen": -299.9487609863281, |
|
"logps/rejected": -166.72817993164062, |
|
"loss": 215.9423, |
|
"losses_ref": -61.32612991333008, |
|
"ref_logps/chosen": -304.54766845703125, |
|
"ref_logps/rejected": -162.97108459472656, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 0.0459887757897377, |
|
"rewards/margins": 0.08355957269668579, |
|
"rewards/rejected": -0.03757079690694809, |
|
"step": 15, |
|
"u": -1.074953317642212, |
|
"weight": 0.4649723172187805 |
|
}, |
|
{ |
|
"diff_generated": -13.927907943725586, |
|
"epoch": 0.04187385501177702, |
|
"grad_norm": 2892.3287332168984, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -2.246702194213867, |
|
"logits/rejected": -2.1279449462890625, |
|
"logps/chosen": -267.1871337890625, |
|
"logps/rejected": -170.03897094726562, |
|
"loss": 233.0012, |
|
"losses_ref": -32.27024459838867, |
|
"ref_logps/chosen": -283.3597106933594, |
|
"ref_logps/rejected": -156.11105346679688, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": 0.16172581911087036, |
|
"rewards/margins": 0.30100491642951965, |
|
"rewards/rejected": -0.1392790973186493, |
|
"step": 20, |
|
"u": -0.5478723049163818, |
|
"weight": 0.3134520649909973 |
|
}, |
|
{ |
|
"diff_generated": -26.503625869750977, |
|
"epoch": 0.05234231876472128, |
|
"grad_norm": 2024.707131865886, |
|
"learning_rate": 5.208333333333334e-07, |
|
"logits/chosen": -2.209564447402954, |
|
"logits/rejected": -2.0659689903259277, |
|
"logps/chosen": -255.67092895507812, |
|
"logps/rejected": -183.784423828125, |
|
"loss": 225.1278, |
|
"losses_ref": -30.188289642333984, |
|
"ref_logps/chosen": -280.2396545410156, |
|
"ref_logps/rejected": -157.2808074951172, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.245687335729599, |
|
"rewards/margins": 0.5107235908508301, |
|
"rewards/rejected": -0.2650362551212311, |
|
"step": 25, |
|
"u": 0.18020522594451904, |
|
"weight": 0.2832922041416168 |
|
}, |
|
{ |
|
"diff_generated": -51.14558792114258, |
|
"epoch": 0.06281078251766553, |
|
"grad_norm": 1518.1810592262389, |
|
"learning_rate": 6.249999999999999e-07, |
|
"logits/chosen": -2.2818737030029297, |
|
"logits/rejected": -2.199028968811035, |
|
"logps/chosen": -243.2410888671875, |
|
"logps/rejected": -215.5212860107422, |
|
"loss": 229.1218, |
|
"losses_ref": -20.79702377319336, |
|
"ref_logps/chosen": -273.4181823730469, |
|
"ref_logps/rejected": -164.37570190429688, |
|
"rewards/accuracies": 0.9906250238418579, |
|
"rewards/chosen": 0.30177104473114014, |
|
"rewards/margins": 0.8132268786430359, |
|
"rewards/rejected": -0.5114558935165405, |
|
"step": 30, |
|
"u": -0.08460383862257004, |
|
"weight": 0.19152367115020752 |
|
}, |
|
{ |
|
"diff_generated": -66.31632995605469, |
|
"epoch": 0.07327924627060979, |
|
"grad_norm": 1482.6172349050332, |
|
"learning_rate": 7.291666666666666e-07, |
|
"logits/chosen": -2.2653889656066895, |
|
"logits/rejected": -2.1242835521698, |
|
"logps/chosen": -249.3292999267578, |
|
"logps/rejected": -223.139892578125, |
|
"loss": 228.9043, |
|
"losses_ref": -19.583892822265625, |
|
"ref_logps/chosen": -282.82373046875, |
|
"ref_logps/rejected": -156.8235626220703, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": 0.3349445164203644, |
|
"rewards/margins": 0.9981077909469604, |
|
"rewards/rejected": -0.6631633043289185, |
|
"step": 35, |
|
"u": 0.06723131239414215, |
|
"weight": 0.2029893398284912 |
|
}, |
|
{ |
|
"diff_generated": -101.70452880859375, |
|
"epoch": 0.08374771002355404, |
|
"grad_norm": 1747.512023088969, |
|
"learning_rate": 8.333333333333333e-07, |
|
"logits/chosen": -2.109070062637329, |
|
"logits/rejected": -2.079871654510498, |
|
"logps/chosen": -237.7236328125, |
|
"logps/rejected": -262.9115905761719, |
|
"loss": 238.8995, |
|
"losses_ref": -15.8267822265625, |
|
"ref_logps/chosen": -272.7063903808594, |
|
"ref_logps/rejected": -161.20706176757812, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.34982770681381226, |
|
"rewards/margins": 1.366873025894165, |
|
"rewards/rejected": -1.017045259475708, |
|
"step": 40, |
|
"u": -1.1587042808532715, |
|
"weight": 0.09851591289043427 |
|
}, |
|
{ |
|
"diff_generated": -117.0851058959961, |
|
"epoch": 0.0942161737764983, |
|
"grad_norm": 1667.7557707134451, |
|
"learning_rate": 9.374999999999999e-07, |
|
"logits/chosen": -2.20316219329834, |
|
"logits/rejected": -2.008223295211792, |
|
"logps/chosen": -257.76983642578125, |
|
"logps/rejected": -278.8745422363281, |
|
"loss": 239.9967, |
|
"losses_ref": -20.097864151000977, |
|
"ref_logps/chosen": -293.736083984375, |
|
"ref_logps/rejected": -161.78945922851562, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": 0.3596626818180084, |
|
"rewards/margins": 1.530513882637024, |
|
"rewards/rejected": -1.1708511114120483, |
|
"step": 45, |
|
"u": -0.12792688608169556, |
|
"weight": 0.16130205988883972 |
|
}, |
|
{ |
|
"diff_generated": -126.9466781616211, |
|
"epoch": 0.10468463752944256, |
|
"grad_norm": 1521.2094097818665, |
|
"learning_rate": 1.0416666666666667e-06, |
|
"logits/chosen": -2.1982452869415283, |
|
"logits/rejected": -2.1284544467926025, |
|
"logps/chosen": -232.5095977783203, |
|
"logps/rejected": -295.5307922363281, |
|
"loss": 224.3866, |
|
"losses_ref": -21.150318145751953, |
|
"ref_logps/chosen": -270.96405029296875, |
|
"ref_logps/rejected": -168.58413696289062, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": 0.38454434275627136, |
|
"rewards/margins": 1.6540111303329468, |
|
"rewards/rejected": -1.2694666385650635, |
|
"step": 50, |
|
"u": 0.001223707222379744, |
|
"weight": 0.18796880543231964 |
|
}, |
|
{ |
|
"diff_generated": -141.50799560546875, |
|
"epoch": 0.11515310128238682, |
|
"grad_norm": 1612.8192197434123, |
|
"learning_rate": 1.1458333333333333e-06, |
|
"logits/chosen": -2.0737013816833496, |
|
"logits/rejected": -1.9873807430267334, |
|
"logps/chosen": -239.891357421875, |
|
"logps/rejected": -311.09619140625, |
|
"loss": 220.8677, |
|
"losses_ref": -7.660050392150879, |
|
"ref_logps/chosen": -280.08502197265625, |
|
"ref_logps/rejected": -169.58819580078125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.40193670988082886, |
|
"rewards/margins": 1.8170166015625, |
|
"rewards/rejected": -1.4150798320770264, |
|
"step": 55, |
|
"u": -0.9630683660507202, |
|
"weight": 0.08691856265068054 |
|
}, |
|
{ |
|
"diff_generated": -137.93148803710938, |
|
"epoch": 0.12562156503533106, |
|
"grad_norm": 1372.8553226775107, |
|
"learning_rate": 1.2499999999999999e-06, |
|
"logits/chosen": -1.9770643711090088, |
|
"logits/rejected": -1.8704265356063843, |
|
"logps/chosen": -242.3487091064453, |
|
"logps/rejected": -295.7236633300781, |
|
"loss": 226.417, |
|
"losses_ref": -8.987265586853027, |
|
"ref_logps/chosen": -281.4112548828125, |
|
"ref_logps/rejected": -157.79214477539062, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.39062565565109253, |
|
"rewards/margins": 1.7699406147003174, |
|
"rewards/rejected": -1.37931489944458, |
|
"step": 60, |
|
"u": -0.9851242303848267, |
|
"weight": 0.08782722800970078 |
|
}, |
|
{ |
|
"diff_generated": -155.2223358154297, |
|
"epoch": 0.1360900287882753, |
|
"grad_norm": 1255.5204766616016, |
|
"learning_rate": 1.3541666666666667e-06, |
|
"logits/chosen": -1.9109680652618408, |
|
"logits/rejected": -1.800903081893921, |
|
"logps/chosen": -251.7116241455078, |
|
"logps/rejected": -313.6351318359375, |
|
"loss": 226.6359, |
|
"losses_ref": -6.898039817810059, |
|
"ref_logps/chosen": -291.105224609375, |
|
"ref_logps/rejected": -158.41278076171875, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3939359784126282, |
|
"rewards/margins": 1.9461593627929688, |
|
"rewards/rejected": -1.5522234439849854, |
|
"step": 65, |
|
"u": -1.2434440851211548, |
|
"weight": 0.07695779949426651 |
|
}, |
|
{ |
|
"diff_generated": -131.47259521484375, |
|
"epoch": 0.14655849254121958, |
|
"grad_norm": 1343.9563405956512, |
|
"learning_rate": 1.4583333333333333e-06, |
|
"logits/chosen": -1.8604061603546143, |
|
"logits/rejected": -1.8694736957550049, |
|
"logps/chosen": -233.1003875732422, |
|
"logps/rejected": -294.2840881347656, |
|
"loss": 225.3943, |
|
"losses_ref": -10.19434642791748, |
|
"ref_logps/chosen": -274.62811279296875, |
|
"ref_logps/rejected": -162.8114776611328, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": 0.41527730226516724, |
|
"rewards/margins": 1.7300033569335938, |
|
"rewards/rejected": -1.3147261142730713, |
|
"step": 70, |
|
"u": -1.253035545349121, |
|
"weight": 0.09121803939342499 |
|
}, |
|
{ |
|
"diff_generated": -137.2784423828125, |
|
"epoch": 0.15702695629416383, |
|
"grad_norm": 1322.5353000176865, |
|
"learning_rate": 1.5624999999999999e-06, |
|
"logits/chosen": -1.800450086593628, |
|
"logits/rejected": -1.649074912071228, |
|
"logps/chosen": -263.3216247558594, |
|
"logps/rejected": -309.0770568847656, |
|
"loss": 233.1299, |
|
"losses_ref": -10.170949935913086, |
|
"ref_logps/chosen": -306.6936950683594, |
|
"ref_logps/rejected": -171.79859924316406, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4337209165096283, |
|
"rewards/margins": 1.8065054416656494, |
|
"rewards/rejected": -1.3727843761444092, |
|
"step": 75, |
|
"u": -1.3111217021942139, |
|
"weight": 0.08406667411327362 |
|
}, |
|
{ |
|
"diff_generated": -128.09861755371094, |
|
"epoch": 0.16749542004710807, |
|
"grad_norm": 1343.7990825981688, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"logits/chosen": -1.6293904781341553, |
|
"logits/rejected": -1.653552770614624, |
|
"logps/chosen": -211.403564453125, |
|
"logps/rejected": -288.49102783203125, |
|
"loss": 223.2768, |
|
"losses_ref": -5.209358215332031, |
|
"ref_logps/chosen": -253.810302734375, |
|
"ref_logps/rejected": -160.39239501953125, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": 0.42406734824180603, |
|
"rewards/margins": 1.7050535678863525, |
|
"rewards/rejected": -1.2809861898422241, |
|
"step": 80, |
|
"u": -1.129665732383728, |
|
"weight": 0.05663755536079407 |
|
}, |
|
{ |
|
"diff_generated": -135.20687866210938, |
|
"epoch": 0.17796388380005235, |
|
"grad_norm": 1192.5534502285198, |
|
"learning_rate": 1.7708333333333332e-06, |
|
"logits/chosen": -1.573900818824768, |
|
"logits/rejected": -1.4756534099578857, |
|
"logps/chosen": -239.03305053710938, |
|
"logps/rejected": -300.70184326171875, |
|
"loss": 223.0021, |
|
"losses_ref": -7.026658535003662, |
|
"ref_logps/chosen": -282.1534423828125, |
|
"ref_logps/rejected": -165.49496459960938, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.43120384216308594, |
|
"rewards/margins": 1.7832725048065186, |
|
"rewards/rejected": -1.3520687818527222, |
|
"step": 85, |
|
"u": -0.8892000317573547, |
|
"weight": 0.074161596596241 |
|
}, |
|
{ |
|
"diff_generated": -148.8050994873047, |
|
"epoch": 0.1884323475529966, |
|
"grad_norm": 1417.0310516206605, |
|
"learning_rate": 1.8749999999999998e-06, |
|
"logits/chosen": -1.3538436889648438, |
|
"logits/rejected": -1.2718507051467896, |
|
"logps/chosen": -234.74856567382812, |
|
"logps/rejected": -304.4095458984375, |
|
"loss": 232.0073, |
|
"losses_ref": -11.248689651489258, |
|
"ref_logps/chosen": -279.6741638183594, |
|
"ref_logps/rejected": -155.60443115234375, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": 0.4492563307285309, |
|
"rewards/margins": 1.937307596206665, |
|
"rewards/rejected": -1.488051176071167, |
|
"step": 90, |
|
"u": -1.1423507928848267, |
|
"weight": 0.08043224364519119 |
|
}, |
|
{ |
|
"diff_generated": -148.7802276611328, |
|
"epoch": 0.19890081130594087, |
|
"grad_norm": 1432.3504082681623, |
|
"learning_rate": 1.9791666666666666e-06, |
|
"logits/chosen": -1.1082611083984375, |
|
"logits/rejected": -1.0555765628814697, |
|
"logps/chosen": -235.3373565673828, |
|
"logps/rejected": -309.65771484375, |
|
"loss": 219.1082, |
|
"losses_ref": -13.706560134887695, |
|
"ref_logps/chosen": -277.9019470214844, |
|
"ref_logps/rejected": -160.87747192382812, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4256461262702942, |
|
"rewards/margins": 1.9134483337402344, |
|
"rewards/rejected": -1.487802267074585, |
|
"step": 95, |
|
"u": -1.0568161010742188, |
|
"weight": 0.09002764523029327 |
|
}, |
|
{ |
|
"diff_generated": -158.8511962890625, |
|
"epoch": 0.2093692750588851, |
|
"grad_norm": 1374.8147610024293, |
|
"learning_rate": 1.9998927475076105e-06, |
|
"logits/chosen": -0.9869598150253296, |
|
"logits/rejected": -0.8535524606704712, |
|
"logps/chosen": -238.96426391601562, |
|
"logps/rejected": -322.4727783203125, |
|
"loss": 236.8658, |
|
"losses_ref": -5.802731513977051, |
|
"ref_logps/chosen": -282.0462951660156, |
|
"ref_logps/rejected": -163.62156677246094, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.43082040548324585, |
|
"rewards/margins": 2.0193324089050293, |
|
"rewards/rejected": -1.5885119438171387, |
|
"step": 100, |
|
"u": -1.2527328729629517, |
|
"weight": 0.06292165815830231 |
|
}, |
|
{ |
|
"diff_generated": -147.18008422851562, |
|
"epoch": 0.21983773881182936, |
|
"grad_norm": 1625.9248559762682, |
|
"learning_rate": 1.9994570736865402e-06, |
|
"logits/chosen": -1.07206392288208, |
|
"logits/rejected": -0.9393303990364075, |
|
"logps/chosen": -232.5029296875, |
|
"logps/rejected": -308.7837829589844, |
|
"loss": 213.8591, |
|
"losses_ref": -10.191104888916016, |
|
"ref_logps/chosen": -275.3525390625, |
|
"ref_logps/rejected": -161.60366821289062, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4284963011741638, |
|
"rewards/margins": 1.9002971649169922, |
|
"rewards/rejected": -1.4718010425567627, |
|
"step": 105, |
|
"u": -1.0033910274505615, |
|
"weight": 0.10204311460256577 |
|
}, |
|
{ |
|
"diff_generated": -128.29922485351562, |
|
"epoch": 0.23030620256477363, |
|
"grad_norm": 1231.2639002533556, |
|
"learning_rate": 1.9986864211644068e-06, |
|
"logits/chosen": -1.1658036708831787, |
|
"logits/rejected": -1.0709865093231201, |
|
"logps/chosen": -231.3977813720703, |
|
"logps/rejected": -283.1410217285156, |
|
"loss": 246.1861, |
|
"losses_ref": -6.052565574645996, |
|
"ref_logps/chosen": -272.9906921386719, |
|
"ref_logps/rejected": -154.841796875, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.41592931747436523, |
|
"rewards/margins": 1.6989214420318604, |
|
"rewards/rejected": -1.2829921245574951, |
|
"step": 110, |
|
"u": -1.3139088153839111, |
|
"weight": 0.07522980868816376 |
|
}, |
|
{ |
|
"diff_generated": -133.98553466796875, |
|
"epoch": 0.24077466631771788, |
|
"grad_norm": 1343.0801296451152, |
|
"learning_rate": 1.997581048233623e-06, |
|
"logits/chosen": -1.1396609544754028, |
|
"logits/rejected": -1.1306806802749634, |
|
"logps/chosen": -226.9049835205078, |
|
"logps/rejected": -293.1982421875, |
|
"loss": 230.2171, |
|
"losses_ref": -5.637959957122803, |
|
"ref_logps/chosen": -269.8221130371094, |
|
"ref_logps/rejected": -159.2126922607422, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4291713833808899, |
|
"rewards/margins": 1.769026756286621, |
|
"rewards/rejected": -1.3398553133010864, |
|
"step": 115, |
|
"u": -1.168405294418335, |
|
"weight": 0.05913761258125305 |
|
}, |
|
{ |
|
"diff_generated": -123.33839416503906, |
|
"epoch": 0.2512431300706621, |
|
"grad_norm": 1434.3497350520097, |
|
"learning_rate": 1.9961413253717214e-06, |
|
"logits/chosen": -1.5746419429779053, |
|
"logits/rejected": -1.518913984298706, |
|
"logps/chosen": -228.5142822265625, |
|
"logps/rejected": -284.6359558105469, |
|
"loss": 234.8627, |
|
"losses_ref": -9.012969017028809, |
|
"ref_logps/chosen": -274.33917236328125, |
|
"ref_logps/rejected": -161.29759216308594, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": 0.4582485258579254, |
|
"rewards/margins": 1.6916322708129883, |
|
"rewards/rejected": -1.2333838939666748, |
|
"step": 120, |
|
"u": -0.9588969349861145, |
|
"weight": 0.08884967118501663 |
|
}, |
|
{ |
|
"diff_generated": -151.09429931640625, |
|
"epoch": 0.26171159382360637, |
|
"grad_norm": 1274.213985322993, |
|
"learning_rate": 1.994367735117177e-06, |
|
"logits/chosen": -1.6689637899398804, |
|
"logits/rejected": -1.6743271350860596, |
|
"logps/chosen": -216.779541015625, |
|
"logps/rejected": -306.51861572265625, |
|
"loss": 226.4779, |
|
"losses_ref": -6.019095420837402, |
|
"ref_logps/chosen": -259.2029724121094, |
|
"ref_logps/rejected": -155.42433166503906, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": 0.42423415184020996, |
|
"rewards/margins": 1.9351768493652344, |
|
"rewards/rejected": -1.5109429359436035, |
|
"step": 125, |
|
"u": -1.222081184387207, |
|
"weight": 0.08297105878591537 |
|
}, |
|
{ |
|
"diff_generated": -161.22811889648438, |
|
"epoch": 0.2721800575765506, |
|
"grad_norm": 1337.1173679216238, |
|
"learning_rate": 1.992260871907687e-06, |
|
"logits/chosen": -1.5299973487854004, |
|
"logits/rejected": -1.4785773754119873, |
|
"logps/chosen": -239.4655303955078, |
|
"logps/rejected": -327.7781982421875, |
|
"loss": 242.8888, |
|
"losses_ref": -7.182534694671631, |
|
"ref_logps/chosen": -280.188720703125, |
|
"ref_logps/rejected": -166.550048828125, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": 0.407231867313385, |
|
"rewards/margins": 2.019512891769409, |
|
"rewards/rejected": -1.612281084060669, |
|
"step": 130, |
|
"u": -1.2559138536453247, |
|
"weight": 0.05781525373458862 |
|
}, |
|
{ |
|
"diff_generated": -169.7267303466797, |
|
"epoch": 0.2826485213294949, |
|
"grad_norm": 1374.1488593321894, |
|
"learning_rate": 1.9898214418809326e-06, |
|
"logits/chosen": -1.3805739879608154, |
|
"logits/rejected": -1.3600701093673706, |
|
"logps/chosen": -238.9783935546875, |
|
"logps/rejected": -343.4627380371094, |
|
"loss": 242.9051, |
|
"losses_ref": -2.127274990081787, |
|
"ref_logps/chosen": -281.3921203613281, |
|
"ref_logps/rejected": -173.73602294921875, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": 0.4241371750831604, |
|
"rewards/margins": 2.1214041709899902, |
|
"rewards/rejected": -1.6972671747207642, |
|
"step": 135, |
|
"u": -1.7065389156341553, |
|
"weight": 0.033993639051914215 |
|
}, |
|
{ |
|
"diff_generated": -151.85092163085938, |
|
"epoch": 0.29311698508243916, |
|
"grad_norm": 1370.477984750469, |
|
"learning_rate": 1.9870502626379126e-06, |
|
"logits/chosen": -1.3134925365447998, |
|
"logits/rejected": -1.3758270740509033, |
|
"logps/chosen": -227.9882049560547, |
|
"logps/rejected": -322.3777770996094, |
|
"loss": 229.547, |
|
"losses_ref": -4.158343315124512, |
|
"ref_logps/chosen": -270.9952392578125, |
|
"ref_logps/rejected": -170.52687072753906, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.43007057905197144, |
|
"rewards/margins": 1.9485795497894287, |
|
"rewards/rejected": -1.518509030342102, |
|
"step": 140, |
|
"u": -1.3956022262573242, |
|
"weight": 0.05143100023269653 |
|
}, |
|
{ |
|
"diff_generated": -146.50155639648438, |
|
"epoch": 0.3035854488353834, |
|
"grad_norm": 1794.900701079277, |
|
"learning_rate": 1.983948262968915e-06, |
|
"logits/chosen": -1.5504910945892334, |
|
"logits/rejected": -1.4326040744781494, |
|
"logps/chosen": -259.777587890625, |
|
"logps/rejected": -307.3033752441406, |
|
"loss": 242.1811, |
|
"losses_ref": -2.1557910442352295, |
|
"ref_logps/chosen": -302.7044982910156, |
|
"ref_logps/rejected": -160.8018035888672, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.42926883697509766, |
|
"rewards/margins": 1.8942844867706299, |
|
"rewards/rejected": -1.4650156497955322, |
|
"step": 145, |
|
"u": -1.3577892780303955, |
|
"weight": 0.044694624841213226 |
|
}, |
|
{ |
|
"diff_generated": -155.41860961914062, |
|
"epoch": 0.31405391258832765, |
|
"grad_norm": 1420.5558411185323, |
|
"learning_rate": 1.9805164825422237e-06, |
|
"logits/chosen": -2.0522618293762207, |
|
"logits/rejected": -1.9478759765625, |
|
"logps/chosen": -238.4119873046875, |
|
"logps/rejected": -314.91790771484375, |
|
"loss": 224.1883, |
|
"losses_ref": -3.6840145587921143, |
|
"ref_logps/chosen": -281.19158935546875, |
|
"ref_logps/rejected": -159.49932861328125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4277961254119873, |
|
"rewards/margins": 1.9819822311401367, |
|
"rewards/rejected": -1.5541859865188599, |
|
"step": 150, |
|
"u": -1.3957250118255615, |
|
"weight": 0.05671170353889465 |
|
}, |
|
{ |
|
"diff_generated": -151.29141235351562, |
|
"epoch": 0.3245223763412719, |
|
"grad_norm": 1339.4660772749999, |
|
"learning_rate": 1.9767560715556594e-06, |
|
"logits/chosen": -2.201369524002075, |
|
"logits/rejected": -2.1122801303863525, |
|
"logps/chosen": -232.8695831298828, |
|
"logps/rejected": -321.6642150878906, |
|
"loss": 230.8218, |
|
"losses_ref": -4.063229084014893, |
|
"ref_logps/chosen": -279.747314453125, |
|
"ref_logps/rejected": -170.372802734375, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.46877723932266235, |
|
"rewards/margins": 1.9816913604736328, |
|
"rewards/rejected": -1.5129140615463257, |
|
"step": 155, |
|
"u": -1.4928115606307983, |
|
"weight": 0.05359172821044922 |
|
}, |
|
{ |
|
"diff_generated": -154.98220825195312, |
|
"epoch": 0.33499084009421615, |
|
"grad_norm": 1436.3409054374235, |
|
"learning_rate": 1.972668290351084e-06, |
|
"logits/chosen": -2.1720938682556152, |
|
"logits/rejected": -2.0600266456604004, |
|
"logps/chosen": -240.95022583007812, |
|
"logps/rejected": -311.90997314453125, |
|
"loss": 234.915, |
|
"losses_ref": -4.4140777587890625, |
|
"ref_logps/chosen": -289.99774169921875, |
|
"ref_logps/rejected": -156.92776489257812, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4904751777648926, |
|
"rewards/margins": 2.040297031402588, |
|
"rewards/rejected": -1.5498219728469849, |
|
"step": 160, |
|
"u": -1.4394853115081787, |
|
"weight": 0.04004598781466484 |
|
}, |
|
{ |
|
"diff_generated": -144.861572265625, |
|
"epoch": 0.34545930384716045, |
|
"grad_norm": 1297.629892424431, |
|
"learning_rate": 1.968254508991978e-06, |
|
"logits/chosen": -2.255429267883301, |
|
"logits/rejected": -2.142435073852539, |
|
"logps/chosen": -243.08935546875, |
|
"logps/rejected": -304.804443359375, |
|
"loss": 237.5995, |
|
"losses_ref": -2.3130009174346924, |
|
"ref_logps/chosen": -284.68487548828125, |
|
"ref_logps/rejected": -159.94287109375, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4159550666809082, |
|
"rewards/margins": 1.8645708560943604, |
|
"rewards/rejected": -1.4486157894134521, |
|
"step": 165, |
|
"u": -1.5542036294937134, |
|
"weight": 0.030019784346222878 |
|
}, |
|
{ |
|
"diff_generated": -151.61795043945312, |
|
"epoch": 0.3559277676001047, |
|
"grad_norm": 1297.3953865872961, |
|
"learning_rate": 1.9635162068042544e-06, |
|
"logits/chosen": -2.119171380996704, |
|
"logits/rejected": -2.017618417739868, |
|
"logps/chosen": -247.02041625976562, |
|
"logps/rejected": -313.6037292480469, |
|
"loss": 237.275, |
|
"losses_ref": -6.966467380523682, |
|
"ref_logps/chosen": -288.6535949707031, |
|
"ref_logps/rejected": -161.9857940673828, |
|
"rewards/accuracies": 0.9906250238418579, |
|
"rewards/chosen": 0.41633161902427673, |
|
"rewards/margins": 1.9325110912322998, |
|
"rewards/rejected": -1.5161794424057007, |
|
"step": 170, |
|
"u": -1.2121031284332275, |
|
"weight": 0.07038909941911697 |
|
}, |
|
{ |
|
"diff_generated": -144.2270050048828, |
|
"epoch": 0.36639623135304894, |
|
"grad_norm": 1438.8100283748447, |
|
"learning_rate": 1.958454971880441e-06, |
|
"logits/chosen": -2.147486686706543, |
|
"logits/rejected": -2.0490543842315674, |
|
"logps/chosen": -268.3631591796875, |
|
"logps/rejected": -305.03021240234375, |
|
"loss": 251.9562, |
|
"losses_ref": -5.818743705749512, |
|
"ref_logps/chosen": -313.4308776855469, |
|
"ref_logps/rejected": -160.80323791503906, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4506770670413971, |
|
"rewards/margins": 1.8929469585418701, |
|
"rewards/rejected": -1.442270040512085, |
|
"step": 175, |
|
"u": -1.2561050653457642, |
|
"weight": 0.0653764009475708 |
|
}, |
|
{ |
|
"diff_generated": -141.9085693359375, |
|
"epoch": 0.3768646951059932, |
|
"grad_norm": 1207.513295077982, |
|
"learning_rate": 1.9530725005474194e-06, |
|
"logits/chosen": -2.267883539199829, |
|
"logits/rejected": -2.218174457550049, |
|
"logps/chosen": -221.9941864013672, |
|
"logps/rejected": -298.5855407714844, |
|
"loss": 221.5628, |
|
"losses_ref": -3.0411601066589355, |
|
"ref_logps/chosen": -264.38067626953125, |
|
"ref_logps/rejected": -156.677001953125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4238646924495697, |
|
"rewards/margins": 1.8429502248764038, |
|
"rewards/rejected": -1.4190856218338013, |
|
"step": 180, |
|
"u": -1.1714732646942139, |
|
"weight": 0.05968625098466873 |
|
}, |
|
{ |
|
"diff_generated": -150.76657104492188, |
|
"epoch": 0.38733315885893743, |
|
"grad_norm": 1307.7780975566222, |
|
"learning_rate": 1.9473705967978807e-06, |
|
"logits/chosen": -2.420961856842041, |
|
"logits/rejected": -2.327650547027588, |
|
"logps/chosen": -227.6046600341797, |
|
"logps/rejected": -303.7978210449219, |
|
"loss": 229.0799, |
|
"losses_ref": -15.570757865905762, |
|
"ref_logps/chosen": -272.23333740234375, |
|
"ref_logps/rejected": -153.03126525878906, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4462866187095642, |
|
"rewards/margins": 1.9539520740509033, |
|
"rewards/rejected": -1.5076655149459839, |
|
"step": 185, |
|
"u": -0.8297923803329468, |
|
"weight": 0.09269951283931732 |
|
}, |
|
{ |
|
"diff_generated": -147.6534423828125, |
|
"epoch": 0.39780162261188173, |
|
"grad_norm": 1169.0067686339887, |
|
"learning_rate": 1.941351171685697e-06, |
|
"logits/chosen": -2.2705044746398926, |
|
"logits/rejected": -2.2303287982940674, |
|
"logps/chosen": -229.6949920654297, |
|
"logps/rejected": -316.17437744140625, |
|
"loss": 234.7021, |
|
"losses_ref": -5.174070835113525, |
|
"ref_logps/chosen": -274.26959228515625, |
|
"ref_logps/rejected": -168.52093505859375, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4457460343837738, |
|
"rewards/margins": 1.9222803115844727, |
|
"rewards/rejected": -1.4765344858169556, |
|
"step": 190, |
|
"u": -1.7719621658325195, |
|
"weight": 0.03358909860253334 |
|
}, |
|
{ |
|
"diff_generated": -159.57711791992188, |
|
"epoch": 0.408270086364826, |
|
"grad_norm": 1222.7096009577886, |
|
"learning_rate": 1.9350162426854148e-06, |
|
"logits/chosen": -2.1345176696777344, |
|
"logits/rejected": -2.1815943717956543, |
|
"logps/chosen": -195.1034393310547, |
|
"logps/rejected": -316.82177734375, |
|
"loss": 220.9707, |
|
"losses_ref": -4.031326770782471, |
|
"ref_logps/chosen": -238.08377075195312, |
|
"ref_logps/rejected": -157.2446746826172, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.42980343103408813, |
|
"rewards/margins": 2.025574207305908, |
|
"rewards/rejected": -1.595771074295044, |
|
"step": 195, |
|
"u": -1.546870231628418, |
|
"weight": 0.03703851252794266 |
|
}, |
|
{ |
|
"diff_generated": -167.23892211914062, |
|
"epoch": 0.4187385501177702, |
|
"grad_norm": 1368.307859885155, |
|
"learning_rate": 1.9283679330160725e-06, |
|
"logits/chosen": -2.1258459091186523, |
|
"logits/rejected": -2.004584789276123, |
|
"logps/chosen": -238.9210205078125, |
|
"logps/rejected": -331.30718994140625, |
|
"loss": 244.6853, |
|
"losses_ref": -4.569379806518555, |
|
"ref_logps/chosen": -285.3875732421875, |
|
"ref_logps/rejected": -164.0682830810547, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4646654725074768, |
|
"rewards/margins": 2.137054681777954, |
|
"rewards/rejected": -1.6723893880844116, |
|
"step": 200, |
|
"u": -1.6067603826522827, |
|
"weight": 0.04548769071698189 |
|
}, |
|
{ |
|
"diff_generated": -156.21780395507812, |
|
"epoch": 0.42920701387071447, |
|
"grad_norm": 1208.8530669692416, |
|
"learning_rate": 1.9214084709295847e-06, |
|
"logits/chosen": -2.0831170082092285, |
|
"logits/rejected": -1.964040756225586, |
|
"logps/chosen": -255.9301300048828, |
|
"logps/rejected": -318.99798583984375, |
|
"loss": 233.3463, |
|
"losses_ref": -5.610936641693115, |
|
"ref_logps/chosen": -300.7832946777344, |
|
"ref_logps/rejected": -162.78021240234375, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": 0.44853147864341736, |
|
"rewards/margins": 2.010709524154663, |
|
"rewards/rejected": -1.5621780157089233, |
|
"step": 205, |
|
"u": -1.3661489486694336, |
|
"weight": 0.06516700237989426 |
|
}, |
|
{ |
|
"diff_generated": -171.98703002929688, |
|
"epoch": 0.4396754776236587, |
|
"grad_norm": 1215.8559114876498, |
|
"learning_rate": 1.9141401889639164e-06, |
|
"logits/chosen": -1.9906151294708252, |
|
"logits/rejected": -1.9088771343231201, |
|
"logps/chosen": -235.02249145507812, |
|
"logps/rejected": -345.1544494628906, |
|
"loss": 234.6928, |
|
"losses_ref": -2.863798141479492, |
|
"ref_logps/chosen": -280.8175048828125, |
|
"ref_logps/rejected": -173.16738891601562, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.45795029401779175, |
|
"rewards/margins": 2.1778206825256348, |
|
"rewards/rejected": -1.7198702096939087, |
|
"step": 210, |
|
"u": -1.4222519397735596, |
|
"weight": 0.044259898364543915 |
|
}, |
|
{ |
|
"diff_generated": -168.92660522460938, |
|
"epoch": 0.45014394137660296, |
|
"grad_norm": 1266.497741976898, |
|
"learning_rate": 1.906565523161312e-06, |
|
"logits/chosen": -1.9987051486968994, |
|
"logits/rejected": -1.9987319707870483, |
|
"logps/chosen": -227.54159545898438, |
|
"logps/rejected": -331.20281982421875, |
|
"loss": 227.5447, |
|
"losses_ref": -2.0428645610809326, |
|
"ref_logps/chosen": -272.03076171875, |
|
"ref_logps/rejected": -162.27622985839844, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4448915421962738, |
|
"rewards/margins": 2.134157657623291, |
|
"rewards/rejected": -1.6892658472061157, |
|
"step": 215, |
|
"u": -1.699721097946167, |
|
"weight": 0.028461579233407974 |
|
}, |
|
{ |
|
"diff_generated": -181.3323211669922, |
|
"epoch": 0.46061240512954726, |
|
"grad_norm": 1409.5627230630107, |
|
"learning_rate": 1.8986870122518259e-06, |
|
"logits/chosen": -1.996578574180603, |
|
"logits/rejected": -1.9339357614517212, |
|
"logps/chosen": -241.12069702148438, |
|
"logps/rejected": -345.39239501953125, |
|
"loss": 250.5986, |
|
"losses_ref": -13.413454055786133, |
|
"ref_logps/chosen": -284.3638610839844, |
|
"ref_logps/rejected": -164.06004333496094, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.43243154883384705, |
|
"rewards/margins": 2.2457549571990967, |
|
"rewards/rejected": -1.8133233785629272, |
|
"step": 220, |
|
"u": -1.559470295906067, |
|
"weight": 0.03921313211321831 |
|
}, |
|
{ |
|
"diff_generated": -167.23196411132812, |
|
"epoch": 0.4710808688824915, |
|
"grad_norm": 1439.3131066005014, |
|
"learning_rate": 1.8905072968024423e-06, |
|
"logits/chosen": -2.0085692405700684, |
|
"logits/rejected": -1.9212806224822998, |
|
"logps/chosen": -240.53793334960938, |
|
"logps/rejected": -324.13519287109375, |
|
"loss": 229.6424, |
|
"losses_ref": -2.6123085021972656, |
|
"ref_logps/chosen": -288.477783203125, |
|
"ref_logps/rejected": -156.90321350097656, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.47939810156822205, |
|
"rewards/margins": 2.1517176628112793, |
|
"rewards/rejected": -1.6723196506500244, |
|
"step": 225, |
|
"u": -1.7230523824691772, |
|
"weight": 0.04574074223637581 |
|
}, |
|
{ |
|
"diff_generated": -159.5584259033203, |
|
"epoch": 0.48154933263543576, |
|
"grad_norm": 1268.731805706848, |
|
"learning_rate": 1.88202911833206e-06, |
|
"logits/chosen": -2.006537914276123, |
|
"logits/rejected": -2.0306971073150635, |
|
"logps/chosen": -209.113037109375, |
|
"logps/rejected": -324.5091552734375, |
|
"loss": 221.1728, |
|
"losses_ref": -2.3901400566101074, |
|
"ref_logps/chosen": -255.0234832763672, |
|
"ref_logps/rejected": -164.95074462890625, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": 0.459104061126709, |
|
"rewards/margins": 2.0546882152557373, |
|
"rewards/rejected": -1.5955842733383179, |
|
"step": 230, |
|
"u": -1.3925855159759521, |
|
"weight": 0.045756690204143524 |
|
}, |
|
{ |
|
"diff_generated": -170.58221435546875, |
|
"epoch": 0.49201779638838, |
|
"grad_norm": 1315.789025978012, |
|
"learning_rate": 1.873255318392644e-06, |
|
"logits/chosen": -1.9995191097259521, |
|
"logits/rejected": -1.8898826837539673, |
|
"logps/chosen": -234.0719757080078, |
|
"logps/rejected": -327.0367736816406, |
|
"loss": 242.3326, |
|
"losses_ref": -4.473931312561035, |
|
"ref_logps/chosen": -280.68048095703125, |
|
"ref_logps/rejected": -156.4545440673828, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.46608513593673706, |
|
"rewards/margins": 2.1719069480895996, |
|
"rewards/rejected": -1.7058223485946655, |
|
"step": 235, |
|
"u": -1.6257721185684204, |
|
"weight": 0.034325193613767624 |
|
}, |
|
{ |
|
"diff_generated": -163.37722778320312, |
|
"epoch": 0.5024862601413242, |
|
"grad_norm": 1285.4823648929914, |
|
"learning_rate": 1.8641888376168483e-06, |
|
"logits/chosen": -1.9665982723236084, |
|
"logits/rejected": -1.9548044204711914, |
|
"logps/chosen": -215.7754669189453, |
|
"logps/rejected": -326.5556335449219, |
|
"loss": 231.7613, |
|
"losses_ref": -5.584181308746338, |
|
"ref_logps/chosen": -260.7419128417969, |
|
"ref_logps/rejected": -163.17840576171875, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.44966477155685425, |
|
"rewards/margins": 2.083436965942383, |
|
"rewards/rejected": -1.6337722539901733, |
|
"step": 240, |
|
"u": -1.2691129446029663, |
|
"weight": 0.0609821155667305 |
|
}, |
|
{ |
|
"diff_generated": -147.24386596679688, |
|
"epoch": 0.5129547238942685, |
|
"grad_norm": 1347.3156065591786, |
|
"learning_rate": 1.8548327147324312e-06, |
|
"logits/chosen": -1.9906165599822998, |
|
"logits/rejected": -1.872373342514038, |
|
"logps/chosen": -243.5879364013672, |
|
"logps/rejected": -304.78204345703125, |
|
"loss": 236.4194, |
|
"losses_ref": -7.212074279785156, |
|
"ref_logps/chosen": -291.9618835449219, |
|
"ref_logps/rejected": -157.53817749023438, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": 0.48373931646347046, |
|
"rewards/margins": 1.9561779499053955, |
|
"rewards/rejected": -1.4724384546279907, |
|
"step": 245, |
|
"u": -0.7504249811172485, |
|
"weight": 0.08246179670095444 |
|
}, |
|
{ |
|
"diff_generated": -136.68235778808594, |
|
"epoch": 0.5234231876472127, |
|
"grad_norm": 1280.557570592857, |
|
"learning_rate": 1.8451900855437948e-06, |
|
"logits/chosen": -2.0444495677948, |
|
"logits/rejected": -1.9412866830825806, |
|
"logps/chosen": -237.24496459960938, |
|
"logps/rejected": -305.5830078125, |
|
"loss": 231.6959, |
|
"losses_ref": -4.014006614685059, |
|
"ref_logps/chosen": -285.0312805175781, |
|
"ref_logps/rejected": -168.90065002441406, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.47786277532577515, |
|
"rewards/margins": 1.8446861505508423, |
|
"rewards/rejected": -1.3668235540390015, |
|
"step": 250, |
|
"u": -1.4464961290359497, |
|
"weight": 0.045917607843875885 |
|
}, |
|
{ |
|
"diff_generated": -147.02664184570312, |
|
"epoch": 0.533891651400157, |
|
"grad_norm": 1271.960313695608, |
|
"learning_rate": 1.8352641818809846e-06, |
|
"logits/chosen": -2.012394428253174, |
|
"logits/rejected": -1.9293123483657837, |
|
"logps/chosen": -255.23617553710938, |
|
"logps/rejected": -305.11065673828125, |
|
"loss": 237.2504, |
|
"losses_ref": -3.9721827507019043, |
|
"ref_logps/chosen": -298.58929443359375, |
|
"ref_logps/rejected": -158.0840606689453, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": 0.43353086709976196, |
|
"rewards/margins": 1.9037971496582031, |
|
"rewards/rejected": -1.470266342163086, |
|
"step": 255, |
|
"u": -1.2067726850509644, |
|
"weight": 0.04464394599199295 |
|
}, |
|
{ |
|
"diff_generated": -150.324462890625, |
|
"epoch": 0.5443601151531012, |
|
"grad_norm": 1323.4761845101339, |
|
"learning_rate": 1.8250583305165094e-06, |
|
"logits/chosen": -1.7699302434921265, |
|
"logits/rejected": -1.7340294122695923, |
|
"logps/chosen": -232.5556640625, |
|
"logps/rejected": -303.0191650390625, |
|
"loss": 236.4857, |
|
"losses_ref": -3.8249027729034424, |
|
"ref_logps/chosen": -277.13360595703125, |
|
"ref_logps/rejected": -152.69473266601562, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4457794725894928, |
|
"rewards/margins": 1.949023962020874, |
|
"rewards/rejected": -1.5032446384429932, |
|
"step": 260, |
|
"u": -1.4394437074661255, |
|
"weight": 0.06012386828660965 |
|
}, |
|
{ |
|
"diff_generated": -146.3737335205078, |
|
"epoch": 0.5548285789060455, |
|
"grad_norm": 1232.2132266823505, |
|
"learning_rate": 1.8145759520503357e-06, |
|
"logits/chosen": -1.836775541305542, |
|
"logits/rejected": -1.7096904516220093, |
|
"logps/chosen": -242.7677764892578, |
|
"logps/rejected": -308.00592041015625, |
|
"loss": 219.0433, |
|
"losses_ref": -2.2338509559631348, |
|
"ref_logps/chosen": -290.8897705078125, |
|
"ref_logps/rejected": -161.63217163085938, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.48121970891952515, |
|
"rewards/margins": 1.9449567794799805, |
|
"rewards/rejected": -1.4637373685836792, |
|
"step": 265, |
|
"u": -1.7323522567749023, |
|
"weight": 0.03296298533678055 |
|
}, |
|
{ |
|
"diff_generated": -160.1627960205078, |
|
"epoch": 0.5652970426589898, |
|
"grad_norm": 1351.7338122517372, |
|
"learning_rate": 1.803820559763439e-06, |
|
"logits/chosen": -1.7946879863739014, |
|
"logits/rejected": -1.7407840490341187, |
|
"logps/chosen": -215.82290649414062, |
|
"logps/rejected": -316.18743896484375, |
|
"loss": 232.6284, |
|
"losses_ref": -3.786867618560791, |
|
"ref_logps/chosen": -261.61407470703125, |
|
"ref_logps/rejected": -156.02464294433594, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.45791149139404297, |
|
"rewards/margins": 2.059539318084717, |
|
"rewards/rejected": -1.6016279458999634, |
|
"step": 270, |
|
"u": -1.651993751525879, |
|
"weight": 0.04032987728714943 |
|
}, |
|
{ |
|
"diff_generated": -142.9796905517578, |
|
"epoch": 0.575765506411934, |
|
"grad_norm": 1181.4870635863472, |
|
"learning_rate": 1.7927957584402895e-06, |
|
"logits/chosen": -1.875299096107483, |
|
"logits/rejected": -1.8068253993988037, |
|
"logps/chosen": -228.66781616210938, |
|
"logps/rejected": -303.5104064941406, |
|
"loss": 224.2237, |
|
"losses_ref": -4.741028308868408, |
|
"ref_logps/chosen": -272.44915771484375, |
|
"ref_logps/rejected": -160.53070068359375, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": 0.43781352043151855, |
|
"rewards/margins": 1.8676105737686157, |
|
"rewards/rejected": -1.4297969341278076, |
|
"step": 275, |
|
"u": -1.267141580581665, |
|
"weight": 0.07081650197505951 |
|
}, |
|
{ |
|
"diff_generated": -147.29513549804688, |
|
"epoch": 0.5862339701648783, |
|
"grad_norm": 1311.3976945524007, |
|
"learning_rate": 1.78150524316067e-06, |
|
"logits/chosen": -1.9360460042953491, |
|
"logits/rejected": -1.8399826288223267, |
|
"logps/chosen": -244.2842559814453, |
|
"logps/rejected": -319.99603271484375, |
|
"loss": 221.9428, |
|
"losses_ref": -5.114128112792969, |
|
"ref_logps/chosen": -288.6471252441406, |
|
"ref_logps/rejected": -172.7008819580078, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4436289668083191, |
|
"rewards/margins": 1.9165802001953125, |
|
"rewards/rejected": -1.4729512929916382, |
|
"step": 280, |
|
"u": -1.33490788936615, |
|
"weight": 0.05145906284451485 |
|
}, |
|
{ |
|
"diff_generated": -165.53073120117188, |
|
"epoch": 0.5967024339178225, |
|
"grad_norm": 1217.7737895640616, |
|
"learning_rate": 1.7699527980612304e-06, |
|
"logits/chosen": -2.008852243423462, |
|
"logits/rejected": -1.865282416343689, |
|
"logps/chosen": -235.48495483398438, |
|
"logps/rejected": -324.86236572265625, |
|
"loss": 237.0448, |
|
"losses_ref": -3.6097474098205566, |
|
"ref_logps/chosen": -281.65557861328125, |
|
"ref_logps/rejected": -159.3316650390625, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4617062509059906, |
|
"rewards/margins": 2.117013454437256, |
|
"rewards/rejected": -1.655307412147522, |
|
"step": 285, |
|
"u": -1.5152809619903564, |
|
"weight": 0.03953182324767113 |
|
}, |
|
{ |
|
"diff_generated": -152.0204315185547, |
|
"epoch": 0.6071708976707668, |
|
"grad_norm": 1365.967274184474, |
|
"learning_rate": 1.758142295067194e-06, |
|
"logits/chosen": -1.9733747243881226, |
|
"logits/rejected": -1.8123550415039062, |
|
"logps/chosen": -253.77774047851562, |
|
"logps/rejected": -316.69073486328125, |
|
"loss": 236.0956, |
|
"losses_ref": -9.393682479858398, |
|
"ref_logps/chosen": -299.4283142089844, |
|
"ref_logps/rejected": -164.6702880859375, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": 0.45650559663772583, |
|
"rewards/margins": 1.9767096042633057, |
|
"rewards/rejected": -1.5202041864395142, |
|
"step": 290, |
|
"u": -1.0198547840118408, |
|
"weight": 0.07342410832643509 |
|
}, |
|
{ |
|
"diff_generated": -156.43539428710938, |
|
"epoch": 0.6176393614237111, |
|
"grad_norm": 1274.7347074994798, |
|
"learning_rate": 1.7460776925946416e-06, |
|
"logits/chosen": -2.04952335357666, |
|
"logits/rejected": -1.9772619009017944, |
|
"logps/chosen": -231.12759399414062, |
|
"logps/rejected": -324.58734130859375, |
|
"loss": 216.7738, |
|
"losses_ref": -3.1922709941864014, |
|
"ref_logps/chosen": -275.5738525390625, |
|
"ref_logps/rejected": -168.1519317626953, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4444626271724701, |
|
"rewards/margins": 2.0088164806365967, |
|
"rewards/rejected": -1.5643537044525146, |
|
"step": 295, |
|
"u": -1.7094459533691406, |
|
"weight": 0.027651017531752586 |
|
}, |
|
{ |
|
"diff_generated": -166.10739135742188, |
|
"epoch": 0.6281078251766553, |
|
"grad_norm": 1279.289070746857, |
|
"learning_rate": 1.7337630342238039e-06, |
|
"logits/chosen": -2.0860671997070312, |
|
"logits/rejected": -1.9944241046905518, |
|
"logps/chosen": -226.953125, |
|
"logps/rejected": -329.9337158203125, |
|
"loss": 245.769, |
|
"losses_ref": -2.491637706756592, |
|
"ref_logps/chosen": -276.3335266113281, |
|
"ref_logps/rejected": -163.8263397216797, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.49380379915237427, |
|
"rewards/margins": 2.1548776626586914, |
|
"rewards/rejected": -1.6610740423202515, |
|
"step": 300, |
|
"u": -1.8025985956192017, |
|
"weight": 0.020761026069521904 |
|
}, |
|
{ |
|
"diff_generated": -160.49281311035156, |
|
"epoch": 0.6385762889295996, |
|
"grad_norm": 1143.8594113545453, |
|
"learning_rate": 1.7212024473438145e-06, |
|
"logits/chosen": -2.1227848529815674, |
|
"logits/rejected": -2.037874698638916, |
|
"logps/chosen": -227.2042694091797, |
|
"logps/rejected": -324.0436096191406, |
|
"loss": 218.3608, |
|
"losses_ref": -5.721261978149414, |
|
"ref_logps/chosen": -275.447265625, |
|
"ref_logps/rejected": -163.55076599121094, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4824300706386566, |
|
"rewards/margins": 2.0873584747314453, |
|
"rewards/rejected": -1.6049282550811768, |
|
"step": 305, |
|
"u": -1.5689971446990967, |
|
"weight": 0.03765694424510002 |
|
}, |
|
{ |
|
"diff_generated": -165.9134979248047, |
|
"epoch": 0.6490447526825438, |
|
"grad_norm": 1216.910104164249, |
|
"learning_rate": 1.70840014176937e-06, |
|
"logits/chosen": -2.148029327392578, |
|
"logits/rejected": -1.9548304080963135, |
|
"logps/chosen": -259.4276123046875, |
|
"logps/rejected": -335.60723876953125, |
|
"loss": 237.5431, |
|
"losses_ref": -6.571761131286621, |
|
"ref_logps/chosen": -307.9371643066406, |
|
"ref_logps/rejected": -169.69369506835938, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.48509567975997925, |
|
"rewards/margins": 2.144230604171753, |
|
"rewards/rejected": -1.659135103225708, |
|
"step": 310, |
|
"u": -1.2953577041625977, |
|
"weight": 0.06081492453813553 |
|
}, |
|
{ |
|
"diff_generated": -150.7538299560547, |
|
"epoch": 0.6595132164354881, |
|
"grad_norm": 1285.8252216937017, |
|
"learning_rate": 1.6953604083297663e-06, |
|
"logits/chosen": -2.0963034629821777, |
|
"logits/rejected": -2.005828619003296, |
|
"logps/chosen": -238.0185089111328, |
|
"logps/rejected": -313.0700988769531, |
|
"loss": 232.0059, |
|
"losses_ref": -5.998663425445557, |
|
"ref_logps/chosen": -286.41973876953125, |
|
"ref_logps/rejected": -162.3162841796875, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4840126633644104, |
|
"rewards/margins": 1.9915508031845093, |
|
"rewards/rejected": -1.5075383186340332, |
|
"step": 315, |
|
"u": -1.022328495979309, |
|
"weight": 0.07499580085277557 |
|
}, |
|
{ |
|
"diff_generated": -167.22291564941406, |
|
"epoch": 0.6699816801884323, |
|
"grad_norm": 1393.3558242713107, |
|
"learning_rate": 1.6820876174307821e-06, |
|
"logits/chosen": -2.0343525409698486, |
|
"logits/rejected": -1.9958488941192627, |
|
"logps/chosen": -220.11959838867188, |
|
"logps/rejected": -324.1341857910156, |
|
"loss": 235.1374, |
|
"losses_ref": -3.5960795879364014, |
|
"ref_logps/chosen": -265.8931579589844, |
|
"ref_logps/rejected": -156.91128540039062, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.45773547887802124, |
|
"rewards/margins": 2.129964590072632, |
|
"rewards/rejected": -1.6722290515899658, |
|
"step": 320, |
|
"u": -1.116194486618042, |
|
"weight": 0.04208649322390556 |
|
}, |
|
{ |
|
"diff_generated": -152.4580078125, |
|
"epoch": 0.6804501439413766, |
|
"grad_norm": 1260.574816635609, |
|
"learning_rate": 1.668586217589889e-06, |
|
"logits/chosen": -2.028233051300049, |
|
"logits/rejected": -1.943868637084961, |
|
"logps/chosen": -252.96224975585938, |
|
"logps/rejected": -314.017578125, |
|
"loss": 228.4758, |
|
"losses_ref": -1.9372276067733765, |
|
"ref_logps/chosen": -299.65130615234375, |
|
"ref_logps/rejected": -161.55958557128906, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.46689024567604065, |
|
"rewards/margins": 1.9914703369140625, |
|
"rewards/rejected": -1.5245802402496338, |
|
"step": 325, |
|
"u": -1.6558067798614502, |
|
"weight": 0.03156626224517822 |
|
}, |
|
{ |
|
"diff_generated": -140.8079376220703, |
|
"epoch": 0.6909186076943209, |
|
"grad_norm": 1201.438854630279, |
|
"learning_rate": 1.6548607339452852e-06, |
|
"logits/chosen": -2.0895023345947266, |
|
"logits/rejected": -2.036318778991699, |
|
"logps/chosen": -216.3995361328125, |
|
"logps/rejected": -303.2993469238281, |
|
"loss": 233.4191, |
|
"losses_ref": -2.161651134490967, |
|
"ref_logps/chosen": -261.6273498535156, |
|
"ref_logps/rejected": -162.49142456054688, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4522779583930969, |
|
"rewards/margins": 1.8603572845458984, |
|
"rewards/rejected": -1.4080793857574463, |
|
"step": 330, |
|
"u": -1.8709716796875, |
|
"weight": 0.017609911039471626 |
|
}, |
|
{ |
|
"diff_generated": -143.80978393554688, |
|
"epoch": 0.7013870714472651, |
|
"grad_norm": 1237.6054714094937, |
|
"learning_rate": 1.6409157667392455e-06, |
|
"logits/chosen": -2.059278964996338, |
|
"logits/rejected": -1.9892032146453857, |
|
"logps/chosen": -235.5959930419922, |
|
"logps/rejected": -307.551513671875, |
|
"loss": 229.4944, |
|
"losses_ref": -6.860163688659668, |
|
"ref_logps/chosen": -283.805908203125, |
|
"ref_logps/rejected": -163.74172973632812, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.48209866881370544, |
|
"rewards/margins": 1.920196771621704, |
|
"rewards/rejected": -1.4380979537963867, |
|
"step": 335, |
|
"u": -0.9188238382339478, |
|
"weight": 0.07267802953720093 |
|
}, |
|
{ |
|
"diff_generated": -160.1986846923828, |
|
"epoch": 0.7118555352002094, |
|
"grad_norm": 1143.246546308752, |
|
"learning_rate": 1.6267559897763027e-06, |
|
"logits/chosen": -1.8168014287948608, |
|
"logits/rejected": -1.863437294960022, |
|
"logps/chosen": -188.27635192871094, |
|
"logps/rejected": -314.9437561035156, |
|
"loss": 216.8938, |
|
"losses_ref": -1.3188815116882324, |
|
"ref_logps/chosen": -237.00216674804688, |
|
"ref_logps/rejected": -154.74508666992188, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": 0.4872584939002991, |
|
"rewards/margins": 2.089244842529297, |
|
"rewards/rejected": -1.6019866466522217, |
|
"step": 340, |
|
"u": -1.303836703300476, |
|
"weight": 0.029538637027144432 |
|
}, |
|
{ |
|
"diff_generated": -151.67230224609375, |
|
"epoch": 0.7223239989531536, |
|
"grad_norm": 1267.0562713440388, |
|
"learning_rate": 1.6123861488567708e-06, |
|
"logits/chosen": -1.9331505298614502, |
|
"logits/rejected": -1.7450395822525024, |
|
"logps/chosen": -256.15277099609375, |
|
"logps/rejected": -316.7372131347656, |
|
"loss": 244.0877, |
|
"losses_ref": -2.1836702823638916, |
|
"ref_logps/chosen": -306.53680419921875, |
|
"ref_logps/rejected": -165.06492614746094, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5038406848907471, |
|
"rewards/margins": 2.0205636024475098, |
|
"rewards/rejected": -1.5167229175567627, |
|
"step": 345, |
|
"u": -1.485855221748352, |
|
"weight": 0.0375472754240036 |
|
}, |
|
{ |
|
"diff_generated": -147.20364379882812, |
|
"epoch": 0.7327924627060979, |
|
"grad_norm": 1350.564919328469, |
|
"learning_rate": 1.5978110601861409e-06, |
|
"logits/chosen": -1.9117012023925781, |
|
"logits/rejected": -1.8668915033340454, |
|
"logps/chosen": -253.0355224609375, |
|
"logps/rejected": -311.43927001953125, |
|
"loss": 240.3254, |
|
"losses_ref": -2.832030773162842, |
|
"ref_logps/chosen": -299.90985107421875, |
|
"ref_logps/rejected": -164.23562622070312, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.46874284744262695, |
|
"rewards/margins": 1.9407793283462524, |
|
"rewards/rejected": -1.472036361694336, |
|
"step": 350, |
|
"u": -1.429086685180664, |
|
"weight": 0.04381849616765976 |
|
}, |
|
{ |
|
"diff_generated": -152.60690307617188, |
|
"epoch": 0.7432609264590422, |
|
"grad_norm": 1367.6005309235504, |
|
"learning_rate": 1.5830356087608763e-06, |
|
"logits/chosen": -1.887460708618164, |
|
"logits/rejected": -1.8180389404296875, |
|
"logps/chosen": -214.82699584960938, |
|
"logps/rejected": -321.7936096191406, |
|
"loss": 228.0585, |
|
"losses_ref": -1.8199619054794312, |
|
"ref_logps/chosen": -263.9666748046875, |
|
"ref_logps/rejected": -169.18673706054688, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.49139684438705444, |
|
"rewards/margins": 2.017465829849243, |
|
"rewards/rejected": -1.526068925857544, |
|
"step": 355, |
|
"u": -1.6218674182891846, |
|
"weight": 0.02579430676996708 |
|
}, |
|
{ |
|
"diff_generated": -148.06683349609375, |
|
"epoch": 0.7537293902119864, |
|
"grad_norm": 1346.2814229526575, |
|
"learning_rate": 1.5680647467311555e-06, |
|
"logits/chosen": -1.8571285009384155, |
|
"logits/rejected": -1.7857725620269775, |
|
"logps/chosen": -244.458251953125, |
|
"logps/rejected": -319.65484619140625, |
|
"loss": 223.1362, |
|
"losses_ref": -2.564044237136841, |
|
"ref_logps/chosen": -293.27410888671875, |
|
"ref_logps/rejected": -171.58799743652344, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4881584644317627, |
|
"rewards/margins": 1.9688268899917603, |
|
"rewards/rejected": -1.480668306350708, |
|
"step": 360, |
|
"u": -1.7225738763809204, |
|
"weight": 0.03375329077243805 |
|
}, |
|
{ |
|
"diff_generated": -158.3987579345703, |
|
"epoch": 0.7641978539649307, |
|
"grad_norm": 1338.295994157702, |
|
"learning_rate": 1.552903491741107e-06, |
|
"logits/chosen": -1.837961196899414, |
|
"logits/rejected": -1.839646577835083, |
|
"logps/chosen": -230.9562530517578, |
|
"logps/rejected": -320.73455810546875, |
|
"loss": 230.7235, |
|
"losses_ref": -2.363715648651123, |
|
"ref_logps/chosen": -276.13995361328125, |
|
"ref_logps/rejected": -162.33580017089844, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": 0.45183688402175903, |
|
"rewards/margins": 2.0358242988586426, |
|
"rewards/rejected": -1.5839874744415283, |
|
"step": 365, |
|
"u": -1.599321722984314, |
|
"weight": 0.03038620948791504 |
|
}, |
|
{ |
|
"diff_generated": -143.7940216064453, |
|
"epoch": 0.7746663177178749, |
|
"grad_norm": 1122.4826063930961, |
|
"learning_rate": 1.5375569252470895e-06, |
|
"logits/chosen": -1.994361162185669, |
|
"logits/rejected": -1.8850581645965576, |
|
"logps/chosen": -266.71722412109375, |
|
"logps/rejected": -306.846923828125, |
|
"loss": 232.9005, |
|
"losses_ref": -7.7454657554626465, |
|
"ref_logps/chosen": -315.1695251464844, |
|
"ref_logps/rejected": -163.05288696289062, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.48452290892601013, |
|
"rewards/margins": 1.922463059425354, |
|
"rewards/rejected": -1.437940239906311, |
|
"step": 370, |
|
"u": -1.2242950201034546, |
|
"weight": 0.05640628933906555 |
|
}, |
|
{ |
|
"diff_generated": -147.28756713867188, |
|
"epoch": 0.7851347814708192, |
|
"grad_norm": 1267.5426171485876, |
|
"learning_rate": 1.5220301908145903e-06, |
|
"logits/chosen": -1.984815001487732, |
|
"logits/rejected": -1.8735277652740479, |
|
"logps/chosen": -236.86972045898438, |
|
"logps/rejected": -316.1020812988281, |
|
"loss": 254.4526, |
|
"losses_ref": -1.4826844930648804, |
|
"ref_logps/chosen": -283.3154296875, |
|
"ref_logps/rejected": -168.81448364257812, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": 0.4644569754600525, |
|
"rewards/margins": 1.9373327493667603, |
|
"rewards/rejected": -1.472875714302063, |
|
"step": 375, |
|
"u": -1.3915516138076782, |
|
"weight": 0.03981015831232071 |
|
}, |
|
{ |
|
"diff_generated": -141.51336669921875, |
|
"epoch": 0.7956032452237635, |
|
"grad_norm": 1213.313777968865, |
|
"learning_rate": 1.5063284923943028e-06, |
|
"logits/chosen": -1.9686000347137451, |
|
"logits/rejected": -1.856993317604065, |
|
"logps/chosen": -250.8971710205078, |
|
"logps/rejected": -304.9432067871094, |
|
"loss": 236.0771, |
|
"losses_ref": -2.1682116985321045, |
|
"ref_logps/chosen": -298.9543762207031, |
|
"ref_logps/rejected": -163.42984008789062, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4805716872215271, |
|
"rewards/margins": 1.895705223083496, |
|
"rewards/rejected": -1.4151335954666138, |
|
"step": 380, |
|
"u": -1.7837505340576172, |
|
"weight": 0.02852563187479973 |
|
}, |
|
{ |
|
"diff_generated": -156.00753784179688, |
|
"epoch": 0.8060717089767077, |
|
"grad_norm": 1211.688665180567, |
|
"learning_rate": 1.490457092577968e-06, |
|
"logits/chosen": -1.9195213317871094, |
|
"logits/rejected": -1.8409401178359985, |
|
"logps/chosen": -229.5646209716797, |
|
"logps/rejected": -317.98406982421875, |
|
"loss": 227.1155, |
|
"losses_ref": -1.2010728120803833, |
|
"ref_logps/chosen": -279.9380798339844, |
|
"ref_logps/rejected": -161.97653198242188, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": 0.5037345290184021, |
|
"rewards/margins": 2.063809871673584, |
|
"rewards/rejected": -1.5600755214691162, |
|
"step": 385, |
|
"u": -1.6141672134399414, |
|
"weight": 0.025375287979841232 |
|
}, |
|
{ |
|
"diff_generated": -152.0254669189453, |
|
"epoch": 0.816540172729652, |
|
"grad_norm": 1239.5744414457495, |
|
"learning_rate": 1.4744213108345602e-06, |
|
"logits/chosen": -2.0957484245300293, |
|
"logits/rejected": -1.9671990871429443, |
|
"logps/chosen": -254.6474151611328, |
|
"logps/rejected": -313.9129333496094, |
|
"loss": 233.3016, |
|
"losses_ref": -4.944865703582764, |
|
"ref_logps/chosen": -304.72125244140625, |
|
"ref_logps/rejected": -161.88751220703125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5007385015487671, |
|
"rewards/margins": 2.0209929943084717, |
|
"rewards/rejected": -1.5202546119689941, |
|
"step": 390, |
|
"u": -1.7007286548614502, |
|
"weight": 0.05083342641592026 |
|
}, |
|
{ |
|
"diff_generated": -139.01864624023438, |
|
"epoch": 0.8270086364825961, |
|
"grad_norm": 1205.2400489160098, |
|
"learning_rate": 1.4582265217274103e-06, |
|
"logits/chosen": -1.9418761730194092, |
|
"logits/rejected": -1.8380733728408813, |
|
"logps/chosen": -247.5355682373047, |
|
"logps/rejected": -302.6370849609375, |
|
"loss": 239.286, |
|
"losses_ref": -1.7620617151260376, |
|
"ref_logps/chosen": -293.9803161621094, |
|
"ref_logps/rejected": -163.61843872070312, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.464447557926178, |
|
"rewards/margins": 1.8546336889266968, |
|
"rewards/rejected": -1.3901864290237427, |
|
"step": 395, |
|
"u": -1.7652490139007568, |
|
"weight": 0.021758217364549637 |
|
}, |
|
{ |
|
"diff_generated": -157.04232788085938, |
|
"epoch": 0.8374771002355405, |
|
"grad_norm": 1205.8199372142892, |
|
"learning_rate": 1.4418781531128635e-06, |
|
"logits/chosen": -2.0544238090515137, |
|
"logits/rejected": -2.0346767902374268, |
|
"logps/chosen": -234.49368286132812, |
|
"logps/rejected": -326.8393249511719, |
|
"loss": 233.9242, |
|
"losses_ref": -1.8244788646697998, |
|
"ref_logps/chosen": -282.6474609375, |
|
"ref_logps/rejected": -169.79696655273438, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.48153790831565857, |
|
"rewards/margins": 2.0519611835479736, |
|
"rewards/rejected": -1.5704233646392822, |
|
"step": 400, |
|
"u": -1.6305999755859375, |
|
"weight": 0.024077033624053 |
|
}, |
|
{ |
|
"diff_generated": -152.27468872070312, |
|
"epoch": 0.8479455639884846, |
|
"grad_norm": 1197.3605051527013, |
|
"learning_rate": 1.4253816843210748e-06, |
|
"logits/chosen": -1.9861503839492798, |
|
"logits/rejected": -1.8832927942276, |
|
"logps/chosen": -244.0829315185547, |
|
"logps/rejected": -317.6984558105469, |
|
"loss": 237.8302, |
|
"losses_ref": -3.3451290130615234, |
|
"ref_logps/chosen": -295.3381652832031, |
|
"ref_logps/rejected": -165.42379760742188, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.51255202293396, |
|
"rewards/margins": 2.035299062728882, |
|
"rewards/rejected": -1.5227469205856323, |
|
"step": 405, |
|
"u": -1.4654412269592285, |
|
"weight": 0.036893170326948166 |
|
}, |
|
{ |
|
"diff_generated": -154.22146606445312, |
|
"epoch": 0.8584140277414289, |
|
"grad_norm": 1201.6414819745964, |
|
"learning_rate": 1.4087426443195547e-06, |
|
"logits/chosen": -1.9021320343017578, |
|
"logits/rejected": -1.8548545837402344, |
|
"logps/chosen": -212.048583984375, |
|
"logps/rejected": -310.93731689453125, |
|
"loss": 223.4945, |
|
"losses_ref": -1.363377571105957, |
|
"ref_logps/chosen": -261.7601013183594, |
|
"ref_logps/rejected": -156.71588134765625, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": 0.4971153736114502, |
|
"rewards/margins": 2.039330005645752, |
|
"rewards/rejected": -1.5422146320343018, |
|
"step": 410, |
|
"u": -1.499205470085144, |
|
"weight": 0.03244508430361748 |
|
}, |
|
{ |
|
"diff_generated": -152.22821044921875, |
|
"epoch": 0.8688824914943732, |
|
"grad_norm": 1267.1758549974509, |
|
"learning_rate": 1.391966609860075e-06, |
|
"logits/chosen": -1.9990746974945068, |
|
"logits/rejected": -1.9241716861724854, |
|
"logps/chosen": -235.38150024414062, |
|
"logps/rejected": -307.2711181640625, |
|
"loss": 229.5926, |
|
"losses_ref": -3.3139452934265137, |
|
"ref_logps/chosen": -284.34393310546875, |
|
"ref_logps/rejected": -155.04290771484375, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.48962411284446716, |
|
"rewards/margins": 2.011906147003174, |
|
"rewards/rejected": -1.5222820043563843, |
|
"step": 415, |
|
"u": -1.4567835330963135, |
|
"weight": 0.04380001127719879 |
|
}, |
|
{ |
|
"diff_generated": -142.56979370117188, |
|
"epoch": 0.8793509552473174, |
|
"grad_norm": 1188.388834303343, |
|
"learning_rate": 1.3750592036095619e-06, |
|
"logits/chosen": -2.0134921073913574, |
|
"logits/rejected": -1.8790652751922607, |
|
"logps/chosen": -250.85546875, |
|
"logps/rejected": -295.076416015625, |
|
"loss": 235.3638, |
|
"losses_ref": -3.0703201293945312, |
|
"ref_logps/chosen": -298.8680725097656, |
|
"ref_logps/rejected": -152.5066375732422, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4801257252693176, |
|
"rewards/margins": 1.9058234691619873, |
|
"rewards/rejected": -1.4256978034973145, |
|
"step": 420, |
|
"u": -1.4335013628005981, |
|
"weight": 0.04213564842939377 |
|
}, |
|
{ |
|
"diff_generated": -147.99075317382812, |
|
"epoch": 0.8898194190002617, |
|
"grad_norm": 1470.580405072441, |
|
"learning_rate": 1.3580260922655984e-06, |
|
"logits/chosen": -1.9547443389892578, |
|
"logits/rejected": -1.8864132165908813, |
|
"logps/chosen": -229.1260223388672, |
|
"logps/rejected": -308.96728515625, |
|
"loss": 230.7763, |
|
"losses_ref": -7.961075782775879, |
|
"ref_logps/chosen": -278.4296875, |
|
"ref_logps/rejected": -160.97654724121094, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": 0.4930366575717926, |
|
"rewards/margins": 1.9729440212249756, |
|
"rewards/rejected": -1.4799073934555054, |
|
"step": 425, |
|
"u": -0.9531173706054688, |
|
"weight": 0.06897237151861191 |
|
}, |
|
{ |
|
"diff_generated": -150.89932250976562, |
|
"epoch": 0.9002878827532059, |
|
"grad_norm": 1247.106031502474, |
|
"learning_rate": 1.3408729846571713e-06, |
|
"logits/chosen": -1.9829527139663696, |
|
"logits/rejected": -1.7790740728378296, |
|
"logps/chosen": -250.89053344726562, |
|
"logps/rejected": -306.49493408203125, |
|
"loss": 227.0822, |
|
"losses_ref": -3.353726625442505, |
|
"ref_logps/chosen": -299.95831298828125, |
|
"ref_logps/rejected": -155.59561157226562, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.49067792296409607, |
|
"rewards/margins": 1.9996709823608398, |
|
"rewards/rejected": -1.5089929103851318, |
|
"step": 430, |
|
"u": -1.7451012134552002, |
|
"weight": 0.029034754261374474 |
|
}, |
|
{ |
|
"diff_generated": -161.92088317871094, |
|
"epoch": 0.9107563465061502, |
|
"grad_norm": 1215.392487626507, |
|
"learning_rate": 1.3236056298312956e-06, |
|
"logits/chosen": -1.8760721683502197, |
|
"logits/rejected": -1.7741060256958008, |
|
"logps/chosen": -230.2984161376953, |
|
"logps/rejected": -322.80450439453125, |
|
"loss": 219.6414, |
|
"losses_ref": -2.6977756023406982, |
|
"ref_logps/chosen": -276.49066162109375, |
|
"ref_logps/rejected": -160.88360595703125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4619222581386566, |
|
"rewards/margins": 2.0811312198638916, |
|
"rewards/rejected": -1.6192089319229126, |
|
"step": 435, |
|
"u": -1.3300695419311523, |
|
"weight": 0.049107056111097336 |
|
}, |
|
{ |
|
"diff_generated": -169.64144897460938, |
|
"epoch": 0.9212248102590945, |
|
"grad_norm": 1199.549953331359, |
|
"learning_rate": 1.3062298151261591e-06, |
|
"logits/chosen": -1.8538296222686768, |
|
"logits/rejected": -1.7674894332885742, |
|
"logps/chosen": -247.5723114013672, |
|
"logps/rejected": -334.99432373046875, |
|
"loss": 228.2011, |
|
"losses_ref": -3.9634671211242676, |
|
"ref_logps/chosen": -293.4337463378906, |
|
"ref_logps/rejected": -165.35289001464844, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4586148262023926, |
|
"rewards/margins": 2.155029296875, |
|
"rewards/rejected": -1.696414589881897, |
|
"step": 440, |
|
"u": -0.911568284034729, |
|
"weight": 0.05750606581568718 |
|
}, |
|
{ |
|
"diff_generated": -176.77850341796875, |
|
"epoch": 0.9316932740120387, |
|
"grad_norm": 1134.6331161044943, |
|
"learning_rate": 1.2887513642314372e-06, |
|
"logits/chosen": -1.7472941875457764, |
|
"logits/rejected": -1.6525627374649048, |
|
"logps/chosen": -229.4337921142578, |
|
"logps/rejected": -337.2396545410156, |
|
"loss": 225.3431, |
|
"losses_ref": -0.7648504376411438, |
|
"ref_logps/chosen": -279.60003662109375, |
|
"ref_logps/rejected": -160.4611358642578, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5016621947288513, |
|
"rewards/margins": 2.2694473266601562, |
|
"rewards/rejected": -1.7677850723266602, |
|
"step": 445, |
|
"u": -1.5227153301239014, |
|
"weight": 0.024677513167262077 |
|
}, |
|
{ |
|
"diff_generated": -180.25607299804688, |
|
"epoch": 0.942161737764983, |
|
"grad_norm": 1254.0549089198466, |
|
"learning_rate": 1.271176135236417e-06, |
|
"logits/chosen": -1.8400166034698486, |
|
"logits/rejected": -1.6989673376083374, |
|
"logps/chosen": -255.73233032226562, |
|
"logps/rejected": -341.2875671386719, |
|
"loss": 233.639, |
|
"losses_ref": -4.4961042404174805, |
|
"ref_logps/chosen": -307.17620849609375, |
|
"ref_logps/rejected": -161.03147888183594, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5144392251968384, |
|
"rewards/margins": 2.317000150680542, |
|
"rewards/rejected": -1.8025610446929932, |
|
"step": 450, |
|
"u": -1.3673655986785889, |
|
"weight": 0.044325508177280426 |
|
}, |
|
{ |
|
"diff_generated": -192.91705322265625, |
|
"epoch": 0.9526302015179272, |
|
"grad_norm": 1206.737012082796, |
|
"learning_rate": 1.2535100186666e-06, |
|
"logits/chosen": -1.808547019958496, |
|
"logits/rejected": -1.6920995712280273, |
|
"logps/chosen": -254.8017578125, |
|
"logps/rejected": -351.9469909667969, |
|
"loss": 245.5463, |
|
"losses_ref": -0.9527796506881714, |
|
"ref_logps/chosen": -304.09619140625, |
|
"ref_logps/rejected": -159.02993774414062, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4929441809654236, |
|
"rewards/margins": 2.422114610671997, |
|
"rewards/rejected": -1.9291703701019287, |
|
"step": 455, |
|
"u": -1.6784477233886719, |
|
"weight": 0.02164948359131813 |
|
}, |
|
{ |
|
"diff_generated": -185.23989868164062, |
|
"epoch": 0.9630986652708715, |
|
"grad_norm": 1270.8466354695972, |
|
"learning_rate": 1.2357589355094273e-06, |
|
"logits/chosen": -1.8315858840942383, |
|
"logits/rejected": -1.7088918685913086, |
|
"logps/chosen": -269.20538330078125, |
|
"logps/rejected": -338.2021179199219, |
|
"loss": 246.9693, |
|
"losses_ref": -3.263090133666992, |
|
"ref_logps/chosen": -319.02618408203125, |
|
"ref_logps/rejected": -152.9622039794922, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4982084631919861, |
|
"rewards/margins": 2.350607395172119, |
|
"rewards/rejected": -1.8523988723754883, |
|
"step": 460, |
|
"u": -1.5151453018188477, |
|
"weight": 0.04330545663833618 |
|
}, |
|
{ |
|
"diff_generated": -187.97279357910156, |
|
"epoch": 0.9735671290238157, |
|
"grad_norm": 1262.5411111889684, |
|
"learning_rate": 1.2179288352297982e-06, |
|
"logits/chosen": -1.7451597452163696, |
|
"logits/rejected": -1.6632684469223022, |
|
"logps/chosen": -227.63937377929688, |
|
"logps/rejected": -355.5631103515625, |
|
"loss": 232.7903, |
|
"losses_ref": -1.6858165264129639, |
|
"ref_logps/chosen": -279.9383544921875, |
|
"ref_logps/rejected": -167.59031677246094, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5229896903038025, |
|
"rewards/margins": 2.4027175903320312, |
|
"rewards/rejected": -1.8797279596328735, |
|
"step": 465, |
|
"u": -1.672014594078064, |
|
"weight": 0.022105634212493896 |
|
}, |
|
{ |
|
"diff_generated": -206.70443725585938, |
|
"epoch": 0.98403559277676, |
|
"grad_norm": 1278.739837777923, |
|
"learning_rate": 1.2000256937760445e-06, |
|
"logits/chosen": -1.570615291595459, |
|
"logits/rejected": -1.4970500469207764, |
|
"logps/chosen": -237.1439208984375, |
|
"logps/rejected": -359.52886962890625, |
|
"loss": 239.3887, |
|
"losses_ref": -2.77233624458313, |
|
"ref_logps/chosen": -285.7524719238281, |
|
"ref_logps/rejected": -152.82440185546875, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4860858917236328, |
|
"rewards/margins": 2.5531301498413086, |
|
"rewards/rejected": -2.067044496536255, |
|
"step": 470, |
|
"u": -1.4401319026947021, |
|
"weight": 0.04976705089211464 |
|
}, |
|
{ |
|
"diff_generated": -199.87838745117188, |
|
"epoch": 0.9945040565297043, |
|
"grad_norm": 1152.4542486150297, |
|
"learning_rate": 1.1820555115770255e-06, |
|
"logits/chosen": -1.4883148670196533, |
|
"logits/rejected": -1.505014419555664, |
|
"logps/chosen": -225.6768798828125, |
|
"logps/rejected": -358.5788269042969, |
|
"loss": 226.472, |
|
"losses_ref": -4.080103874206543, |
|
"ref_logps/chosen": -273.79522705078125, |
|
"ref_logps/rejected": -158.70046997070312, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.48118335008621216, |
|
"rewards/margins": 2.4799671173095703, |
|
"rewards/rejected": -1.998783826828003, |
|
"step": 475, |
|
"u": -1.5296900272369385, |
|
"weight": 0.05185595899820328 |
|
}, |
|
{ |
|
"diff_generated": -208.0040283203125, |
|
"epoch": 1.0049725202826485, |
|
"grad_norm": 1301.7034712659586, |
|
"learning_rate": 1.1640243115310217e-06, |
|
"logits/chosen": -1.5732040405273438, |
|
"logits/rejected": -1.5068919658660889, |
|
"logps/chosen": -223.4159393310547, |
|
"logps/rejected": -374.62591552734375, |
|
"loss": 226.8136, |
|
"losses_ref": -4.88800573348999, |
|
"ref_logps/chosen": -293.29400634765625, |
|
"ref_logps/rejected": -166.62188720703125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6987806558609009, |
|
"rewards/margins": 2.778820514678955, |
|
"rewards/rejected": -2.080040454864502, |
|
"step": 480, |
|
"u": -1.9416106939315796, |
|
"weight": 0.03623828664422035 |
|
}, |
|
{ |
|
"diff_generated": -215.4559783935547, |
|
"epoch": 1.0154409840355927, |
|
"grad_norm": 1355.7730407413364, |
|
"learning_rate": 1.1459381369870972e-06, |
|
"logits/chosen": -1.5292342901229858, |
|
"logits/rejected": -1.4070460796356201, |
|
"logps/chosen": -192.32717895507812, |
|
"logps/rejected": -380.6830139160156, |
|
"loss": 181.3888, |
|
"losses_ref": -3.105132818222046, |
|
"ref_logps/chosen": -294.8918762207031, |
|
"ref_logps/rejected": -165.22702026367188, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0256469249725342, |
|
"rewards/margins": 3.180206775665283, |
|
"rewards/rejected": -2.15455961227417, |
|
"step": 485, |
|
"u": -2.883460521697998, |
|
"weight": 0.04116251319646835 |
|
}, |
|
{ |
|
"diff_generated": -209.49990844726562, |
|
"epoch": 1.025909447788537, |
|
"grad_norm": 1391.787637976481, |
|
"learning_rate": 1.1278030497196046e-06, |
|
"logits/chosen": -1.2669024467468262, |
|
"logits/rejected": -1.2282651662826538, |
|
"logps/chosen": -166.51095581054688, |
|
"logps/rejected": -365.71807861328125, |
|
"loss": 180.3994, |
|
"losses_ref": -2.922461986541748, |
|
"ref_logps/chosen": -264.67388916015625, |
|
"ref_logps/rejected": -156.21817016601562, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9816292524337769, |
|
"rewards/margins": 3.0766279697418213, |
|
"rewards/rejected": -2.094998836517334, |
|
"step": 490, |
|
"u": -3.193206310272217, |
|
"weight": 0.029411697760224342 |
|
}, |
|
{ |
|
"diff_generated": -208.9027099609375, |
|
"epoch": 1.0363779115414813, |
|
"grad_norm": 1404.013865827238, |
|
"learning_rate": 1.1096251278965172e-06, |
|
"logits/chosen": -1.229707956314087, |
|
"logits/rejected": -1.2453272342681885, |
|
"logps/chosen": -167.49026489257812, |
|
"logps/rejected": -368.3177185058594, |
|
"loss": 166.3708, |
|
"losses_ref": -5.491534233093262, |
|
"ref_logps/chosen": -266.75250244140625, |
|
"ref_logps/rejected": -159.41500854492188, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9926217794418335, |
|
"rewards/margins": 3.081648588180542, |
|
"rewards/rejected": -2.089026927947998, |
|
"step": 495, |
|
"u": -1.6660839319229126, |
|
"weight": 0.06474236398935318 |
|
}, |
|
{ |
|
"diff_generated": -213.4435577392578, |
|
"epoch": 1.0468463752944255, |
|
"grad_norm": 1314.8398697189618, |
|
"learning_rate": 1.0914104640422679e-06, |
|
"logits/chosen": -1.391204595565796, |
|
"logits/rejected": -1.3654673099517822, |
|
"logps/chosen": -161.88082885742188, |
|
"logps/rejected": -374.336669921875, |
|
"loss": 175.893, |
|
"losses_ref": -1.2716583013534546, |
|
"ref_logps/chosen": -257.99908447265625, |
|
"ref_logps/rejected": -160.89312744140625, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9611825942993164, |
|
"rewards/margins": 3.0956180095672607, |
|
"rewards/rejected": -2.1344354152679443, |
|
"step": 500, |
|
"u": -3.1927852630615234, |
|
"weight": 0.015900352969765663 |
|
}, |
|
{ |
|
"diff_generated": -206.3148956298828, |
|
"epoch": 1.05731483904737, |
|
"grad_norm": 1445.075187818513, |
|
"learning_rate": 1.0731651629957721e-06, |
|
"logits/chosen": -1.3434970378875732, |
|
"logits/rejected": -1.305525541305542, |
|
"logps/chosen": -192.31558227539062, |
|
"logps/rejected": -378.03851318359375, |
|
"loss": 185.1733, |
|
"losses_ref": -2.9738333225250244, |
|
"ref_logps/chosen": -297.85302734375, |
|
"ref_logps/rejected": -171.7236328125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.055374264717102, |
|
"rewards/margins": 3.118523359298706, |
|
"rewards/rejected": -2.0631489753723145, |
|
"step": 505, |
|
"u": -3.4035236835479736, |
|
"weight": 0.024688560515642166 |
|
}, |
|
{ |
|
"diff_generated": -186.8843231201172, |
|
"epoch": 1.067783302800314, |
|
"grad_norm": 1181.3904875833675, |
|
"learning_rate": 1.0548953398643274e-06, |
|
"logits/chosen": -1.566375970840454, |
|
"logits/rejected": -1.4381110668182373, |
|
"logps/chosen": -193.49539184570312, |
|
"logps/rejected": -350.2602233886719, |
|
"loss": 179.7564, |
|
"losses_ref": -2.258450984954834, |
|
"ref_logps/chosen": -297.76202392578125, |
|
"ref_logps/rejected": -163.3759002685547, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0426661968231201, |
|
"rewards/margins": 2.9115095138549805, |
|
"rewards/rejected": -1.8688430786132812, |
|
"step": 510, |
|
"u": -2.4679007530212402, |
|
"weight": 0.044156283140182495 |
|
}, |
|
{ |
|
"diff_generated": -200.12066650390625, |
|
"epoch": 1.0782517665532583, |
|
"grad_norm": 1297.9609792649137, |
|
"learning_rate": 1.0366071179740706e-06, |
|
"logits/chosen": -1.6367733478546143, |
|
"logits/rejected": -1.4493190050125122, |
|
"logps/chosen": -209.0851593017578, |
|
"logps/rejected": -365.74053955078125, |
|
"loss": 186.0993, |
|
"losses_ref": -3.8747305870056152, |
|
"ref_logps/chosen": -317.296630859375, |
|
"ref_logps/rejected": -165.619873046875, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0821150541305542, |
|
"rewards/margins": 3.0833218097686768, |
|
"rewards/rejected": -2.001206636428833, |
|
"step": 515, |
|
"u": -2.938070297241211, |
|
"weight": 0.03331952169537544 |
|
}, |
|
{ |
|
"diff_generated": -207.33700561523438, |
|
"epoch": 1.0887202303062025, |
|
"grad_norm": 1362.4544964162274, |
|
"learning_rate": 1.0183066268176775e-06, |
|
"logits/chosen": -1.541912317276001, |
|
"logits/rejected": -1.406719446182251, |
|
"logps/chosen": -204.0404052734375, |
|
"logps/rejected": -376.406494140625, |
|
"loss": 202.1916, |
|
"losses_ref": -0.5564223527908325, |
|
"ref_logps/chosen": -307.4422912597656, |
|
"ref_logps/rejected": -169.06948852539062, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.034018874168396, |
|
"rewards/margins": 3.107388973236084, |
|
"rewards/rejected": -2.0733699798583984, |
|
"step": 520, |
|
"u": -3.34126353263855, |
|
"weight": 0.007363998796790838 |
|
}, |
|
{ |
|
"diff_generated": -209.34707641601562, |
|
"epoch": 1.0991886940591469, |
|
"grad_norm": 1329.928673259645, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.4774454832077026, |
|
"logits/rejected": -1.3976843357086182, |
|
"logps/chosen": -190.63027954101562, |
|
"logps/rejected": -365.6118469238281, |
|
"loss": 191.7027, |
|
"losses_ref": -4.4078168869018555, |
|
"ref_logps/chosen": -289.65625, |
|
"ref_logps/rejected": -156.2647705078125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.990260124206543, |
|
"rewards/margins": 3.083730936050415, |
|
"rewards/rejected": -2.093470811843872, |
|
"step": 525, |
|
"u": -2.4628920555114746, |
|
"weight": 0.034967873245477676 |
|
}, |
|
{ |
|
"diff_generated": -219.25039672851562, |
|
"epoch": 1.109657157812091, |
|
"grad_norm": 1263.6571441007575, |
|
"learning_rate": 9.816933731823228e-07, |
|
"logits/chosen": -1.48972749710083, |
|
"logits/rejected": -1.3531391620635986, |
|
"logps/chosen": -184.37472534179688, |
|
"logps/rejected": -382.6318359375, |
|
"loss": 179.9115, |
|
"losses_ref": -4.217190742492676, |
|
"ref_logps/chosen": -283.9466857910156, |
|
"ref_logps/rejected": -163.38145446777344, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.995719313621521, |
|
"rewards/margins": 3.188223361968994, |
|
"rewards/rejected": -2.1925039291381836, |
|
"step": 530, |
|
"u": -2.7124040126800537, |
|
"weight": 0.03560812398791313 |
|
}, |
|
{ |
|
"diff_generated": -222.4695587158203, |
|
"epoch": 1.1201256215650353, |
|
"grad_norm": 1387.0782441687347, |
|
"learning_rate": 9.633928820259293e-07, |
|
"logits/chosen": -1.2347859144210815, |
|
"logits/rejected": -1.2332684993743896, |
|
"logps/chosen": -162.6536102294922, |
|
"logps/rejected": -388.9007263183594, |
|
"loss": 162.1828, |
|
"losses_ref": -2.344147205352783, |
|
"ref_logps/chosen": -256.69085693359375, |
|
"ref_logps/rejected": -166.43115234375, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9403725862503052, |
|
"rewards/margins": 3.1650681495666504, |
|
"rewards/rejected": -2.2246956825256348, |
|
"step": 535, |
|
"u": -2.9149539470672607, |
|
"weight": 0.036711305379867554 |
|
}, |
|
{ |
|
"diff_generated": -220.98583984375, |
|
"epoch": 1.1305940853179797, |
|
"grad_norm": 1297.6784365239848, |
|
"learning_rate": 9.451046601356725e-07, |
|
"logits/chosen": -1.3270328044891357, |
|
"logits/rejected": -1.2543261051177979, |
|
"logps/chosen": -174.17941284179688, |
|
"logps/rejected": -378.3968200683594, |
|
"loss": 168.7943, |
|
"losses_ref": -5.623769760131836, |
|
"ref_logps/chosen": -267.5427551269531, |
|
"ref_logps/rejected": -157.4110107421875, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9336336255073547, |
|
"rewards/margins": 3.1434922218322754, |
|
"rewards/rejected": -2.2098584175109863, |
|
"step": 540, |
|
"u": -2.444173574447632, |
|
"weight": 0.07986775040626526 |
|
}, |
|
{ |
|
"diff_generated": -227.32980346679688, |
|
"epoch": 1.1410625490709239, |
|
"grad_norm": 1205.4234546771595, |
|
"learning_rate": 9.268348370042281e-07, |
|
"logits/chosen": -1.3813427686691284, |
|
"logits/rejected": -1.318725347518921, |
|
"logps/chosen": -174.5741424560547, |
|
"logps/rejected": -399.95172119140625, |
|
"loss": 168.9783, |
|
"losses_ref": -3.7193565368652344, |
|
"ref_logps/chosen": -273.3332824707031, |
|
"ref_logps/rejected": -172.62191772460938, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9875916242599487, |
|
"rewards/margins": 3.260889768600464, |
|
"rewards/rejected": -2.2732982635498047, |
|
"step": 545, |
|
"u": -3.2109789848327637, |
|
"weight": 0.042879991233348846 |
|
}, |
|
{ |
|
"diff_generated": -249.11709594726562, |
|
"epoch": 1.151531012823868, |
|
"grad_norm": 1266.8954047572045, |
|
"learning_rate": 9.085895359577323e-07, |
|
"logits/chosen": -1.33551824092865, |
|
"logits/rejected": -1.3183876276016235, |
|
"logps/chosen": -167.4661865234375, |
|
"logps/rejected": -403.6305236816406, |
|
"loss": 174.6021, |
|
"losses_ref": -1.4713778495788574, |
|
"ref_logps/chosen": -267.08013916015625, |
|
"ref_logps/rejected": -154.513427734375, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9961398243904114, |
|
"rewards/margins": 3.4873108863830566, |
|
"rewards/rejected": -2.491170883178711, |
|
"step": 550, |
|
"u": -2.983215093612671, |
|
"weight": 0.02549784444272518 |
|
}, |
|
{ |
|
"diff_generated": -220.6905059814453, |
|
"epoch": 1.1619994765768125, |
|
"grad_norm": 1242.623006879505, |
|
"learning_rate": 8.903748721034826e-07, |
|
"logits/chosen": -1.410308599472046, |
|
"logits/rejected": -1.3436871767044067, |
|
"logps/chosen": -175.43826293945312, |
|
"logps/rejected": -392.7843322753906, |
|
"loss": 178.3087, |
|
"losses_ref": -2.583522081375122, |
|
"ref_logps/chosen": -277.257080078125, |
|
"ref_logps/rejected": -172.09388732910156, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.018188238143921, |
|
"rewards/margins": 3.225093126296997, |
|
"rewards/rejected": -2.206904649734497, |
|
"step": 555, |
|
"u": -2.3211851119995117, |
|
"weight": 0.039024386554956436 |
|
}, |
|
{ |
|
"diff_generated": -246.51620483398438, |
|
"epoch": 1.1724679403297567, |
|
"grad_norm": 1315.7734920897904, |
|
"learning_rate": 8.721969502803953e-07, |
|
"logits/chosen": -1.4283636808395386, |
|
"logits/rejected": -1.4595166444778442, |
|
"logps/chosen": -190.37667846679688, |
|
"logps/rejected": -401.78594970703125, |
|
"loss": 169.0395, |
|
"losses_ref": -0.9799969792366028, |
|
"ref_logps/chosen": -288.49481201171875, |
|
"ref_logps/rejected": -155.26974487304688, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9811817407608032, |
|
"rewards/margins": 3.4463438987731934, |
|
"rewards/rejected": -2.4651618003845215, |
|
"step": 560, |
|
"u": -3.4851043224334717, |
|
"weight": 0.011031994596123695 |
|
}, |
|
{ |
|
"diff_generated": -237.05813598632812, |
|
"epoch": 1.1829364040827008, |
|
"grad_norm": 1324.6420978322203, |
|
"learning_rate": 8.540618630129027e-07, |
|
"logits/chosen": -1.5112595558166504, |
|
"logits/rejected": -1.4447729587554932, |
|
"logps/chosen": -197.54592895507812, |
|
"logps/rejected": -408.50360107421875, |
|
"loss": 180.7105, |
|
"losses_ref": -8.419300079345703, |
|
"ref_logps/chosen": -298.6998596191406, |
|
"ref_logps/rejected": -171.4455108642578, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.011539340019226, |
|
"rewards/margins": 3.3821206092834473, |
|
"rewards/rejected": -2.3705811500549316, |
|
"step": 565, |
|
"u": -3.0135345458984375, |
|
"weight": 0.03675166517496109 |
|
}, |
|
{ |
|
"diff_generated": -226.0189666748047, |
|
"epoch": 1.193404867835645, |
|
"grad_norm": 1290.2940777725041, |
|
"learning_rate": 8.359756884689783e-07, |
|
"logits/chosen": -1.5810168981552124, |
|
"logits/rejected": -1.4695533514022827, |
|
"logps/chosen": -179.12496948242188, |
|
"logps/rejected": -392.3472595214844, |
|
"loss": 183.5485, |
|
"losses_ref": -1.6247104406356812, |
|
"ref_logps/chosen": -278.8708801269531, |
|
"ref_logps/rejected": -166.32827758789062, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9974590539932251, |
|
"rewards/margins": 3.257648468017578, |
|
"rewards/rejected": -2.2601895332336426, |
|
"step": 570, |
|
"u": -3.080786943435669, |
|
"weight": 0.019241400063037872 |
|
}, |
|
{ |
|
"diff_generated": -213.7429962158203, |
|
"epoch": 1.2038733315885894, |
|
"grad_norm": 1344.148507837478, |
|
"learning_rate": 8.179444884229744e-07, |
|
"logits/chosen": -1.4880825281143188, |
|
"logits/rejected": -1.502333641052246, |
|
"logps/chosen": -189.47103881835938, |
|
"logps/rejected": -378.13275146484375, |
|
"loss": 171.5777, |
|
"losses_ref": -0.9622389674186707, |
|
"ref_logps/chosen": -284.98492431640625, |
|
"ref_logps/rejected": -164.38975524902344, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9551390409469604, |
|
"rewards/margins": 3.092568874359131, |
|
"rewards/rejected": -2.137429714202881, |
|
"step": 575, |
|
"u": -3.1298766136169434, |
|
"weight": 0.013516530394554138 |
|
}, |
|
{ |
|
"diff_generated": -231.77328491210938, |
|
"epoch": 1.2143417953415336, |
|
"grad_norm": 1304.8866597655287, |
|
"learning_rate": 7.999743062239557e-07, |
|
"logits/chosen": -1.4784562587738037, |
|
"logits/rejected": -1.5664056539535522, |
|
"logps/chosen": -176.44296264648438, |
|
"logps/rejected": -421.82135009765625, |
|
"loss": 181.4369, |
|
"losses_ref": -1.1828618049621582, |
|
"ref_logps/chosen": -274.30767822265625, |
|
"ref_logps/rejected": -190.04803466796875, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9786470532417297, |
|
"rewards/margins": 3.296379804611206, |
|
"rewards/rejected": -2.3177332878112793, |
|
"step": 580, |
|
"u": -3.0596017837524414, |
|
"weight": 0.012155565433204174 |
|
}, |
|
{ |
|
"diff_generated": -220.5200653076172, |
|
"epoch": 1.2248102590944778, |
|
"grad_norm": 1320.435055959004, |
|
"learning_rate": 7.820711647702017e-07, |
|
"logits/chosen": -1.4778623580932617, |
|
"logits/rejected": -1.5001682043075562, |
|
"logps/chosen": -168.55393981933594, |
|
"logps/rejected": -381.0849304199219, |
|
"loss": 177.0697, |
|
"losses_ref": -2.307084560394287, |
|
"ref_logps/chosen": -260.8992004394531, |
|
"ref_logps/rejected": -160.5648651123047, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9234523773193359, |
|
"rewards/margins": 3.128653049468994, |
|
"rewards/rejected": -2.205200672149658, |
|
"step": 585, |
|
"u": -2.9381721019744873, |
|
"weight": 0.03427546098828316 |
|
}, |
|
{ |
|
"diff_generated": -207.7950439453125, |
|
"epoch": 1.235278722847422, |
|
"grad_norm": 1261.3527727961057, |
|
"learning_rate": 7.642410644905726e-07, |
|
"logits/chosen": -1.4036446809768677, |
|
"logits/rejected": -1.4330257177352905, |
|
"logps/chosen": -171.85134887695312, |
|
"logps/rejected": -370.4696960449219, |
|
"loss": 176.7884, |
|
"losses_ref": -2.213914394378662, |
|
"ref_logps/chosen": -269.0211486816406, |
|
"ref_logps/rejected": -162.6746826171875, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9716979265213013, |
|
"rewards/margins": 3.0496482849121094, |
|
"rewards/rejected": -2.0779504776000977, |
|
"step": 590, |
|
"u": -2.40622878074646, |
|
"weight": 0.04459633305668831 |
|
}, |
|
{ |
|
"diff_generated": -230.44302368164062, |
|
"epoch": 1.2457471866003664, |
|
"grad_norm": 1305.239745538134, |
|
"learning_rate": 7.464899813334e-07, |
|
"logits/chosen": -1.261853575706482, |
|
"logits/rejected": -1.2570579051971436, |
|
"logps/chosen": -181.5194091796875, |
|
"logps/rejected": -393.291259765625, |
|
"loss": 177.5223, |
|
"losses_ref": -4.516595840454102, |
|
"ref_logps/chosen": -278.3271789550781, |
|
"ref_logps/rejected": -162.84823608398438, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9680774807929993, |
|
"rewards/margins": 3.272507905960083, |
|
"rewards/rejected": -2.3044302463531494, |
|
"step": 595, |
|
"u": -2.7008533477783203, |
|
"weight": 0.05883873626589775 |
|
}, |
|
{ |
|
"diff_generated": -232.184326171875, |
|
"epoch": 1.2562156503533106, |
|
"grad_norm": 1268.765281131021, |
|
"learning_rate": 7.288238647635829e-07, |
|
"logits/chosen": -1.4351574182510376, |
|
"logits/rejected": -1.2977135181427002, |
|
"logps/chosen": -184.0857696533203, |
|
"logps/rejected": -400.95361328125, |
|
"loss": 177.9198, |
|
"losses_ref": -3.803828001022339, |
|
"ref_logps/chosen": -284.0093078613281, |
|
"ref_logps/rejected": -168.76925659179688, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9992351531982422, |
|
"rewards/margins": 3.3210787773132324, |
|
"rewards/rejected": -2.3218436241149902, |
|
"step": 600, |
|
"u": -2.89520263671875, |
|
"weight": 0.026231110095977783 |
|
}, |
|
{ |
|
"diff_generated": -198.3398895263672, |
|
"epoch": 1.2666841141062548, |
|
"grad_norm": 1202.5442238394803, |
|
"learning_rate": 7.112486357685631e-07, |
|
"logits/chosen": -1.499137043952942, |
|
"logits/rejected": -1.4640613794326782, |
|
"logps/chosen": -186.61227416992188, |
|
"logps/rejected": -356.29132080078125, |
|
"loss": 187.6658, |
|
"losses_ref": -4.061453819274902, |
|
"ref_logps/chosen": -287.03717041015625, |
|
"ref_logps/rejected": -157.95144653320312, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.004248857498169, |
|
"rewards/margins": 2.987647771835327, |
|
"rewards/rejected": -1.9833987951278687, |
|
"step": 605, |
|
"u": -3.231105089187622, |
|
"weight": 0.03989076986908913 |
|
}, |
|
{ |
|
"diff_generated": -219.8319091796875, |
|
"epoch": 1.2771525778591992, |
|
"grad_norm": 1294.217440674336, |
|
"learning_rate": 6.937701848738407e-07, |
|
"logits/chosen": -1.41506028175354, |
|
"logits/rejected": -1.4094430208206177, |
|
"logps/chosen": -169.46595764160156, |
|
"logps/rejected": -384.82025146484375, |
|
"loss": 167.9688, |
|
"losses_ref": -1.3657054901123047, |
|
"ref_logps/chosen": -266.1152648925781, |
|
"ref_logps/rejected": -164.98837280273438, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9664928317070007, |
|
"rewards/margins": 3.1648120880126953, |
|
"rewards/rejected": -2.19831919670105, |
|
"step": 610, |
|
"u": -3.3554062843322754, |
|
"weight": 0.01739688031375408 |
|
}, |
|
{ |
|
"diff_generated": -217.53970336914062, |
|
"epoch": 1.2876210416121434, |
|
"grad_norm": 1337.3093609895052, |
|
"learning_rate": 6.763943701687045e-07, |
|
"logits/chosen": -1.633599877357483, |
|
"logits/rejected": -1.5192573070526123, |
|
"logps/chosen": -191.32760620117188, |
|
"logps/rejected": -387.1475524902344, |
|
"loss": 183.0882, |
|
"losses_ref": -0.359982430934906, |
|
"ref_logps/chosen": -299.5060119628906, |
|
"ref_logps/rejected": -169.60787963867188, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0817840099334717, |
|
"rewards/margins": 3.257181167602539, |
|
"rewards/rejected": -2.1753971576690674, |
|
"step": 615, |
|
"u": -3.340681552886963, |
|
"weight": 0.008451832458376884 |
|
}, |
|
{ |
|
"diff_generated": -208.6597137451172, |
|
"epoch": 1.2980895053650876, |
|
"grad_norm": 1324.2598041902163, |
|
"learning_rate": 6.591270153428288e-07, |
|
"logits/chosen": -1.6454055309295654, |
|
"logits/rejected": -1.489946961402893, |
|
"logps/chosen": -191.6290283203125, |
|
"logps/rejected": -364.0921325683594, |
|
"loss": 178.0635, |
|
"losses_ref": -2.520381450653076, |
|
"ref_logps/chosen": -295.8542785644531, |
|
"ref_logps/rejected": -155.4324188232422, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.042252540588379, |
|
"rewards/margins": 3.128849506378174, |
|
"rewards/rejected": -2.086596965789795, |
|
"step": 620, |
|
"u": -2.7204320430755615, |
|
"weight": 0.02861974760890007 |
|
}, |
|
{ |
|
"diff_generated": -196.55752563476562, |
|
"epoch": 1.308557969118032, |
|
"grad_norm": 1344.8788218911382, |
|
"learning_rate": 6.419739077344016e-07, |
|
"logits/chosen": -1.5530303716659546, |
|
"logits/rejected": -1.423179030418396, |
|
"logps/chosen": -200.18063354492188, |
|
"logps/rejected": -360.1055603027344, |
|
"loss": 179.8101, |
|
"losses_ref": -3.9870200157165527, |
|
"ref_logps/chosen": -300.4015197753906, |
|
"ref_logps/rejected": -163.5480194091797, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0022084712982178, |
|
"rewards/margins": 2.9677836894989014, |
|
"rewards/rejected": -1.9655752182006836, |
|
"step": 625, |
|
"u": -2.844027042388916, |
|
"weight": 0.038288719952106476 |
|
}, |
|
{ |
|
"diff_generated": -198.09622192382812, |
|
"epoch": 1.3190264328709762, |
|
"grad_norm": 1205.0358284390313, |
|
"learning_rate": 6.24940796390438e-07, |
|
"logits/chosen": -1.5373382568359375, |
|
"logits/rejected": -1.444549322128296, |
|
"logps/chosen": -174.25350952148438, |
|
"logps/rejected": -362.83953857421875, |
|
"loss": 166.5968, |
|
"losses_ref": -2.2841248512268066, |
|
"ref_logps/chosen": -274.06365966796875, |
|
"ref_logps/rejected": -164.74331665039062, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9981018900871277, |
|
"rewards/margins": 2.9790642261505127, |
|
"rewards/rejected": -1.9809621572494507, |
|
"step": 630, |
|
"u": -2.492745876312256, |
|
"weight": 0.01923806592822075 |
|
}, |
|
{ |
|
"diff_generated": -214.6193389892578, |
|
"epoch": 1.3294948966239204, |
|
"grad_norm": 1314.9134741026285, |
|
"learning_rate": 6.08033390139925e-07, |
|
"logits/chosen": -1.4583691358566284, |
|
"logits/rejected": -1.290028691291809, |
|
"logps/chosen": -190.0717315673828, |
|
"logps/rejected": -369.1700439453125, |
|
"loss": 192.5529, |
|
"losses_ref": -0.966667652130127, |
|
"ref_logps/chosen": -293.4891662597656, |
|
"ref_logps/rejected": -154.55068969726562, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0341745615005493, |
|
"rewards/margins": 3.180367946624756, |
|
"rewards/rejected": -2.146193265914917, |
|
"step": 635, |
|
"u": -2.8957810401916504, |
|
"weight": 0.020022699609398842 |
|
}, |
|
{ |
|
"diff_generated": -209.5125732421875, |
|
"epoch": 1.3399633603768648, |
|
"grad_norm": 1311.833771803947, |
|
"learning_rate": 5.912573556804452e-07, |
|
"logits/chosen": -1.4464821815490723, |
|
"logits/rejected": -1.3825037479400635, |
|
"logps/chosen": -181.79258728027344, |
|
"logps/rejected": -380.0604553222656, |
|
"loss": 186.6832, |
|
"losses_ref": -2.0217666625976562, |
|
"ref_logps/chosen": -283.5255126953125, |
|
"ref_logps/rejected": -170.54788208007812, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.017329216003418, |
|
"rewards/margins": 3.112454891204834, |
|
"rewards/rejected": -2.095125675201416, |
|
"step": 640, |
|
"u": -2.146329164505005, |
|
"weight": 0.051374662667512894 |
|
}, |
|
{ |
|
"diff_generated": -233.44900512695312, |
|
"epoch": 1.350431824129809, |
|
"grad_norm": 1320.9978857185588, |
|
"learning_rate": 5.746183156789252e-07, |
|
"logits/chosen": -1.4467910528182983, |
|
"logits/rejected": -1.2174046039581299, |
|
"logps/chosen": -190.71127319335938, |
|
"logps/rejected": -401.9010314941406, |
|
"loss": 181.6372, |
|
"losses_ref": -1.3231620788574219, |
|
"ref_logps/chosen": -301.30584716796875, |
|
"ref_logps/rejected": -168.45204162597656, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1059458255767822, |
|
"rewards/margins": 3.4404358863830566, |
|
"rewards/rejected": -2.3344900608062744, |
|
"step": 645, |
|
"u": -2.5961060523986816, |
|
"weight": 0.031209224835038185 |
|
}, |
|
{ |
|
"diff_generated": -218.05880737304688, |
|
"epoch": 1.3609002878827532, |
|
"grad_norm": 1268.0769992434364, |
|
"learning_rate": 5.581218468871365e-07, |
|
"logits/chosen": -1.2198398113250732, |
|
"logits/rejected": -1.3189094066619873, |
|
"logps/chosen": -157.86666870117188, |
|
"logps/rejected": -376.75433349609375, |
|
"loss": 168.9012, |
|
"losses_ref": -2.4989333152770996, |
|
"ref_logps/chosen": -252.76400756835938, |
|
"ref_logps/rejected": -158.69552612304688, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9489734768867493, |
|
"rewards/margins": 3.129561424255371, |
|
"rewards/rejected": -2.1805882453918457, |
|
"step": 650, |
|
"u": -2.92409086227417, |
|
"weight": 0.0428018681704998 |
|
}, |
|
{ |
|
"diff_generated": -235.935546875, |
|
"epoch": 1.3713687516356974, |
|
"grad_norm": 1347.742524924812, |
|
"learning_rate": 5.417734782725896e-07, |
|
"logits/chosen": -1.2961053848266602, |
|
"logits/rejected": -1.261878252029419, |
|
"logps/chosen": -177.77523803710938, |
|
"logps/rejected": -389.1588134765625, |
|
"loss": 179.405, |
|
"losses_ref": -1.0838311910629272, |
|
"ref_logps/chosen": -277.2697448730469, |
|
"ref_logps/rejected": -153.2233123779297, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9949450492858887, |
|
"rewards/margins": 3.3543007373809814, |
|
"rewards/rejected": -2.3593554496765137, |
|
"step": 655, |
|
"u": -3.1044487953186035, |
|
"weight": 0.017367416992783546 |
|
}, |
|
{ |
|
"diff_generated": -211.7913055419922, |
|
"epoch": 1.3818372153886418, |
|
"grad_norm": 1311.034191538654, |
|
"learning_rate": 5.255786891654399e-07, |
|
"logits/chosen": -1.2746165990829468, |
|
"logits/rejected": -1.2540855407714844, |
|
"logps/chosen": -170.9514923095703, |
|
"logps/rejected": -376.79229736328125, |
|
"loss": 174.0495, |
|
"losses_ref": -2.949699878692627, |
|
"ref_logps/chosen": -268.7665100097656, |
|
"ref_logps/rejected": -165.0010223388672, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9781501889228821, |
|
"rewards/margins": 3.0960631370544434, |
|
"rewards/rejected": -2.117912769317627, |
|
"step": 660, |
|
"u": -1.9965251684188843, |
|
"weight": 0.04027215391397476 |
|
}, |
|
{ |
|
"diff_generated": -224.0879364013672, |
|
"epoch": 1.392305679141586, |
|
"grad_norm": 1328.7070235599076, |
|
"learning_rate": 5.095429074220319e-07, |
|
"logits/chosen": -1.2053465843200684, |
|
"logits/rejected": -1.1557897329330444, |
|
"logps/chosen": -175.30589294433594, |
|
"logps/rejected": -393.43218994140625, |
|
"loss": 184.6881, |
|
"losses_ref": -3.8794121742248535, |
|
"ref_logps/chosen": -274.28826904296875, |
|
"ref_logps/rejected": -169.34422302246094, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9898236989974976, |
|
"rewards/margins": 3.230703353881836, |
|
"rewards/rejected": -2.240879535675049, |
|
"step": 665, |
|
"u": -3.0530405044555664, |
|
"weight": 0.03465485945343971 |
|
}, |
|
{ |
|
"diff_generated": -240.96484375, |
|
"epoch": 1.4027741428945302, |
|
"grad_norm": 1353.971116750616, |
|
"learning_rate": 4.936715076056974e-07, |
|
"logits/chosen": -1.242436408996582, |
|
"logits/rejected": -1.24913489818573, |
|
"logps/chosen": -183.4954833984375, |
|
"logps/rejected": -405.70050048828125, |
|
"loss": 171.9422, |
|
"losses_ref": -0.8431612253189087, |
|
"ref_logps/chosen": -284.3236999511719, |
|
"ref_logps/rejected": -164.7356414794922, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0082820653915405, |
|
"rewards/margins": 3.4179306030273438, |
|
"rewards/rejected": -2.4096481800079346, |
|
"step": 670, |
|
"u": -3.2680907249450684, |
|
"weight": 0.007226690649986267 |
|
}, |
|
{ |
|
"diff_generated": -230.71115112304688, |
|
"epoch": 1.4132426066474744, |
|
"grad_norm": 1301.2684317901687, |
|
"learning_rate": 4.779698091854098e-07, |
|
"logits/chosen": -1.4362276792526245, |
|
"logits/rejected": -1.2898997068405151, |
|
"logps/chosen": -196.05447387695312, |
|
"logps/rejected": -400.25994873046875, |
|
"loss": 193.0132, |
|
"losses_ref": -0.4112131595611572, |
|
"ref_logps/chosen": -306.9317321777344, |
|
"ref_logps/rejected": -169.54879760742188, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1087725162506104, |
|
"rewards/margins": 3.415884494781494, |
|
"rewards/rejected": -2.307111978530884, |
|
"step": 675, |
|
"u": -3.2834739685058594, |
|
"weight": 0.006595195736736059 |
|
}, |
|
{ |
|
"diff_generated": -205.662841796875, |
|
"epoch": 1.4237110704004188, |
|
"grad_norm": 1344.3331479958706, |
|
"learning_rate": 4.624430747529102e-07, |
|
"logits/chosen": -1.3598095178604126, |
|
"logits/rejected": -1.158661961555481, |
|
"logps/chosen": -205.39236450195312, |
|
"logps/rejected": -369.7188720703125, |
|
"loss": 181.4401, |
|
"losses_ref": -1.5265072584152222, |
|
"ref_logps/chosen": -313.2685546875, |
|
"ref_logps/rejected": -164.05599975585938, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0787618160247803, |
|
"rewards/margins": 3.135390520095825, |
|
"rewards/rejected": -2.056628465652466, |
|
"step": 680, |
|
"u": -3.1280694007873535, |
|
"weight": 0.024629075080156326 |
|
}, |
|
{ |
|
"diff_generated": -223.96005249023438, |
|
"epoch": 1.434179534153363, |
|
"grad_norm": 1420.0899808408303, |
|
"learning_rate": 4.4709650825889277e-07, |
|
"logits/chosen": -1.202007532119751, |
|
"logits/rejected": -1.1467583179473877, |
|
"logps/chosen": -161.4755859375, |
|
"logps/rejected": -394.6024475097656, |
|
"loss": 181.6898, |
|
"losses_ref": -0.6423639059066772, |
|
"ref_logps/chosen": -258.74224853515625, |
|
"ref_logps/rejected": -170.64236450195312, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9726665616035461, |
|
"rewards/margins": 3.2122673988342285, |
|
"rewards/rejected": -2.239600658416748, |
|
"step": 685, |
|
"u": -2.6377460956573486, |
|
"weight": 0.008045530878007412 |
|
}, |
|
{ |
|
"diff_generated": -199.37355041503906, |
|
"epoch": 1.4446479979063072, |
|
"grad_norm": 1308.6573761420323, |
|
"learning_rate": 4.3193525326884426e-07, |
|
"logits/chosen": -1.3359885215759277, |
|
"logits/rejected": -1.2320592403411865, |
|
"logps/chosen": -199.9832000732422, |
|
"logps/rejected": -364.55865478515625, |
|
"loss": 197.232, |
|
"losses_ref": -2.2240054607391357, |
|
"ref_logps/chosen": -303.2825927734375, |
|
"ref_logps/rejected": -165.18508911132812, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": 1.0329937934875488, |
|
"rewards/margins": 3.026729106903076, |
|
"rewards/rejected": -1.9937355518341064, |
|
"step": 690, |
|
"u": -3.028186559677124, |
|
"weight": 0.02633347176015377 |
|
}, |
|
{ |
|
"diff_generated": -224.0160369873047, |
|
"epoch": 1.4551164616592516, |
|
"grad_norm": 1299.079432778448, |
|
"learning_rate": 4.1696439123912406e-07, |
|
"logits/chosen": -1.2223880290985107, |
|
"logits/rejected": -1.209564447402954, |
|
"logps/chosen": -174.464111328125, |
|
"logps/rejected": -393.27691650390625, |
|
"loss": 178.2965, |
|
"losses_ref": -4.651436805725098, |
|
"ref_logps/chosen": -266.0323486328125, |
|
"ref_logps/rejected": -169.26083374023438, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.915682315826416, |
|
"rewards/margins": 3.1558427810668945, |
|
"rewards/rejected": -2.2401604652404785, |
|
"step": 695, |
|
"u": -2.1582460403442383, |
|
"weight": 0.050126731395721436 |
|
}, |
|
{ |
|
"diff_generated": -229.42153930664062, |
|
"epoch": 1.4655849254121958, |
|
"grad_norm": 1181.3986183050397, |
|
"learning_rate": 4.0218893981385927e-07, |
|
"logits/chosen": -1.2920024394989014, |
|
"logits/rejected": -1.2460237741470337, |
|
"logps/chosen": -169.0710906982422, |
|
"logps/rejected": -389.62451171875, |
|
"loss": 185.8502, |
|
"losses_ref": -2.0431206226348877, |
|
"ref_logps/chosen": -263.98992919921875, |
|
"ref_logps/rejected": -160.20298767089844, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9491885900497437, |
|
"rewards/margins": 3.243403196334839, |
|
"rewards/rejected": -2.294214963912964, |
|
"step": 700, |
|
"u": -2.900634765625, |
|
"weight": 0.029426846653223038 |
|
}, |
|
{ |
|
"diff_generated": -238.50405883789062, |
|
"epoch": 1.47605338916514, |
|
"grad_norm": 1395.6758572737517, |
|
"learning_rate": 3.87613851143229e-07, |
|
"logits/chosen": -1.321358323097229, |
|
"logits/rejected": -1.2150487899780273, |
|
"logps/chosen": -193.1901397705078, |
|
"logps/rejected": -408.7565002441406, |
|
"loss": 180.6914, |
|
"losses_ref": -7.425305366516113, |
|
"ref_logps/chosen": -295.8336486816406, |
|
"ref_logps/rejected": -170.25244140625, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0264348983764648, |
|
"rewards/margins": 3.411475419998169, |
|
"rewards/rejected": -2.385040760040283, |
|
"step": 705, |
|
"u": -2.5025954246520996, |
|
"weight": 0.05010579898953438 |
|
}, |
|
{ |
|
"diff_generated": -232.7953643798828, |
|
"epoch": 1.4865218529180844, |
|
"grad_norm": 1298.8055689658759, |
|
"learning_rate": 3.7324401022369744e-07, |
|
"logits/chosen": -1.322563886642456, |
|
"logits/rejected": -1.1327731609344482, |
|
"logps/chosen": -194.57736206054688, |
|
"logps/rejected": -386.1799011230469, |
|
"loss": 178.1232, |
|
"losses_ref": -1.3739917278289795, |
|
"ref_logps/chosen": -296.6303405761719, |
|
"ref_logps/rejected": -153.38453674316406, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0205297470092773, |
|
"rewards/margins": 3.3484835624694824, |
|
"rewards/rejected": -2.327953815460205, |
|
"step": 710, |
|
"u": -3.248492479324341, |
|
"weight": 0.022353414446115494 |
|
}, |
|
{ |
|
"diff_generated": -204.0673370361328, |
|
"epoch": 1.4969903166710286, |
|
"grad_norm": 1434.009703095031, |
|
"learning_rate": 3.5908423326075455e-07, |
|
"logits/chosen": -1.2674996852874756, |
|
"logits/rejected": -1.242331862449646, |
|
"logps/chosen": -167.33718872070312, |
|
"logps/rejected": -369.3961486816406, |
|
"loss": 183.2372, |
|
"losses_ref": -1.1576902866363525, |
|
"ref_logps/chosen": -261.9808044433594, |
|
"ref_logps/rejected": -165.32882690429688, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9464362263679504, |
|
"rewards/margins": 2.987109661102295, |
|
"rewards/rejected": -2.0406734943389893, |
|
"step": 715, |
|
"u": -2.994257688522339, |
|
"weight": 0.022263679653406143 |
|
}, |
|
{ |
|
"diff_generated": -233.936767578125, |
|
"epoch": 1.5074587804239727, |
|
"grad_norm": 1304.2767992641454, |
|
"learning_rate": 3.45139266054715e-07, |
|
"logits/chosen": -1.318178415298462, |
|
"logits/rejected": -1.1334383487701416, |
|
"logps/chosen": -197.61227416992188, |
|
"logps/rejected": -397.12127685546875, |
|
"loss": 183.3899, |
|
"losses_ref": -1.6034200191497803, |
|
"ref_logps/chosen": -309.3571472167969, |
|
"ref_logps/rejected": -163.1844940185547, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1174486875534058, |
|
"rewards/margins": 3.4568161964416504, |
|
"rewards/rejected": -2.3393678665161133, |
|
"step": 720, |
|
"u": -3.3942806720733643, |
|
"weight": 0.019716758280992508 |
|
}, |
|
{ |
|
"diff_generated": -244.8833770751953, |
|
"epoch": 1.5179272441769172, |
|
"grad_norm": 1236.5966034907726, |
|
"learning_rate": 3.314137824101111e-07, |
|
"logits/chosen": -1.306779384613037, |
|
"logits/rejected": -1.1290355920791626, |
|
"logps/chosen": -218.06015014648438, |
|
"logps/rejected": -403.7084045410156, |
|
"loss": 191.625, |
|
"losses_ref": -2.257856845855713, |
|
"ref_logps/chosen": -318.39056396484375, |
|
"ref_logps/rejected": -158.82498168945312, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0033042430877686, |
|
"rewards/margins": 3.4521377086639404, |
|
"rewards/rejected": -2.448833703994751, |
|
"step": 725, |
|
"u": -3.3267006874084473, |
|
"weight": 0.032559871673583984 |
|
}, |
|
{ |
|
"diff_generated": -222.01101684570312, |
|
"epoch": 1.5283957079298613, |
|
"grad_norm": 1211.6699328046157, |
|
"learning_rate": 3.179123825692178e-07, |
|
"logits/chosen": -1.248240351676941, |
|
"logits/rejected": -1.091903805732727, |
|
"logps/chosen": -175.27281188964844, |
|
"logps/rejected": -383.36309814453125, |
|
"loss": 173.3922, |
|
"losses_ref": -5.464686393737793, |
|
"ref_logps/chosen": -273.9178771972656, |
|
"ref_logps/rejected": -161.35206604003906, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9864505529403687, |
|
"rewards/margins": 3.2065606117248535, |
|
"rewards/rejected": -2.2201101779937744, |
|
"step": 730, |
|
"u": -2.946007013320923, |
|
"weight": 0.04170671105384827 |
|
}, |
|
{ |
|
"diff_generated": -220.7677764892578, |
|
"epoch": 1.5388641716828055, |
|
"grad_norm": 1300.7738080642184, |
|
"learning_rate": 3.0463959167023335e-07, |
|
"logits/chosen": -1.2869834899902344, |
|
"logits/rejected": -1.1894266605377197, |
|
"logps/chosen": -182.8350372314453, |
|
"logps/rejected": -379.5199890136719, |
|
"loss": 171.4061, |
|
"losses_ref": -4.5947465896606445, |
|
"ref_logps/chosen": -284.21063232421875, |
|
"ref_logps/rejected": -158.75221252441406, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0137560367584229, |
|
"rewards/margins": 3.2214341163635254, |
|
"rewards/rejected": -2.2076778411865234, |
|
"step": 735, |
|
"u": -2.485495090484619, |
|
"weight": 0.053016532212495804 |
|
}, |
|
{ |
|
"diff_generated": -238.62289428710938, |
|
"epoch": 1.54933263543575, |
|
"grad_norm": 1330.9443663432232, |
|
"learning_rate": 2.915998582306299e-07, |
|
"logits/chosen": -1.3296325206756592, |
|
"logits/rejected": -1.1434093713760376, |
|
"logps/chosen": -192.86752319335938, |
|
"logps/rejected": -412.7796325683594, |
|
"loss": 171.9964, |
|
"losses_ref": -0.9953049421310425, |
|
"ref_logps/chosen": -298.61065673828125, |
|
"ref_logps/rejected": -174.15672302246094, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0574313402175903, |
|
"rewards/margins": 3.443660259246826, |
|
"rewards/rejected": -2.3862290382385254, |
|
"step": 740, |
|
"u": -3.0645031929016113, |
|
"weight": 0.014706036075949669 |
|
}, |
|
{ |
|
"diff_generated": -232.26016235351562, |
|
"epoch": 1.559801099188694, |
|
"grad_norm": 1284.1954470666844, |
|
"learning_rate": 2.7879755265618557e-07, |
|
"logits/chosen": -1.1518179178237915, |
|
"logits/rejected": -1.1568098068237305, |
|
"logps/chosen": -160.57080078125, |
|
"logps/rejected": -390.94854736328125, |
|
"loss": 177.5848, |
|
"losses_ref": -0.8798303604125977, |
|
"ref_logps/chosen": -254.80648803710938, |
|
"ref_logps/rejected": -158.68838500976562, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9423569440841675, |
|
"rewards/margins": 3.264958620071411, |
|
"rewards/rejected": -2.322601556777954, |
|
"step": 745, |
|
"u": -2.921161413192749, |
|
"weight": 0.0170670785009861 |
|
}, |
|
{ |
|
"diff_generated": -233.20187377929688, |
|
"epoch": 1.5702695629416383, |
|
"grad_norm": 1289.4633991370238, |
|
"learning_rate": 2.6623696577619625e-07, |
|
"logits/chosen": -1.2346287965774536, |
|
"logits/rejected": -1.2745471000671387, |
|
"logps/chosen": -192.0581512451172, |
|
"logps/rejected": -391.81146240234375, |
|
"loss": 182.0347, |
|
"losses_ref": -1.6879841089248657, |
|
"ref_logps/chosen": -290.81500244140625, |
|
"ref_logps/rejected": -158.6095428466797, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9875686764717102, |
|
"rewards/margins": 3.3195877075195312, |
|
"rewards/rejected": -2.3320186138153076, |
|
"step": 750, |
|
"u": -3.0655760765075684, |
|
"weight": 0.02230766788125038 |
|
}, |
|
{ |
|
"diff_generated": -217.38137817382812, |
|
"epoch": 1.5807380266945825, |
|
"grad_norm": 1443.1241349727247, |
|
"learning_rate": 2.5392230740535846e-07, |
|
"logits/chosen": -1.4136921167373657, |
|
"logits/rejected": -1.14936363697052, |
|
"logps/chosen": -205.6985321044922, |
|
"logps/rejected": -384.5464782714844, |
|
"loss": 193.6084, |
|
"losses_ref": -2.2517495155334473, |
|
"ref_logps/chosen": -317.1262512207031, |
|
"ref_logps/rejected": -167.1651153564453, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1142771244049072, |
|
"rewards/margins": 3.288090467453003, |
|
"rewards/rejected": -2.173813581466675, |
|
"step": 755, |
|
"u": -2.5095248222351074, |
|
"weight": 0.04148329049348831 |
|
}, |
|
{ |
|
"diff_generated": -223.0001220703125, |
|
"epoch": 1.5912064904475267, |
|
"grad_norm": 1315.9291386894508, |
|
"learning_rate": 2.418577049328058e-07, |
|
"logits/chosen": -1.6086959838867188, |
|
"logits/rejected": -1.2083603143692017, |
|
"logps/chosen": -214.2064666748047, |
|
"logps/rejected": -383.09918212890625, |
|
"loss": 193.4667, |
|
"losses_ref": -0.697050929069519, |
|
"ref_logps/chosen": -327.52081298828125, |
|
"ref_logps/rejected": -160.09909057617188, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1331430673599243, |
|
"rewards/margins": 3.3631443977355957, |
|
"rewards/rejected": -2.2300009727478027, |
|
"step": 760, |
|
"u": -3.4212822914123535, |
|
"weight": 0.02071799524128437 |
|
}, |
|
{ |
|
"diff_generated": -240.10708618164062, |
|
"epoch": 1.6016749542004711, |
|
"grad_norm": 1354.8278973512276, |
|
"learning_rate": 2.300472019387697e-07, |
|
"logits/chosen": -1.3740001916885376, |
|
"logits/rejected": -1.2972242832183838, |
|
"logps/chosen": -184.8181915283203, |
|
"logps/rejected": -400.69439697265625, |
|
"loss": 183.1763, |
|
"losses_ref": -5.5122270584106445, |
|
"ref_logps/chosen": -284.5431213378906, |
|
"ref_logps/rejected": -160.58731079101562, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9972493052482605, |
|
"rewards/margins": 3.398320436477661, |
|
"rewards/rejected": -2.4010708332061768, |
|
"step": 765, |
|
"u": -2.9249844551086426, |
|
"weight": 0.04190880060195923 |
|
}, |
|
{ |
|
"diff_generated": -224.24789428710938, |
|
"epoch": 1.6121434179534153, |
|
"grad_norm": 1294.896383811541, |
|
"learning_rate": 2.1849475683932994e-07, |
|
"logits/chosen": -1.3714028596878052, |
|
"logits/rejected": -1.3127011060714722, |
|
"logps/chosen": -184.06544494628906, |
|
"logps/rejected": -384.2123107910156, |
|
"loss": 179.5198, |
|
"losses_ref": -3.6349315643310547, |
|
"ref_logps/chosen": -284.44268798828125, |
|
"ref_logps/rejected": -159.9644012451172, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.003772497177124, |
|
"rewards/margins": 3.246250867843628, |
|
"rewards/rejected": -2.242478847503662, |
|
"step": 770, |
|
"u": -2.7088732719421387, |
|
"weight": 0.04078099876642227 |
|
}, |
|
{ |
|
"diff_generated": -228.37911987304688, |
|
"epoch": 1.6226118817063595, |
|
"grad_norm": 1315.1478573927377, |
|
"learning_rate": 2.0720424155971038e-07, |
|
"logits/chosen": -1.4367603063583374, |
|
"logits/rejected": -1.2870023250579834, |
|
"logps/chosen": -201.5555877685547, |
|
"logps/rejected": -386.1324157714844, |
|
"loss": 176.5013, |
|
"losses_ref": -2.8903164863586426, |
|
"ref_logps/chosen": -306.54461669921875, |
|
"ref_logps/rejected": -157.75328063964844, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0498902797698975, |
|
"rewards/margins": 3.333681583404541, |
|
"rewards/rejected": -2.2837913036346436, |
|
"step": 775, |
|
"u": -2.703965902328491, |
|
"weight": 0.04053039103746414 |
|
}, |
|
{ |
|
"diff_generated": -220.4508819580078, |
|
"epoch": 1.633080345459304, |
|
"grad_norm": 1400.50539955428, |
|
"learning_rate": 1.961794402365611e-07, |
|
"logits/chosen": -1.4036462306976318, |
|
"logits/rejected": -1.2919548749923706, |
|
"logps/chosen": -200.26541137695312, |
|
"logps/rejected": -386.81597900390625, |
|
"loss": 183.6931, |
|
"losses_ref": -1.8775193691253662, |
|
"ref_logps/chosen": -310.53729248046875, |
|
"ref_logps/rejected": -166.36508178710938, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1027185916900635, |
|
"rewards/margins": 3.307227373123169, |
|
"rewards/rejected": -2.2045087814331055, |
|
"step": 780, |
|
"u": -2.835704803466797, |
|
"weight": 0.031143631786108017 |
|
}, |
|
{ |
|
"diff_generated": -217.3331298828125, |
|
"epoch": 1.643548809212248, |
|
"grad_norm": 1301.0844819616188, |
|
"learning_rate": 1.8542404794966427e-07, |
|
"logits/chosen": -1.4641870260238647, |
|
"logits/rejected": -1.3147245645523071, |
|
"logps/chosen": -196.31103515625, |
|
"logps/rejected": -391.39166259765625, |
|
"loss": 178.2437, |
|
"losses_ref": -1.2605804204940796, |
|
"ref_logps/chosen": -303.4082336425781, |
|
"ref_logps/rejected": -174.0585479736328, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.070972204208374, |
|
"rewards/margins": 3.2443034648895264, |
|
"rewards/rejected": -2.1733312606811523, |
|
"step": 785, |
|
"u": -2.590919017791748, |
|
"weight": 0.01949651725590229 |
|
}, |
|
{ |
|
"diff_generated": -220.4461669921875, |
|
"epoch": 1.6540172729651923, |
|
"grad_norm": 1297.8083100097251, |
|
"learning_rate": 1.7494166948349053e-07, |
|
"logits/chosen": -1.3500601053237915, |
|
"logits/rejected": -1.411941409111023, |
|
"logps/chosen": -159.91616821289062, |
|
"logps/rejected": -383.6579284667969, |
|
"loss": 166.2805, |
|
"losses_ref": -1.1791235208511353, |
|
"ref_logps/chosen": -257.8923034667969, |
|
"ref_logps/rejected": -163.21176147460938, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9797613024711609, |
|
"rewards/margins": 3.184222936630249, |
|
"rewards/rejected": -2.2044615745544434, |
|
"step": 790, |
|
"u": -3.492673873901367, |
|
"weight": 0.020172851160168648 |
|
}, |
|
{ |
|
"diff_generated": -218.501708984375, |
|
"epoch": 1.6644857367181367, |
|
"grad_norm": 1305.6902286203212, |
|
"learning_rate": 1.6473581811901528e-07, |
|
"logits/chosen": -1.3759443759918213, |
|
"logits/rejected": -1.3116881847381592, |
|
"logps/chosen": -175.59524536132812, |
|
"logps/rejected": -386.4131774902344, |
|
"loss": 166.0248, |
|
"losses_ref": -0.9349870681762695, |
|
"ref_logps/chosen": -275.24603271484375, |
|
"ref_logps/rejected": -167.91146850585938, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9965084791183472, |
|
"rewards/margins": 3.181525468826294, |
|
"rewards/rejected": -2.1850171089172363, |
|
"step": 795, |
|
"u": -3.1951217651367188, |
|
"weight": 0.007377298083156347 |
|
}, |
|
{ |
|
"diff_generated": -227.8848114013672, |
|
"epoch": 1.674954200471081, |
|
"grad_norm": 1377.3447203192195, |
|
"learning_rate": 1.5480991445620538e-07, |
|
"logits/chosen": -1.3294823169708252, |
|
"logits/rejected": -1.3292287588119507, |
|
"logps/chosen": -171.1267852783203, |
|
"logps/rejected": -383.339111328125, |
|
"loss": 179.9502, |
|
"losses_ref": -1.4068111181259155, |
|
"ref_logps/chosen": -269.3274841308594, |
|
"ref_logps/rejected": -155.45433044433594, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9820070266723633, |
|
"rewards/margins": 3.260855197906494, |
|
"rewards/rejected": -2.278848171234131, |
|
"step": 800, |
|
"u": -3.075801134109497, |
|
"weight": 0.02200758084654808 |
|
}, |
|
{ |
|
"diff_generated": -223.35498046875, |
|
"epoch": 1.685422664224025, |
|
"grad_norm": 1269.6278028028526, |
|
"learning_rate": 1.4516728526756873e-07, |
|
"logits/chosen": -1.4065078496932983, |
|
"logits/rejected": -1.2835044860839844, |
|
"logps/chosen": -182.1883544921875, |
|
"logps/rejected": -374.7066650390625, |
|
"loss": 186.9203, |
|
"losses_ref": -2.037257671356201, |
|
"ref_logps/chosen": -276.4019470214844, |
|
"ref_logps/rejected": -151.35165405273438, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9421361684799194, |
|
"rewards/margins": 3.1756858825683594, |
|
"rewards/rejected": -2.2335495948791504, |
|
"step": 805, |
|
"u": -2.378087282180786, |
|
"weight": 0.04411940649151802 |
|
}, |
|
{ |
|
"diff_generated": -216.16616821289062, |
|
"epoch": 1.6958911279769695, |
|
"grad_norm": 1463.8714206417467, |
|
"learning_rate": 1.3581116238315194e-07, |
|
"logits/chosen": -1.4423078298568726, |
|
"logits/rejected": -1.3139569759368896, |
|
"logps/chosen": -205.9932098388672, |
|
"logps/rejected": -375.70849609375, |
|
"loss": 190.2176, |
|
"losses_ref": -1.2827723026275635, |
|
"ref_logps/chosen": -311.7004699707031, |
|
"ref_logps/rejected": -159.54234313964844, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": 1.057072639465332, |
|
"rewards/margins": 3.2187340259552, |
|
"rewards/rejected": -2.161661386489868, |
|
"step": 810, |
|
"u": -2.773268938064575, |
|
"weight": 0.022295668721199036 |
|
}, |
|
{ |
|
"diff_generated": -213.4055633544922, |
|
"epoch": 1.7063595917299135, |
|
"grad_norm": 1352.148808645479, |
|
"learning_rate": 1.2674468160735586e-07, |
|
"logits/chosen": -1.4077790975570679, |
|
"logits/rejected": -1.3166415691375732, |
|
"logps/chosen": -177.383544921875, |
|
"logps/rejected": -373.1116027832031, |
|
"loss": 179.0974, |
|
"losses_ref": -3.5087268352508545, |
|
"ref_logps/chosen": -279.3453674316406, |
|
"ref_logps/rejected": -159.70603942871094, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.019618034362793, |
|
"rewards/margins": 3.1536736488342285, |
|
"rewards/rejected": -2.1340556144714355, |
|
"step": 815, |
|
"u": -2.615370512008667, |
|
"weight": 0.0508296899497509 |
|
}, |
|
{ |
|
"diff_generated": -237.0354766845703, |
|
"epoch": 1.7168280554828579, |
|
"grad_norm": 1326.9582054025304, |
|
"learning_rate": 1.1797088166794e-07, |
|
"logits/chosen": -1.328039288520813, |
|
"logits/rejected": -1.2903969287872314, |
|
"logps/chosen": -176.13819885253906, |
|
"logps/rejected": -401.98748779296875, |
|
"loss": 179.7547, |
|
"losses_ref": -0.005425100214779377, |
|
"ref_logps/chosen": -275.8017883300781, |
|
"ref_logps/rejected": -164.95204162597656, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9966354370117188, |
|
"rewards/margins": 3.366990327835083, |
|
"rewards/rejected": -2.370354652404785, |
|
"step": 820, |
|
"u": -2.696533679962158, |
|
"weight": 3.794050280703232e-05 |
|
}, |
|
{ |
|
"diff_generated": -219.973876953125, |
|
"epoch": 1.7272965192358023, |
|
"grad_norm": 1183.8442331623387, |
|
"learning_rate": 1.0949270319755766e-07, |
|
"logits/chosen": -1.3806655406951904, |
|
"logits/rejected": -1.337877631187439, |
|
"logps/chosen": -167.13290405273438, |
|
"logps/rejected": -381.6925048828125, |
|
"loss": 173.9734, |
|
"losses_ref": -2.8696396350860596, |
|
"ref_logps/chosen": -262.41363525390625, |
|
"ref_logps/rejected": -161.7186279296875, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9528074264526367, |
|
"rewards/margins": 3.1525461673736572, |
|
"rewards/rejected": -2.1997389793395996, |
|
"step": 825, |
|
"u": -2.7405786514282227, |
|
"weight": 0.02941594459116459 |
|
}, |
|
{ |
|
"diff_generated": -211.63803100585938, |
|
"epoch": 1.7377649829887463, |
|
"grad_norm": 1227.5586469104078, |
|
"learning_rate": 1.013129877481741e-07, |
|
"logits/chosen": -1.3626017570495605, |
|
"logits/rejected": -1.199372410774231, |
|
"logps/chosen": -211.2673797607422, |
|
"logps/rejected": -382.85003662109375, |
|
"loss": 185.7144, |
|
"losses_ref": -5.601190090179443, |
|
"ref_logps/chosen": -318.6112060546875, |
|
"ref_logps/rejected": -171.2120361328125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0734381675720215, |
|
"rewards/margins": 3.1898186206817627, |
|
"rewards/rejected": -2.116380214691162, |
|
"step": 830, |
|
"u": -2.8598952293395996, |
|
"weight": 0.029378216713666916 |
|
}, |
|
{ |
|
"diff_generated": -230.986328125, |
|
"epoch": 1.7482334467416907, |
|
"grad_norm": 1290.8717428712873, |
|
"learning_rate": 9.343447683868799e-08, |
|
"logits/chosen": -1.2116000652313232, |
|
"logits/rejected": -1.2751588821411133, |
|
"logps/chosen": -169.79380798339844, |
|
"logps/rejected": -394.716064453125, |
|
"loss": 178.4699, |
|
"losses_ref": -0.9493634104728699, |
|
"ref_logps/chosen": -262.22894287109375, |
|
"ref_logps/rejected": -163.72976684570312, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9243512153625488, |
|
"rewards/margins": 3.2342143058776855, |
|
"rewards/rejected": -2.309863328933716, |
|
"step": 835, |
|
"u": -2.9001994132995605, |
|
"weight": 0.008882230147719383 |
|
}, |
|
{ |
|
"diff_generated": -216.50314331054688, |
|
"epoch": 1.7587019104946349, |
|
"grad_norm": 1335.6008361033998, |
|
"learning_rate": 8.585981103608342e-08, |
|
"logits/chosen": -1.3362239599227905, |
|
"logits/rejected": -1.1397970914840698, |
|
"logps/chosen": -206.77511596679688, |
|
"logps/rejected": -389.9546203613281, |
|
"loss": 191.1818, |
|
"losses_ref": -0.28884872794151306, |
|
"ref_logps/chosen": -316.14837646484375, |
|
"ref_logps/rejected": -173.45150756835938, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0937325954437256, |
|
"rewards/margins": 3.2587637901306152, |
|
"rewards/rejected": -2.1650314331054688, |
|
"step": 840, |
|
"u": -3.1953749656677246, |
|
"weight": 0.004329306539148092 |
|
}, |
|
{ |
|
"diff_generated": -202.93106079101562, |
|
"epoch": 1.769170374247579, |
|
"grad_norm": 1266.3673749218208, |
|
"learning_rate": 7.859152907041544e-08, |
|
"logits/chosen": -1.354994773864746, |
|
"logits/rejected": -1.1393146514892578, |
|
"logps/chosen": -199.24710083007812, |
|
"logps/rejected": -360.2781677246094, |
|
"loss": 176.0576, |
|
"losses_ref": -1.7230793237686157, |
|
"ref_logps/chosen": -305.8094177246094, |
|
"ref_logps/rejected": -157.3471221923828, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.06562340259552, |
|
"rewards/margins": 3.0949339866638184, |
|
"rewards/rejected": -2.029310464859009, |
|
"step": 845, |
|
"u": -2.745694160461426, |
|
"weight": 0.0367230661213398 |
|
}, |
|
{ |
|
"diff_generated": -211.84765625, |
|
"epoch": 1.7796388380005235, |
|
"grad_norm": 1302.5644299154628, |
|
"learning_rate": 7.163206698392742e-08, |
|
"logits/chosen": -1.2949212789535522, |
|
"logits/rejected": -1.1885995864868164, |
|
"logps/chosen": -185.09088134765625, |
|
"logps/rejected": -367.0600891113281, |
|
"loss": 184.3042, |
|
"losses_ref": -3.0929312705993652, |
|
"ref_logps/chosen": -285.8619384765625, |
|
"ref_logps/rejected": -155.21240234375, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0077106952667236, |
|
"rewards/margins": 3.1261868476867676, |
|
"rewards/rejected": -2.118476629257202, |
|
"step": 850, |
|
"u": -3.2007651329040527, |
|
"weight": 0.03339768201112747 |
|
}, |
|
{ |
|
"diff_generated": -209.816650390625, |
|
"epoch": 1.7901073017534677, |
|
"grad_norm": 1345.4005628593675, |
|
"learning_rate": 6.498375731458527e-08, |
|
"logits/chosen": -1.4427772760391235, |
|
"logits/rejected": -1.2521936893463135, |
|
"logps/chosen": -190.9636688232422, |
|
"logps/rejected": -376.20391845703125, |
|
"loss": 177.6342, |
|
"losses_ref": -2.1869561672210693, |
|
"ref_logps/chosen": -298.1745300292969, |
|
"ref_logps/rejected": -166.38723754882812, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.072108507156372, |
|
"rewards/margins": 3.1702747344970703, |
|
"rewards/rejected": -2.098165988922119, |
|
"step": 855, |
|
"u": -3.108565330505371, |
|
"weight": 0.026614084839820862 |
|
}, |
|
{ |
|
"diff_generated": -229.3601837158203, |
|
"epoch": 1.8005757655064119, |
|
"grad_norm": 1261.2192787306672, |
|
"learning_rate": 5.8648828314302735e-08, |
|
"logits/chosen": -1.3119590282440186, |
|
"logits/rejected": -1.1316639184951782, |
|
"logps/chosen": -186.41650390625, |
|
"logps/rejected": -386.34906005859375, |
|
"loss": 176.3818, |
|
"losses_ref": -2.3852286338806152, |
|
"ref_logps/chosen": -289.1662292480469, |
|
"ref_logps/rejected": -156.98886108398438, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0274972915649414, |
|
"rewards/margins": 3.3210995197296143, |
|
"rewards/rejected": -2.2936015129089355, |
|
"step": 860, |
|
"u": -2.8182337284088135, |
|
"weight": 0.03502316027879715 |
|
}, |
|
{ |
|
"diff_generated": -210.816650390625, |
|
"epoch": 1.8110442292593563, |
|
"grad_norm": 1269.4818113722586, |
|
"learning_rate": 5.2629403202119505e-08, |
|
"logits/chosen": -1.2412734031677246, |
|
"logits/rejected": -1.227634072303772, |
|
"logps/chosen": -173.3083953857422, |
|
"logps/rejected": -375.7776184082031, |
|
"loss": 171.0543, |
|
"losses_ref": -0.6853199005126953, |
|
"ref_logps/chosen": -271.1694030761719, |
|
"ref_logps/rejected": -164.96096801757812, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9786099195480347, |
|
"rewards/margins": 3.0867760181427, |
|
"rewards/rejected": -2.108166217803955, |
|
"step": 865, |
|
"u": -3.394763231277466, |
|
"weight": 0.010494846850633621 |
|
}, |
|
{ |
|
"diff_generated": -226.4854736328125, |
|
"epoch": 1.8215126930123005, |
|
"grad_norm": 1268.655880675009, |
|
"learning_rate": 4.692749945258057e-08, |
|
"logits/chosen": -1.3430616855621338, |
|
"logits/rejected": -1.1744420528411865, |
|
"logps/chosen": -195.01589965820312, |
|
"logps/rejected": -389.9505920410156, |
|
"loss": 186.5683, |
|
"losses_ref": -3.434800624847412, |
|
"ref_logps/chosen": -299.091796875, |
|
"ref_logps/rejected": -163.46514892578125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0407590866088867, |
|
"rewards/margins": 3.3056137561798096, |
|
"rewards/rejected": -2.2648544311523438, |
|
"step": 870, |
|
"u": -2.5695509910583496, |
|
"weight": 0.046660859137773514 |
|
}, |
|
{ |
|
"diff_generated": -236.5417022705078, |
|
"epoch": 1.8319811567652446, |
|
"grad_norm": 1204.436962297953, |
|
"learning_rate": 4.1545028119559066e-08, |
|
"logits/chosen": -1.3133630752563477, |
|
"logits/rejected": -1.3207045793533325, |
|
"logps/chosen": -190.3129425048828, |
|
"logps/rejected": -398.3179931640625, |
|
"loss": 171.6172, |
|
"losses_ref": -1.358794927597046, |
|
"ref_logps/chosen": -287.73504638671875, |
|
"ref_logps/rejected": -161.77627563476562, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.974220871925354, |
|
"rewards/margins": 3.339637279510498, |
|
"rewards/rejected": -2.365417003631592, |
|
"step": 875, |
|
"u": -2.7414660453796387, |
|
"weight": 0.022162286564707756 |
|
}, |
|
{ |
|
"diff_generated": -210.89306640625, |
|
"epoch": 1.842449620518189, |
|
"grad_norm": 1230.1188284044147, |
|
"learning_rate": 3.648379319574568e-08, |
|
"logits/chosen": -1.383299708366394, |
|
"logits/rejected": -1.3287036418914795, |
|
"logps/chosen": -190.19691467285156, |
|
"logps/rejected": -363.8573913574219, |
|
"loss": 168.7976, |
|
"losses_ref": -4.205277442932129, |
|
"ref_logps/chosen": -291.4452819824219, |
|
"ref_logps/rejected": -152.96432495117188, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0124839544296265, |
|
"rewards/margins": 3.1214146614074707, |
|
"rewards/rejected": -2.1089303493499756, |
|
"step": 880, |
|
"u": -2.69191312789917, |
|
"weight": 0.03942141681909561 |
|
}, |
|
{ |
|
"diff_generated": -224.0536346435547, |
|
"epoch": 1.8529180842711332, |
|
"grad_norm": 1317.8866979194424, |
|
"learning_rate": 3.17454910080216e-08, |
|
"logits/chosen": -1.387369155883789, |
|
"logits/rejected": -1.256730318069458, |
|
"logps/chosen": -213.5888671875, |
|
"logps/rejected": -388.00115966796875, |
|
"loss": 200.2688, |
|
"losses_ref": -0.6602109670639038, |
|
"ref_logps/chosen": -319.39569091796875, |
|
"ref_logps/rejected": -163.94747924804688, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.058068037033081, |
|
"rewards/margins": 3.2986044883728027, |
|
"rewards/rejected": -2.2405362129211426, |
|
"step": 885, |
|
"u": -2.7355685234069824, |
|
"weight": 0.029411468654870987 |
|
}, |
|
{ |
|
"diff_generated": -221.8759765625, |
|
"epoch": 1.8633865480240774, |
|
"grad_norm": 1306.8555947562052, |
|
"learning_rate": 2.733170964891607e-08, |
|
"logits/chosen": -1.3195066452026367, |
|
"logits/rejected": -1.2867323160171509, |
|
"logps/chosen": -170.53369140625, |
|
"logps/rejected": -378.52935791015625, |
|
"loss": 174.36, |
|
"losses_ref": -0.899361252784729, |
|
"ref_logps/chosen": -274.72943115234375, |
|
"ref_logps/rejected": -156.65335083007812, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0419572591781616, |
|
"rewards/margins": 3.2607169151306152, |
|
"rewards/rejected": -2.218759775161743, |
|
"step": 890, |
|
"u": -3.3832144737243652, |
|
"weight": 0.008660494349896908 |
|
}, |
|
{ |
|
"diff_generated": -214.10165405273438, |
|
"epoch": 1.8738550117770219, |
|
"grad_norm": 1275.7234308585855, |
|
"learning_rate": 2.324392844434042e-08, |
|
"logits/chosen": -1.3565282821655273, |
|
"logits/rejected": -1.344678282737732, |
|
"logps/chosen": -192.53738403320312, |
|
"logps/rejected": -390.2491149902344, |
|
"loss": 191.1614, |
|
"losses_ref": -2.9138152599334717, |
|
"ref_logps/chosen": -295.70458984375, |
|
"ref_logps/rejected": -176.14747619628906, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0316721200942993, |
|
"rewards/margins": 3.1726887226104736, |
|
"rewards/rejected": -2.141016721725464, |
|
"step": 895, |
|
"u": -3.1400444507598877, |
|
"weight": 0.02205641008913517 |
|
}, |
|
{ |
|
"diff_generated": -221.6460418701172, |
|
"epoch": 1.8843234755299658, |
|
"grad_norm": 1242.4363921596732, |
|
"learning_rate": 1.9483517457776434e-08, |
|
"logits/chosen": -1.1762725114822388, |
|
"logits/rejected": -1.3724615573883057, |
|
"logps/chosen": -159.86691284179688, |
|
"logps/rejected": -381.15081787109375, |
|
"loss": 172.6295, |
|
"losses_ref": -4.887435436248779, |
|
"ref_logps/chosen": -252.33743286132812, |
|
"ref_logps/rejected": -159.50479125976562, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9247050285339355, |
|
"rewards/margins": 3.1411654949188232, |
|
"rewards/rejected": -2.216460704803467, |
|
"step": 900, |
|
"u": -2.2665815353393555, |
|
"weight": 0.07192285358905792 |
|
}, |
|
{ |
|
"diff_generated": -227.669189453125, |
|
"epoch": 1.8947919392829102, |
|
"grad_norm": 1323.6799372011517, |
|
"learning_rate": 1.6051737031084533e-08, |
|
"logits/chosen": -1.2494432926177979, |
|
"logits/rejected": -1.1595919132232666, |
|
"logps/chosen": -175.1837921142578, |
|
"logps/rejected": -384.48175048828125, |
|
"loss": 174.3896, |
|
"losses_ref": -1.007882833480835, |
|
"ref_logps/chosen": -276.83319091796875, |
|
"ref_logps/rejected": -156.81253051757812, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0164941549301147, |
|
"rewards/margins": 3.2931861877441406, |
|
"rewards/rejected": -2.2766921520233154, |
|
"step": 905, |
|
"u": -3.0982091426849365, |
|
"weight": 0.01854753866791725 |
|
}, |
|
{ |
|
"diff_generated": -222.00784301757812, |
|
"epoch": 1.9052604030358546, |
|
"grad_norm": 1353.0501425434295, |
|
"learning_rate": 1.2949737362087154e-08, |
|
"logits/chosen": -1.222752332687378, |
|
"logits/rejected": -1.265421986579895, |
|
"logps/chosen": -173.27577209472656, |
|
"logps/rejected": -388.85797119140625, |
|
"loss": 174.8498, |
|
"losses_ref": -6.1348981857299805, |
|
"ref_logps/chosen": -269.9849853515625, |
|
"ref_logps/rejected": -166.85018920898438, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9670922160148621, |
|
"rewards/margins": 3.1871705055236816, |
|
"rewards/rejected": -2.220078229904175, |
|
"step": 910, |
|
"u": -2.5107998847961426, |
|
"weight": 0.06687295436859131 |
|
}, |
|
{ |
|
"diff_generated": -211.4918975830078, |
|
"epoch": 1.9157288667887986, |
|
"grad_norm": 1286.3307044665144, |
|
"learning_rate": 1.0178558119067315e-08, |
|
"logits/chosen": -1.2266263961791992, |
|
"logits/rejected": -1.0511000156402588, |
|
"logps/chosen": -177.09149169921875, |
|
"logps/rejected": -372.6114807128906, |
|
"loss": 175.9135, |
|
"losses_ref": -0.7255733609199524, |
|
"ref_logps/chosen": -277.30194091796875, |
|
"ref_logps/rejected": -161.1195831298828, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0021045207977295, |
|
"rewards/margins": 3.11702299118042, |
|
"rewards/rejected": -2.1149187088012695, |
|
"step": 915, |
|
"u": -3.0817387104034424, |
|
"weight": 0.014788592234253883 |
|
}, |
|
{ |
|
"diff_generated": -220.1043701171875, |
|
"epoch": 1.926197330541743, |
|
"grad_norm": 1287.429219240266, |
|
"learning_rate": 7.739128092312918e-09, |
|
"logits/chosen": -1.3375459909439087, |
|
"logits/rejected": -1.274279236793518, |
|
"logps/chosen": -181.00665283203125, |
|
"logps/rejected": -377.59088134765625, |
|
"loss": 171.8915, |
|
"losses_ref": -1.6772384643554688, |
|
"ref_logps/chosen": -280.682861328125, |
|
"ref_logps/rejected": -157.4865264892578, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9967617988586426, |
|
"rewards/margins": 3.197805881500244, |
|
"rewards/rejected": -2.2010436058044434, |
|
"step": 920, |
|
"u": -2.880985736846924, |
|
"weight": 0.036544255912303925 |
|
}, |
|
{ |
|
"diff_generated": -222.374755859375, |
|
"epoch": 1.9366657942946872, |
|
"grad_norm": 1348.176434591513, |
|
"learning_rate": 5.632264882822757e-09, |
|
"logits/chosen": -1.3248652219772339, |
|
"logits/rejected": -1.2289717197418213, |
|
"logps/chosen": -187.19947814941406, |
|
"logps/rejected": -382.12860107421875, |
|
"loss": 186.23, |
|
"losses_ref": -2.8856143951416016, |
|
"ref_logps/chosen": -288.6744079589844, |
|
"ref_logps/rejected": -159.7538299560547, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0147496461868286, |
|
"rewards/margins": 3.238497257232666, |
|
"rewards/rejected": -2.223747730255127, |
|
"step": 925, |
|
"u": -2.8917412757873535, |
|
"weight": 0.024965789169073105 |
|
}, |
|
{ |
|
"diff_generated": -213.1198272705078, |
|
"epoch": 1.9471342580476314, |
|
"grad_norm": 1395.889446013467, |
|
"learning_rate": 3.858674628278824e-09, |
|
"logits/chosen": -1.366350531578064, |
|
"logits/rejected": -1.119940996170044, |
|
"logps/chosen": -188.4399871826172, |
|
"logps/rejected": -371.8886413574219, |
|
"loss": 182.8113, |
|
"losses_ref": -5.109557151794434, |
|
"ref_logps/chosen": -294.67010498046875, |
|
"ref_logps/rejected": -158.7688446044922, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0623013973236084, |
|
"rewards/margins": 3.1934995651245117, |
|
"rewards/rejected": -2.1311981678009033, |
|
"step": 930, |
|
"u": -2.426971435546875, |
|
"weight": 0.053149282932281494 |
|
}, |
|
{ |
|
"diff_generated": -237.7774200439453, |
|
"epoch": 1.9576027218005758, |
|
"grad_norm": 1267.4031070589224, |
|
"learning_rate": 2.418951766376742e-09, |
|
"logits/chosen": -1.2219622135162354, |
|
"logits/rejected": -1.2400046586990356, |
|
"logps/chosen": -167.6567840576172, |
|
"logps/rejected": -398.30865478515625, |
|
"loss": 180.7217, |
|
"losses_ref": -5.82874059677124, |
|
"ref_logps/chosen": -267.5108947753906, |
|
"ref_logps/rejected": -160.53125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9985405802726746, |
|
"rewards/margins": 3.376314878463745, |
|
"rewards/rejected": -2.377774238586426, |
|
"step": 935, |
|
"u": -2.8524553775787354, |
|
"weight": 0.05332515761256218 |
|
}, |
|
{ |
|
"diff_generated": -221.6811981201172, |
|
"epoch": 1.96807118555352, |
|
"grad_norm": 1234.1312151479083, |
|
"learning_rate": 1.313578835593465e-09, |
|
"logits/chosen": -1.3167364597320557, |
|
"logits/rejected": -1.0956764221191406, |
|
"logps/chosen": -202.79949951171875, |
|
"logps/rejected": -389.28814697265625, |
|
"loss": 183.0365, |
|
"losses_ref": -1.295898199081421, |
|
"ref_logps/chosen": -312.72149658203125, |
|
"ref_logps/rejected": -167.60696411132812, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0992200374603271, |
|
"rewards/margins": 3.3160319328308105, |
|
"rewards/rejected": -2.2168118953704834, |
|
"step": 940, |
|
"u": -2.668116569519043, |
|
"weight": 0.01472543366253376 |
|
}, |
|
{ |
|
"diff_generated": -209.20425415039062, |
|
"epoch": 1.9785396493064642, |
|
"grad_norm": 1327.2259702611773, |
|
"learning_rate": 5.429263134594242e-10, |
|
"logits/chosen": -1.298588514328003, |
|
"logits/rejected": -1.3200442790985107, |
|
"logps/chosen": -177.170654296875, |
|
"logps/rejected": -369.2535705566406, |
|
"loss": 179.5952, |
|
"losses_ref": -4.355043888092041, |
|
"ref_logps/chosen": -273.0224304199219, |
|
"ref_logps/rejected": -160.04933166503906, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9585177302360535, |
|
"rewards/margins": 3.050560474395752, |
|
"rewards/rejected": -2.0920424461364746, |
|
"step": 945, |
|
"u": -2.591240406036377, |
|
"weight": 0.0508696511387825 |
|
}, |
|
{ |
|
"diff_generated": -218.85842895507812, |
|
"epoch": 1.9890081130594086, |
|
"grad_norm": 1215.4951947566592, |
|
"learning_rate": 1.0725249238940915e-10, |
|
"logits/chosen": -1.3104689121246338, |
|
"logits/rejected": -1.166074514389038, |
|
"logps/chosen": -190.97283935546875, |
|
"logps/rejected": -377.3951110839844, |
|
"loss": 185.4394, |
|
"losses_ref": -1.4506399631500244, |
|
"ref_logps/chosen": -288.34576416015625, |
|
"ref_logps/rejected": -158.53671264648438, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9737294316291809, |
|
"rewards/margins": 3.162313938140869, |
|
"rewards/rejected": -2.188584089279175, |
|
"step": 950, |
|
"u": -3.3431270122528076, |
|
"weight": 0.014722567982971668 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 954, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|