|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9973828840617638, |
|
"eval_steps": 500, |
|
"global_step": 954, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"diff_generated": -1.8149629831314087, |
|
"epoch": 0.002093692750588851, |
|
"grad_norm": 43.26649304714989, |
|
"learning_rate": 2.083333333333333e-08, |
|
"logits/chosen": -2.1441590785980225, |
|
"logits/rejected": -2.0543735027313232, |
|
"logps/chosen": -276.82366943359375, |
|
"logps/rejected": -131.32485961914062, |
|
"logps_avg/chosen": -1.2310187816619873, |
|
"logps_avg/rejected": -0.5444889068603516, |
|
"loss": 0.9706, |
|
"losses_ref": -0.2554703652858734, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1, |
|
"u": -1.129564642906189, |
|
"weight": 0.727432131767273 |
|
}, |
|
{ |
|
"diff_generated": -2.051100015640259, |
|
"epoch": 0.010468463752944255, |
|
"grad_norm": 36.895500460127934, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -2.2114098072052, |
|
"logits/rejected": -2.10967755317688, |
|
"logps/chosen": -280.6037902832031, |
|
"logps/rejected": -162.30044555664062, |
|
"logps_avg/chosen": -1.178394079208374, |
|
"logps_avg/rejected": -0.6153301000595093, |
|
"loss": 0.8456, |
|
"losses_ref": -0.2878931164741516, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 5, |
|
"u": -1.3192780017852783, |
|
"weight": 0.6589411497116089 |
|
}, |
|
{ |
|
"diff_generated": -2.0342957973480225, |
|
"epoch": 0.02093692750588851, |
|
"grad_norm": 42.24412669427099, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -2.3565850257873535, |
|
"logits/rejected": -2.1584813594818115, |
|
"logps/chosen": -300.6426086425781, |
|
"logps/rejected": -167.40040588378906, |
|
"logps_avg/chosen": -1.1184991598129272, |
|
"logps_avg/rejected": -0.6102887988090515, |
|
"loss": 0.8731, |
|
"losses_ref": -0.2850458025932312, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 10, |
|
"u": -1.2951091527938843, |
|
"weight": 0.6724194884300232 |
|
}, |
|
{ |
|
"diff_generated": -1.9851667881011963, |
|
"epoch": 0.031405391258832765, |
|
"grad_norm": 31.267399626309693, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.2946715354919434, |
|
"logits/rejected": -2.146397113800049, |
|
"logps/chosen": -293.4947509765625, |
|
"logps/rejected": -156.3843994140625, |
|
"logps_avg/chosen": -1.0986683368682861, |
|
"logps_avg/rejected": -0.5955500602722168, |
|
"loss": 0.7379, |
|
"losses_ref": -0.28325891494750977, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 15, |
|
"u": -1.2593215703964233, |
|
"weight": 0.6894552111625671 |
|
}, |
|
{ |
|
"diff_generated": -2.0035815238952637, |
|
"epoch": 0.04187385501177702, |
|
"grad_norm": 22.686346023577535, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -2.2586379051208496, |
|
"logits/rejected": -2.134080410003662, |
|
"logps/chosen": -261.52960205078125, |
|
"logps/rejected": -161.9304656982422, |
|
"logps_avg/chosen": -0.9046722650527954, |
|
"logps_avg/rejected": -0.6010745763778687, |
|
"loss": 0.5984, |
|
"losses_ref": -0.2947906255722046, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 20, |
|
"u": -1.323677659034729, |
|
"weight": 0.6650992631912231 |
|
}, |
|
{ |
|
"diff_generated": -3.258924961090088, |
|
"epoch": 0.05234231876472128, |
|
"grad_norm": 15.412617135483135, |
|
"learning_rate": 5.208333333333334e-07, |
|
"logits/chosen": -2.1527328491210938, |
|
"logits/rejected": -2.013265609741211, |
|
"logps/chosen": -257.1512756347656, |
|
"logps/rejected": -277.85711669921875, |
|
"logps_avg/chosen": -0.8043298721313477, |
|
"logps_avg/rejected": -0.9776775240898132, |
|
"loss": 0.5813, |
|
"losses_ref": -0.25987568497657776, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 25, |
|
"u": -1.7415921688079834, |
|
"weight": 0.4334268569946289 |
|
}, |
|
{ |
|
"diff_generated": -6.022626876831055, |
|
"epoch": 0.06281078251766553, |
|
"grad_norm": 15.25952740077981, |
|
"learning_rate": 6.249999999999999e-07, |
|
"logits/chosen": -2.1849024295806885, |
|
"logits/rejected": -2.1174261569976807, |
|
"logps/chosen": -248.16909790039062, |
|
"logps/rejected": -534.7174682617188, |
|
"logps_avg/chosen": -0.8181886672973633, |
|
"logps_avg/rejected": -1.8067880868911743, |
|
"loss": 0.667, |
|
"losses_ref": -0.1500019133090973, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 30, |
|
"u": -2.0229365825653076, |
|
"weight": 0.225816011428833 |
|
}, |
|
{ |
|
"diff_generated": -9.153361320495605, |
|
"epoch": 0.07327924627060979, |
|
"grad_norm": 18.48300356782214, |
|
"learning_rate": 7.291666666666666e-07, |
|
"logits/chosen": -2.2708792686462402, |
|
"logits/rejected": -2.130821704864502, |
|
"logps/chosen": -255.21701049804688, |
|
"logps/rejected": -782.3409423828125, |
|
"logps_avg/chosen": -0.7904274463653564, |
|
"logps_avg/rejected": -2.7460083961486816, |
|
"loss": 0.6695, |
|
"losses_ref": -0.1412452608346939, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 35, |
|
"u": -2.0066444873809814, |
|
"weight": 0.2316206991672516 |
|
}, |
|
{ |
|
"diff_generated": -13.209306716918945, |
|
"epoch": 0.08374771002355404, |
|
"grad_norm": 11.436173876886219, |
|
"learning_rate": 8.333333333333333e-07, |
|
"logits/chosen": -2.2111456394195557, |
|
"logits/rejected": -2.13924241065979, |
|
"logps/chosen": -241.15072631835938, |
|
"logps/rejected": -1223.218017578125, |
|
"logps_avg/chosen": -0.7820993661880493, |
|
"logps_avg/rejected": -3.962791919708252, |
|
"loss": 0.6798, |
|
"losses_ref": -0.09846386313438416, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 40, |
|
"u": -2.131727457046509, |
|
"weight": 0.1441923826932907 |
|
}, |
|
{ |
|
"diff_generated": -14.63012409210205, |
|
"epoch": 0.0942161737764983, |
|
"grad_norm": 59.29532742939981, |
|
"learning_rate": 9.374999999999999e-07, |
|
"logits/chosen": -2.298741102218628, |
|
"logits/rejected": -2.0653302669525146, |
|
"logps/chosen": -264.97357177734375, |
|
"logps/rejected": -1320.9332275390625, |
|
"logps_avg/chosen": -0.779043436050415, |
|
"logps_avg/rejected": -4.389036655426025, |
|
"loss": 0.6914, |
|
"losses_ref": -0.08891113847494125, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 45, |
|
"u": -2.13495135307312, |
|
"weight": 0.13693246245384216 |
|
}, |
|
{ |
|
"diff_generated": -12.911537170410156, |
|
"epoch": 0.10468463752944256, |
|
"grad_norm": 8.930786410211843, |
|
"learning_rate": 1.0416666666666667e-06, |
|
"logits/chosen": -2.302333116531372, |
|
"logits/rejected": -2.2043356895446777, |
|
"logps/chosen": -241.756103515625, |
|
"logps/rejected": -1145.5604248046875, |
|
"logps_avg/chosen": -0.7927433252334595, |
|
"logps_avg/rejected": -3.8734612464904785, |
|
"loss": 0.6993, |
|
"losses_ref": -0.10359562933444977, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 50, |
|
"u": -2.1199097633361816, |
|
"weight": 0.15450677275657654 |
|
}, |
|
{ |
|
"diff_generated": -11.095788955688477, |
|
"epoch": 0.11515310128238682, |
|
"grad_norm": 9.783120635378207, |
|
"learning_rate": 1.1458333333333333e-06, |
|
"logits/chosen": -2.4609317779541016, |
|
"logits/rejected": -2.3575634956359863, |
|
"logps/chosen": -245.7393798828125, |
|
"logps/rejected": -981.2423095703125, |
|
"logps_avg/chosen": -0.8303758502006531, |
|
"logps_avg/rejected": -3.3287365436553955, |
|
"loss": 0.6926, |
|
"losses_ref": -0.08979364484548569, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 55, |
|
"u": -2.17197322845459, |
|
"weight": 0.11447404325008392 |
|
}, |
|
{ |
|
"diff_generated": -13.795969009399414, |
|
"epoch": 0.12562156503533106, |
|
"grad_norm": 9.420248973366883, |
|
"learning_rate": 1.2499999999999999e-06, |
|
"logits/chosen": -2.5860393047332764, |
|
"logits/rejected": -2.482574939727783, |
|
"logps/chosen": -249.44070434570312, |
|
"logps/rejected": -1232.59228515625, |
|
"logps_avg/chosen": -0.7758530378341675, |
|
"logps_avg/rejected": -4.138791084289551, |
|
"loss": 0.6815, |
|
"losses_ref": -0.0876917839050293, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 60, |
|
"u": -2.1477303504943848, |
|
"weight": 0.12934879958629608 |
|
}, |
|
{ |
|
"diff_generated": -16.25264549255371, |
|
"epoch": 0.1360900287882753, |
|
"grad_norm": 12.24868452539092, |
|
"learning_rate": 1.3541666666666667e-06, |
|
"logits/chosen": -2.640986204147339, |
|
"logits/rejected": -2.510274648666382, |
|
"logps/chosen": -258.56109619140625, |
|
"logps/rejected": -1508.2763671875, |
|
"logps_avg/chosen": -0.7998191118240356, |
|
"logps_avg/rejected": -4.875794410705566, |
|
"loss": 0.7039, |
|
"losses_ref": -0.07322683185338974, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 65, |
|
"u": -2.180368423461914, |
|
"weight": 0.1039782166481018 |
|
}, |
|
{ |
|
"diff_generated": -16.121641159057617, |
|
"epoch": 0.14655849254121958, |
|
"grad_norm": 7.905374307014113, |
|
"learning_rate": 1.4583333333333333e-06, |
|
"logits/chosen": -2.581535816192627, |
|
"logits/rejected": -2.4923813343048096, |
|
"logps/chosen": -238.9574432373047, |
|
"logps/rejected": -1444.403564453125, |
|
"logps_avg/chosen": -0.8027188181877136, |
|
"logps_avg/rejected": -4.836493015289307, |
|
"loss": 0.6907, |
|
"losses_ref": -0.0750691220164299, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 70, |
|
"u": -2.189579486846924, |
|
"weight": 0.09880717098712921 |
|
}, |
|
{ |
|
"diff_generated": -16.705251693725586, |
|
"epoch": 0.15702695629416383, |
|
"grad_norm": 9.573720561122785, |
|
"learning_rate": 1.5624999999999999e-06, |
|
"logits/chosen": -2.598374128341675, |
|
"logits/rejected": -2.446035146713257, |
|
"logps/chosen": -270.2249450683594, |
|
"logps/rejected": -1517.441650390625, |
|
"logps_avg/chosen": -0.7964105606079102, |
|
"logps_avg/rejected": -5.011575698852539, |
|
"loss": 0.725, |
|
"losses_ref": -0.07196028530597687, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 75, |
|
"u": -2.21059513092041, |
|
"weight": 0.08612708002328873 |
|
}, |
|
{ |
|
"diff_generated": -18.304201126098633, |
|
"epoch": 0.16749542004710807, |
|
"grad_norm": 7.0924424799681, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"logits/chosen": -2.591045618057251, |
|
"logits/rejected": -2.489673376083374, |
|
"logps/chosen": -216.99685668945312, |
|
"logps/rejected": -1667.5283203125, |
|
"logps_avg/chosen": -0.7215350866317749, |
|
"logps_avg/rejected": -5.491259574890137, |
|
"loss": 0.6699, |
|
"losses_ref": -0.06580645591020584, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 80, |
|
"u": -2.2118382453918457, |
|
"weight": 0.08225957304239273 |
|
}, |
|
{ |
|
"diff_generated": -18.906076431274414, |
|
"epoch": 0.17796388380005235, |
|
"grad_norm": 7.632608732109636, |
|
"learning_rate": 1.7708333333333332e-06, |
|
"logits/chosen": -2.6046338081359863, |
|
"logits/rejected": -2.4658734798431396, |
|
"logps/chosen": -244.0012664794922, |
|
"logps/rejected": -1689.686767578125, |
|
"logps_avg/chosen": -0.7541030049324036, |
|
"logps_avg/rejected": -5.671823978424072, |
|
"loss": 0.7032, |
|
"losses_ref": -0.06257248669862747, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 85, |
|
"u": -2.2184884548187256, |
|
"weight": 0.07795710116624832 |
|
}, |
|
{ |
|
"diff_generated": -22.26788902282715, |
|
"epoch": 0.1884323475529966, |
|
"grad_norm": 10.332533231863795, |
|
"learning_rate": 1.8749999999999998e-06, |
|
"logits/chosen": -2.62504243850708, |
|
"logits/rejected": -2.4670681953430176, |
|
"logps/chosen": -241.73550415039062, |
|
"logps/rejected": -1991.0435791015625, |
|
"logps_avg/chosen": -0.7270082235336304, |
|
"logps_avg/rejected": -6.680366516113281, |
|
"loss": 0.689, |
|
"losses_ref": -0.06023075059056282, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 90, |
|
"u": -2.2096261978149414, |
|
"weight": 0.08131252229213715 |
|
}, |
|
{ |
|
"diff_generated": -20.242061614990234, |
|
"epoch": 0.19890081130594087, |
|
"grad_norm": 7.009998646854354, |
|
"learning_rate": 1.9791666666666666e-06, |
|
"logits/chosen": -2.5733673572540283, |
|
"logits/rejected": -2.4526114463806152, |
|
"logps/chosen": -241.0827178955078, |
|
"logps/rejected": -1833.453369140625, |
|
"logps_avg/chosen": -0.7628769278526306, |
|
"logps_avg/rejected": -6.07261848449707, |
|
"loss": 0.6963, |
|
"losses_ref": -0.06475149095058441, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 95, |
|
"u": -2.2028064727783203, |
|
"weight": 0.0875387191772461 |
|
}, |
|
{ |
|
"diff_generated": -20.439355850219727, |
|
"epoch": 0.2093692750588851, |
|
"grad_norm": 8.018231688525765, |
|
"learning_rate": 1.9998927475076105e-06, |
|
"logits/chosen": -2.621689558029175, |
|
"logits/rejected": -2.470346689224243, |
|
"logps/chosen": -245.5767059326172, |
|
"logps/rejected": -1799.0728759765625, |
|
"logps_avg/chosen": -0.7319446802139282, |
|
"logps_avg/rejected": -6.13180685043335, |
|
"loss": 0.713, |
|
"losses_ref": -0.06253904104232788, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 100, |
|
"u": -2.2177913188934326, |
|
"weight": 0.07825066894292831 |
|
}, |
|
{ |
|
"diff_generated": -20.04744529724121, |
|
"epoch": 0.21983773881182936, |
|
"grad_norm": 7.248502316485956, |
|
"learning_rate": 1.9994570736865402e-06, |
|
"logits/chosen": -2.5862081050872803, |
|
"logits/rejected": -2.4370968341827393, |
|
"logps/chosen": -236.89501953125, |
|
"logps/rejected": -1794.0465087890625, |
|
"logps_avg/chosen": -0.7266777753829956, |
|
"logps_avg/rejected": -6.01423454284668, |
|
"loss": 0.6834, |
|
"losses_ref": -0.06446884572505951, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 105, |
|
"u": -2.198464870452881, |
|
"weight": 0.0902954638004303 |
|
}, |
|
{ |
|
"diff_generated": -20.10696792602539, |
|
"epoch": 0.23030620256477363, |
|
"grad_norm": 6.989545794085033, |
|
"learning_rate": 1.9986864211644068e-06, |
|
"logits/chosen": -2.570603609085083, |
|
"logits/rejected": -2.431187391281128, |
|
"logps/chosen": -236.31884765625, |
|
"logps/rejected": -1773.07421875, |
|
"logps_avg/chosen": -0.7348344326019287, |
|
"logps_avg/rejected": -6.032090187072754, |
|
"loss": 0.6907, |
|
"losses_ref": -0.06961078941822052, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 110, |
|
"u": -2.2041425704956055, |
|
"weight": 0.08867262303829193 |
|
}, |
|
{ |
|
"diff_generated": -21.548114776611328, |
|
"epoch": 0.24077466631771788, |
|
"grad_norm": 8.060053280392543, |
|
"learning_rate": 1.997581048233623e-06, |
|
"logits/chosen": -2.581951141357422, |
|
"logits/rejected": -2.4441328048706055, |
|
"logps/chosen": -232.8576202392578, |
|
"logps/rejected": -1942.4847412109375, |
|
"logps_avg/chosen": -0.7739059329032898, |
|
"logps_avg/rejected": -6.4644341468811035, |
|
"loss": 0.6817, |
|
"losses_ref": -0.062096286565065384, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 115, |
|
"u": -2.216289520263672, |
|
"weight": 0.07720647752285004 |
|
}, |
|
{ |
|
"diff_generated": -20.77760887145996, |
|
"epoch": 0.2512431300706621, |
|
"grad_norm": 6.53936940072868, |
|
"learning_rate": 1.9961413253717214e-06, |
|
"logits/chosen": -2.610959768295288, |
|
"logits/rejected": -2.4528729915618896, |
|
"logps/chosen": -233.8311004638672, |
|
"logps/rejected": -1862.2890625, |
|
"logps_avg/chosen": -0.7324265837669373, |
|
"logps_avg/rejected": -6.233283519744873, |
|
"loss": 0.6932, |
|
"losses_ref": -0.0750860795378685, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 120, |
|
"u": -2.200193405151367, |
|
"weight": 0.09466435015201569 |
|
}, |
|
{ |
|
"diff_generated": -23.185279846191406, |
|
"epoch": 0.26171159382360637, |
|
"grad_norm": 7.018169897249557, |
|
"learning_rate": 1.994367735117177e-06, |
|
"logits/chosen": -2.5702836513519287, |
|
"logits/rejected": -2.391747236251831, |
|
"logps/chosen": -220.02792358398438, |
|
"logps/rejected": -2155.526123046875, |
|
"logps_avg/chosen": -0.7447048425674438, |
|
"logps_avg/rejected": -6.955584526062012, |
|
"loss": 0.7052, |
|
"losses_ref": -0.05986471846699715, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 125, |
|
"u": -2.1955928802490234, |
|
"weight": 0.08941423892974854 |
|
}, |
|
{ |
|
"diff_generated": -22.66459846496582, |
|
"epoch": 0.2721800575765506, |
|
"grad_norm": 31.410489955444024, |
|
"learning_rate": 1.992260871907687e-06, |
|
"logits/chosen": -2.567049503326416, |
|
"logits/rejected": -2.4223153591156006, |
|
"logps/chosen": -242.8145751953125, |
|
"logps/rejected": -2053.98388671875, |
|
"logps_avg/chosen": -0.7978746294975281, |
|
"logps_avg/rejected": -6.799378871917725, |
|
"loss": 0.7155, |
|
"losses_ref": -0.04843521863222122, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 130, |
|
"u": -2.239774465560913, |
|
"weight": 0.05849189683794975 |
|
}, |
|
{ |
|
"diff_generated": -23.263744354248047, |
|
"epoch": 0.2826485213294949, |
|
"grad_norm": 7.49886026826363, |
|
"learning_rate": 1.9898214418809326e-06, |
|
"logits/chosen": -2.532973289489746, |
|
"logits/rejected": -2.372011423110962, |
|
"logps/chosen": -241.5897674560547, |
|
"logps/rejected": -2110.734375, |
|
"logps_avg/chosen": -0.7454018592834473, |
|
"logps_avg/rejected": -6.979123592376709, |
|
"loss": 0.6961, |
|
"losses_ref": -0.04879006743431091, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 135, |
|
"u": -2.2302093505859375, |
|
"weight": 0.06326891481876373 |
|
}, |
|
{ |
|
"diff_generated": -22.754619598388672, |
|
"epoch": 0.29311698508243916, |
|
"grad_norm": 7.014311333863948, |
|
"learning_rate": 1.9870502626379126e-06, |
|
"logits/chosen": -2.488236904144287, |
|
"logits/rejected": -2.361851215362549, |
|
"logps/chosen": -234.2844696044922, |
|
"logps/rejected": -2074.984375, |
|
"logps_avg/chosen": -0.7961763143539429, |
|
"logps_avg/rejected": -6.826386451721191, |
|
"loss": 0.7285, |
|
"losses_ref": -0.055333297699689865, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 140, |
|
"u": -2.2265305519104004, |
|
"weight": 0.06895061582326889 |
|
}, |
|
{ |
|
"diff_generated": -20.225128173828125, |
|
"epoch": 0.3035854488353834, |
|
"grad_norm": 6.7478341009341865, |
|
"learning_rate": 1.983948262968915e-06, |
|
"logits/chosen": -2.5856704711914062, |
|
"logits/rejected": -2.4371695518493652, |
|
"logps/chosen": -263.78900146484375, |
|
"logps/rejected": -1824.1302490234375, |
|
"logps_avg/chosen": -0.7517282366752625, |
|
"logps_avg/rejected": -6.067538738250732, |
|
"loss": 0.6839, |
|
"losses_ref": -0.06395243108272552, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 145, |
|
"u": -2.2037534713745117, |
|
"weight": 0.08503635227680206 |
|
}, |
|
{ |
|
"diff_generated": -24.050996780395508, |
|
"epoch": 0.31405391258832765, |
|
"grad_norm": 7.353090756036984, |
|
"learning_rate": 1.9805164825422237e-06, |
|
"logits/chosen": -2.607673168182373, |
|
"logits/rejected": -2.408552646636963, |
|
"logps/chosen": -241.8136749267578, |
|
"logps/rejected": -2169.62353515625, |
|
"logps_avg/chosen": -0.7578203082084656, |
|
"logps_avg/rejected": -7.215299129486084, |
|
"loss": 0.6958, |
|
"losses_ref": -0.05395021289587021, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 150, |
|
"u": -2.234814167022705, |
|
"weight": 0.06379680335521698 |
|
}, |
|
{ |
|
"diff_generated": -23.94837188720703, |
|
"epoch": 0.3245223763412719, |
|
"grad_norm": 7.484499798723553, |
|
"learning_rate": 1.9767560715556594e-06, |
|
"logits/chosen": -2.5357837677001953, |
|
"logits/rejected": -2.3741650581359863, |
|
"logps/chosen": -237.78701782226562, |
|
"logps/rejected": -2074.5205078125, |
|
"logps_avg/chosen": -0.7676432132720947, |
|
"logps_avg/rejected": -7.184511661529541, |
|
"loss": 0.7199, |
|
"losses_ref": -0.044619906693696976, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 155, |
|
"u": -2.2300286293029785, |
|
"weight": 0.061775337904691696 |
|
}, |
|
{ |
|
"diff_generated": -24.210857391357422, |
|
"epoch": 0.33499084009421615, |
|
"grad_norm": 7.8117370330190115, |
|
"learning_rate": 1.972668290351084e-06, |
|
"logits/chosen": -2.532038688659668, |
|
"logits/rejected": -2.3655738830566406, |
|
"logps/chosen": -246.5824432373047, |
|
"logps/rejected": -2090.85693359375, |
|
"logps_avg/chosen": -0.7575558423995972, |
|
"logps_avg/rejected": -7.2632575035095215, |
|
"loss": 0.6939, |
|
"losses_ref": -0.04590834304690361, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 160, |
|
"u": -2.236487865447998, |
|
"weight": 0.05844121426343918 |
|
}, |
|
{ |
|
"diff_generated": -20.957683563232422, |
|
"epoch": 0.34545930384716045, |
|
"grad_norm": 7.4058662270815026, |
|
"learning_rate": 1.968254508991978e-06, |
|
"logits/chosen": -2.6238338947296143, |
|
"logits/rejected": -2.4566922187805176, |
|
"logps/chosen": -245.81436157226562, |
|
"logps/rejected": -1895.0390625, |
|
"logps_avg/chosen": -0.7605465054512024, |
|
"logps_avg/rejected": -6.2873053550720215, |
|
"loss": 0.701, |
|
"losses_ref": -0.05409424751996994, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 165, |
|
"u": -2.236602783203125, |
|
"weight": 0.0619116947054863 |
|
}, |
|
{ |
|
"diff_generated": -23.36783218383789, |
|
"epoch": 0.3559277676001047, |
|
"grad_norm": 7.74288657614709, |
|
"learning_rate": 1.9635162068042544e-06, |
|
"logits/chosen": -2.5531725883483887, |
|
"logits/rejected": -2.385223627090454, |
|
"logps/chosen": -250.6099090576172, |
|
"logps/rejected": -2106.687744140625, |
|
"logps_avg/chosen": -0.7441612482070923, |
|
"logps_avg/rejected": -7.010349273681641, |
|
"loss": 0.7035, |
|
"losses_ref": -0.060589499771595, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 170, |
|
"u": -2.218136787414551, |
|
"weight": 0.0771271213889122 |
|
}, |
|
{ |
|
"diff_generated": -23.426584243774414, |
|
"epoch": 0.36639623135304894, |
|
"grad_norm": 6.175218562127925, |
|
"learning_rate": 1.958454971880441e-06, |
|
"logits/chosen": -2.545517683029175, |
|
"logits/rejected": -2.3892464637756348, |
|
"logps/chosen": -271.62152099609375, |
|
"logps/rejected": -2128.689208984375, |
|
"logps_avg/chosen": -0.7712885141372681, |
|
"logps_avg/rejected": -7.027975559234619, |
|
"loss": 0.6768, |
|
"losses_ref": -0.059747565537691116, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 175, |
|
"u": -2.221135377883911, |
|
"weight": 0.07428421080112457 |
|
}, |
|
{ |
|
"diff_generated": -23.27652931213379, |
|
"epoch": 0.3768646951059932, |
|
"grad_norm": 7.602114045248552, |
|
"learning_rate": 1.9530725005474194e-06, |
|
"logits/chosen": -2.5965559482574463, |
|
"logits/rejected": -2.4581873416900635, |
|
"logps/chosen": -225.35818481445312, |
|
"logps/rejected": -2096.1943359375, |
|
"logps_avg/chosen": -0.7377344369888306, |
|
"logps_avg/rejected": -6.982959747314453, |
|
"loss": 0.6599, |
|
"losses_ref": -0.06142450496554375, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 180, |
|
"u": -2.224907875061035, |
|
"weight": 0.07244168221950531 |
|
}, |
|
{ |
|
"diff_generated": -24.591943740844727, |
|
"epoch": 0.38733315885893743, |
|
"grad_norm": 6.781608060052273, |
|
"learning_rate": 1.9473705967978807e-06, |
|
"logits/chosen": -2.6047005653381348, |
|
"logits/rejected": -2.4540090560913086, |
|
"logps/chosen": -231.2947235107422, |
|
"logps/rejected": -2179.2568359375, |
|
"logps_avg/chosen": -0.689501166343689, |
|
"logps_avg/rejected": -7.3775835037231445, |
|
"loss": 0.6665, |
|
"losses_ref": -0.05740996077656746, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 185, |
|
"u": -2.2239882946014404, |
|
"weight": 0.07182185351848602 |
|
}, |
|
{ |
|
"diff_generated": -25.36248016357422, |
|
"epoch": 0.39780162261188173, |
|
"grad_norm": 10.641404317565371, |
|
"learning_rate": 1.941351171685697e-06, |
|
"logits/chosen": -2.5710506439208984, |
|
"logits/rejected": -2.4436774253845215, |
|
"logps/chosen": -236.1158905029297, |
|
"logps/rejected": -2273.37158203125, |
|
"logps_avg/chosen": -0.7929750680923462, |
|
"logps_avg/rejected": -7.6087446212768555, |
|
"loss": 0.7108, |
|
"losses_ref": -0.05253469944000244, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 190, |
|
"u": -2.239004373550415, |
|
"weight": 0.06010523438453674 |
|
}, |
|
{ |
|
"diff_generated": -25.077518463134766, |
|
"epoch": 0.408270086364826, |
|
"grad_norm": 9.470830241427814, |
|
"learning_rate": 1.9350162426854148e-06, |
|
"logits/chosen": -2.602252244949341, |
|
"logits/rejected": -2.4661412239074707, |
|
"logps/chosen": -197.88571166992188, |
|
"logps/rejected": -2272.28076171875, |
|
"logps_avg/chosen": -0.7630836367607117, |
|
"logps_avg/rejected": -7.523255348205566, |
|
"loss": 0.6999, |
|
"losses_ref": -0.04595743492245674, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 195, |
|
"u": -2.243717670440674, |
|
"weight": 0.054233819246292114 |
|
}, |
|
{ |
|
"diff_generated": -24.682910919189453, |
|
"epoch": 0.4187385501177702, |
|
"grad_norm": 6.269041714690376, |
|
"learning_rate": 1.9283679330160725e-06, |
|
"logits/chosen": -2.5849337577819824, |
|
"logits/rejected": -2.394373655319214, |
|
"logps/chosen": -242.97378540039062, |
|
"logps/rejected": -2224.541015625, |
|
"logps_avg/chosen": -0.7199097871780396, |
|
"logps_avg/rejected": -7.404873847961426, |
|
"loss": 0.69, |
|
"losses_ref": -0.0516563281416893, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 200, |
|
"u": -2.2307353019714355, |
|
"weight": 0.06507831811904907 |
|
}, |
|
{ |
|
"diff_generated": -22.525114059448242, |
|
"epoch": 0.42920701387071447, |
|
"grad_norm": 6.963251924926938, |
|
"learning_rate": 1.9214084709295847e-06, |
|
"logits/chosen": -2.6382362842559814, |
|
"logits/rejected": -2.4577651023864746, |
|
"logps/chosen": -259.39349365234375, |
|
"logps/rejected": -2065.585693359375, |
|
"logps_avg/chosen": -0.7225343585014343, |
|
"logps_avg/rejected": -6.757534027099609, |
|
"loss": 0.696, |
|
"losses_ref": -0.05577712133526802, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 205, |
|
"u": -2.2293906211853027, |
|
"weight": 0.0664394274353981 |
|
}, |
|
{ |
|
"diff_generated": -22.145648956298828, |
|
"epoch": 0.4396754776236587, |
|
"grad_norm": 7.299076527075288, |
|
"learning_rate": 1.9141401889639164e-06, |
|
"logits/chosen": -2.5583319664001465, |
|
"logits/rejected": -2.4039664268493652, |
|
"logps/chosen": -238.9542694091797, |
|
"logps/rejected": -2062.404541015625, |
|
"logps_avg/chosen": -0.7716320753097534, |
|
"logps_avg/rejected": -6.6436944007873535, |
|
"loss": 0.6993, |
|
"losses_ref": -0.058913152664899826, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 210, |
|
"u": -2.2152769565582275, |
|
"weight": 0.07614172250032425 |
|
}, |
|
{ |
|
"diff_generated": -23.579111099243164, |
|
"epoch": 0.45014394137660296, |
|
"grad_norm": 8.50842985439364, |
|
"learning_rate": 1.906565523161312e-06, |
|
"logits/chosen": -2.600001335144043, |
|
"logits/rejected": -2.4590041637420654, |
|
"logps/chosen": -231.87673950195312, |
|
"logps/rejected": -2083.391357421875, |
|
"logps_avg/chosen": -0.7907384634017944, |
|
"logps_avg/rejected": -7.073732852935791, |
|
"loss": 0.7066, |
|
"losses_ref": -0.05489416792988777, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 215, |
|
"u": -2.234017848968506, |
|
"weight": 0.06394322961568832 |
|
}, |
|
{ |
|
"diff_generated": -27.941696166992188, |
|
"epoch": 0.46061240512954726, |
|
"grad_norm": 6.305196829448204, |
|
"learning_rate": 1.8986870122518259e-06, |
|
"logits/chosen": -2.6018145084381104, |
|
"logits/rejected": -2.436535358428955, |
|
"logps/chosen": -245.06005859375, |
|
"logps/rejected": -2555.211181640625, |
|
"logps_avg/chosen": -0.7695084810256958, |
|
"logps_avg/rejected": -8.382509231567383, |
|
"loss": 0.7137, |
|
"losses_ref": -0.04443511739373207, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 220, |
|
"u": -2.2481765747070312, |
|
"weight": 0.051543742418289185 |
|
}, |
|
{ |
|
"diff_generated": -26.58075523376465, |
|
"epoch": 0.4710808688824915, |
|
"grad_norm": 6.961879634992629, |
|
"learning_rate": 1.8905072968024423e-06, |
|
"logits/chosen": -2.567117214202881, |
|
"logits/rejected": -2.3789048194885254, |
|
"logps/chosen": -244.94296264648438, |
|
"logps/rejected": -2428.1923828125, |
|
"logps_avg/chosen": -0.7622503042221069, |
|
"logps_avg/rejected": -7.974226474761963, |
|
"loss": 0.6936, |
|
"losses_ref": -0.04088358208537102, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 225, |
|
"u": -2.2515604496002197, |
|
"weight": 0.04799002408981323 |
|
}, |
|
{ |
|
"diff_generated": -24.84002113342285, |
|
"epoch": 0.48154933263543576, |
|
"grad_norm": 7.1763831101881275, |
|
"learning_rate": 1.88202911833206e-06, |
|
"logits/chosen": -2.520597219467163, |
|
"logits/rejected": -2.395034074783325, |
|
"logps/chosen": -213.36929321289062, |
|
"logps/rejected": -2192.75390625, |
|
"logps_avg/chosen": -0.7349015474319458, |
|
"logps_avg/rejected": -7.4520063400268555, |
|
"loss": 0.6978, |
|
"losses_ref": -0.051292240619659424, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 230, |
|
"u": -2.231480360031128, |
|
"weight": 0.06503967195749283 |
|
}, |
|
{ |
|
"diff_generated": -26.721317291259766, |
|
"epoch": 0.49201779638838, |
|
"grad_norm": 5.9191842237687835, |
|
"learning_rate": 1.873255318392644e-06, |
|
"logits/chosen": -2.4896910190582275, |
|
"logits/rejected": -2.296112060546875, |
|
"logps/chosen": -239.5654296875, |
|
"logps/rejected": -2448.593505859375, |
|
"logps_avg/chosen": -0.7563043236732483, |
|
"logps_avg/rejected": -8.01639461517334, |
|
"loss": 0.7163, |
|
"losses_ref": -0.05161570757627487, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 235, |
|
"u": -2.230043649673462, |
|
"weight": 0.06615348160266876 |
|
}, |
|
{ |
|
"diff_generated": -22.361705780029297, |
|
"epoch": 0.5024862601413242, |
|
"grad_norm": 6.264520814093759, |
|
"learning_rate": 1.8641888376168483e-06, |
|
"logits/chosen": -2.4571125507354736, |
|
"logits/rejected": -2.3177151679992676, |
|
"logps/chosen": -219.5469207763672, |
|
"logps/rejected": -1993.8834228515625, |
|
"logps_avg/chosen": -0.7551349997520447, |
|
"logps_avg/rejected": -6.708512783050537, |
|
"loss": 0.7049, |
|
"losses_ref": -0.05244841426610947, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 240, |
|
"u": -2.2336738109588623, |
|
"weight": 0.06469963490962982 |
|
}, |
|
{ |
|
"diff_generated": -19.673988342285156, |
|
"epoch": 0.5129547238942685, |
|
"grad_norm": 6.373155717275301, |
|
"learning_rate": 1.8548327147324312e-06, |
|
"logits/chosen": -2.459257125854492, |
|
"logits/rejected": -2.273050546646118, |
|
"logps/chosen": -248.42935180664062, |
|
"logps/rejected": -1772.5706787109375, |
|
"logps_avg/chosen": -0.7812148928642273, |
|
"logps_avg/rejected": -5.902197360992432, |
|
"loss": 0.6961, |
|
"losses_ref": -0.0656919851899147, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 245, |
|
"u": -2.2100472450256348, |
|
"weight": 0.08213107287883759 |
|
}, |
|
{ |
|
"diff_generated": -18.21377182006836, |
|
"epoch": 0.5234231876472127, |
|
"grad_norm": 7.015946672120974, |
|
"learning_rate": 1.8451900855437948e-06, |
|
"logits/chosen": -2.4628689289093018, |
|
"logits/rejected": -2.322192430496216, |
|
"logps/chosen": -242.85488891601562, |
|
"logps/rejected": -1614.31201171875, |
|
"logps_avg/chosen": -0.7260557413101196, |
|
"logps_avg/rejected": -5.4641313552856445, |
|
"loss": 0.6754, |
|
"losses_ref": -0.05365673825144768, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 250, |
|
"u": -2.234142303466797, |
|
"weight": 0.06292648613452911 |
|
}, |
|
{ |
|
"diff_generated": -21.006351470947266, |
|
"epoch": 0.533891651400157, |
|
"grad_norm": 6.444057235727556, |
|
"learning_rate": 1.8352641818809846e-06, |
|
"logits/chosen": -2.44881010055542, |
|
"logits/rejected": -2.264845371246338, |
|
"logps/chosen": -258.3345031738281, |
|
"logps/rejected": -1910.637451171875, |
|
"logps_avg/chosen": -0.7704434394836426, |
|
"logps_avg/rejected": -6.301905155181885, |
|
"loss": 0.6922, |
|
"losses_ref": -0.05841520428657532, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 255, |
|
"u": -2.230961322784424, |
|
"weight": 0.06754828989505768 |
|
}, |
|
{ |
|
"diff_generated": -21.22915267944336, |
|
"epoch": 0.5443601151531012, |
|
"grad_norm": 6.052398997240752, |
|
"learning_rate": 1.8250583305165094e-06, |
|
"logits/chosen": -2.3323371410369873, |
|
"logits/rejected": -2.212430477142334, |
|
"logps/chosen": -235.18038940429688, |
|
"logps/rejected": -1926.814453125, |
|
"logps_avg/chosen": -0.6792945861816406, |
|
"logps_avg/rejected": -6.368745803833008, |
|
"loss": 0.6742, |
|
"losses_ref": -0.047284115105867386, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 260, |
|
"u": -2.220738172531128, |
|
"weight": 0.06843873858451843 |
|
}, |
|
{ |
|
"diff_generated": -21.301851272583008, |
|
"epoch": 0.5548285789060455, |
|
"grad_norm": 6.4499158810515755, |
|
"learning_rate": 1.8145759520503357e-06, |
|
"logits/chosen": -2.4637808799743652, |
|
"logits/rejected": -2.2752346992492676, |
|
"logps/chosen": -246.92269897460938, |
|
"logps/rejected": -1889.571533203125, |
|
"logps_avg/chosen": -0.7389290928840637, |
|
"logps_avg/rejected": -6.390555381774902, |
|
"loss": 0.6763, |
|
"losses_ref": -0.05337480455636978, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 265, |
|
"u": -2.2342476844787598, |
|
"weight": 0.06287747621536255 |
|
}, |
|
{ |
|
"diff_generated": -24.129053115844727, |
|
"epoch": 0.5652970426589898, |
|
"grad_norm": 6.150486891273085, |
|
"learning_rate": 1.803820559763439e-06, |
|
"logits/chosen": -2.463932752609253, |
|
"logits/rejected": -2.262209415435791, |
|
"logps/chosen": -218.674072265625, |
|
"logps/rejected": -2158.11376953125, |
|
"logps_avg/chosen": -0.7358182072639465, |
|
"logps_avg/rejected": -7.238715171813965, |
|
"loss": 0.7092, |
|
"losses_ref": -0.05700932815670967, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 270, |
|
"u": -2.235081911087036, |
|
"weight": 0.0649222731590271 |
|
}, |
|
{ |
|
"diff_generated": -22.390344619750977, |
|
"epoch": 0.575765506411934, |
|
"grad_norm": 7.077728369391663, |
|
"learning_rate": 1.7927957584402895e-06, |
|
"logits/chosen": -2.4641366004943848, |
|
"logits/rejected": -2.289757251739502, |
|
"logps/chosen": -230.87442016601562, |
|
"logps/rejected": -1978.302734375, |
|
"logps_avg/chosen": -0.6890340447425842, |
|
"logps_avg/rejected": -6.717103004455566, |
|
"loss": 0.6762, |
|
"losses_ref": -0.05622117966413498, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 275, |
|
"u": -2.217959403991699, |
|
"weight": 0.07241992652416229 |
|
}, |
|
{ |
|
"diff_generated": -21.651906967163086, |
|
"epoch": 0.5862339701648783, |
|
"grad_norm": 6.269922997412507, |
|
"learning_rate": 1.78150524316067e-06, |
|
"logits/chosen": -2.512561082839966, |
|
"logits/rejected": -2.3291046619415283, |
|
"logps/chosen": -247.04129028320312, |
|
"logps/rejected": -1997.1549072265625, |
|
"logps_avg/chosen": -0.7235974073410034, |
|
"logps_avg/rejected": -6.495572566986084, |
|
"loss": 0.6702, |
|
"losses_ref": -0.04933195561170578, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 280, |
|
"u": -2.2435684204101562, |
|
"weight": 0.05631055310368538 |
|
}, |
|
{ |
|
"diff_generated": -24.519784927368164, |
|
"epoch": 0.5967024339178225, |
|
"grad_norm": 6.4591538424452475, |
|
"learning_rate": 1.7699527980612304e-06, |
|
"logits/chosen": -2.533612012863159, |
|
"logits/rejected": -2.310060501098633, |
|
"logps/chosen": -241.06430053710938, |
|
"logps/rejected": -2117.74609375, |
|
"logps_avg/chosen": -0.7511512041091919, |
|
"logps_avg/rejected": -7.355935573577881, |
|
"loss": 0.7064, |
|
"losses_ref": -0.0406634621322155, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 285, |
|
"u": -2.252286434173584, |
|
"weight": 0.04695131629705429 |
|
}, |
|
{ |
|
"diff_generated": -22.960649490356445, |
|
"epoch": 0.6071708976707668, |
|
"grad_norm": 6.2154170319293724, |
|
"learning_rate": 1.758142295067194e-06, |
|
"logits/chosen": -2.508026123046875, |
|
"logits/rejected": -2.2768871784210205, |
|
"logps/chosen": -256.1479797363281, |
|
"logps/rejected": -2004.0556640625, |
|
"logps_avg/chosen": -0.7584555745124817, |
|
"logps_avg/rejected": -6.888195037841797, |
|
"loss": 0.6642, |
|
"losses_ref": -0.05948421359062195, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 290, |
|
"u": -2.2216153144836426, |
|
"weight": 0.07435683906078339 |
|
}, |
|
{ |
|
"diff_generated": -23.191375732421875, |
|
"epoch": 0.6176393614237111, |
|
"grad_norm": 6.58174772631908, |
|
"learning_rate": 1.7460776925946416e-06, |
|
"logits/chosen": -2.5151877403259277, |
|
"logits/rejected": -2.297478199005127, |
|
"logps/chosen": -233.7965087890625, |
|
"logps/rejected": -2135.15673828125, |
|
"logps_avg/chosen": -0.7887166738510132, |
|
"logps_avg/rejected": -6.957413673400879, |
|
"loss": 0.6755, |
|
"losses_ref": -0.055867087095975876, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 295, |
|
"u": -2.237914562225342, |
|
"weight": 0.062143467366695404 |
|
}, |
|
{ |
|
"diff_generated": -24.709823608398438, |
|
"epoch": 0.6281078251766553, |
|
"grad_norm": 7.437442244122165, |
|
"learning_rate": 1.7337630342238039e-06, |
|
"logits/chosen": -2.525470018386841, |
|
"logits/rejected": -2.3166513442993164, |
|
"logps/chosen": -229.94558715820312, |
|
"logps/rejected": -2169.215576171875, |
|
"logps_avg/chosen": -0.7630201578140259, |
|
"logps_avg/rejected": -7.412947177886963, |
|
"loss": 0.7146, |
|
"losses_ref": -0.0521920807659626, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 300, |
|
"u": -2.238734722137451, |
|
"weight": 0.06009601429104805 |
|
}, |
|
{ |
|
"diff_generated": -25.142507553100586, |
|
"epoch": 0.6385762889295996, |
|
"grad_norm": 6.81810983140467, |
|
"learning_rate": 1.7212024473438145e-06, |
|
"logits/chosen": -2.5295021533966064, |
|
"logits/rejected": -2.3437719345092773, |
|
"logps/chosen": -230.28018188476562, |
|
"logps/rejected": -2279.5810546875, |
|
"logps_avg/chosen": -0.6913032531738281, |
|
"logps_avg/rejected": -7.54275369644165, |
|
"loss": 0.6605, |
|
"losses_ref": -0.04879279434680939, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 305, |
|
"u": -2.2388291358947754, |
|
"weight": 0.058758098632097244 |
|
}, |
|
{ |
|
"diff_generated": -24.345029830932617, |
|
"epoch": 0.6490447526825438, |
|
"grad_norm": 6.09422333137857, |
|
"learning_rate": 1.70840014176937e-06, |
|
"logits/chosen": -2.496091604232788, |
|
"logits/rejected": -2.2605936527252197, |
|
"logps/chosen": -264.0978698730469, |
|
"logps/rejected": -2208.2470703125, |
|
"logps_avg/chosen": -0.7388861179351807, |
|
"logps_avg/rejected": -7.303508758544922, |
|
"loss": 0.6912, |
|
"losses_ref": -0.042303841561079025, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 310, |
|
"u": -2.246720552444458, |
|
"weight": 0.05154282599687576 |
|
}, |
|
{ |
|
"diff_generated": -23.305843353271484, |
|
"epoch": 0.6595132164354881, |
|
"grad_norm": 6.009874799920644, |
|
"learning_rate": 1.6953604083297663e-06, |
|
"logits/chosen": -2.5141513347625732, |
|
"logits/rejected": -2.3054990768432617, |
|
"logps/chosen": -241.82406616210938, |
|
"logps/rejected": -2167.42724609375, |
|
"logps_avg/chosen": -0.740818202495575, |
|
"logps_avg/rejected": -6.991753578186035, |
|
"loss": 0.6887, |
|
"losses_ref": -0.0596298985183239, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 315, |
|
"u": -2.2182247638702393, |
|
"weight": 0.07611407339572906 |
|
}, |
|
{ |
|
"diff_generated": -27.0042724609375, |
|
"epoch": 0.6699816801884323, |
|
"grad_norm": 5.920473182891855, |
|
"learning_rate": 1.6820876174307821e-06, |
|
"logits/chosen": -2.482053279876709, |
|
"logits/rejected": -2.2886459827423096, |
|
"logps/chosen": -223.24893188476562, |
|
"logps/rejected": -2428.3193359375, |
|
"logps_avg/chosen": -0.7374002933502197, |
|
"logps_avg/rejected": -8.101282119750977, |
|
"loss": 0.6816, |
|
"losses_ref": -0.049068134278059006, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 320, |
|
"u": -2.235114574432373, |
|
"weight": 0.06041133403778076 |
|
}, |
|
{ |
|
"diff_generated": -25.161632537841797, |
|
"epoch": 0.6804501439413766, |
|
"grad_norm": 6.759097342452152, |
|
"learning_rate": 1.668586217589889e-06, |
|
"logits/chosen": -2.4576220512390137, |
|
"logits/rejected": -2.2568023204803467, |
|
"logps/chosen": -255.9824676513672, |
|
"logps/rejected": -2272.87451171875, |
|
"logps_avg/chosen": -0.8112290501594543, |
|
"logps_avg/rejected": -7.548490047454834, |
|
"loss": 0.7034, |
|
"losses_ref": -0.04155198484659195, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 325, |
|
"u": -2.2597880363464355, |
|
"weight": 0.04243909567594528 |
|
}, |
|
{ |
|
"diff_generated": -26.866863250732422, |
|
"epoch": 0.6909186076943209, |
|
"grad_norm": 5.913181146879915, |
|
"learning_rate": 1.6548607339452852e-06, |
|
"logits/chosen": -2.5034430027008057, |
|
"logits/rejected": -2.2873404026031494, |
|
"logps/chosen": -219.890625, |
|
"logps/rejected": -2450.533203125, |
|
"logps_avg/chosen": -0.7192927598953247, |
|
"logps_avg/rejected": -8.060060501098633, |
|
"loss": 0.679, |
|
"losses_ref": -0.04148325324058533, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 330, |
|
"u": -2.258359432220459, |
|
"weight": 0.04333708435297012 |
|
}, |
|
{ |
|
"diff_generated": -26.58041000366211, |
|
"epoch": 0.7013870714472651, |
|
"grad_norm": 6.258862828154151, |
|
"learning_rate": 1.6409157667392455e-06, |
|
"logits/chosen": -2.5029423236846924, |
|
"logits/rejected": -2.2649450302124023, |
|
"logps/chosen": -239.6374969482422, |
|
"logps/rejected": -2410.685302734375, |
|
"logps_avg/chosen": -0.7706997990608215, |
|
"logps_avg/rejected": -7.974122524261475, |
|
"loss": 0.7035, |
|
"losses_ref": -0.05212752893567085, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 335, |
|
"u": -2.2294507026672363, |
|
"weight": 0.06685757637023926 |
|
}, |
|
{ |
|
"diff_generated": -30.061986923217773, |
|
"epoch": 0.7118555352002094, |
|
"grad_norm": 7.477206152513725, |
|
"learning_rate": 1.6267559897763027e-06, |
|
"logits/chosen": -2.3795595169067383, |
|
"logits/rejected": -2.18742036819458, |
|
"logps/chosen": -192.0414276123047, |
|
"logps/rejected": -2674.73486328125, |
|
"logps_avg/chosen": -0.7409474849700928, |
|
"logps_avg/rejected": -9.018596649169922, |
|
"loss": 0.6831, |
|
"losses_ref": -0.044330693781375885, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 340, |
|
"u": -2.235874652862549, |
|
"weight": 0.059127964079380035 |
|
}, |
|
{ |
|
"diff_generated": -28.720870971679688, |
|
"epoch": 0.7223239989531536, |
|
"grad_norm": 5.9002590426162325, |
|
"learning_rate": 1.6123861488567708e-06, |
|
"logits/chosen": -2.4881465435028076, |
|
"logits/rejected": -2.2146873474121094, |
|
"logps/chosen": -260.3475341796875, |
|
"logps/rejected": -2515.25, |
|
"logps_avg/chosen": -0.7461652755737305, |
|
"logps_avg/rejected": -8.61626148223877, |
|
"loss": 0.6968, |
|
"losses_ref": -0.044901080429553986, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 345, |
|
"u": -2.2523629665374756, |
|
"weight": 0.04855785518884659 |
|
}, |
|
{ |
|
"diff_generated": -28.04868507385254, |
|
"epoch": 0.7327924627060979, |
|
"grad_norm": 15.894199978415127, |
|
"learning_rate": 1.5978110601861409e-06, |
|
"logits/chosen": -2.471588611602783, |
|
"logits/rejected": -2.2580113410949707, |
|
"logps/chosen": -255.3411102294922, |
|
"logps/rejected": -2506.482666015625, |
|
"logps_avg/chosen": -0.7827759385108948, |
|
"logps_avg/rejected": -8.414606094360352, |
|
"loss": 0.7362, |
|
"losses_ref": -0.04014447331428528, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 350, |
|
"u": -2.2483315467834473, |
|
"weight": 0.04962104931473732 |
|
}, |
|
{ |
|
"diff_generated": -27.11871337890625, |
|
"epoch": 0.7432609264590422, |
|
"grad_norm": 5.4012187487436725, |
|
"learning_rate": 1.5830356087608763e-06, |
|
"logits/chosen": -2.4285144805908203, |
|
"logits/rejected": -2.1649179458618164, |
|
"logps/chosen": -218.6619415283203, |
|
"logps/rejected": -2413.4892578125, |
|
"logps_avg/chosen": -0.7086374163627625, |
|
"logps_avg/rejected": -8.135615348815918, |
|
"loss": 0.7021, |
|
"losses_ref": -0.03781733289361, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 355, |
|
"u": -2.2616829872131348, |
|
"weight": 0.0397658608853817 |
|
}, |
|
{ |
|
"diff_generated": -26.739330291748047, |
|
"epoch": 0.7537293902119864, |
|
"grad_norm": 6.5263260405852614, |
|
"learning_rate": 1.5680647467311555e-06, |
|
"logits/chosen": -2.4289963245391846, |
|
"logits/rejected": -2.133953332901001, |
|
"logps/chosen": -247.11563110351562, |
|
"logps/rejected": -2465.95849609375, |
|
"logps_avg/chosen": -0.7823926210403442, |
|
"logps_avg/rejected": -8.02180004119873, |
|
"loss": 0.6853, |
|
"losses_ref": -0.0527551993727684, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 360, |
|
"u": -2.2420668601989746, |
|
"weight": 0.0583949089050293 |
|
}, |
|
{ |
|
"diff_generated": -27.283761978149414, |
|
"epoch": 0.7641978539649307, |
|
"grad_norm": 6.979588218526593, |
|
"learning_rate": 1.552903491741107e-06, |
|
"logits/chosen": -2.449387550354004, |
|
"logits/rejected": -2.1368231773376465, |
|
"logps/chosen": -234.6686553955078, |
|
"logps/rejected": -2578.747802734375, |
|
"logps_avg/chosen": -0.740507185459137, |
|
"logps_avg/rejected": -8.185129165649414, |
|
"loss": 0.6824, |
|
"losses_ref": -0.03961649537086487, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 365, |
|
"u": -2.2473196983337402, |
|
"weight": 0.05010756850242615 |
|
}, |
|
{ |
|
"diff_generated": -26.48313331604004, |
|
"epoch": 0.7746663177178749, |
|
"grad_norm": 7.697158528726637, |
|
"learning_rate": 1.5375569252470895e-06, |
|
"logits/chosen": -2.5160136222839355, |
|
"logits/rejected": -2.2105443477630615, |
|
"logps/chosen": -270.76727294921875, |
|
"logps/rejected": -2356.61376953125, |
|
"logps_avg/chosen": -0.798203706741333, |
|
"logps_avg/rejected": -7.944940090179443, |
|
"loss": 0.6956, |
|
"losses_ref": -0.03987672179937363, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 370, |
|
"u": -2.259321689605713, |
|
"weight": 0.042373161762952805 |
|
}, |
|
{ |
|
"diff_generated": -25.16873550415039, |
|
"epoch": 0.7851347814708192, |
|
"grad_norm": 6.394620991151716, |
|
"learning_rate": 1.5220301908145903e-06, |
|
"logits/chosen": -2.464231491088867, |
|
"logits/rejected": -2.1346030235290527, |
|
"logps/chosen": -240.89230346679688, |
|
"logps/rejected": -2322.256591796875, |
|
"logps_avg/chosen": -0.6929277181625366, |
|
"logps_avg/rejected": -7.55062198638916, |
|
"loss": 0.6924, |
|
"losses_ref": -0.04263712465763092, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 375, |
|
"u": -2.2447123527526855, |
|
"weight": 0.05186506360769272 |
|
}, |
|
{ |
|
"diff_generated": -26.598400115966797, |
|
"epoch": 0.7956032452237635, |
|
"grad_norm": 6.833084085030009, |
|
"learning_rate": 1.5063284923943028e-06, |
|
"logits/chosen": -2.4700121879577637, |
|
"logits/rejected": -2.1623213291168213, |
|
"logps/chosen": -255.25228881835938, |
|
"logps/rejected": -2325.41162109375, |
|
"logps_avg/chosen": -0.7505702376365662, |
|
"logps_avg/rejected": -7.97952127456665, |
|
"loss": 0.6914, |
|
"losses_ref": -0.039328016340732574, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 380, |
|
"u": -2.2589406967163086, |
|
"weight": 0.04286640137434006 |
|
}, |
|
{ |
|
"diff_generated": -29.339923858642578, |
|
"epoch": 0.8060717089767077, |
|
"grad_norm": 6.446112080414134, |
|
"learning_rate": 1.490457092577968e-06, |
|
"logits/chosen": -2.4463934898376465, |
|
"logits/rejected": -2.0776758193969727, |
|
"logps/chosen": -232.91452026367188, |
|
"logps/rejected": -2714.375244140625, |
|
"logps_avg/chosen": -0.6785185933113098, |
|
"logps_avg/rejected": -8.801977157592773, |
|
"loss": 0.6865, |
|
"losses_ref": -0.04436464607715607, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 385, |
|
"u": -2.247189998626709, |
|
"weight": 0.05228755623102188 |
|
}, |
|
{ |
|
"diff_generated": -27.133153915405273, |
|
"epoch": 0.816540172729652, |
|
"grad_norm": 5.888520537518448, |
|
"learning_rate": 1.4744213108345602e-06, |
|
"logits/chosen": -2.5249063968658447, |
|
"logits/rejected": -2.1448757648468018, |
|
"logps/chosen": -258.61212158203125, |
|
"logps/rejected": -2449.294677734375, |
|
"logps_avg/chosen": -0.7527631521224976, |
|
"logps_avg/rejected": -8.139945983886719, |
|
"loss": 0.685, |
|
"losses_ref": -0.0589534267783165, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 390, |
|
"u": -2.2169764041900635, |
|
"weight": 0.0769612193107605 |
|
}, |
|
{ |
|
"diff_generated": -27.033132553100586, |
|
"epoch": 0.8270086364825961, |
|
"grad_norm": 6.211789156823427, |
|
"learning_rate": 1.4582265217274103e-06, |
|
"logits/chosen": -2.4122936725616455, |
|
"logits/rejected": -2.095203161239624, |
|
"logps/chosen": -251.5576629638672, |
|
"logps/rejected": -2401.735595703125, |
|
"logps_avg/chosen": -0.7489043474197388, |
|
"logps_avg/rejected": -8.109941482543945, |
|
"loss": 0.6753, |
|
"losses_ref": -0.048131681978702545, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 395, |
|
"u": -2.247305393218994, |
|
"weight": 0.053915899246931076 |
|
}, |
|
{ |
|
"diff_generated": -30.035808563232422, |
|
"epoch": 0.8374771002355405, |
|
"grad_norm": 6.698107767192597, |
|
"learning_rate": 1.4418781531128635e-06, |
|
"logits/chosen": -2.486995220184326, |
|
"logits/rejected": -2.131185531616211, |
|
"logps/chosen": -239.08642578125, |
|
"logps/rejected": -2759.15625, |
|
"logps_avg/chosen": -0.7630764245986938, |
|
"logps_avg/rejected": -9.010741233825684, |
|
"loss": 0.6892, |
|
"losses_ref": -0.036631032824516296, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 400, |
|
"u": -2.2631499767303467, |
|
"weight": 0.038820598274469376 |
|
}, |
|
{ |
|
"diff_generated": -27.721935272216797, |
|
"epoch": 0.8479455639884846, |
|
"grad_norm": 7.031324917308057, |
|
"learning_rate": 1.4253816843210748e-06, |
|
"logits/chosen": -2.4483680725097656, |
|
"logits/rejected": -2.089618444442749, |
|
"logps/chosen": -249.0079803466797, |
|
"logps/rejected": -2574.352783203125, |
|
"logps_avg/chosen": -0.722091019153595, |
|
"logps_avg/rejected": -8.316580772399902, |
|
"loss": 0.7066, |
|
"losses_ref": -0.043711207807064056, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 405, |
|
"u": -2.2515838146209717, |
|
"weight": 0.048544611781835556 |
|
}, |
|
{ |
|
"diff_generated": -30.00594711303711, |
|
"epoch": 0.8584140277414289, |
|
"grad_norm": 5.878873328550679, |
|
"learning_rate": 1.4087426443195547e-06, |
|
"logits/chosen": -2.4377264976501465, |
|
"logits/rejected": -2.0860629081726074, |
|
"logps/chosen": -220.13644409179688, |
|
"logps/rejected": -2700.03369140625, |
|
"logps_avg/chosen": -0.7378045916557312, |
|
"logps_avg/rejected": -9.001784324645996, |
|
"loss": 0.6757, |
|
"losses_ref": -0.032459113746881485, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 410, |
|
"u": -2.2480547428131104, |
|
"weight": 0.04561341553926468 |
|
}, |
|
{ |
|
"diff_generated": -27.96181297302246, |
|
"epoch": 0.8688824914943732, |
|
"grad_norm": 6.085121754886306, |
|
"learning_rate": 1.391966609860075e-06, |
|
"logits/chosen": -2.4773359298706055, |
|
"logits/rejected": -2.129520893096924, |
|
"logps/chosen": -239.4454803466797, |
|
"logps/rejected": -2550.92919921875, |
|
"logps_avg/chosen": -0.7163268327713013, |
|
"logps_avg/rejected": -8.388544082641602, |
|
"loss": 0.6864, |
|
"losses_ref": -0.03842215612530708, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 415, |
|
"u": -2.2578535079956055, |
|
"weight": 0.04306939244270325 |
|
}, |
|
{ |
|
"diff_generated": -27.701797485351562, |
|
"epoch": 0.8793509552473174, |
|
"grad_norm": 6.898834621323108, |
|
"learning_rate": 1.3750592036095619e-06, |
|
"logits/chosen": -2.4759981632232666, |
|
"logits/rejected": -2.1207737922668457, |
|
"logps/chosen": -255.3009490966797, |
|
"logps/rejected": -2467.61328125, |
|
"logps_avg/chosen": -0.7468316555023193, |
|
"logps_avg/rejected": -8.310539245605469, |
|
"loss": 0.6929, |
|
"losses_ref": -0.050536155700683594, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 420, |
|
"u": -2.2376935482025146, |
|
"weight": 0.05989469215273857 |
|
}, |
|
{ |
|
"diff_generated": -27.900798797607422, |
|
"epoch": 0.8898194190002617, |
|
"grad_norm": 7.318402161699278, |
|
"learning_rate": 1.3580260922655984e-06, |
|
"logits/chosen": -2.459564685821533, |
|
"logits/rejected": -2.133777379989624, |
|
"logps/chosen": -232.8207550048828, |
|
"logps/rejected": -2438.7041015625, |
|
"logps_avg/chosen": -0.7522517442703247, |
|
"logps_avg/rejected": -8.370241165161133, |
|
"loss": 0.6907, |
|
"losses_ref": -0.040023092180490494, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 425, |
|
"u": -2.2584662437438965, |
|
"weight": 0.043312422931194305 |
|
}, |
|
{ |
|
"diff_generated": -29.812658309936523, |
|
"epoch": 0.9002878827532059, |
|
"grad_norm": 6.38418063766252, |
|
"learning_rate": 1.3408729846571713e-06, |
|
"logits/chosen": -2.4594979286193848, |
|
"logits/rejected": -2.071135997772217, |
|
"logps/chosen": -280.634521484375, |
|
"logps/rejected": -2652.205322265625, |
|
"logps_avg/chosen": -0.7122408747673035, |
|
"logps_avg/rejected": -8.943798065185547, |
|
"loss": 0.6859, |
|
"losses_ref": -0.03510651737451553, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 430, |
|
"u": -2.2575087547302246, |
|
"weight": 0.04237521067261696 |
|
}, |
|
{ |
|
"diff_generated": -25.203630447387695, |
|
"epoch": 0.9107563465061502, |
|
"grad_norm": 6.588604544150575, |
|
"learning_rate": 1.3236056298312956e-06, |
|
"logits/chosen": -2.481071949005127, |
|
"logits/rejected": -2.1608479022979736, |
|
"logps/chosen": -234.13027954101562, |
|
"logps/rejected": -2276.569580078125, |
|
"logps_avg/chosen": -0.7077358365058899, |
|
"logps_avg/rejected": -7.561089992523193, |
|
"loss": 0.6722, |
|
"losses_ref": -0.04718080908060074, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 435, |
|
"u": -2.249262809753418, |
|
"weight": 0.05173084884881973 |
|
}, |
|
{ |
|
"diff_generated": -23.8907470703125, |
|
"epoch": 0.9212248102590945, |
|
"grad_norm": 6.857956310477159, |
|
"learning_rate": 1.3062298151261591e-06, |
|
"logits/chosen": -2.5335617065429688, |
|
"logits/rejected": -2.219560146331787, |
|
"logps/chosen": -250.57705688476562, |
|
"logps/rejected": -2190.95947265625, |
|
"logps_avg/chosen": -0.6971117854118347, |
|
"logps_avg/rejected": -7.167223930358887, |
|
"loss": 0.6666, |
|
"losses_ref": -0.04138738289475441, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 440, |
|
"u": -2.250927686691284, |
|
"weight": 0.04870566353201866 |
|
}, |
|
{ |
|
"diff_generated": -24.81663703918457, |
|
"epoch": 0.9316932740120387, |
|
"grad_norm": 7.035268937333438, |
|
"learning_rate": 1.2887513642314372e-06, |
|
"logits/chosen": -2.466610908508301, |
|
"logits/rejected": -2.1418159008026123, |
|
"logps/chosen": -234.072021484375, |
|
"logps/rejected": -2254.32177734375, |
|
"logps_avg/chosen": -0.7226396203041077, |
|
"logps_avg/rejected": -7.4449920654296875, |
|
"loss": 0.6772, |
|
"losses_ref": -0.02925349771976471, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 445, |
|
"u": -2.261955738067627, |
|
"weight": 0.036579299718141556 |
|
}, |
|
{ |
|
"diff_generated": -23.858409881591797, |
|
"epoch": 0.942161737764983, |
|
"grad_norm": 5.8496221029871895, |
|
"learning_rate": 1.271176135236417e-06, |
|
"logits/chosen": -2.5474791526794434, |
|
"logits/rejected": -2.2467288970947266, |
|
"logps/chosen": -259.63043212890625, |
|
"logps/rejected": -2068.02978515625, |
|
"logps_avg/chosen": -0.7590965032577515, |
|
"logps_avg/rejected": -7.157523155212402, |
|
"loss": 0.6926, |
|
"losses_ref": -0.04666949436068535, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 450, |
|
"u": -2.24082612991333, |
|
"weight": 0.057170577347278595 |
|
}, |
|
{ |
|
"diff_generated": -21.257368087768555, |
|
"epoch": 0.9526302015179272, |
|
"grad_norm": 9.579263194990599, |
|
"learning_rate": 1.2535100186666e-06, |
|
"logits/chosen": -2.5334389209747314, |
|
"logits/rejected": -2.2800872325897217, |
|
"logps/chosen": -258.4393615722656, |
|
"logps/rejected": -1949.274658203125, |
|
"logps_avg/chosen": -0.7667442560195923, |
|
"logps_avg/rejected": -6.377211093902588, |
|
"loss": 0.7272, |
|
"losses_ref": -0.04685154929757118, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 455, |
|
"u": -2.239955186843872, |
|
"weight": 0.05661741644144058 |
|
}, |
|
{ |
|
"diff_generated": -21.260351181030273, |
|
"epoch": 0.9630986652708715, |
|
"grad_norm": 7.19097418251884, |
|
"learning_rate": 1.2357589355094273e-06, |
|
"logits/chosen": -2.5235114097595215, |
|
"logits/rejected": -2.2688846588134766, |
|
"logps/chosen": -274.0472106933594, |
|
"logps/rejected": -1854.4193115234375, |
|
"logps_avg/chosen": -0.7401561141014099, |
|
"logps_avg/rejected": -6.378105163574219, |
|
"loss": 0.6996, |
|
"losses_ref": -0.04187412187457085, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 460, |
|
"u": -2.255484104156494, |
|
"weight": 0.04560910537838936 |
|
}, |
|
{ |
|
"diff_generated": -20.870580673217773, |
|
"epoch": 0.9735671290238157, |
|
"grad_norm": 5.873029901097039, |
|
"learning_rate": 1.2179288352297982e-06, |
|
"logits/chosen": -2.5459725856781006, |
|
"logits/rejected": -2.300191879272461, |
|
"logps/chosen": -233.07363891601562, |
|
"logps/rejected": -1780.218505859375, |
|
"logps_avg/chosen": -0.676838755607605, |
|
"logps_avg/rejected": -6.26117467880249, |
|
"loss": 0.701, |
|
"losses_ref": -0.035965751856565475, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 465, |
|
"u": -2.2623190879821777, |
|
"weight": 0.03852839767932892 |
|
}, |
|
{ |
|
"diff_generated": -20.314434051513672, |
|
"epoch": 0.98403559277676, |
|
"grad_norm": 6.047640955364439, |
|
"learning_rate": 1.2000256937760445e-06, |
|
"logits/chosen": -2.478569746017456, |
|
"logits/rejected": -2.2165324687957764, |
|
"logps/chosen": -241.59115600585938, |
|
"logps/rejected": -1793.3131103515625, |
|
"logps_avg/chosen": -0.7300271987915039, |
|
"logps_avg/rejected": -6.094330787658691, |
|
"loss": 0.6661, |
|
"losses_ref": -0.03309565782546997, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 470, |
|
"u": -2.2623355388641357, |
|
"weight": 0.03777972236275673 |
|
}, |
|
{ |
|
"diff_generated": -20.79926109313965, |
|
"epoch": 0.9945040565297043, |
|
"grad_norm": 8.164412498048108, |
|
"learning_rate": 1.1820555115770255e-06, |
|
"logits/chosen": -2.5342564582824707, |
|
"logits/rejected": -2.2890594005584717, |
|
"logps/chosen": -230.3572235107422, |
|
"logps/rejected": -1833.0390625, |
|
"logps_avg/chosen": -0.751907467842102, |
|
"logps_avg/rejected": -6.239778995513916, |
|
"loss": 0.6895, |
|
"losses_ref": -0.03975466638803482, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 475, |
|
"u": -2.2550594806671143, |
|
"weight": 0.04479961842298508 |
|
}, |
|
{ |
|
"diff_generated": -20.89034080505371, |
|
"epoch": 1.0049725202826485, |
|
"grad_norm": 9.91162629957212, |
|
"learning_rate": 1.1640243115310217e-06, |
|
"logits/chosen": -2.515481948852539, |
|
"logits/rejected": -2.238800525665283, |
|
"logps/chosen": -238.7968292236328, |
|
"logps/rejected": -1904.6226806640625, |
|
"logps_avg/chosen": -0.730613112449646, |
|
"logps_avg/rejected": -6.2671027183532715, |
|
"loss": 0.6185, |
|
"losses_ref": -0.0886848121881485, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 480, |
|
"u": -3.230264186859131, |
|
"weight": 0.0876741111278534 |
|
}, |
|
{ |
|
"diff_generated": -22.67684555053711, |
|
"epoch": 1.0154409840355927, |
|
"grad_norm": 8.949481189927978, |
|
"learning_rate": 1.1459381369870972e-06, |
|
"logits/chosen": -2.4899425506591797, |
|
"logits/rejected": -2.1274173259735107, |
|
"logps/chosen": -239.3141632080078, |
|
"logps/rejected": -2098.4287109375, |
|
"logps_avg/chosen": -0.6295738816261292, |
|
"logps_avg/rejected": -6.8030548095703125, |
|
"loss": 0.5199, |
|
"losses_ref": -0.09897326678037643, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 485, |
|
"u": -4.378706932067871, |
|
"weight": 0.08350441604852676 |
|
}, |
|
{ |
|
"diff_generated": -24.163660049438477, |
|
"epoch": 1.025909447788537, |
|
"grad_norm": 8.708694233875605, |
|
"learning_rate": 1.1278030497196046e-06, |
|
"logits/chosen": -2.448932409286499, |
|
"logits/rejected": -2.0961108207702637, |
|
"logps/chosen": -197.19461059570312, |
|
"logps/rejected": -2133.96630859375, |
|
"logps_avg/chosen": -0.5785419940948486, |
|
"logps_avg/rejected": -7.2490973472595215, |
|
"loss": 0.5111, |
|
"losses_ref": -0.12583398818969727, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 490, |
|
"u": -4.304468631744385, |
|
"weight": 0.12433832883834839 |
|
}, |
|
{ |
|
"diff_generated": -25.089040756225586, |
|
"epoch": 1.0363779115414813, |
|
"grad_norm": 8.538618246529412, |
|
"learning_rate": 1.1096251278965172e-06, |
|
"logits/chosen": -2.4840457439422607, |
|
"logits/rejected": -2.1427814960479736, |
|
"logps/chosen": -202.72528076171875, |
|
"logps/rejected": -2115.415283203125, |
|
"logps_avg/chosen": -0.5992251038551331, |
|
"logps_avg/rejected": -7.526711940765381, |
|
"loss": 0.4987, |
|
"losses_ref": -0.10639525949954987, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 495, |
|
"u": -4.343268394470215, |
|
"weight": 0.10977953672409058 |
|
}, |
|
{ |
|
"diff_generated": -24.132022857666016, |
|
"epoch": 1.0468463752944255, |
|
"grad_norm": 7.67811116418592, |
|
"learning_rate": 1.0914104640422679e-06, |
|
"logits/chosen": -2.4932920932769775, |
|
"logits/rejected": -2.1089999675750732, |
|
"logps/chosen": -199.10342407226562, |
|
"logps/rejected": -2176.26318359375, |
|
"logps_avg/chosen": -0.6183401346206665, |
|
"logps_avg/rejected": -7.2396063804626465, |
|
"loss": 0.5202, |
|
"losses_ref": -0.12012694031000137, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 500, |
|
"u": -4.302992820739746, |
|
"weight": 0.11936072260141373 |
|
}, |
|
{ |
|
"diff_generated": -23.451740264892578, |
|
"epoch": 1.05731483904737, |
|
"grad_norm": 20.37435210253164, |
|
"learning_rate": 1.0731651629957721e-06, |
|
"logits/chosen": -2.42221736907959, |
|
"logits/rejected": -2.1496148109436035, |
|
"logps/chosen": -229.11068725585938, |
|
"logps/rejected": -2094.52197265625, |
|
"logps_avg/chosen": -0.6533752679824829, |
|
"logps_avg/rejected": -7.035521507263184, |
|
"loss": 0.5184, |
|
"losses_ref": -0.1230870932340622, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 505, |
|
"u": -4.369751930236816, |
|
"weight": 0.1066075786948204 |
|
}, |
|
{ |
|
"diff_generated": -22.74098777770996, |
|
"epoch": 1.067783302800314, |
|
"grad_norm": 7.268444145722818, |
|
"learning_rate": 1.0548953398643274e-06, |
|
"logits/chosen": -2.4076297283172607, |
|
"logits/rejected": -2.0819380283355713, |
|
"logps/chosen": -233.77938842773438, |
|
"logps/rejected": -2035.225830078125, |
|
"logps_avg/chosen": -0.6575011014938354, |
|
"logps_avg/rejected": -6.822296142578125, |
|
"loss": 0.4947, |
|
"losses_ref": -0.14097091555595398, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 510, |
|
"u": -4.253186225891113, |
|
"weight": 0.1333218514919281 |
|
}, |
|
{ |
|
"diff_generated": -25.923725128173828, |
|
"epoch": 1.0782517665532583, |
|
"grad_norm": 8.062661700192072, |
|
"learning_rate": 1.0366071179740706e-06, |
|
"logits/chosen": -2.4787120819091797, |
|
"logits/rejected": -2.12414288520813, |
|
"logps/chosen": -257.2312927246094, |
|
"logps/rejected": -2302.900634765625, |
|
"logps_avg/chosen": -0.6627689003944397, |
|
"logps_avg/rejected": -7.777116298675537, |
|
"loss": 0.5085, |
|
"losses_ref": -0.10705102980136871, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 515, |
|
"u": -4.345104217529297, |
|
"weight": 0.10459395498037338 |
|
}, |
|
{ |
|
"diff_generated": -27.071746826171875, |
|
"epoch": 1.0887202303062025, |
|
"grad_norm": 7.3598703596101975, |
|
"learning_rate": 1.0183066268176775e-06, |
|
"logits/chosen": -2.436248779296875, |
|
"logits/rejected": -2.075679063796997, |
|
"logps/chosen": -244.1257781982422, |
|
"logps/rejected": -2375.113525390625, |
|
"logps_avg/chosen": -0.6157761812210083, |
|
"logps_avg/rejected": -8.1215238571167, |
|
"loss": 0.5683, |
|
"losses_ref": -0.08251279592514038, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 520, |
|
"u": -4.396883010864258, |
|
"weight": 0.06908340752124786 |
|
}, |
|
{ |
|
"diff_generated": -26.481449127197266, |
|
"epoch": 1.0991886940591469, |
|
"grad_norm": 8.892060607648993, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -2.4646589756011963, |
|
"logits/rejected": -2.096703290939331, |
|
"logps/chosen": -226.17453002929688, |
|
"logps/rejected": -2343.119384765625, |
|
"logps_avg/chosen": -0.6375609040260315, |
|
"logps_avg/rejected": -7.944435119628906, |
|
"loss": 0.5652, |
|
"losses_ref": -0.08028392493724823, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 525, |
|
"u": -4.399170398712158, |
|
"weight": 0.07661790400743484 |
|
}, |
|
{ |
|
"diff_generated": -25.77886962890625, |
|
"epoch": 1.109657157812091, |
|
"grad_norm": 11.93280848823974, |
|
"learning_rate": 9.816933731823228e-07, |
|
"logits/chosen": -2.4755985736846924, |
|
"logits/rejected": -2.1236746311187744, |
|
"logps/chosen": -219.5588836669922, |
|
"logps/rejected": -2258.547119140625, |
|
"logps_avg/chosen": -0.6109720468521118, |
|
"logps_avg/rejected": -7.733660697937012, |
|
"loss": 0.5032, |
|
"losses_ref": -0.09919899702072144, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 530, |
|
"u": -4.327098846435547, |
|
"weight": 0.0968068465590477 |
|
}, |
|
{ |
|
"diff_generated": -26.962757110595703, |
|
"epoch": 1.1201256215650353, |
|
"grad_norm": 11.74024044453861, |
|
"learning_rate": 9.633928820259293e-07, |
|
"logits/chosen": -2.382981777191162, |
|
"logits/rejected": -1.9988247156143188, |
|
"logps/chosen": -198.56578063964844, |
|
"logps/rejected": -2398.09326171875, |
|
"logps_avg/chosen": -0.6096338033676147, |
|
"logps_avg/rejected": -8.088827133178711, |
|
"loss": 0.5305, |
|
"losses_ref": -0.06856809556484222, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 535, |
|
"u": -4.411980152130127, |
|
"weight": 0.06424126774072647 |
|
}, |
|
{ |
|
"diff_generated": -26.22715187072754, |
|
"epoch": 1.1305940853179797, |
|
"grad_norm": 11.054487118285914, |
|
"learning_rate": 9.451046601356725e-07, |
|
"logits/chosen": -2.4410181045532227, |
|
"logits/rejected": -2.095543146133423, |
|
"logps/chosen": -207.6184844970703, |
|
"logps/rejected": -2253.38623046875, |
|
"logps_avg/chosen": -0.6336568593978882, |
|
"logps_avg/rejected": -7.868145942687988, |
|
"loss": 0.5357, |
|
"losses_ref": -0.0955720990896225, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 540, |
|
"u": -4.348645210266113, |
|
"weight": 0.09204810112714767 |
|
}, |
|
{ |
|
"diff_generated": -25.266141891479492, |
|
"epoch": 1.1410625490709239, |
|
"grad_norm": 8.805909515635294, |
|
"learning_rate": 9.268348370042281e-07, |
|
"logits/chosen": -2.4485838413238525, |
|
"logits/rejected": -2.1053905487060547, |
|
"logps/chosen": -216.48910522460938, |
|
"logps/rejected": -2250.44775390625, |
|
"logps_avg/chosen": -0.588961124420166, |
|
"logps_avg/rejected": -7.579843044281006, |
|
"loss": 0.5159, |
|
"losses_ref": -0.09172032028436661, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 545, |
|
"u": -4.326230525970459, |
|
"weight": 0.09086887538433075 |
|
}, |
|
{ |
|
"diff_generated": -26.917110443115234, |
|
"epoch": 1.151531012823868, |
|
"grad_norm": 10.666064793677686, |
|
"learning_rate": 9.085895359577323e-07, |
|
"logits/chosen": -2.404174566268921, |
|
"logits/rejected": -2.037463665008545, |
|
"logps/chosen": -205.3460235595703, |
|
"logps/rejected": -2429.36279296875, |
|
"logps_avg/chosen": -0.5989923477172852, |
|
"logps_avg/rejected": -8.07513427734375, |
|
"loss": 0.5332, |
|
"losses_ref": -0.06065789982676506, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 550, |
|
"u": -4.387241363525391, |
|
"weight": 0.0559367910027504 |
|
}, |
|
{ |
|
"diff_generated": -25.942188262939453, |
|
"epoch": 1.1619994765768125, |
|
"grad_norm": 10.199822581929254, |
|
"learning_rate": 8.903748721034826e-07, |
|
"logits/chosen": -2.432077407836914, |
|
"logits/rejected": -2.0631113052368164, |
|
"logps/chosen": -209.88076782226562, |
|
"logps/rejected": -2297.24853515625, |
|
"logps_avg/chosen": -0.6222396492958069, |
|
"logps_avg/rejected": -7.782655239105225, |
|
"loss": 0.5436, |
|
"losses_ref": -0.053764212876558304, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 555, |
|
"u": -4.413111209869385, |
|
"weight": 0.05243021994829178 |
|
}, |
|
{ |
|
"diff_generated": -26.842655181884766, |
|
"epoch": 1.1724679403297567, |
|
"grad_norm": 9.055623269790141, |
|
"learning_rate": 8.721969502803953e-07, |
|
"logits/chosen": -2.4761881828308105, |
|
"logits/rejected": -2.037745952606201, |
|
"logps/chosen": -228.0619659423828, |
|
"logps/rejected": -2454.422607421875, |
|
"logps_avg/chosen": -0.6156254410743713, |
|
"logps_avg/rejected": -8.052797317504883, |
|
"loss": 0.4938, |
|
"losses_ref": -0.06194459646940231, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 560, |
|
"u": -4.417675018310547, |
|
"weight": 0.05182374641299248 |
|
}, |
|
{ |
|
"diff_generated": -25.78971290588379, |
|
"epoch": 1.1829364040827008, |
|
"grad_norm": 11.397081928275703, |
|
"learning_rate": 8.540618630129027e-07, |
|
"logits/chosen": -2.4368996620178223, |
|
"logits/rejected": -2.0613627433776855, |
|
"logps/chosen": -244.33059692382812, |
|
"logps/rejected": -2314.3056640625, |
|
"logps_avg/chosen": -0.6685888171195984, |
|
"logps_avg/rejected": -7.736914157867432, |
|
"loss": 0.5495, |
|
"losses_ref": -0.07071459293365479, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 565, |
|
"u": -4.390778064727783, |
|
"weight": 0.06269918382167816 |
|
}, |
|
{ |
|
"diff_generated": -26.82694435119629, |
|
"epoch": 1.193404867835645, |
|
"grad_norm": 9.221832000440747, |
|
"learning_rate": 8.359756884689783e-07, |
|
"logits/chosen": -2.497908115386963, |
|
"logits/rejected": -2.125258207321167, |
|
"logps/chosen": -215.4803009033203, |
|
"logps/rejected": -2407.225830078125, |
|
"logps_avg/chosen": -0.6236811876296997, |
|
"logps_avg/rejected": -8.048083305358887, |
|
"loss": 0.5244, |
|
"losses_ref": -0.08507435768842697, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 570, |
|
"u": -4.405519485473633, |
|
"weight": 0.07415871322154999 |
|
}, |
|
{ |
|
"diff_generated": -27.44614601135254, |
|
"epoch": 1.2038733315885894, |
|
"grad_norm": 14.484772212758768, |
|
"learning_rate": 8.179444884229744e-07, |
|
"logits/chosen": -2.415398597717285, |
|
"logits/rejected": -2.0458593368530273, |
|
"logps/chosen": -224.60482788085938, |
|
"logps/rejected": -2476.796142578125, |
|
"logps_avg/chosen": -0.6788522601127625, |
|
"logps_avg/rejected": -8.233844757080078, |
|
"loss": 0.5625, |
|
"losses_ref": -0.05934012681245804, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 575, |
|
"u": -4.461823463439941, |
|
"weight": 0.044574279338121414 |
|
}, |
|
{ |
|
"diff_generated": -29.135217666625977, |
|
"epoch": 1.2143417953415336, |
|
"grad_norm": 18.01394064023352, |
|
"learning_rate": 7.999743062239557e-07, |
|
"logits/chosen": -2.4544944763183594, |
|
"logits/rejected": -2.104241371154785, |
|
"logps/chosen": -210.87893676757812, |
|
"logps/rejected": -2643.50390625, |
|
"logps_avg/chosen": -0.6716314554214478, |
|
"logps_avg/rejected": -8.740565299987793, |
|
"loss": 0.5555, |
|
"losses_ref": -0.056417226791381836, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 580, |
|
"u": -4.44529914855957, |
|
"weight": 0.04976346716284752 |
|
}, |
|
{ |
|
"diff_generated": -27.484622955322266, |
|
"epoch": 1.2248102590944778, |
|
"grad_norm": 10.29630717051048, |
|
"learning_rate": 7.820711647702017e-07, |
|
"logits/chosen": -2.4541475772857666, |
|
"logits/rejected": -2.0904035568237305, |
|
"logps/chosen": -202.5820770263672, |
|
"logps/rejected": -2515.11962890625, |
|
"logps_avg/chosen": -0.5754384994506836, |
|
"logps_avg/rejected": -8.245387077331543, |
|
"loss": 0.5346, |
|
"losses_ref": -0.08221448957920074, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 585, |
|
"u": -4.365923881530762, |
|
"weight": 0.07960718125104904 |
|
}, |
|
{ |
|
"diff_generated": -26.950695037841797, |
|
"epoch": 1.235278722847422, |
|
"grad_norm": 10.223108898541343, |
|
"learning_rate": 7.642410644905726e-07, |
|
"logits/chosen": -2.3840575218200684, |
|
"logits/rejected": -2.0544769763946533, |
|
"logps/chosen": -205.935546875, |
|
"logps/rejected": -2364.6396484375, |
|
"logps_avg/chosen": -0.5895050764083862, |
|
"logps_avg/rejected": -8.08520793914795, |
|
"loss": 0.5503, |
|
"losses_ref": -0.10383725166320801, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 590, |
|
"u": -4.365530490875244, |
|
"weight": 0.09789486229419708 |
|
}, |
|
{ |
|
"diff_generated": -29.25247573852539, |
|
"epoch": 1.2457471866003664, |
|
"grad_norm": 12.09100466478698, |
|
"learning_rate": 7.464899813334e-07, |
|
"logits/chosen": -2.3943965435028076, |
|
"logits/rejected": -2.067821979522705, |
|
"logps/chosen": -215.44094848632812, |
|
"logps/rejected": -2522.196533203125, |
|
"logps_avg/chosen": -0.6099680662155151, |
|
"logps_avg/rejected": -8.77574348449707, |
|
"loss": 0.5325, |
|
"losses_ref": -0.07746943831443787, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 595, |
|
"u": -4.356167793273926, |
|
"weight": 0.07601340860128403 |
|
}, |
|
{ |
|
"diff_generated": -27.34578514099121, |
|
"epoch": 1.2562156503533106, |
|
"grad_norm": 8.052346731222642, |
|
"learning_rate": 7.288238647635829e-07, |
|
"logits/chosen": -2.435148239135742, |
|
"logits/rejected": -2.1030170917510986, |
|
"logps/chosen": -226.7269744873047, |
|
"logps/rejected": -2427.451171875, |
|
"logps_avg/chosen": -0.6252392530441284, |
|
"logps_avg/rejected": -8.2037353515625, |
|
"loss": 0.5356, |
|
"losses_ref": -0.06464961916208267, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 600, |
|
"u": -4.406424522399902, |
|
"weight": 0.061459980905056 |
|
}, |
|
{ |
|
"diff_generated": -27.118465423583984, |
|
"epoch": 1.2666841141062548, |
|
"grad_norm": 11.655006277757288, |
|
"learning_rate": 7.112486357685631e-07, |
|
"logits/chosen": -2.450383424758911, |
|
"logits/rejected": -2.0887584686279297, |
|
"logps/chosen": -222.7769012451172, |
|
"logps/rejected": -2357.30712890625, |
|
"logps_avg/chosen": -0.6189793348312378, |
|
"logps_avg/rejected": -8.135540008544922, |
|
"loss": 0.5517, |
|
"losses_ref": -0.08965682238340378, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 605, |
|
"u": -4.366550445556641, |
|
"weight": 0.09028217941522598 |
|
}, |
|
{ |
|
"diff_generated": -27.826339721679688, |
|
"epoch": 1.2771525778591992, |
|
"grad_norm": 8.355569379147827, |
|
"learning_rate": 6.937701848738407e-07, |
|
"logits/chosen": -2.4444997310638428, |
|
"logits/rejected": -2.103099822998047, |
|
"logps/chosen": -200.1586151123047, |
|
"logps/rejected": -2441.192138671875, |
|
"logps_avg/chosen": -0.5492798089981079, |
|
"logps_avg/rejected": -8.347902297973633, |
|
"loss": 0.5273, |
|
"losses_ref": -0.05201203376054764, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 610, |
|
"u": -4.470887660980225, |
|
"weight": 0.04102148860692978 |
|
}, |
|
{ |
|
"diff_generated": -27.140499114990234, |
|
"epoch": 1.2876210416121434, |
|
"grad_norm": 19.07484346081228, |
|
"learning_rate": 6.763943701687045e-07, |
|
"logits/chosen": -2.4840033054351807, |
|
"logits/rejected": -2.0714080333709717, |
|
"logps/chosen": -237.1542510986328, |
|
"logps/rejected": -2492.620849609375, |
|
"logps_avg/chosen": -0.6195243000984192, |
|
"logps_avg/rejected": -8.142149925231934, |
|
"loss": 0.5249, |
|
"losses_ref": -0.07448837906122208, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 615, |
|
"u": -4.392305374145508, |
|
"weight": 0.06138737127184868 |
|
}, |
|
{ |
|
"diff_generated": -29.331090927124023, |
|
"epoch": 1.2980895053650876, |
|
"grad_norm": 14.350296949575641, |
|
"learning_rate": 6.591270153428288e-07, |
|
"logits/chosen": -2.5314509868621826, |
|
"logits/rejected": -2.1232359409332275, |
|
"logps/chosen": -230.3607940673828, |
|
"logps/rejected": -2496.131103515625, |
|
"logps_avg/chosen": -0.6086186170578003, |
|
"logps_avg/rejected": -8.799327850341797, |
|
"loss": 0.5301, |
|
"losses_ref": -0.05894411355257034, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 620, |
|
"u": -4.37734842300415, |
|
"weight": 0.053639549762010574 |
|
}, |
|
{ |
|
"diff_generated": -26.76749038696289, |
|
"epoch": 1.308557969118032, |
|
"grad_norm": 8.772096019129755, |
|
"learning_rate": 6.419739077344016e-07, |
|
"logits/chosen": -2.517256259918213, |
|
"logits/rejected": -2.158301591873169, |
|
"logps/chosen": -236.55648803710938, |
|
"logps/rejected": -2372.91796875, |
|
"logps_avg/chosen": -0.6213998794555664, |
|
"logps_avg/rejected": -8.030247688293457, |
|
"loss": 0.544, |
|
"losses_ref": -0.09482914954423904, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 625, |
|
"u": -4.376565456390381, |
|
"weight": 0.07662535458803177 |
|
}, |
|
{ |
|
"diff_generated": -28.416824340820312, |
|
"epoch": 1.3190264328709762, |
|
"grad_norm": 7.8466631670725935, |
|
"learning_rate": 6.24940796390438e-07, |
|
"logits/chosen": -2.4629857540130615, |
|
"logits/rejected": -2.0768308639526367, |
|
"logps/chosen": -214.29360961914062, |
|
"logps/rejected": -2455.93115234375, |
|
"logps_avg/chosen": -0.6123236417770386, |
|
"logps_avg/rejected": -8.52504825592041, |
|
"loss": 0.5392, |
|
"losses_ref": -0.059877872467041016, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 630, |
|
"u": -4.431421756744385, |
|
"weight": 0.043088506907224655 |
|
}, |
|
{ |
|
"diff_generated": -29.8402099609375, |
|
"epoch": 1.3294948966239204, |
|
"grad_norm": 20.160929381759352, |
|
"learning_rate": 6.08033390139925e-07, |
|
"logits/chosen": -2.4479854106903076, |
|
"logits/rejected": -2.0140042304992676, |
|
"logps/chosen": -228.12948608398438, |
|
"logps/rejected": -2645.977294921875, |
|
"logps_avg/chosen": -0.6280118227005005, |
|
"logps_avg/rejected": -8.95206356048584, |
|
"loss": 0.5647, |
|
"losses_ref": -0.0805547907948494, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 635, |
|
"u": -4.419759750366211, |
|
"weight": 0.06746160984039307 |
|
}, |
|
{ |
|
"diff_generated": -29.193140029907227, |
|
"epoch": 1.3399633603768648, |
|
"grad_norm": 17.984653220174852, |
|
"learning_rate": 5.912573556804452e-07, |
|
"logits/chosen": -2.4721744060516357, |
|
"logits/rejected": -2.0706074237823486, |
|
"logps/chosen": -219.49658203125, |
|
"logps/rejected": -2600.13525390625, |
|
"logps_avg/chosen": -0.5888947248458862, |
|
"logps_avg/rejected": -8.757942199707031, |
|
"loss": 0.5708, |
|
"losses_ref": -0.06751363724470139, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 640, |
|
"u": -4.354660511016846, |
|
"weight": 0.08399678766727448 |
|
}, |
|
{ |
|
"diff_generated": -29.59097671508789, |
|
"epoch": 1.350431824129809, |
|
"grad_norm": 8.832363301034992, |
|
"learning_rate": 5.746183156789252e-07, |
|
"logits/chosen": -2.522441864013672, |
|
"logits/rejected": -2.069122076034546, |
|
"logps/chosen": -234.3195343017578, |
|
"logps/rejected": -2680.282470703125, |
|
"logps_avg/chosen": -0.6104280352592468, |
|
"logps_avg/rejected": -8.877291679382324, |
|
"loss": 0.5457, |
|
"losses_ref": -0.05418990179896355, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 645, |
|
"u": -4.4123215675354, |
|
"weight": 0.058007679879665375 |
|
}, |
|
{ |
|
"diff_generated": -28.265172958374023, |
|
"epoch": 1.3609002878827532, |
|
"grad_norm": 12.218786161167232, |
|
"learning_rate": 5.581218468871365e-07, |
|
"logits/chosen": -2.4173598289489746, |
|
"logits/rejected": -2.0515952110290527, |
|
"logps/chosen": -190.7438507080078, |
|
"logps/rejected": -2539.76953125, |
|
"logps_avg/chosen": -0.5876272320747375, |
|
"logps_avg/rejected": -8.479551315307617, |
|
"loss": 0.5169, |
|
"losses_ref": -0.08093442767858505, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 650, |
|
"u": -4.324867248535156, |
|
"weight": 0.08522786945104599 |
|
}, |
|
{ |
|
"diff_generated": -31.353778839111328, |
|
"epoch": 1.3713687516356974, |
|
"grad_norm": 10.11440836146207, |
|
"learning_rate": 5.417734782725896e-07, |
|
"logits/chosen": -2.459190845489502, |
|
"logits/rejected": -2.060859203338623, |
|
"logps/chosen": -211.8318634033203, |
|
"logps/rejected": -2672.73583984375, |
|
"logps_avg/chosen": -0.5790122151374817, |
|
"logps_avg/rejected": -9.406133651733398, |
|
"loss": 0.5603, |
|
"losses_ref": -0.038860172033309937, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 655, |
|
"u": -4.468893051147461, |
|
"weight": 0.03133354336023331 |
|
}, |
|
{ |
|
"diff_generated": -28.588571548461914, |
|
"epoch": 1.3818372153886418, |
|
"grad_norm": 12.583696879491457, |
|
"learning_rate": 5.255786891654399e-07, |
|
"logits/chosen": -2.4734246730804443, |
|
"logits/rejected": -2.0776007175445557, |
|
"logps/chosen": -203.22389221191406, |
|
"logps/rejected": -2578.066162109375, |
|
"logps_avg/chosen": -0.6348826289176941, |
|
"logps_avg/rejected": -8.57657241821289, |
|
"loss": 0.5486, |
|
"losses_ref": -0.06403845548629761, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 660, |
|
"u": -4.37404203414917, |
|
"weight": 0.06924913823604584 |
|
}, |
|
{ |
|
"diff_generated": -29.78804588317871, |
|
"epoch": 1.392305679141586, |
|
"grad_norm": 13.032538343695713, |
|
"learning_rate": 5.095429074220319e-07, |
|
"logits/chosen": -2.4960551261901855, |
|
"logits/rejected": -2.1090826988220215, |
|
"logps/chosen": -213.1850128173828, |
|
"logps/rejected": -2626.316162109375, |
|
"logps_avg/chosen": -0.6238334774971008, |
|
"logps_avg/rejected": -8.93641471862793, |
|
"loss": 0.5533, |
|
"losses_ref": -0.06042981147766113, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 665, |
|
"u": -4.39785099029541, |
|
"weight": 0.05738676339387894 |
|
}, |
|
{ |
|
"diff_generated": -31.395706176757812, |
|
"epoch": 1.4027741428945302, |
|
"grad_norm": 29.282292978403014, |
|
"learning_rate": 4.936715076056974e-07, |
|
"logits/chosen": -2.519998073577881, |
|
"logits/rejected": -2.1003477573394775, |
|
"logps/chosen": -227.49972534179688, |
|
"logps/rejected": -2841.53759765625, |
|
"logps_avg/chosen": -0.6322627067565918, |
|
"logps_avg/rejected": -9.418710708618164, |
|
"loss": 0.545, |
|
"losses_ref": -0.04599471017718315, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 670, |
|
"u": -4.469276428222656, |
|
"weight": 0.033300966024398804 |
|
}, |
|
{ |
|
"diff_generated": -32.34383010864258, |
|
"epoch": 1.4132426066474744, |
|
"grad_norm": 21.235357659003228, |
|
"learning_rate": 4.779698091854098e-07, |
|
"logits/chosen": -2.5733542442321777, |
|
"logits/rejected": -2.1177892684936523, |
|
"logps/chosen": -241.3948516845703, |
|
"logps/rejected": -2941.85205078125, |
|
"logps_avg/chosen": -0.634663999080658, |
|
"logps_avg/rejected": -9.70314884185791, |
|
"loss": 0.5578, |
|
"losses_ref": -0.03548940271139145, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 675, |
|
"u": -4.454364776611328, |
|
"weight": 0.025668436661362648 |
|
}, |
|
{ |
|
"diff_generated": -29.166423797607422, |
|
"epoch": 1.4237110704004188, |
|
"grad_norm": 9.728306873667183, |
|
"learning_rate": 4.624430747529102e-07, |
|
"logits/chosen": -2.5310111045837402, |
|
"logits/rejected": -2.1089558601379395, |
|
"logps/chosen": -245.45083618164062, |
|
"logps/rejected": -2643.77001953125, |
|
"logps_avg/chosen": -0.6183468699455261, |
|
"logps_avg/rejected": -8.749927520751953, |
|
"loss": 0.5228, |
|
"losses_ref": -0.08980627357959747, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 680, |
|
"u": -4.334953308105469, |
|
"weight": 0.07751224935054779 |
|
}, |
|
{ |
|
"diff_generated": -33.88722610473633, |
|
"epoch": 1.434179534153363, |
|
"grad_norm": 14.616844426526761, |
|
"learning_rate": 4.4709650825889277e-07, |
|
"logits/chosen": -2.460334300994873, |
|
"logits/rejected": -2.0326919555664062, |
|
"logps/chosen": -193.82003784179688, |
|
"logps/rejected": -2947.883544921875, |
|
"logps_avg/chosen": -0.5843343138694763, |
|
"logps_avg/rejected": -10.166168212890625, |
|
"loss": 0.5694, |
|
"losses_ref": -0.03547119349241257, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 685, |
|
"u": -4.463663578033447, |
|
"weight": 0.030837317928671837 |
|
}, |
|
{ |
|
"diff_generated": -30.6027774810791, |
|
"epoch": 1.4446479979063072, |
|
"grad_norm": 11.081953598678401, |
|
"learning_rate": 4.3193525326884426e-07, |
|
"logits/chosen": -2.5122551918029785, |
|
"logits/rejected": -2.0895779132843018, |
|
"logps/chosen": -238.4690704345703, |
|
"logps/rejected": -2627.096435546875, |
|
"logps_avg/chosen": -0.6726236343383789, |
|
"logps_avg/rejected": -9.180832862854004, |
|
"loss": 0.587, |
|
"losses_ref": -0.05756605789065361, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 690, |
|
"u": -4.38980770111084, |
|
"weight": 0.0510624423623085 |
|
}, |
|
{ |
|
"diff_generated": -32.015716552734375, |
|
"epoch": 1.4551164616592516, |
|
"grad_norm": 11.608639050571856, |
|
"learning_rate": 4.1696439123912406e-07, |
|
"logits/chosen": -2.4778366088867188, |
|
"logits/rejected": -2.0454444885253906, |
|
"logps/chosen": -205.8911590576172, |
|
"logps/rejected": -2957.13525390625, |
|
"logps_avg/chosen": -0.6116452217102051, |
|
"logps_avg/rejected": -9.604714393615723, |
|
"loss": 0.5502, |
|
"losses_ref": -0.05736450105905533, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 695, |
|
"u": -4.357982635498047, |
|
"weight": 0.05952075123786926 |
|
}, |
|
{ |
|
"diff_generated": -35.234153747558594, |
|
"epoch": 1.4655849254121958, |
|
"grad_norm": 8.17712308208093, |
|
"learning_rate": 4.0218893981385927e-07, |
|
"logits/chosen": -2.485691547393799, |
|
"logits/rejected": -2.046220064163208, |
|
"logps/chosen": -200.62582397460938, |
|
"logps/rejected": -3101.075439453125, |
|
"logps_avg/chosen": -0.5734541416168213, |
|
"logps_avg/rejected": -10.570245742797852, |
|
"loss": 0.5729, |
|
"losses_ref": -0.028310665860772133, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 700, |
|
"u": -4.45696496963501, |
|
"weight": 0.023738497868180275 |
|
}, |
|
{ |
|
"diff_generated": -35.26641082763672, |
|
"epoch": 1.47605338916514, |
|
"grad_norm": 16.950355166034456, |
|
"learning_rate": 3.87613851143229e-07, |
|
"logits/chosen": -2.494295597076416, |
|
"logits/rejected": -2.00370717048645, |
|
"logps/chosen": -230.57400512695312, |
|
"logps/rejected": -3109.327392578125, |
|
"logps_avg/chosen": -0.6209388971328735, |
|
"logps_avg/rejected": -10.57992172241211, |
|
"loss": 0.5466, |
|
"losses_ref": -0.0546514168381691, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 705, |
|
"u": -4.406841278076172, |
|
"weight": 0.04961226135492325 |
|
}, |
|
{ |
|
"diff_generated": -34.927207946777344, |
|
"epoch": 1.4865218529180844, |
|
"grad_norm": 9.208840009036596, |
|
"learning_rate": 3.7324401022369744e-07, |
|
"logits/chosen": -2.4626827239990234, |
|
"logits/rejected": -1.9565467834472656, |
|
"logps/chosen": -232.802001953125, |
|
"logps/rejected": -3108.4921875, |
|
"logps_avg/chosen": -0.6169513463973999, |
|
"logps_avg/rejected": -10.47816276550293, |
|
"loss": 0.5383, |
|
"losses_ref": -0.051527369767427444, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 710, |
|
"u": -4.410109043121338, |
|
"weight": 0.04789410158991814 |
|
}, |
|
{ |
|
"diff_generated": -31.93350601196289, |
|
"epoch": 1.4969903166710286, |
|
"grad_norm": 8.74366239695945, |
|
"learning_rate": 3.5908423326075455e-07, |
|
"logits/chosen": -2.470921039581299, |
|
"logits/rejected": -2.028719425201416, |
|
"logps/chosen": -197.37814331054688, |
|
"logps/rejected": -2799.31396484375, |
|
"logps_avg/chosen": -0.5950369834899902, |
|
"logps_avg/rejected": -9.580052375793457, |
|
"loss": 0.5627, |
|
"losses_ref": -0.05724947527050972, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 715, |
|
"u": -4.411328315734863, |
|
"weight": 0.047762464731931686 |
|
}, |
|
{ |
|
"diff_generated": -33.14401626586914, |
|
"epoch": 1.5074587804239727, |
|
"grad_norm": 8.842328295664547, |
|
"learning_rate": 3.45139266054715e-07, |
|
"logits/chosen": -2.5109152793884277, |
|
"logits/rejected": -2.010921001434326, |
|
"logps/chosen": -247.7344207763672, |
|
"logps/rejected": -3127.861328125, |
|
"logps_avg/chosen": -0.6326244473457336, |
|
"logps_avg/rejected": -9.943206787109375, |
|
"loss": 0.5529, |
|
"losses_ref": -0.05398111790418625, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 720, |
|
"u": -4.407879829406738, |
|
"weight": 0.051137275993824005 |
|
}, |
|
{ |
|
"diff_generated": -34.548439025878906, |
|
"epoch": 1.5179272441769172, |
|
"grad_norm": 9.975694420372704, |
|
"learning_rate": 3.314137824101111e-07, |
|
"logits/chosen": -2.5249905586242676, |
|
"logits/rejected": -2.0087645053863525, |
|
"logps/chosen": -254.705322265625, |
|
"logps/rejected": -3178.156494140625, |
|
"logps_avg/chosen": -0.6393792033195496, |
|
"logps_avg/rejected": -10.364530563354492, |
|
"loss": 0.5512, |
|
"losses_ref": -0.05713530257344246, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 725, |
|
"u": -4.388852119445801, |
|
"weight": 0.061693333089351654 |
|
}, |
|
{ |
|
"diff_generated": -32.73970413208008, |
|
"epoch": 1.5283957079298613, |
|
"grad_norm": 11.767533184902167, |
|
"learning_rate": 3.179123825692178e-07, |
|
"logits/chosen": -2.47417950630188, |
|
"logits/rejected": -2.016237497329712, |
|
"logps/chosen": -209.87802124023438, |
|
"logps/rejected": -2884.9580078125, |
|
"logps_avg/chosen": -0.5899583101272583, |
|
"logps_avg/rejected": -9.821910858154297, |
|
"loss": 0.5576, |
|
"losses_ref": -0.05416392162442207, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 730, |
|
"u": -4.406733989715576, |
|
"weight": 0.052076805382966995 |
|
}, |
|
{ |
|
"diff_generated": -32.37422561645508, |
|
"epoch": 1.5388641716828055, |
|
"grad_norm": 9.47936945913295, |
|
"learning_rate": 3.0463959167023335e-07, |
|
"logits/chosen": -2.5015838146209717, |
|
"logits/rejected": -2.069798231124878, |
|
"logps/chosen": -217.7288055419922, |
|
"logps/rejected": -2870.407958984375, |
|
"logps_avg/chosen": -0.6165660619735718, |
|
"logps_avg/rejected": -9.712267875671387, |
|
"loss": 0.5285, |
|
"losses_ref": -0.08272585272789001, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 735, |
|
"u": -4.363903999328613, |
|
"weight": 0.07859805971384048 |
|
}, |
|
{ |
|
"diff_generated": -32.28863525390625, |
|
"epoch": 1.54933263543575, |
|
"grad_norm": 9.124308513157976, |
|
"learning_rate": 2.915998582306299e-07, |
|
"logits/chosen": -2.5220367908477783, |
|
"logits/rejected": -2.038191318511963, |
|
"logps/chosen": -229.7245330810547, |
|
"logps/rejected": -2982.073486328125, |
|
"logps_avg/chosen": -0.617731511592865, |
|
"logps_avg/rejected": -9.686590194702148, |
|
"loss": 0.5329, |
|
"losses_ref": -0.05901874229311943, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 740, |
|
"u": -4.457033634185791, |
|
"weight": 0.051646940410137177 |
|
}, |
|
{ |
|
"diff_generated": -31.57918357849121, |
|
"epoch": 1.559801099188694, |
|
"grad_norm": 8.788334428443942, |
|
"learning_rate": 2.7879755265618557e-07, |
|
"logits/chosen": -2.385359287261963, |
|
"logits/rejected": -2.0353574752807617, |
|
"logps/chosen": -191.27542114257812, |
|
"logps/rejected": -2743.20849609375, |
|
"logps_avg/chosen": -0.5724462270736694, |
|
"logps_avg/rejected": -9.473755836486816, |
|
"loss": 0.5301, |
|
"losses_ref": -0.06048304960131645, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 745, |
|
"u": -4.434350967407227, |
|
"weight": 0.051485490053892136 |
|
}, |
|
{ |
|
"diff_generated": -33.260643005371094, |
|
"epoch": 1.5702695629416383, |
|
"grad_norm": 13.597985798817346, |
|
"learning_rate": 2.6623696577619625e-07, |
|
"logits/chosen": -2.498661518096924, |
|
"logits/rejected": -2.070701837539673, |
|
"logps/chosen": -227.7393035888672, |
|
"logps/rejected": -2963.530517578125, |
|
"logps_avg/chosen": -0.6551213264465332, |
|
"logps_avg/rejected": -9.978193283081055, |
|
"loss": 0.5837, |
|
"losses_ref": -0.03624705597758293, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 750, |
|
"u": -4.466372966766357, |
|
"weight": 0.028057094663381577 |
|
}, |
|
{ |
|
"diff_generated": -29.464405059814453, |
|
"epoch": 1.5807380266945825, |
|
"grad_norm": 9.250307778356563, |
|
"learning_rate": 2.5392230740535846e-07, |
|
"logits/chosen": -2.5032472610473633, |
|
"logits/rejected": -2.06776762008667, |
|
"logps/chosen": -251.3708953857422, |
|
"logps/rejected": -2650.0810546875, |
|
"logps_avg/chosen": -0.6423950791358948, |
|
"logps_avg/rejected": -8.839322090148926, |
|
"loss": 0.5765, |
|
"losses_ref": -0.052409954369068146, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 755, |
|
"u": -4.408892631530762, |
|
"weight": 0.05609407275915146 |
|
}, |
|
{ |
|
"diff_generated": -29.876062393188477, |
|
"epoch": 1.5912064904475267, |
|
"grad_norm": 12.686799097235559, |
|
"learning_rate": 2.418577049328058e-07, |
|
"logits/chosen": -2.5676896572113037, |
|
"logits/rejected": -2.1377835273742676, |
|
"logps/chosen": -265.7136535644531, |
|
"logps/rejected": -2646.18896484375, |
|
"logps_avg/chosen": -0.665650486946106, |
|
"logps_avg/rejected": -8.962818145751953, |
|
"loss": 0.5887, |
|
"losses_ref": -0.06443095207214355, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 760, |
|
"u": -4.407828330993652, |
|
"weight": 0.06109876185655594 |
|
}, |
|
{ |
|
"diff_generated": -33.68701171875, |
|
"epoch": 1.6016749542004711, |
|
"grad_norm": 10.274482248605684, |
|
"learning_rate": 2.300472019387697e-07, |
|
"logits/chosen": -2.469991683959961, |
|
"logits/rejected": -2.029064893722534, |
|
"logps/chosen": -220.9040985107422, |
|
"logps/rejected": -3017.740234375, |
|
"logps_avg/chosen": -0.6078630685806274, |
|
"logps_avg/rejected": -10.10610294342041, |
|
"loss": 0.5524, |
|
"losses_ref": -0.04078926518559456, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 765, |
|
"u": -4.417876243591309, |
|
"weight": 0.03704729676246643 |
|
}, |
|
{ |
|
"diff_generated": -31.825037002563477, |
|
"epoch": 1.6121434179534153, |
|
"grad_norm": 11.839464542057028, |
|
"learning_rate": 2.1849475683932994e-07, |
|
"logits/chosen": -2.4939956665039062, |
|
"logits/rejected": -2.1075644493103027, |
|
"logps/chosen": -223.6890869140625, |
|
"logps/rejected": -2828.83447265625, |
|
"logps_avg/chosen": -0.6260048747062683, |
|
"logps_avg/rejected": -9.547511100769043, |
|
"loss": 0.5492, |
|
"losses_ref": -0.05019731447100639, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 770, |
|
"u": -4.3696393966674805, |
|
"weight": 0.05455632880330086 |
|
}, |
|
{ |
|
"diff_generated": -30.594751358032227, |
|
"epoch": 1.6226118817063595, |
|
"grad_norm": 9.146985127674856, |
|
"learning_rate": 2.0720424155971038e-07, |
|
"logits/chosen": -2.4665775299072266, |
|
"logits/rejected": -2.0385656356811523, |
|
"logps/chosen": -238.6437530517578, |
|
"logps/rejected": -2788.4453125, |
|
"logps_avg/chosen": -0.6432589292526245, |
|
"logps_avg/rejected": -9.178424835205078, |
|
"loss": 0.5603, |
|
"losses_ref": -0.060744620859622955, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 775, |
|
"u": -4.3677592277526855, |
|
"weight": 0.054513733834028244 |
|
}, |
|
{ |
|
"diff_generated": -30.54671859741211, |
|
"epoch": 1.633080345459304, |
|
"grad_norm": 12.431506597181475, |
|
"learning_rate": 1.961794402365611e-07, |
|
"logits/chosen": -2.48872971534729, |
|
"logits/rejected": -2.045698404312134, |
|
"logps/chosen": -238.8667755126953, |
|
"logps/rejected": -2746.897705078125, |
|
"logps_avg/chosen": -0.6708707809448242, |
|
"logps_avg/rejected": -9.16401481628418, |
|
"loss": 0.5942, |
|
"losses_ref": -0.043663203716278076, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 780, |
|
"u": -4.429020881652832, |
|
"weight": 0.0392422154545784 |
|
}, |
|
{ |
|
"diff_generated": -30.78244400024414, |
|
"epoch": 1.643548809212248, |
|
"grad_norm": 14.0111361325287, |
|
"learning_rate": 1.8542404794966427e-07, |
|
"logits/chosen": -2.5275959968566895, |
|
"logits/rejected": -2.0743932723999023, |
|
"logps/chosen": -236.8502655029297, |
|
"logps/rejected": -2726.872802734375, |
|
"logps_avg/chosen": -0.6049509644508362, |
|
"logps_avg/rejected": -9.234731674194336, |
|
"loss": 0.5559, |
|
"losses_ref": -0.040397271513938904, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 785, |
|
"u": -4.439243316650391, |
|
"weight": 0.034041326493024826 |
|
}, |
|
{ |
|
"diff_generated": -30.46352767944336, |
|
"epoch": 1.6540172729651923, |
|
"grad_norm": 13.778205091571524, |
|
"learning_rate": 1.7494166948349053e-07, |
|
"logits/chosen": -2.4739108085632324, |
|
"logits/rejected": -2.0248847007751465, |
|
"logps/chosen": -188.06265258789062, |
|
"logps/rejected": -2811.63427734375, |
|
"logps_avg/chosen": -0.58104407787323, |
|
"logps_avg/rejected": -9.139059066772461, |
|
"loss": 0.5279, |
|
"losses_ref": -0.0705099031329155, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 790, |
|
"u": -4.394803047180176, |
|
"weight": 0.06850212812423706 |
|
}, |
|
{ |
|
"diff_generated": -31.430471420288086, |
|
"epoch": 1.6644857367181367, |
|
"grad_norm": 7.6385064901749775, |
|
"learning_rate": 1.6473581811901528e-07, |
|
"logits/chosen": -2.465888500213623, |
|
"logits/rejected": -2.0527515411376953, |
|
"logps/chosen": -210.7668914794922, |
|
"logps/rejected": -2648.2431640625, |
|
"logps_avg/chosen": -0.6304226517677307, |
|
"logps_avg/rejected": -9.429141998291016, |
|
"loss": 0.5656, |
|
"losses_ref": -0.035576872527599335, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 795, |
|
"u": -4.473604679107666, |
|
"weight": 0.025509512051939964 |
|
}, |
|
{ |
|
"diff_generated": -31.38290023803711, |
|
"epoch": 1.674954200471081, |
|
"grad_norm": 10.762504453960963, |
|
"learning_rate": 1.5480991445620538e-07, |
|
"logits/chosen": -2.458466053009033, |
|
"logits/rejected": -2.0299301147460938, |
|
"logps/chosen": -205.1313018798828, |
|
"logps/rejected": -2810.052001953125, |
|
"logps_avg/chosen": -0.5803036093711853, |
|
"logps_avg/rejected": -9.414871215820312, |
|
"loss": 0.5407, |
|
"losses_ref": -0.06857903301715851, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 800, |
|
"u": -4.383907318115234, |
|
"weight": 0.07058969140052795 |
|
}, |
|
{ |
|
"diff_generated": -32.339012145996094, |
|
"epoch": 1.685422664224025, |
|
"grad_norm": 12.623391530366172, |
|
"learning_rate": 1.4516728526756873e-07, |
|
"logits/chosen": -2.4743473529815674, |
|
"logits/rejected": -2.0498290061950684, |
|
"logps/chosen": -213.2050018310547, |
|
"logps/rejected": -2888.50927734375, |
|
"logps_avg/chosen": -0.5934925079345703, |
|
"logps_avg/rejected": -9.701704025268555, |
|
"loss": 0.5501, |
|
"losses_ref": -0.061614394187927246, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 805, |
|
"u": -4.301063537597656, |
|
"weight": 0.06795644760131836 |
|
}, |
|
{ |
|
"diff_generated": -29.015087127685547, |
|
"epoch": 1.6958911279769695, |
|
"grad_norm": 17.58977680719491, |
|
"learning_rate": 1.3581116238315194e-07, |
|
"logits/chosen": -2.4904446601867676, |
|
"logits/rejected": -2.050494909286499, |
|
"logps/chosen": -245.46176147460938, |
|
"logps/rejected": -2670.2060546875, |
|
"logps_avg/chosen": -0.6670945882797241, |
|
"logps_avg/rejected": -8.704526901245117, |
|
"loss": 0.5769, |
|
"losses_ref": -0.05934567004442215, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 810, |
|
"u": -4.374115943908691, |
|
"weight": 0.05290456861257553 |
|
}, |
|
{ |
|
"diff_generated": -31.670734405517578, |
|
"epoch": 1.7063595917299135, |
|
"grad_norm": 20.41492239134003, |
|
"learning_rate": 1.2674468160735586e-07, |
|
"logits/chosen": -2.5279009342193604, |
|
"logits/rejected": -2.089564800262451, |
|
"logps/chosen": -219.30712890625, |
|
"logps/rejected": -2705.98193359375, |
|
"logps_avg/chosen": -0.6055987477302551, |
|
"logps_avg/rejected": -9.501219749450684, |
|
"loss": 0.5913, |
|
"losses_ref": -0.04426007717847824, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 815, |
|
"u": -4.457495212554932, |
|
"weight": 0.04348568618297577 |
|
}, |
|
{ |
|
"diff_generated": -32.43749237060547, |
|
"epoch": 1.7168280554828579, |
|
"grad_norm": 8.725588658168348, |
|
"learning_rate": 1.1797088166794e-07, |
|
"logits/chosen": -2.479827880859375, |
|
"logits/rejected": -2.0322813987731934, |
|
"logps/chosen": -209.2858428955078, |
|
"logps/rejected": -2927.29150390625, |
|
"logps_avg/chosen": -0.5941019058227539, |
|
"logps_avg/rejected": -9.731245994567871, |
|
"loss": 0.5891, |
|
"losses_ref": -0.03500083088874817, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 820, |
|
"u": -4.411776065826416, |
|
"weight": 0.028576117008924484 |
|
}, |
|
{ |
|
"diff_generated": -29.760284423828125, |
|
"epoch": 1.7272965192358023, |
|
"grad_norm": 7.224696592212977, |
|
"learning_rate": 1.0949270319755766e-07, |
|
"logits/chosen": -2.5083603858947754, |
|
"logits/rejected": -2.0863795280456543, |
|
"logps/chosen": -206.98812866210938, |
|
"logps/rejected": -2673.796875, |
|
"logps_avg/chosen": -0.5425812005996704, |
|
"logps_avg/rejected": -8.928085327148438, |
|
"loss": 0.5471, |
|
"losses_ref": -0.040049560368061066, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 825, |
|
"u": -4.422328472137451, |
|
"weight": 0.03419359400868416 |
|
}, |
|
{ |
|
"diff_generated": -30.241125106811523, |
|
"epoch": 1.7377649829887463, |
|
"grad_norm": 11.359999539925766, |
|
"learning_rate": 1.013129877481741e-07, |
|
"logits/chosen": -2.4465301036834717, |
|
"logits/rejected": -2.0786962509155273, |
|
"logps/chosen": -251.66110229492188, |
|
"logps/rejected": -2615.54248046875, |
|
"logps_avg/chosen": -0.6354495286941528, |
|
"logps_avg/rejected": -9.07233715057373, |
|
"loss": 0.5595, |
|
"losses_ref": -0.038409143686294556, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 830, |
|
"u": -4.411218643188477, |
|
"weight": 0.03372519463300705 |
|
}, |
|
{ |
|
"diff_generated": -31.309673309326172, |
|
"epoch": 1.7482334467416907, |
|
"grad_norm": 10.689212774701963, |
|
"learning_rate": 9.343447683868799e-08, |
|
"logits/chosen": -2.459969997406006, |
|
"logits/rejected": -2.0669496059417725, |
|
"logps/chosen": -197.42056274414062, |
|
"logps/rejected": -2780.952392578125, |
|
"logps_avg/chosen": -0.5673859715461731, |
|
"logps_avg/rejected": -9.392901420593262, |
|
"loss": 0.5517, |
|
"losses_ref": -0.03770770505070686, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 835, |
|
"u": -4.412764549255371, |
|
"weight": 0.03573904559016228 |
|
}, |
|
{ |
|
"diff_generated": -30.0009765625, |
|
"epoch": 1.7587019104946349, |
|
"grad_norm": 13.800508017129163, |
|
"learning_rate": 8.585981103608342e-08, |
|
"logits/chosen": -2.48380184173584, |
|
"logits/rejected": -2.0376243591308594, |
|
"logps/chosen": -247.1182861328125, |
|
"logps/rejected": -2758.78857421875, |
|
"logps_avg/chosen": -0.6514982581138611, |
|
"logps_avg/rejected": -9.000292778015137, |
|
"loss": 0.5682, |
|
"losses_ref": -0.04732600972056389, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 840, |
|
"u": -4.44762659072876, |
|
"weight": 0.04196245223283768 |
|
}, |
|
{ |
|
"diff_generated": -30.065624237060547, |
|
"epoch": 1.769170374247579, |
|
"grad_norm": 13.143185887862547, |
|
"learning_rate": 7.859152907041544e-08, |
|
"logits/chosen": -2.4641730785369873, |
|
"logits/rejected": -2.0567100048065186, |
|
"logps/chosen": -236.99148559570312, |
|
"logps/rejected": -2573.870849609375, |
|
"logps_avg/chosen": -0.6164765357971191, |
|
"logps_avg/rejected": -9.019688606262207, |
|
"loss": 0.5526, |
|
"losses_ref": -0.05898575857281685, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 845, |
|
"u": -4.398618698120117, |
|
"weight": 0.060839347541332245 |
|
}, |
|
{ |
|
"diff_generated": -30.66835594177246, |
|
"epoch": 1.7796388380005235, |
|
"grad_norm": 17.88344708080126, |
|
"learning_rate": 7.163206698392742e-08, |
|
"logits/chosen": -2.4754815101623535, |
|
"logits/rejected": -2.077538251876831, |
|
"logps/chosen": -222.5938262939453, |
|
"logps/rejected": -2694.906494140625, |
|
"logps_avg/chosen": -0.6013268232345581, |
|
"logps_avg/rejected": -9.200507164001465, |
|
"loss": 0.5739, |
|
"losses_ref": -0.05739979073405266, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 850, |
|
"u": -4.392508506774902, |
|
"weight": 0.04965168982744217 |
|
}, |
|
{ |
|
"diff_generated": -29.592029571533203, |
|
"epoch": 1.7901073017534677, |
|
"grad_norm": 13.06278922990348, |
|
"learning_rate": 6.498375731458527e-08, |
|
"logits/chosen": -2.514953136444092, |
|
"logits/rejected": -2.096156597137451, |
|
"logps/chosen": -233.39132690429688, |
|
"logps/rejected": -2654.203857421875, |
|
"logps_avg/chosen": -0.6016189455986023, |
|
"logps_avg/rejected": -8.877609252929688, |
|
"loss": 0.5566, |
|
"losses_ref": -0.04416666924953461, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 855, |
|
"u": -4.415837287902832, |
|
"weight": 0.03559427708387375 |
|
}, |
|
{ |
|
"diff_generated": -32.24101638793945, |
|
"epoch": 1.8005757655064119, |
|
"grad_norm": 12.360220474861023, |
|
"learning_rate": 5.8648828314302735e-08, |
|
"logits/chosen": -2.4461560249328613, |
|
"logits/rejected": -2.015535354614258, |
|
"logps/chosen": -225.93533325195312, |
|
"logps/rejected": -2782.87255859375, |
|
"logps_avg/chosen": -0.5964374542236328, |
|
"logps_avg/rejected": -9.6723051071167, |
|
"loss": 0.5666, |
|
"losses_ref": -0.03670288249850273, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 860, |
|
"u": -4.398609161376953, |
|
"weight": 0.03543057292699814 |
|
}, |
|
{ |
|
"diff_generated": -31.262531280517578, |
|
"epoch": 1.8110442292593563, |
|
"grad_norm": 16.164691771356388, |
|
"learning_rate": 5.2629403202119505e-08, |
|
"logits/chosen": -2.4537065029144287, |
|
"logits/rejected": -2.062150716781616, |
|
"logps/chosen": -204.52587890625, |
|
"logps/rejected": -2741.170654296875, |
|
"logps_avg/chosen": -0.5822928547859192, |
|
"logps_avg/rejected": -9.378759384155273, |
|
"loss": 0.5402, |
|
"losses_ref": -0.03764919191598892, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 865, |
|
"u": -4.42946720123291, |
|
"weight": 0.031336475163698196 |
|
}, |
|
{ |
|
"diff_generated": -30.04671859741211, |
|
"epoch": 1.8215126930123005, |
|
"grad_norm": 10.013135246955365, |
|
"learning_rate": 4.692749945258057e-08, |
|
"logits/chosen": -2.4766173362731934, |
|
"logits/rejected": -2.0611166954040527, |
|
"logps/chosen": -236.82284545898438, |
|
"logps/rejected": -2744.845458984375, |
|
"logps_avg/chosen": -0.6182196736335754, |
|
"logps_avg/rejected": -9.014015197753906, |
|
"loss": 0.5905, |
|
"losses_ref": -0.07179991900920868, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 870, |
|
"u": -4.407280921936035, |
|
"weight": 0.07166210561990738 |
|
}, |
|
{ |
|
"diff_generated": -32.90989303588867, |
|
"epoch": 1.8319811567652446, |
|
"grad_norm": 9.513246816083905, |
|
"learning_rate": 4.1545028119559066e-08, |
|
"logits/chosen": -2.4886152744293213, |
|
"logits/rejected": -2.066333770751953, |
|
"logps/chosen": -223.5939483642578, |
|
"logps/rejected": -2896.932373046875, |
|
"logps_avg/chosen": -0.6256131529808044, |
|
"logps_avg/rejected": -9.872968673706055, |
|
"loss": 0.5458, |
|
"losses_ref": -0.0590200200676918, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 875, |
|
"u": -4.408657073974609, |
|
"weight": 0.056226253509521484 |
|
}, |
|
{ |
|
"diff_generated": -31.053930282592773, |
|
"epoch": 1.842449620518189, |
|
"grad_norm": 94.13052968470578, |
|
"learning_rate": 3.648379319574568e-08, |
|
"logits/chosen": -2.528390407562256, |
|
"logits/rejected": -2.073420524597168, |
|
"logps/chosen": -222.608642578125, |
|
"logps/rejected": -2745.4130859375, |
|
"logps_avg/chosen": -0.6137613654136658, |
|
"logps_avg/rejected": -9.316179275512695, |
|
"loss": 0.5237, |
|
"losses_ref": -0.06711964309215546, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 880, |
|
"u": -4.3957600593566895, |
|
"weight": 0.06412933766841888 |
|
}, |
|
{ |
|
"diff_generated": -30.842365264892578, |
|
"epoch": 1.8529180842711332, |
|
"grad_norm": 8.312877021027528, |
|
"learning_rate": 3.17454910080216e-08, |
|
"logits/chosen": -2.5333809852600098, |
|
"logits/rejected": -2.1170499324798584, |
|
"logps/chosen": -253.5600128173828, |
|
"logps/rejected": -2778.802001953125, |
|
"logps_avg/chosen": -0.6801126599311829, |
|
"logps_avg/rejected": -9.25270938873291, |
|
"loss": 0.5709, |
|
"losses_ref": -0.0633564293384552, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 885, |
|
"u": -4.409802436828613, |
|
"weight": 0.05907650664448738 |
|
}, |
|
{ |
|
"diff_generated": -30.68337631225586, |
|
"epoch": 1.8633865480240774, |
|
"grad_norm": 8.793473948703046, |
|
"learning_rate": 2.733170964891607e-08, |
|
"logits/chosen": -2.46742582321167, |
|
"logits/rejected": -2.0830397605895996, |
|
"logps/chosen": -204.62625122070312, |
|
"logps/rejected": -2726.16552734375, |
|
"logps_avg/chosen": -0.5727981328964233, |
|
"logps_avg/rejected": -9.205012321472168, |
|
"loss": 0.5596, |
|
"losses_ref": -0.05169714242219925, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 890, |
|
"u": -4.437578201293945, |
|
"weight": 0.043870192021131516 |
|
}, |
|
{ |
|
"diff_generated": -30.3818302154541, |
|
"epoch": 1.8738550117770219, |
|
"grad_norm": 6.648166332075938, |
|
"learning_rate": 2.324392844434042e-08, |
|
"logits/chosen": -2.491211414337158, |
|
"logits/rejected": -2.0470757484436035, |
|
"logps/chosen": -229.8271026611328, |
|
"logps/rejected": -2785.03076171875, |
|
"logps_avg/chosen": -0.6076307892799377, |
|
"logps_avg/rejected": -9.11454963684082, |
|
"loss": 0.5638, |
|
"losses_ref": -0.032108329236507416, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 895, |
|
"u": -4.453085899353027, |
|
"weight": 0.02507254108786583 |
|
}, |
|
{ |
|
"diff_generated": -31.61887550354004, |
|
"epoch": 1.8843234755299658, |
|
"grad_norm": 18.51567409544646, |
|
"learning_rate": 1.9483517457776434e-08, |
|
"logits/chosen": -2.4359021186828613, |
|
"logits/rejected": -2.096619129180908, |
|
"logps/chosen": -188.21896362304688, |
|
"logps/rejected": -2806.19921875, |
|
"logps_avg/chosen": -0.5758072733879089, |
|
"logps_avg/rejected": -9.485663414001465, |
|
"loss": 0.5343, |
|
"losses_ref": -0.08278501033782959, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 900, |
|
"u": -4.343811988830566, |
|
"weight": 0.08570453524589539 |
|
}, |
|
{ |
|
"diff_generated": -31.200836181640625, |
|
"epoch": 1.8947919392829102, |
|
"grad_norm": 16.452754098885247, |
|
"learning_rate": 1.6051737031084533e-08, |
|
"logits/chosen": -2.453563690185547, |
|
"logits/rejected": -2.0280988216400146, |
|
"logps/chosen": -214.77395629882812, |
|
"logps/rejected": -2817.1669921875, |
|
"logps_avg/chosen": -0.5827924013137817, |
|
"logps_avg/rejected": -9.360250473022461, |
|
"loss": 0.5565, |
|
"losses_ref": -0.0487370602786541, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 905, |
|
"u": -4.410248756408691, |
|
"weight": 0.04455076903104782 |
|
}, |
|
{ |
|
"diff_generated": -30.942846298217773, |
|
"epoch": 1.9052604030358546, |
|
"grad_norm": 20.55170462638644, |
|
"learning_rate": 1.2949737362087154e-08, |
|
"logits/chosen": -2.467200756072998, |
|
"logits/rejected": -2.096820831298828, |
|
"logps/chosen": -206.9503936767578, |
|
"logps/rejected": -2817.215087890625, |
|
"logps_avg/chosen": -0.6169668436050415, |
|
"logps_avg/rejected": -9.282854080200195, |
|
"loss": 0.5886, |
|
"losses_ref": -0.0511205717921257, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 910, |
|
"u": -4.404674053192139, |
|
"weight": 0.04643367975950241 |
|
}, |
|
{ |
|
"diff_generated": -30.169301986694336, |
|
"epoch": 1.9157288667887986, |
|
"grad_norm": 8.053020444587133, |
|
"learning_rate": 1.0178558119067315e-08, |
|
"logits/chosen": -2.4181623458862305, |
|
"logits/rejected": -2.028630018234253, |
|
"logps/chosen": -212.6619873046875, |
|
"logps/rejected": -2651.956787109375, |
|
"logps_avg/chosen": -0.5928919315338135, |
|
"logps_avg/rejected": -9.050790786743164, |
|
"loss": 0.5551, |
|
"losses_ref": -0.05854606628417969, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 915, |
|
"u": -4.413174629211426, |
|
"weight": 0.0472232848405838 |
|
}, |
|
{ |
|
"diff_generated": -32.18278121948242, |
|
"epoch": 1.926197330541743, |
|
"grad_norm": 24.431507328322112, |
|
"learning_rate": 7.739128092312918e-09, |
|
"logits/chosen": -2.4973015785217285, |
|
"logits/rejected": -2.0860588550567627, |
|
"logps/chosen": -216.73666381835938, |
|
"logps/rejected": -2769.303955078125, |
|
"logps_avg/chosen": -0.6046438813209534, |
|
"logps_avg/rejected": -9.654834747314453, |
|
"loss": 0.5467, |
|
"losses_ref": -0.06063861399888992, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 920, |
|
"u": -4.384333610534668, |
|
"weight": 0.07002799212932587 |
|
}, |
|
{ |
|
"diff_generated": -33.56671142578125, |
|
"epoch": 1.9366657942946872, |
|
"grad_norm": 12.304529486565588, |
|
"learning_rate": 5.632264882822757e-09, |
|
"logits/chosen": -2.499455451965332, |
|
"logits/rejected": -2.059584140777588, |
|
"logps/chosen": -228.59640502929688, |
|
"logps/rejected": -2900.51123046875, |
|
"logps_avg/chosen": -0.6097213625907898, |
|
"logps_avg/rejected": -10.070013046264648, |
|
"loss": 0.5799, |
|
"losses_ref": -0.0342455692589283, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 925, |
|
"u": -4.438467979431152, |
|
"weight": 0.030456313863396645 |
|
}, |
|
{ |
|
"diff_generated": -30.743816375732422, |
|
"epoch": 1.9471342580476314, |
|
"grad_norm": 11.65318893393544, |
|
"learning_rate": 3.858674628278824e-09, |
|
"logits/chosen": -2.4831936359405518, |
|
"logits/rejected": -2.0906691551208496, |
|
"logps/chosen": -230.875, |
|
"logps/rejected": -2670.49755859375, |
|
"logps_avg/chosen": -0.603253960609436, |
|
"logps_avg/rejected": -9.223145484924316, |
|
"loss": 0.5642, |
|
"losses_ref": -0.05138419196009636, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 930, |
|
"u": -4.435812473297119, |
|
"weight": 0.0458533950150013 |
|
}, |
|
{ |
|
"diff_generated": -31.753076553344727, |
|
"epoch": 1.9576027218005758, |
|
"grad_norm": 16.58166205034555, |
|
"learning_rate": 2.418951766376742e-09, |
|
"logits/chosen": -2.4695091247558594, |
|
"logits/rejected": -2.0497422218322754, |
|
"logps/chosen": -205.1109619140625, |
|
"logps/rejected": -2825.771484375, |
|
"logps_avg/chosen": -0.5685989260673523, |
|
"logps_avg/rejected": -9.525922775268555, |
|
"loss": 0.554, |
|
"losses_ref": -0.05179325491189957, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 935, |
|
"u": -4.395134925842285, |
|
"weight": 0.04630660265684128 |
|
}, |
|
{ |
|
"diff_generated": -31.87947654724121, |
|
"epoch": 1.96807118555352, |
|
"grad_norm": 26.35143781539668, |
|
"learning_rate": 1.313578835593465e-09, |
|
"logits/chosen": -2.4483304023742676, |
|
"logits/rejected": -2.004983425140381, |
|
"logps/chosen": -241.7947998046875, |
|
"logps/rejected": -2828.03173828125, |
|
"logps_avg/chosen": -0.6296852827072144, |
|
"logps_avg/rejected": -9.5638427734375, |
|
"loss": 0.5603, |
|
"losses_ref": -0.03613152354955673, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 940, |
|
"u": -4.452606678009033, |
|
"weight": 0.026990771293640137 |
|
}, |
|
{ |
|
"diff_generated": -29.87912940979004, |
|
"epoch": 1.9785396493064642, |
|
"grad_norm": 15.814334066391242, |
|
"learning_rate": 5.429263134594242e-10, |
|
"logits/chosen": -2.4958741664886475, |
|
"logits/rejected": -2.101313591003418, |
|
"logps/chosen": -207.99179077148438, |
|
"logps/rejected": -2708.303466796875, |
|
"logps_avg/chosen": -0.5728383660316467, |
|
"logps_avg/rejected": -8.963739395141602, |
|
"loss": 0.5538, |
|
"losses_ref": -0.06295718252658844, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 945, |
|
"u": -4.400644302368164, |
|
"weight": 0.05401432514190674 |
|
}, |
|
{ |
|
"diff_generated": -31.941226959228516, |
|
"epoch": 1.9890081130594086, |
|
"grad_norm": 8.580957108117007, |
|
"learning_rate": 1.0725249238940915e-10, |
|
"logits/chosen": -2.4698963165283203, |
|
"logits/rejected": -2.0529587268829346, |
|
"logps/chosen": -231.325927734375, |
|
"logps/rejected": -2804.859619140625, |
|
"logps_avg/chosen": -0.6270388960838318, |
|
"logps_avg/rejected": -9.582367897033691, |
|
"loss": 0.5563, |
|
"losses_ref": -0.029423978179693222, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 950, |
|
"u": -4.438694477081299, |
|
"weight": 0.025990551337599754 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 954, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|