NoManDeRY's picture
Upload folder using huggingface_hub
5fc3d8e verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.998691442030882,
"eval_steps": 50,
"global_step": 477,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.002093692750588851,
"grad_norm": 4.405554687265435,
"learning_rate": 1.0416666666666666e-08,
"logits/chosen": -0.4866575300693512,
"logits/rejected": -0.7110590934753418,
"logps/chosen": -355.9316101074219,
"logps/rejected": -328.53912353515625,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.02093692750588851,
"grad_norm": 4.356279351306001,
"learning_rate": 1.0416666666666667e-07,
"logits/chosen": -0.570002019405365,
"logits/rejected": -0.6484304070472717,
"logps/chosen": -295.9329528808594,
"logps/rejected": -294.4837951660156,
"loss": 0.6932,
"rewards/accuracies": 0.4409722089767456,
"rewards/chosen": -8.168106433004141e-05,
"rewards/margins": -0.0002471136685926467,
"rewards/rejected": 0.00016543263336643577,
"step": 10
},
{
"epoch": 0.04187385501177702,
"grad_norm": 4.946352322484767,
"learning_rate": 2.0833333333333333e-07,
"logits/chosen": -0.6396545767784119,
"logits/rejected": -0.7154265642166138,
"logps/chosen": -303.1146545410156,
"logps/rejected": -268.5458679199219,
"loss": 0.6931,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": 0.0005873010377399623,
"rewards/margins": -5.269009852781892e-05,
"rewards/rejected": 0.0006399911362677813,
"step": 20
},
{
"epoch": 0.06281078251766553,
"grad_norm": 3.7069226422915262,
"learning_rate": 3.1249999999999997e-07,
"logits/chosen": -0.571534276008606,
"logits/rejected": -0.7066272497177124,
"logps/chosen": -318.60552978515625,
"logps/rejected": -287.88037109375,
"loss": 0.6919,
"rewards/accuracies": 0.59375,
"rewards/chosen": 0.007322841789573431,
"rewards/margins": 0.0024591959081590176,
"rewards/rejected": 0.004863646812736988,
"step": 30
},
{
"epoch": 0.08374771002355404,
"grad_norm": 3.83088263918906,
"learning_rate": 4.1666666666666667e-07,
"logits/chosen": -0.6221814155578613,
"logits/rejected": -0.6772241592407227,
"logps/chosen": -314.6795349121094,
"logps/rejected": -282.56732177734375,
"loss": 0.6887,
"rewards/accuracies": 0.690625011920929,
"rewards/chosen": 0.02986811473965645,
"rewards/margins": 0.011089036241173744,
"rewards/rejected": 0.018779078498482704,
"step": 40
},
{
"epoch": 0.10468463752944256,
"grad_norm": 3.669105350122918,
"learning_rate": 4.999731868769026e-07,
"logits/chosen": -0.6346956491470337,
"logits/rejected": -0.7239211797714233,
"logps/chosen": -284.1347351074219,
"logps/rejected": -266.820556640625,
"loss": 0.6829,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.05462328717112541,
"rewards/margins": 0.023288695141673088,
"rewards/rejected": 0.031334586441516876,
"step": 50
},
{
"epoch": 0.10468463752944256,
"eval_logits/chosen": -0.6508491635322571,
"eval_logits/rejected": -0.7277823686599731,
"eval_logps/chosen": -284.6937561035156,
"eval_logps/rejected": -264.2071838378906,
"eval_loss": 0.6801902055740356,
"eval_rewards/accuracies": 0.6579999923706055,
"eval_rewards/chosen": 0.061179425567388535,
"eval_rewards/margins": 0.027551723644137383,
"eval_rewards/rejected": 0.033627700060606,
"eval_runtime": 363.9047,
"eval_samples_per_second": 5.496,
"eval_steps_per_second": 1.374,
"step": 50
},
{
"epoch": 0.12562156503533106,
"grad_norm": 4.218127495844181,
"learning_rate": 4.990353313429303e-07,
"logits/chosen": -0.6323250532150269,
"logits/rejected": -0.7270756959915161,
"logps/chosen": -276.2745056152344,
"logps/rejected": -254.50173950195312,
"loss": 0.6752,
"rewards/accuracies": 0.703125,
"rewards/chosen": 0.05940670520067215,
"rewards/margins": 0.04031256586313248,
"rewards/rejected": 0.019094135612249374,
"step": 60
},
{
"epoch": 0.14655849254121958,
"grad_norm": 6.381025648346635,
"learning_rate": 4.967625656594781e-07,
"logits/chosen": -0.6830436587333679,
"logits/rejected": -0.7048647403717041,
"logps/chosen": -299.7218017578125,
"logps/rejected": -286.69842529296875,
"loss": 0.6631,
"rewards/accuracies": 0.6968749761581421,
"rewards/chosen": -0.004782336764037609,
"rewards/margins": 0.055135466158390045,
"rewards/rejected": -0.05991780757904053,
"step": 70
},
{
"epoch": 0.16749542004710807,
"grad_norm": 4.8225743752994505,
"learning_rate": 4.93167072587771e-07,
"logits/chosen": -0.72892826795578,
"logits/rejected": -0.7647081613540649,
"logps/chosen": -325.7091369628906,
"logps/rejected": -299.4671630859375,
"loss": 0.6529,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.01807677373290062,
"rewards/margins": 0.09614584594964981,
"rewards/rejected": -0.11422260105609894,
"step": 80
},
{
"epoch": 0.1884323475529966,
"grad_norm": 6.458187740670673,
"learning_rate": 4.882681251368548e-07,
"logits/chosen": -0.72618168592453,
"logits/rejected": -0.7866657972335815,
"logps/chosen": -284.3786315917969,
"logps/rejected": -288.81207275390625,
"loss": 0.6357,
"rewards/accuracies": 0.703125,
"rewards/chosen": -0.056771814823150635,
"rewards/margins": 0.13238851726055145,
"rewards/rejected": -0.1891603320837021,
"step": 90
},
{
"epoch": 0.2093692750588851,
"grad_norm": 9.681896471190186,
"learning_rate": 4.820919832540181e-07,
"logits/chosen": -0.7013887166976929,
"logits/rejected": -0.7288186550140381,
"logps/chosen": -296.72760009765625,
"logps/rejected": -284.09246826171875,
"loss": 0.6237,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": -0.12606890499591827,
"rewards/margins": 0.1648329347372055,
"rewards/rejected": -0.2909018397331238,
"step": 100
},
{
"epoch": 0.2093692750588851,
"eval_logits/chosen": -0.6815055012702942,
"eval_logits/rejected": -0.7410086989402771,
"eval_logps/chosen": -302.6812438964844,
"eval_logps/rejected": -297.7958068847656,
"eval_loss": 0.6211419701576233,
"eval_rewards/accuracies": 0.7080000042915344,
"eval_rewards/chosen": -0.1186954528093338,
"eval_rewards/margins": 0.18356309831142426,
"eval_rewards/rejected": -0.30225852131843567,
"eval_runtime": 362.6984,
"eval_samples_per_second": 5.514,
"eval_steps_per_second": 1.379,
"step": 100
},
{
"epoch": 0.23030620256477363,
"grad_norm": 9.835955943860144,
"learning_rate": 4.7467175306295647e-07,
"logits/chosen": -0.6474356651306152,
"logits/rejected": -0.6836977005004883,
"logps/chosen": -304.34320068359375,
"logps/rejected": -310.12701416015625,
"loss": 0.6217,
"rewards/accuracies": 0.690625011920929,
"rewards/chosen": -0.1749463528394699,
"rewards/margins": 0.19409213960170746,
"rewards/rejected": -0.36903852224349976,
"step": 110
},
{
"epoch": 0.2512431300706621,
"grad_norm": 8.239660157761222,
"learning_rate": 4.6604720940421207e-07,
"logits/chosen": -0.607188880443573,
"logits/rejected": -0.6938163042068481,
"logps/chosen": -345.035400390625,
"logps/rejected": -316.98529052734375,
"loss": 0.6104,
"rewards/accuracies": 0.6968749761581421,
"rewards/chosen": -0.2799115777015686,
"rewards/margins": 0.2499914914369583,
"rewards/rejected": -0.5299030542373657,
"step": 120
},
{
"epoch": 0.2721800575765506,
"grad_norm": 14.29803862258713,
"learning_rate": 4.5626458262912735e-07,
"logits/chosen": -0.6138719916343689,
"logits/rejected": -0.6717087626457214,
"logps/chosen": -312.33154296875,
"logps/rejected": -326.3365478515625,
"loss": 0.5966,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.141954243183136,
"rewards/margins": 0.27666208148002625,
"rewards/rejected": -0.4186163544654846,
"step": 130
},
{
"epoch": 0.29311698508243916,
"grad_norm": 11.127586752555215,
"learning_rate": 4.453763107901675e-07,
"logits/chosen": -0.6942325830459595,
"logits/rejected": -0.7251573801040649,
"logps/chosen": -342.69415283203125,
"logps/rejected": -336.53369140625,
"loss": 0.5974,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": -0.22019581496715546,
"rewards/margins": 0.24864068627357483,
"rewards/rejected": -0.46883654594421387,
"step": 140
},
{
"epoch": 0.31405391258832765,
"grad_norm": 18.14233757409728,
"learning_rate": 4.3344075855595097e-07,
"logits/chosen": -0.6244213581085205,
"logits/rejected": -0.7043098211288452,
"logps/chosen": -350.4010925292969,
"logps/rejected": -343.3636779785156,
"loss": 0.5943,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -0.2886292040348053,
"rewards/margins": 0.3421526551246643,
"rewards/rejected": -0.6307818293571472,
"step": 150
},
{
"epoch": 0.31405391258832765,
"eval_logits/chosen": -0.6515117287635803,
"eval_logits/rejected": -0.7014611959457397,
"eval_logps/chosen": -314.8688659667969,
"eval_logps/rejected": -318.15289306640625,
"eval_loss": 0.5983646512031555,
"eval_rewards/accuracies": 0.6980000138282776,
"eval_rewards/chosen": -0.24057185649871826,
"eval_rewards/margins": 0.265257328748703,
"eval_rewards/rejected": -0.5058292150497437,
"eval_runtime": 362.535,
"eval_samples_per_second": 5.517,
"eval_steps_per_second": 1.379,
"step": 150
},
{
"epoch": 0.33499084009421615,
"grad_norm": 38.32033611861921,
"learning_rate": 4.2052190435769554e-07,
"logits/chosen": -0.6601163148880005,
"logits/rejected": -0.6881546378135681,
"logps/chosen": -315.28863525390625,
"logps/rejected": -332.63458251953125,
"loss": 0.5868,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.2826778292655945,
"rewards/margins": 0.2917526066303253,
"rewards/rejected": -0.5744304656982422,
"step": 160
},
{
"epoch": 0.3559277676001047,
"grad_norm": 28.729045640780658,
"learning_rate": 4.0668899744407567e-07,
"logits/chosen": -0.6568697690963745,
"logits/rejected": -0.6840031147003174,
"logps/chosen": -313.22998046875,
"logps/rejected": -343.158203125,
"loss": 0.5898,
"rewards/accuracies": 0.7093750238418579,
"rewards/chosen": -0.3508220911026001,
"rewards/margins": 0.3158671259880066,
"rewards/rejected": -0.6666892170906067,
"step": 170
},
{
"epoch": 0.3768646951059932,
"grad_norm": 31.687184520759857,
"learning_rate": 3.920161866827889e-07,
"logits/chosen": -0.6633812785148621,
"logits/rejected": -0.7255716919898987,
"logps/chosen": -348.14739990234375,
"logps/rejected": -364.039794921875,
"loss": 0.5712,
"rewards/accuracies": 0.7406250238418579,
"rewards/chosen": -0.38741543889045715,
"rewards/margins": 0.4109956622123718,
"rewards/rejected": -0.7984111309051514,
"step": 180
},
{
"epoch": 0.39780162261188173,
"grad_norm": 30.89197698830195,
"learning_rate": 3.765821230985757e-07,
"logits/chosen": -0.6792806386947632,
"logits/rejected": -0.6605275273323059,
"logps/chosen": -321.3431091308594,
"logps/rejected": -347.12255859375,
"loss": 0.5812,
"rewards/accuracies": 0.703125,
"rewards/chosen": -0.40531882643699646,
"rewards/margins": 0.33206993341445923,
"rewards/rejected": -0.7373887300491333,
"step": 190
},
{
"epoch": 0.4187385501177702,
"grad_norm": 37.44350102005558,
"learning_rate": 3.604695382782159e-07,
"logits/chosen": -0.6688074469566345,
"logits/rejected": -0.7198851108551025,
"logps/chosen": -349.90423583984375,
"logps/rejected": -371.4599304199219,
"loss": 0.5788,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.5965304970741272,
"rewards/margins": 0.3574155569076538,
"rewards/rejected": -0.9539459943771362,
"step": 200
},
{
"epoch": 0.4187385501177702,
"eval_logits/chosen": -0.656848669052124,
"eval_logits/rejected": -0.7012197375297546,
"eval_logps/chosen": -356.0472106933594,
"eval_logps/rejected": -370.5501708984375,
"eval_loss": 0.573124349117279,
"eval_rewards/accuracies": 0.7099999785423279,
"eval_rewards/chosen": -0.652355432510376,
"eval_rewards/margins": 0.3774465024471283,
"eval_rewards/rejected": -1.0298019647598267,
"eval_runtime": 361.6699,
"eval_samples_per_second": 5.53,
"eval_steps_per_second": 1.382,
"step": 200
},
{
"epoch": 0.4396754776236587,
"grad_norm": 20.28041078703544,
"learning_rate": 3.4376480090239047e-07,
"logits/chosen": -0.6503298878669739,
"logits/rejected": -0.6961864233016968,
"logps/chosen": -363.98443603515625,
"logps/rejected": -377.7153015136719,
"loss": 0.5838,
"rewards/accuracies": 0.715624988079071,
"rewards/chosen": -0.6623082160949707,
"rewards/margins": 0.4378587305545807,
"rewards/rejected": -1.100166916847229,
"step": 210
},
{
"epoch": 0.46061240512954726,
"grad_norm": 35.49367620967288,
"learning_rate": 3.265574537815398e-07,
"logits/chosen": -0.6686447858810425,
"logits/rejected": -0.6819853186607361,
"logps/chosen": -350.9520263671875,
"logps/rejected": -395.07806396484375,
"loss": 0.5712,
"rewards/accuracies": 0.734375,
"rewards/chosen": -0.6993459463119507,
"rewards/margins": 0.39070096611976624,
"rewards/rejected": -1.090047001838684,
"step": 220
},
{
"epoch": 0.48154933263543576,
"grad_norm": 16.234101384109916,
"learning_rate": 3.0893973387735683e-07,
"logits/chosen": -0.7044863700866699,
"logits/rejected": -0.7113361358642578,
"logps/chosen": -372.82562255859375,
"logps/rejected": -418.2840881347656,
"loss": 0.5785,
"rewards/accuracies": 0.659375011920929,
"rewards/chosen": -0.960254967212677,
"rewards/margins": 0.3817780911922455,
"rewards/rejected": -1.3420331478118896,
"step": 230
},
{
"epoch": 0.5024862601413242,
"grad_norm": 28.450587843543108,
"learning_rate": 2.910060778827554e-07,
"logits/chosen": -0.6691449880599976,
"logits/rejected": -0.690998375415802,
"logps/chosen": -392.90472412109375,
"logps/rejected": -415.3272399902344,
"loss": 0.5413,
"rewards/accuracies": 0.734375,
"rewards/chosen": -0.8516443967819214,
"rewards/margins": 0.5042856931686401,
"rewards/rejected": -1.355930209159851,
"step": 240
},
{
"epoch": 0.5234231876472127,
"grad_norm": 26.335941375612,
"learning_rate": 2.7285261601056697e-07,
"logits/chosen": -0.744831383228302,
"logits/rejected": -0.7566218972206116,
"logps/chosen": -391.26239013671875,
"logps/rejected": -434.4244079589844,
"loss": 0.5518,
"rewards/accuracies": 0.7093750238418579,
"rewards/chosen": -0.8992071151733398,
"rewards/margins": 0.5220283269882202,
"rewards/rejected": -1.4212353229522705,
"step": 250
},
{
"epoch": 0.5234231876472127,
"eval_logits/chosen": -0.6885221600532532,
"eval_logits/rejected": -0.7286450266838074,
"eval_logps/chosen": -390.97772216796875,
"eval_logps/rejected": -414.00164794921875,
"eval_loss": 0.5652250051498413,
"eval_rewards/accuracies": 0.7260000109672546,
"eval_rewards/chosen": -1.0016601085662842,
"eval_rewards/margins": 0.4626566171646118,
"eval_rewards/rejected": -1.4643168449401855,
"eval_runtime": 359.194,
"eval_samples_per_second": 5.568,
"eval_steps_per_second": 1.392,
"step": 250
},
{
"epoch": 0.5443601151531012,
"grad_norm": 50.5833145233516,
"learning_rate": 2.5457665670441937e-07,
"logits/chosen": -0.7291234135627747,
"logits/rejected": -0.7106319665908813,
"logps/chosen": -386.3546142578125,
"logps/rejected": -427.50469970703125,
"loss": 0.5567,
"rewards/accuracies": 0.7093750238418579,
"rewards/chosen": -0.9915366172790527,
"rewards/margins": 0.4434414803981781,
"rewards/rejected": -1.4349782466888428,
"step": 260
},
{
"epoch": 0.5652970426589898,
"grad_norm": 29.40437710442188,
"learning_rate": 2.3627616503391812e-07,
"logits/chosen": -0.6915990114212036,
"logits/rejected": -0.7419033050537109,
"logps/chosen": -384.0176696777344,
"logps/rejected": -393.740234375,
"loss": 0.556,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.9498584866523743,
"rewards/margins": 0.4780716001987457,
"rewards/rejected": -1.4279301166534424,
"step": 270
},
{
"epoch": 0.5862339701648783,
"grad_norm": 555.5760315124888,
"learning_rate": 2.1804923757009882e-07,
"logits/chosen": -0.7014500498771667,
"logits/rejected": -0.7159109115600586,
"logps/chosen": -389.5604553222656,
"logps/rejected": -420.940185546875,
"loss": 0.571,
"rewards/accuracies": 0.684374988079071,
"rewards/chosen": -0.9026616811752319,
"rewards/margins": 0.4052717089653015,
"rewards/rejected": -1.3079332113265991,
"step": 280
},
{
"epoch": 0.6071708976707668,
"grad_norm": 17.414681195430056,
"learning_rate": 1.9999357655598891e-07,
"logits/chosen": -0.6687137484550476,
"logits/rejected": -0.7034614086151123,
"logps/chosen": -383.6869201660156,
"logps/rejected": -404.7913818359375,
"loss": 0.5536,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.8191441297531128,
"rewards/margins": 0.5369825959205627,
"rewards/rejected": -1.3561267852783203,
"step": 290
},
{
"epoch": 0.6281078251766553,
"grad_norm": 111.908211236837,
"learning_rate": 1.8220596619089573e-07,
"logits/chosen": -0.6898786425590515,
"logits/rejected": -0.7226337790489197,
"logps/chosen": -403.2303771972656,
"logps/rejected": -441.4107971191406,
"loss": 0.5472,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -1.030331015586853,
"rewards/margins": 0.5372998118400574,
"rewards/rejected": -1.5676310062408447,
"step": 300
},
{
"epoch": 0.6281078251766553,
"eval_logits/chosen": -0.6861531734466553,
"eval_logits/rejected": -0.7268748879432678,
"eval_logps/chosen": -395.8287048339844,
"eval_logps/rejected": -419.298583984375,
"eval_loss": 0.5599412322044373,
"eval_rewards/accuracies": 0.722000002861023,
"eval_rewards/chosen": -1.0501700639724731,
"eval_rewards/margins": 0.46711620688438416,
"eval_rewards/rejected": -1.5172861814498901,
"eval_runtime": 354.6129,
"eval_samples_per_second": 5.64,
"eval_steps_per_second": 1.41,
"step": 300
},
{
"epoch": 0.6490447526825438,
"grad_norm": 37.29689892406944,
"learning_rate": 1.647817538357072e-07,
"logits/chosen": -0.7173858880996704,
"logits/rejected": -0.7734094858169556,
"logps/chosen": -422.9934997558594,
"logps/rejected": -421.84832763671875,
"loss": 0.5395,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.9894211888313293,
"rewards/margins": 0.4697929918766022,
"rewards/rejected": -1.459214210510254,
"step": 310
},
{
"epoch": 0.6699816801884323,
"grad_norm": 42.01533855938247,
"learning_rate": 1.478143389201113e-07,
"logits/chosen": -0.6970559358596802,
"logits/rejected": -0.6931095123291016,
"logps/chosen": -377.6057434082031,
"logps/rejected": -406.4757995605469,
"loss": 0.5333,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -0.9958661794662476,
"rewards/margins": 0.5368868112564087,
"rewards/rejected": -1.5327531099319458,
"step": 320
},
{
"epoch": 0.6909186076943209,
"grad_norm": 37.59789471432025,
"learning_rate": 1.3139467229135998e-07,
"logits/chosen": -0.7029486298561096,
"logits/rejected": -0.7086675763130188,
"logps/chosen": -405.01715087890625,
"logps/rejected": -445.0240783691406,
"loss": 0.5457,
"rewards/accuracies": 0.734375,
"rewards/chosen": -1.0576202869415283,
"rewards/margins": 0.5650633573532104,
"rewards/rejected": -1.6226835250854492,
"step": 330
},
{
"epoch": 0.7118555352002094,
"grad_norm": 36.37515401195654,
"learning_rate": 1.1561076868822755e-07,
"logits/chosen": -0.7101846933364868,
"logits/rejected": -0.704253077507019,
"logps/chosen": -403.80133056640625,
"logps/rejected": -455.9847106933594,
"loss": 0.5449,
"rewards/accuracies": 0.7406250238418579,
"rewards/chosen": -1.0073381662368774,
"rewards/margins": 0.5620848536491394,
"rewards/rejected": -1.569422960281372,
"step": 340
},
{
"epoch": 0.7327924627060979,
"grad_norm": 24.48749996210416,
"learning_rate": 1.0054723495346482e-07,
"logits/chosen": -0.7318924069404602,
"logits/rejected": -0.7332495450973511,
"logps/chosen": -386.4432373046875,
"logps/rejected": -431.122314453125,
"loss": 0.5215,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.9518150091171265,
"rewards/margins": 0.6185209155082703,
"rewards/rejected": -1.5703357458114624,
"step": 350
},
{
"epoch": 0.7327924627060979,
"eval_logits/chosen": -0.7031384110450745,
"eval_logits/rejected": -0.7401583790779114,
"eval_logps/chosen": -392.8218688964844,
"eval_logps/rejected": -421.5936279296875,
"eval_loss": 0.5506237745285034,
"eval_rewards/accuracies": 0.7379999756813049,
"eval_rewards/chosen": -1.0201021432876587,
"eval_rewards/margins": 0.5201343894004822,
"eval_rewards/rejected": -1.5402365922927856,
"eval_runtime": 353.9993,
"eval_samples_per_second": 5.65,
"eval_steps_per_second": 1.412,
"step": 350
},
{
"epoch": 0.7537293902119864,
"grad_norm": 33.943607014642296,
"learning_rate": 8.628481651367875e-08,
"logits/chosen": -0.6926234364509583,
"logits/rejected": -0.708940327167511,
"logps/chosen": -418.54705810546875,
"logps/rejected": -442.49365234375,
"loss": 0.557,
"rewards/accuracies": 0.7281249761581421,
"rewards/chosen": -1.08583664894104,
"rewards/margins": 0.4989503026008606,
"rewards/rejected": -1.5847870111465454,
"step": 360
},
{
"epoch": 0.7746663177178749,
"grad_norm": 29.32569904282417,
"learning_rate": 7.289996455765748e-08,
"logits/chosen": -0.6894348859786987,
"logits/rejected": -0.7344834804534912,
"logps/chosen": -399.79083251953125,
"logps/rejected": -426.5049743652344,
"loss": 0.5393,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -1.078011155128479,
"rewards/margins": 0.5504921674728394,
"rewards/rejected": -1.6285032033920288,
"step": 370
},
{
"epoch": 0.7956032452237635,
"grad_norm": 46.152539915607406,
"learning_rate": 6.046442623320145e-08,
"logits/chosen": -0.6863051056861877,
"logits/rejected": -0.7106188535690308,
"logps/chosen": -421.65606689453125,
"logps/rejected": -461.34490966796875,
"loss": 0.5398,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -1.169628620147705,
"rewards/margins": 0.5851330757141113,
"rewards/rejected": -1.7547616958618164,
"step": 380
},
{
"epoch": 0.816540172729652,
"grad_norm": 32.74884413709988,
"learning_rate": 4.904486005914027e-08,
"logits/chosen": -0.6976770162582397,
"logits/rejected": -0.7492203712463379,
"logps/chosen": -417.27020263671875,
"logps/rejected": -452.70343017578125,
"loss": 0.5329,
"rewards/accuracies": 0.715624988079071,
"rewards/chosen": -1.0417944192886353,
"rewards/margins": 0.5599567890167236,
"rewards/rejected": -1.6017510890960693,
"step": 390
},
{
"epoch": 0.8374771002355405,
"grad_norm": 43.939889505795925,
"learning_rate": 3.8702478614051345e-08,
"logits/chosen": -0.7140255570411682,
"logits/rejected": -0.750179648399353,
"logps/chosen": -409.387451171875,
"logps/rejected": -452.68414306640625,
"loss": 0.5415,
"rewards/accuracies": 0.734375,
"rewards/chosen": -1.0880142450332642,
"rewards/margins": 0.5527244806289673,
"rewards/rejected": -1.6407387256622314,
"step": 400
},
{
"epoch": 0.8374771002355405,
"eval_logits/chosen": -0.7055131793022156,
"eval_logits/rejected": -0.7418683767318726,
"eval_logps/chosen": -402.3448181152344,
"eval_logps/rejected": -432.3640441894531,
"eval_loss": 0.5493519306182861,
"eval_rewards/accuracies": 0.7459999918937683,
"eval_rewards/chosen": -1.1153309345245361,
"eval_rewards/margins": 0.5326094031333923,
"eval_rewards/rejected": -1.6479403972625732,
"eval_runtime": 354.6496,
"eval_samples_per_second": 5.639,
"eval_steps_per_second": 1.41,
"step": 400
},
{
"epoch": 0.8584140277414289,
"grad_norm": 34.6918001312517,
"learning_rate": 2.9492720416985e-08,
"logits/chosen": -0.7722108960151672,
"logits/rejected": -0.7759251594543457,
"logps/chosen": -425.9440002441406,
"logps/rejected": -437.1736755371094,
"loss": 0.5366,
"rewards/accuracies": 0.7281249761581421,
"rewards/chosen": -1.136293649673462,
"rewards/margins": 0.545237123966217,
"rewards/rejected": -1.6815307140350342,
"step": 410
},
{
"epoch": 0.8793509552473174,
"grad_norm": 26.59254420115498,
"learning_rate": 2.1464952759020856e-08,
"logits/chosen": -0.7044004201889038,
"logits/rejected": -0.7171922922134399,
"logps/chosen": -400.56964111328125,
"logps/rejected": -466.5582580566406,
"loss": 0.5326,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -1.1527706384658813,
"rewards/margins": 0.6072098016738892,
"rewards/rejected": -1.7599804401397705,
"step": 420
},
{
"epoch": 0.9002878827532059,
"grad_norm": 18.548901459218577,
"learning_rate": 1.4662207078575684e-08,
"logits/chosen": -0.7108010053634644,
"logits/rejected": -0.7494346499443054,
"logps/chosen": -422.11761474609375,
"logps/rejected": -446.8285217285156,
"loss": 0.5159,
"rewards/accuracies": 0.753125011920929,
"rewards/chosen": -1.1354727745056152,
"rewards/margins": 0.583838701248169,
"rewards/rejected": -1.7193113565444946,
"step": 430
},
{
"epoch": 0.9212248102590945,
"grad_norm": 46.949560224254434,
"learning_rate": 9.12094829893642e-09,
"logits/chosen": -0.7222899198532104,
"logits/rejected": -0.7426605820655823,
"logps/chosen": -414.1346740722656,
"logps/rejected": -462.6570739746094,
"loss": 0.5371,
"rewards/accuracies": 0.75,
"rewards/chosen": -1.1052252054214478,
"rewards/margins": 0.6353217959403992,
"rewards/rejected": -1.7405471801757812,
"step": 440
},
{
"epoch": 0.942161737764983,
"grad_norm": 52.60402288327978,
"learning_rate": 4.8708793644441086e-09,
"logits/chosen": -0.709662914276123,
"logits/rejected": -0.7304754853248596,
"logps/chosen": -394.4976501464844,
"logps/rejected": -445.88360595703125,
"loss": 0.5368,
"rewards/accuracies": 0.71875,
"rewards/chosen": -1.1392234563827515,
"rewards/margins": 0.5495996475219727,
"rewards/rejected": -1.6888229846954346,
"step": 450
},
{
"epoch": 0.942161737764983,
"eval_logits/chosen": -0.7091179490089417,
"eval_logits/rejected": -0.7446374893188477,
"eval_logps/chosen": -405.1720275878906,
"eval_logps/rejected": -435.4569091796875,
"eval_loss": 0.5487431287765503,
"eval_rewards/accuracies": 0.7379999756813049,
"eval_rewards/chosen": -1.1436034440994263,
"eval_rewards/margins": 0.5352665185928345,
"eval_rewards/rejected": -1.6788699626922607,
"eval_runtime": 354.9509,
"eval_samples_per_second": 5.635,
"eval_steps_per_second": 1.409,
"step": 450
},
{
"epoch": 0.9630986652708715,
"grad_norm": 25.01747933998231,
"learning_rate": 1.9347820230782295e-09,
"logits/chosen": -0.7313689589500427,
"logits/rejected": -0.7425884008407593,
"logps/chosen": -407.93182373046875,
"logps/rejected": -438.13311767578125,
"loss": 0.5418,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": -1.1677922010421753,
"rewards/margins": 0.5207014083862305,
"rewards/rejected": -1.6884937286376953,
"step": 460
},
{
"epoch": 0.98403559277676,
"grad_norm": 18.928838252315014,
"learning_rate": 3.2839470889836627e-10,
"logits/chosen": -0.6598347425460815,
"logits/rejected": -0.7180498242378235,
"logps/chosen": -415.50347900390625,
"logps/rejected": -432.26556396484375,
"loss": 0.5174,
"rewards/accuracies": 0.7406250238418579,
"rewards/chosen": -1.1242364645004272,
"rewards/margins": 0.6142801642417908,
"rewards/rejected": -1.7385165691375732,
"step": 470
},
{
"epoch": 0.998691442030882,
"step": 477,
"total_flos": 0.0,
"train_loss": 0.5814160232274037,
"train_runtime": 29745.7246,
"train_samples_per_second": 2.055,
"train_steps_per_second": 0.016
}
],
"logging_steps": 10,
"max_steps": 477,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}