hugodk-sch's picture
Model save
a76d839 verified
raw
history blame
22.1 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 100,
"global_step": 385,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"grad_norm": 63.25,
"learning_rate": 1.282051282051282e-07,
"logits/chosen": -2.7358343601226807,
"logits/rejected": -2.7480404376983643,
"logps/chosen": -27.35565757751465,
"logps/rejected": -21.06114387512207,
"loss": 1.0,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.03,
"grad_norm": 108.0,
"learning_rate": 1.282051282051282e-06,
"logits/chosen": -3.009650945663452,
"logits/rejected": -2.998239040374756,
"logps/chosen": -33.192203521728516,
"logps/rejected": -31.957557678222656,
"loss": 1.0141,
"rewards/accuracies": 0.4166666567325592,
"rewards/chosen": -0.009009478613734245,
"rewards/margins": -0.014087951742112637,
"rewards/rejected": 0.005078474525362253,
"step": 10
},
{
"epoch": 0.05,
"grad_norm": 73.5,
"learning_rate": 2.564102564102564e-06,
"logits/chosen": -2.899263381958008,
"logits/rejected": -2.894313335418701,
"logps/chosen": -32.45400619506836,
"logps/rejected": -28.9648494720459,
"loss": 0.9967,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": 0.016872350126504898,
"rewards/margins": 0.003259001299738884,
"rewards/rejected": 0.01361334603279829,
"step": 20
},
{
"epoch": 0.08,
"grad_norm": 71.0,
"learning_rate": 3.846153846153847e-06,
"logits/chosen": -3.0970497131347656,
"logits/rejected": -3.108996868133545,
"logps/chosen": -32.78731918334961,
"logps/rejected": -30.140506744384766,
"loss": 0.9473,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.09709431231021881,
"rewards/margins": 0.05265679210424423,
"rewards/rejected": 0.044437527656555176,
"step": 30
},
{
"epoch": 0.1,
"grad_norm": 57.75,
"learning_rate": 4.999896948438434e-06,
"logits/chosen": -2.8630309104919434,
"logits/rejected": -2.8540406227111816,
"logps/chosen": -31.542278289794922,
"logps/rejected": -32.394432067871094,
"loss": 0.8414,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.23043569922447205,
"rewards/margins": 0.260015070438385,
"rewards/rejected": -0.02957936003804207,
"step": 40
},
{
"epoch": 0.13,
"grad_norm": 44.75,
"learning_rate": 4.987541037542187e-06,
"logits/chosen": -2.8809738159179688,
"logits/rejected": -2.8790669441223145,
"logps/chosen": -29.41156005859375,
"logps/rejected": -30.12240219116211,
"loss": 0.8761,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": 0.26288196444511414,
"rewards/margins": 0.2896483242511749,
"rewards/rejected": -0.02676635980606079,
"step": 50
},
{
"epoch": 0.16,
"grad_norm": 63.75,
"learning_rate": 4.954691471941119e-06,
"logits/chosen": -2.9108948707580566,
"logits/rejected": -2.912576675415039,
"logps/chosen": -29.901845932006836,
"logps/rejected": -28.0941219329834,
"loss": 0.7968,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.18233974277973175,
"rewards/margins": 0.2930926978588104,
"rewards/rejected": -0.11075299978256226,
"step": 60
},
{
"epoch": 0.18,
"grad_norm": 72.5,
"learning_rate": 4.901618883413549e-06,
"logits/chosen": -2.9933552742004395,
"logits/rejected": -2.9995028972625732,
"logps/chosen": -29.255428314208984,
"logps/rejected": -31.047006607055664,
"loss": 0.9015,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.020431842654943466,
"rewards/margins": 0.14459456503391266,
"rewards/rejected": -0.1241627112030983,
"step": 70
},
{
"epoch": 0.21,
"grad_norm": 70.5,
"learning_rate": 4.828760511501322e-06,
"logits/chosen": -2.808861255645752,
"logits/rejected": -2.8243188858032227,
"logps/chosen": -29.406871795654297,
"logps/rejected": -29.915807723999023,
"loss": 0.7973,
"rewards/accuracies": 0.6875,
"rewards/chosen": 0.10708501189947128,
"rewards/margins": 0.30030542612075806,
"rewards/rejected": -0.19322039186954498,
"step": 80
},
{
"epoch": 0.23,
"grad_norm": 56.25,
"learning_rate": 4.7367166013034295e-06,
"logits/chosen": -2.8983397483825684,
"logits/rejected": -2.880967617034912,
"logps/chosen": -32.71396255493164,
"logps/rejected": -30.347427368164062,
"loss": 0.8646,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.09797407686710358,
"rewards/margins": 0.35859915614128113,
"rewards/rejected": -0.26062512397766113,
"step": 90
},
{
"epoch": 0.26,
"grad_norm": 54.75,
"learning_rate": 4.626245458345211e-06,
"logits/chosen": -3.004662275314331,
"logits/rejected": -3.005678653717041,
"logps/chosen": -31.851581573486328,
"logps/rejected": -30.93560791015625,
"loss": 0.8401,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.1642352044582367,
"rewards/margins": 0.25978168845176697,
"rewards/rejected": -0.09554646909236908,
"step": 100
},
{
"epoch": 0.26,
"eval_logits/chosen": -2.811776876449585,
"eval_logits/rejected": -2.809250831604004,
"eval_logps/chosen": -31.276582717895508,
"eval_logps/rejected": -34.853797912597656,
"eval_loss": 0.8872909545898438,
"eval_rewards/accuracies": 0.6067276000976562,
"eval_rewards/chosen": 0.0046949307434260845,
"eval_rewards/margins": 0.12921129167079926,
"eval_rewards/rejected": -0.12451635301113129,
"eval_runtime": 113.4101,
"eval_samples_per_second": 3.024,
"eval_steps_per_second": 0.379,
"step": 100
},
{
"epoch": 0.29,
"grad_norm": 83.0,
"learning_rate": 4.498257201263691e-06,
"logits/chosen": -2.9626810550689697,
"logits/rejected": -2.9382669925689697,
"logps/chosen": -31.831439971923828,
"logps/rejected": -31.40035057067871,
"loss": 0.6952,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": 0.3082864582538605,
"rewards/margins": 0.5014885663986206,
"rewards/rejected": -0.1932021528482437,
"step": 110
},
{
"epoch": 0.31,
"grad_norm": 65.0,
"learning_rate": 4.353806263777678e-06,
"logits/chosen": -3.0443854331970215,
"logits/rejected": -3.073098659515381,
"logps/chosen": -28.707149505615234,
"logps/rejected": -34.32903289794922,
"loss": 0.7028,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": 0.3654031455516815,
"rewards/margins": 0.5105921030044556,
"rewards/rejected": -0.14518897235393524,
"step": 120
},
{
"epoch": 0.34,
"grad_norm": 44.5,
"learning_rate": 4.1940827077152755e-06,
"logits/chosen": -2.7482428550720215,
"logits/rejected": -2.743565082550049,
"logps/chosen": -28.566293716430664,
"logps/rejected": -30.31746482849121,
"loss": 0.7405,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": 0.3267093598842621,
"rewards/margins": 0.47363653779029846,
"rewards/rejected": -0.14692717790603638,
"step": 130
},
{
"epoch": 0.36,
"grad_norm": 46.25,
"learning_rate": 4.0204024186666215e-06,
"logits/chosen": -3.022853136062622,
"logits/rejected": -3.0205140113830566,
"logps/chosen": -27.139602661132812,
"logps/rejected": -31.89032554626465,
"loss": 0.7769,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.27729907631874084,
"rewards/margins": 0.46414414048194885,
"rewards/rejected": -0.1868450939655304,
"step": 140
},
{
"epoch": 0.39,
"grad_norm": 48.0,
"learning_rate": 3.834196265035119e-06,
"logits/chosen": -2.816462516784668,
"logits/rejected": -2.810920000076294,
"logps/chosen": -27.263103485107422,
"logps/rejected": -31.520715713500977,
"loss": 0.6276,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.4499203562736511,
"rewards/margins": 0.6875794529914856,
"rewards/rejected": -0.2376590520143509,
"step": 150
},
{
"epoch": 0.42,
"grad_norm": 68.5,
"learning_rate": 3.636998309800573e-06,
"logits/chosen": -3.133582592010498,
"logits/rejected": -3.1159331798553467,
"logps/chosen": -31.7524471282959,
"logps/rejected": -29.40524673461914,
"loss": 0.547,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.5914198756217957,
"rewards/margins": 0.9317981600761414,
"rewards/rejected": -0.3403782546520233,
"step": 160
},
{
"epoch": 0.44,
"grad_norm": 58.25,
"learning_rate": 3.4304331721118078e-06,
"logits/chosen": -2.944953203201294,
"logits/rejected": -2.952117681503296,
"logps/chosen": -29.341304779052734,
"logps/rejected": -31.5146427154541,
"loss": 0.6062,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.4426456391811371,
"rewards/margins": 0.7747727632522583,
"rewards/rejected": -0.33212706446647644,
"step": 170
},
{
"epoch": 0.47,
"grad_norm": 57.25,
"learning_rate": 3.2162026428305436e-06,
"logits/chosen": -2.795180082321167,
"logits/rejected": -2.792935848236084,
"logps/chosen": -29.07159423828125,
"logps/rejected": -30.085384368896484,
"loss": 0.5999,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.47860392928123474,
"rewards/margins": 0.7323731184005737,
"rewards/rejected": -0.2537691593170166,
"step": 180
},
{
"epoch": 0.49,
"grad_norm": 35.5,
"learning_rate": 2.996071664294641e-06,
"logits/chosen": -2.9086050987243652,
"logits/rejected": -2.9050517082214355,
"logps/chosen": -29.76633644104004,
"logps/rejected": -28.544025421142578,
"loss": 0.778,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.3363017439842224,
"rewards/margins": 0.4854954779148102,
"rewards/rejected": -0.14919371902942657,
"step": 190
},
{
"epoch": 0.52,
"grad_norm": 51.0,
"learning_rate": 2.7718537898066833e-06,
"logits/chosen": -2.9782276153564453,
"logits/rejected": -2.9666411876678467,
"logps/chosen": -32.82664108276367,
"logps/rejected": -30.458984375,
"loss": 0.724,
"rewards/accuracies": 0.6875,
"rewards/chosen": 0.6979023218154907,
"rewards/margins": 0.7178188562393188,
"rewards/rejected": -0.01991647481918335,
"step": 200
},
{
"epoch": 0.52,
"eval_logits/chosen": -2.8178980350494385,
"eval_logits/rejected": -2.815643548965454,
"eval_logps/chosen": -31.38960838317871,
"eval_logps/rejected": -34.92823791503906,
"eval_loss": 0.9140273332595825,
"eval_rewards/accuracies": 0.565614640712738,
"eval_rewards/chosen": -0.08572381734848022,
"eval_rewards/margins": 0.09834489226341248,
"eval_rewards/rejected": -0.1840687096118927,
"eval_runtime": 113.15,
"eval_samples_per_second": 3.031,
"eval_steps_per_second": 0.38,
"step": 200
},
{
"epoch": 0.55,
"grad_norm": 52.5,
"learning_rate": 2.5453962426402006e-06,
"logits/chosen": -2.9144248962402344,
"logits/rejected": -2.914703845977783,
"logps/chosen": -32.25563049316406,
"logps/rejected": -34.11750793457031,
"loss": 0.5504,
"rewards/accuracies": 0.8125,
"rewards/chosen": 0.6222246289253235,
"rewards/margins": 0.8270590901374817,
"rewards/rejected": -0.2048344612121582,
"step": 210
},
{
"epoch": 0.57,
"grad_norm": 47.5,
"learning_rate": 2.3185646976551794e-06,
"logits/chosen": -2.8935537338256836,
"logits/rejected": -2.909308671951294,
"logps/chosen": -29.393646240234375,
"logps/rejected": -28.79998779296875,
"loss": 0.6005,
"rewards/accuracies": 0.6875,
"rewards/chosen": 0.6275160908699036,
"rewards/margins": 0.8448917269706726,
"rewards/rejected": -0.21737566590309143,
"step": 220
},
{
"epoch": 0.6,
"grad_norm": 46.5,
"learning_rate": 2.0932279108998323e-06,
"logits/chosen": -2.9393625259399414,
"logits/rejected": -2.9434802532196045,
"logps/chosen": -30.659032821655273,
"logps/rejected": -31.946239471435547,
"loss": 0.7369,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": 0.39426764845848083,
"rewards/margins": 0.5447386503219604,
"rewards/rejected": -0.15047098696231842,
"step": 230
},
{
"epoch": 0.62,
"grad_norm": 46.75,
"learning_rate": 1.8712423238279358e-06,
"logits/chosen": -2.99426531791687,
"logits/rejected": -3.00126314163208,
"logps/chosen": -29.958850860595703,
"logps/rejected": -30.447010040283203,
"loss": 0.4564,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": 0.7568598985671997,
"rewards/margins": 0.9262750744819641,
"rewards/rejected": -0.16941508650779724,
"step": 240
},
{
"epoch": 0.65,
"grad_norm": 55.0,
"learning_rate": 1.6544367689701824e-06,
"logits/chosen": -2.8251967430114746,
"logits/rejected": -2.815450429916382,
"logps/chosen": -26.474361419677734,
"logps/rejected": -29.600570678710938,
"loss": 0.721,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": 0.3838837444782257,
"rewards/margins": 0.43902724981307983,
"rewards/rejected": -0.05514346435666084,
"step": 250
},
{
"epoch": 0.68,
"grad_norm": 21.75,
"learning_rate": 1.4445974030621963e-06,
"logits/chosen": -2.8065786361694336,
"logits/rejected": -2.8274922370910645,
"logps/chosen": -28.88662338256836,
"logps/rejected": -34.49494934082031,
"loss": 0.4422,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": 0.7546705603599548,
"rewards/margins": 1.0113604068756104,
"rewards/rejected": -0.25668981671333313,
"step": 260
},
{
"epoch": 0.7,
"grad_norm": 60.0,
"learning_rate": 1.243452991757889e-06,
"logits/chosen": -2.948761463165283,
"logits/rejected": -2.955237627029419,
"logps/chosen": -30.040813446044922,
"logps/rejected": -30.221237182617188,
"loss": 0.5322,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": 0.5924339890480042,
"rewards/margins": 0.8772123456001282,
"rewards/rejected": -0.2847784161567688,
"step": 270
},
{
"epoch": 0.73,
"grad_norm": 29.375,
"learning_rate": 1.0526606671603523e-06,
"logits/chosen": -2.9630208015441895,
"logits/rejected": -2.949868679046631,
"logps/chosen": -30.0323429107666,
"logps/rejected": -28.719945907592773,
"loss": 0.7384,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": 0.5514736771583557,
"rewards/margins": 0.6223500967025757,
"rewards/rejected": -0.07087641209363937,
"step": 280
},
{
"epoch": 0.75,
"grad_norm": 36.5,
"learning_rate": 8.737922755071455e-07,
"logits/chosen": -2.890263080596924,
"logits/rejected": -2.872467279434204,
"logps/chosen": -31.614023208618164,
"logps/rejected": -30.969629287719727,
"loss": 0.4081,
"rewards/accuracies": 0.8374999761581421,
"rewards/chosen": 0.8178254961967468,
"rewards/margins": 1.1748807430267334,
"rewards/rejected": -0.3570552468299866,
"step": 290
},
{
"epoch": 0.78,
"grad_norm": 47.0,
"learning_rate": 7.08321427484816e-07,
"logits/chosen": -2.892866611480713,
"logits/rejected": -2.8899810314178467,
"logps/chosen": -31.352060317993164,
"logps/rejected": -27.79092025756836,
"loss": 0.6408,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": 0.6964761018753052,
"rewards/margins": 0.8463441729545593,
"rewards/rejected": -0.14986807107925415,
"step": 300
},
{
"epoch": 0.78,
"eval_logits/chosen": -2.810232400894165,
"eval_logits/rejected": -2.807687520980835,
"eval_logps/chosen": -31.392431259155273,
"eval_logps/rejected": -34.949954986572266,
"eval_loss": 0.9091227650642395,
"eval_rewards/accuracies": 0.5627076625823975,
"eval_rewards/chosen": -0.08798420429229736,
"eval_rewards/margins": 0.11345873028039932,
"eval_rewards/rejected": -0.20144294202327728,
"eval_runtime": 113.1296,
"eval_samples_per_second": 3.032,
"eval_steps_per_second": 0.38,
"step": 300
},
{
"epoch": 0.81,
"grad_norm": 57.5,
"learning_rate": 5.576113578589035e-07,
"logits/chosen": -2.771563768386841,
"logits/rejected": -2.790158748626709,
"logps/chosen": -28.552204132080078,
"logps/rejected": -31.123676300048828,
"loss": 0.4625,
"rewards/accuracies": 0.8374999761581421,
"rewards/chosen": 0.5417758226394653,
"rewards/margins": 0.855111300945282,
"rewards/rejected": -0.31333547830581665,
"step": 310
},
{
"epoch": 0.83,
"grad_norm": 50.75,
"learning_rate": 4.229036944380913e-07,
"logits/chosen": -3.01896333694458,
"logits/rejected": -3.0036964416503906,
"logps/chosen": -29.334665298461914,
"logps/rejected": -28.647085189819336,
"loss": 0.4327,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": 0.7015351057052612,
"rewards/margins": 0.9641841650009155,
"rewards/rejected": -0.2626491189002991,
"step": 320
},
{
"epoch": 0.86,
"grad_norm": 19.25,
"learning_rate": 3.053082288996112e-07,
"logits/chosen": -2.9340624809265137,
"logits/rejected": -2.916344404220581,
"logps/chosen": -27.29937744140625,
"logps/rejected": -30.881011962890625,
"loss": 0.3934,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": 0.8501566052436829,
"rewards/margins": 1.2416493892669678,
"rewards/rejected": -0.3914927840232849,
"step": 330
},
{
"epoch": 0.88,
"grad_norm": 54.0,
"learning_rate": 2.0579377374915805e-07,
"logits/chosen": -3.1503987312316895,
"logits/rejected": -3.156454563140869,
"logps/chosen": -30.917760848999023,
"logps/rejected": -33.217864990234375,
"loss": 0.4822,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": 0.5771540403366089,
"rewards/margins": 0.994644820690155,
"rewards/rejected": -0.4174906611442566,
"step": 340
},
{
"epoch": 0.91,
"grad_norm": 27.0,
"learning_rate": 1.2518018074041684e-07,
"logits/chosen": -3.026543140411377,
"logits/rejected": -3.0300345420837402,
"logps/chosen": -29.901714324951172,
"logps/rejected": -31.784900665283203,
"loss": 0.5385,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": 0.7241344451904297,
"rewards/margins": 0.9414092302322388,
"rewards/rejected": -0.2172747403383255,
"step": 350
},
{
"epoch": 0.94,
"grad_norm": 47.25,
"learning_rate": 6.41315865106129e-08,
"logits/chosen": -2.8658933639526367,
"logits/rejected": -2.8671889305114746,
"logps/chosen": -27.54721450805664,
"logps/rejected": -30.053890228271484,
"loss": 0.4298,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": 0.8820101022720337,
"rewards/margins": 1.0603699684143066,
"rewards/rejected": -0.17835985124111176,
"step": 360
},
{
"epoch": 0.96,
"grad_norm": 62.75,
"learning_rate": 2.3150941078050325e-08,
"logits/chosen": -2.9409708976745605,
"logits/rejected": -2.9394469261169434,
"logps/chosen": -29.869876861572266,
"logps/rejected": -32.248016357421875,
"loss": 0.486,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": 0.7334206700325012,
"rewards/margins": 0.9893990755081177,
"rewards/rejected": -0.25597840547561646,
"step": 370
},
{
"epoch": 0.99,
"grad_norm": 56.5,
"learning_rate": 2.575864278703266e-09,
"logits/chosen": -2.8988537788391113,
"logits/rejected": -2.8815865516662598,
"logps/chosen": -28.09465980529785,
"logps/rejected": -28.32416343688965,
"loss": 0.4149,
"rewards/accuracies": 0.875,
"rewards/chosen": 0.6803036332130432,
"rewards/margins": 1.027940273284912,
"rewards/rejected": -0.3476366698741913,
"step": 380
},
{
"epoch": 1.0,
"step": 385,
"total_flos": 0.0,
"train_loss": 0.6588526527602951,
"train_runtime": 2719.293,
"train_samples_per_second": 1.132,
"train_steps_per_second": 0.142
}
],
"logging_steps": 10,
"max_steps": 385,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}