llama3.1-cpo-full / trainer_state.json
jbjeong91's picture
Model save
a45c9e1 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9930715935334873,
"eval_steps": 100,
"global_step": 324,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.09237875288683603,
"grad_norm": 38.34350007536878,
"learning_rate": 1.5151515151515152e-07,
"logits/chosen": -0.33047571778297424,
"logits/rejected": -0.31439679861068726,
"logps/chosen": -268.56201171875,
"logps/rejected": -270.61700439453125,
"loss": 2.4944,
"nll_loss": 0.726706862449646,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -26.856201171875,
"rewards/margins": 0.20550203323364258,
"rewards/rejected": -27.061702728271484,
"step": 10
},
{
"epoch": 0.18475750577367206,
"grad_norm": 39.7882668385144,
"learning_rate": 3.0303030303030305e-07,
"logits/chosen": -0.3137342929840088,
"logits/rejected": -0.2968626618385315,
"logps/chosen": -261.27764892578125,
"logps/rejected": -261.04803466796875,
"loss": 2.5847,
"nll_loss": 0.736041247844696,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -26.127761840820312,
"rewards/margins": -0.022955775260925293,
"rewards/rejected": -26.10480308532715,
"step": 20
},
{
"epoch": 0.27713625866050806,
"grad_norm": 32.849922759930486,
"learning_rate": 4.545454545454545e-07,
"logits/chosen": -0.383260041475296,
"logits/rejected": -0.3610544204711914,
"logps/chosen": -254.9075927734375,
"logps/rejected": -254.6737823486328,
"loss": 2.4052,
"nll_loss": 0.7010518312454224,
"rewards/accuracies": 0.515625,
"rewards/chosen": -25.490758895874023,
"rewards/margins": -0.023382291197776794,
"rewards/rejected": -25.467376708984375,
"step": 30
},
{
"epoch": 0.3695150115473441,
"grad_norm": 26.712148454979943,
"learning_rate": 4.879725085910652e-07,
"logits/chosen": -0.5479347705841064,
"logits/rejected": -0.5087471008300781,
"logps/chosen": -220.08718872070312,
"logps/rejected": -216.94229125976562,
"loss": 2.3725,
"nll_loss": 0.6341860890388489,
"rewards/accuracies": 0.5015624761581421,
"rewards/chosen": -22.008716583251953,
"rewards/margins": -0.3144901692867279,
"rewards/rejected": -21.69422721862793,
"step": 40
},
{
"epoch": 0.4618937644341801,
"grad_norm": 26.784049649942634,
"learning_rate": 4.707903780068728e-07,
"logits/chosen": -0.8294746279716492,
"logits/rejected": -0.8073676228523254,
"logps/chosen": -196.47360229492188,
"logps/rejected": -196.55337524414062,
"loss": 2.0929,
"nll_loss": 0.5368759036064148,
"rewards/accuracies": 0.5296875238418579,
"rewards/chosen": -19.64735984802246,
"rewards/margins": 0.007975578308105469,
"rewards/rejected": -19.655336380004883,
"step": 50
},
{
"epoch": 0.5542725173210161,
"grad_norm": 28.025237769650065,
"learning_rate": 4.536082474226804e-07,
"logits/chosen": -0.6816179752349854,
"logits/rejected": -0.6509512066841125,
"logps/chosen": -175.80374145507812,
"logps/rejected": -176.0839385986328,
"loss": 2.0271,
"nll_loss": 0.46367818117141724,
"rewards/accuracies": 0.510937511920929,
"rewards/chosen": -17.580373764038086,
"rewards/margins": 0.028019297868013382,
"rewards/rejected": -17.608394622802734,
"step": 60
},
{
"epoch": 0.6466512702078522,
"grad_norm": 26.448824948400027,
"learning_rate": 4.3642611683848796e-07,
"logits/chosen": -0.5208871364593506,
"logits/rejected": -0.4965832233428955,
"logps/chosen": -160.55596923828125,
"logps/rejected": -162.43707275390625,
"loss": 1.908,
"nll_loss": 0.4267793595790863,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -16.05559730529785,
"rewards/margins": 0.1881115734577179,
"rewards/rejected": -16.24370765686035,
"step": 70
},
{
"epoch": 0.7390300230946882,
"grad_norm": 25.832913188032137,
"learning_rate": 4.1924398625429554e-07,
"logits/chosen": -0.42753878235816956,
"logits/rejected": -0.4124147295951843,
"logps/chosen": -153.801513671875,
"logps/rejected": -158.33753967285156,
"loss": 1.8956,
"nll_loss": 0.4220770001411438,
"rewards/accuracies": 0.542187511920929,
"rewards/chosen": -15.380151748657227,
"rewards/margins": 0.453604519367218,
"rewards/rejected": -15.833755493164062,
"step": 80
},
{
"epoch": 0.8314087759815243,
"grad_norm": 27.79297058622181,
"learning_rate": 4.020618556701031e-07,
"logits/chosen": -0.4597485661506653,
"logits/rejected": -0.4340926706790924,
"logps/chosen": -150.138427734375,
"logps/rejected": -151.1810760498047,
"loss": 1.8861,
"nll_loss": 0.4107755124568939,
"rewards/accuracies": 0.5406249761581421,
"rewards/chosen": -15.013842582702637,
"rewards/margins": 0.10426414012908936,
"rewards/rejected": -15.1181058883667,
"step": 90
},
{
"epoch": 0.9237875288683602,
"grad_norm": 25.1491914386423,
"learning_rate": 3.8487972508591063e-07,
"logits/chosen": -0.5064208507537842,
"logits/rejected": -0.4822482168674469,
"logps/chosen": -159.95938110351562,
"logps/rejected": -161.27655029296875,
"loss": 1.822,
"nll_loss": 0.41467323899269104,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -15.995938301086426,
"rewards/margins": 0.131715327501297,
"rewards/rejected": -16.127653121948242,
"step": 100
},
{
"epoch": 0.9237875288683602,
"eval_logits/chosen": -0.4379667639732361,
"eval_logits/rejected": -0.42346981167793274,
"eval_logps/chosen": -146.49607849121094,
"eval_logps/rejected": -154.26937866210938,
"eval_loss": 1.7790985107421875,
"eval_nll_loss": 0.4057552218437195,
"eval_rewards/accuracies": 0.6034482717514038,
"eval_rewards/chosen": -14.649608612060547,
"eval_rewards/margins": 0.777328372001648,
"eval_rewards/rejected": -15.4269380569458,
"eval_runtime": 65.2011,
"eval_samples_per_second": 28.006,
"eval_steps_per_second": 0.445,
"step": 100
},
{
"epoch": 1.0161662817551964,
"grad_norm": 24.357108466796436,
"learning_rate": 3.676975945017182e-07,
"logits/chosen": -0.44147372245788574,
"logits/rejected": -0.4213744103908539,
"logps/chosen": -148.9695281982422,
"logps/rejected": -151.85446166992188,
"loss": 1.7788,
"nll_loss": 0.40945443511009216,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -14.896951675415039,
"rewards/margins": 0.2884957790374756,
"rewards/rejected": -15.185447692871094,
"step": 110
},
{
"epoch": 1.1085450346420322,
"grad_norm": 30.225216765479118,
"learning_rate": 3.5051546391752573e-07,
"logits/chosen": -0.41636085510253906,
"logits/rejected": -0.38961413502693176,
"logps/chosen": -149.9561309814453,
"logps/rejected": -154.90982055664062,
"loss": 1.6408,
"nll_loss": 0.40735840797424316,
"rewards/accuracies": 0.5953124761581421,
"rewards/chosen": -14.995613098144531,
"rewards/margins": 0.49536871910095215,
"rewards/rejected": -15.490982055664062,
"step": 120
},
{
"epoch": 1.2009237875288683,
"grad_norm": 25.661477968018204,
"learning_rate": 3.333333333333333e-07,
"logits/chosen": -0.39541321992874146,
"logits/rejected": -0.36797264218330383,
"logps/chosen": -143.75636291503906,
"logps/rejected": -149.67919921875,
"loss": 1.6412,
"nll_loss": 0.4088224768638611,
"rewards/accuracies": 0.604687511920929,
"rewards/chosen": -14.375636100769043,
"rewards/margins": 0.5922830700874329,
"rewards/rejected": -14.967920303344727,
"step": 130
},
{
"epoch": 1.2933025404157044,
"grad_norm": 24.629268500456213,
"learning_rate": 3.161512027491409e-07,
"logits/chosen": -0.4200739860534668,
"logits/rejected": -0.40387552976608276,
"logps/chosen": -154.5819091796875,
"logps/rejected": -162.4684600830078,
"loss": 1.5851,
"nll_loss": 0.42036017775535583,
"rewards/accuracies": 0.604687511920929,
"rewards/chosen": -15.45819091796875,
"rewards/margins": 0.7886544466018677,
"rewards/rejected": -16.246845245361328,
"step": 140
},
{
"epoch": 1.3856812933025404,
"grad_norm": 27.244637011376536,
"learning_rate": 2.9896907216494845e-07,
"logits/chosen": -0.4014149606227875,
"logits/rejected": -0.38134342432022095,
"logps/chosen": -157.56259155273438,
"logps/rejected": -163.28109741210938,
"loss": 1.6163,
"nll_loss": 0.42149510979652405,
"rewards/accuracies": 0.5953124761581421,
"rewards/chosen": -15.756260871887207,
"rewards/margins": 0.5718507170677185,
"rewards/rejected": -16.32811164855957,
"step": 150
},
{
"epoch": 1.4780600461893765,
"grad_norm": 48.54509039980329,
"learning_rate": 2.81786941580756e-07,
"logits/chosen": -0.4404594302177429,
"logits/rejected": -0.43164220452308655,
"logps/chosen": -162.8580780029297,
"logps/rejected": -169.2598876953125,
"loss": 1.572,
"nll_loss": 0.4240867495536804,
"rewards/accuracies": 0.596875011920929,
"rewards/chosen": -16.285808563232422,
"rewards/margins": 0.6401800513267517,
"rewards/rejected": -16.925989151000977,
"step": 160
},
{
"epoch": 1.5704387990762125,
"grad_norm": 26.569985559411176,
"learning_rate": 2.6460481099656354e-07,
"logits/chosen": -0.41170358657836914,
"logits/rejected": -0.40014591813087463,
"logps/chosen": -152.54824829101562,
"logps/rejected": -160.4109344482422,
"loss": 1.5587,
"nll_loss": 0.4169366955757141,
"rewards/accuracies": 0.6390625238418579,
"rewards/chosen": -15.2548246383667,
"rewards/margins": 0.7862688302993774,
"rewards/rejected": -16.041095733642578,
"step": 170
},
{
"epoch": 1.6628175519630486,
"grad_norm": 24.616859305838048,
"learning_rate": 2.474226804123711e-07,
"logits/chosen": -0.4424857497215271,
"logits/rejected": -0.43130555748939514,
"logps/chosen": -153.38320922851562,
"logps/rejected": -157.69728088378906,
"loss": 1.531,
"nll_loss": 0.4121263921260834,
"rewards/accuracies": 0.5703125,
"rewards/chosen": -15.3383207321167,
"rewards/margins": 0.43140602111816406,
"rewards/rejected": -15.76972770690918,
"step": 180
},
{
"epoch": 1.7551963048498846,
"grad_norm": 24.22918462233095,
"learning_rate": 2.3024054982817866e-07,
"logits/chosen": -0.40492838621139526,
"logits/rejected": -0.3852563202381134,
"logps/chosen": -155.97390747070312,
"logps/rejected": -163.59666442871094,
"loss": 1.5443,
"nll_loss": 0.4084969162940979,
"rewards/accuracies": 0.596875011920929,
"rewards/chosen": -15.597391128540039,
"rewards/margins": 0.7622756958007812,
"rewards/rejected": -16.359668731689453,
"step": 190
},
{
"epoch": 1.8475750577367207,
"grad_norm": 24.111596988391938,
"learning_rate": 2.1305841924398624e-07,
"logits/chosen": -0.38298338651657104,
"logits/rejected": -0.35016077756881714,
"logps/chosen": -148.51443481445312,
"logps/rejected": -155.7366943359375,
"loss": 1.5612,
"nll_loss": 0.41300660371780396,
"rewards/accuracies": 0.590624988079071,
"rewards/chosen": -14.85144329071045,
"rewards/margins": 0.7222263813018799,
"rewards/rejected": -15.573671340942383,
"step": 200
},
{
"epoch": 1.8475750577367207,
"eval_logits/chosen": -0.38625869154930115,
"eval_logits/rejected": -0.3721800148487091,
"eval_logps/chosen": -151.33670043945312,
"eval_logps/rejected": -159.72564697265625,
"eval_loss": 1.6871463060379028,
"eval_nll_loss": 0.419677197933197,
"eval_rewards/accuracies": 0.6379310488700867,
"eval_rewards/chosen": -15.133668899536133,
"eval_rewards/margins": 0.8388964533805847,
"eval_rewards/rejected": -15.972565650939941,
"eval_runtime": 44.5152,
"eval_samples_per_second": 41.02,
"eval_steps_per_second": 0.651,
"step": 200
},
{
"epoch": 1.9399538106235565,
"grad_norm": 24.485330144648206,
"learning_rate": 1.958762886597938e-07,
"logits/chosen": -0.3989901542663574,
"logits/rejected": -0.38505780696868896,
"logps/chosen": -154.37796020507812,
"logps/rejected": -161.5634307861328,
"loss": 1.5471,
"nll_loss": 0.42780718207359314,
"rewards/accuracies": 0.6109374761581421,
"rewards/chosen": -15.437795639038086,
"rewards/margins": 0.7185462713241577,
"rewards/rejected": -16.15634536743164,
"step": 210
},
{
"epoch": 2.032332563510393,
"grad_norm": 23.912915890804598,
"learning_rate": 1.7869415807560136e-07,
"logits/chosen": -0.4208546578884125,
"logits/rejected": -0.4081268310546875,
"logps/chosen": -150.35691833496094,
"logps/rejected": -160.062744140625,
"loss": 1.4932,
"nll_loss": 0.4046563506126404,
"rewards/accuracies": 0.620312511920929,
"rewards/chosen": -15.035693168640137,
"rewards/margins": 0.970583438873291,
"rewards/rejected": -16.006277084350586,
"step": 220
},
{
"epoch": 2.1247113163972284,
"grad_norm": 28.319868627323874,
"learning_rate": 1.6151202749140893e-07,
"logits/chosen": -0.4150509834289551,
"logits/rejected": -0.39563247561454773,
"logps/chosen": -154.30528259277344,
"logps/rejected": -164.85025024414062,
"loss": 1.3917,
"nll_loss": 0.424283504486084,
"rewards/accuracies": 0.640625,
"rewards/chosen": -15.43052864074707,
"rewards/margins": 1.0544955730438232,
"rewards/rejected": -16.48502540588379,
"step": 230
},
{
"epoch": 2.2170900692840645,
"grad_norm": 26.100118895427645,
"learning_rate": 1.4432989690721648e-07,
"logits/chosen": -0.3663300573825836,
"logits/rejected": -0.3529093861579895,
"logps/chosen": -153.01861572265625,
"logps/rejected": -165.33999633789062,
"loss": 1.3738,
"nll_loss": 0.40894705057144165,
"rewards/accuracies": 0.6796875,
"rewards/chosen": -15.301861763000488,
"rewards/margins": 1.2321385145187378,
"rewards/rejected": -16.53400230407715,
"step": 240
},
{
"epoch": 2.3094688221709005,
"grad_norm": 31.011772944003695,
"learning_rate": 1.2714776632302405e-07,
"logits/chosen": -0.4251771867275238,
"logits/rejected": -0.4077603816986084,
"logps/chosen": -159.03237915039062,
"logps/rejected": -167.05409240722656,
"loss": 1.3875,
"nll_loss": 0.4267016053199768,
"rewards/accuracies": 0.625,
"rewards/chosen": -15.903238296508789,
"rewards/margins": 0.8021726608276367,
"rewards/rejected": -16.70541000366211,
"step": 250
},
{
"epoch": 2.4018475750577366,
"grad_norm": 27.88691245436523,
"learning_rate": 1.099656357388316e-07,
"logits/chosen": -0.3865527808666229,
"logits/rejected": -0.3643147349357605,
"logps/chosen": -153.9661865234375,
"logps/rejected": -164.27066040039062,
"loss": 1.4061,
"nll_loss": 0.409515380859375,
"rewards/accuracies": 0.653124988079071,
"rewards/chosen": -15.39661693572998,
"rewards/margins": 1.0304476022720337,
"rewards/rejected": -16.427064895629883,
"step": 260
},
{
"epoch": 2.4942263279445727,
"grad_norm": 36.88845169314625,
"learning_rate": 9.278350515463918e-08,
"logits/chosen": -0.41444501280784607,
"logits/rejected": -0.3972172141075134,
"logps/chosen": -155.81336975097656,
"logps/rejected": -167.61431884765625,
"loss": 1.3905,
"nll_loss": 0.4134409427642822,
"rewards/accuracies": 0.6703125238418579,
"rewards/chosen": -15.581338882446289,
"rewards/margins": 1.1800928115844727,
"rewards/rejected": -16.761430740356445,
"step": 270
},
{
"epoch": 2.5866050808314087,
"grad_norm": 25.23086170545782,
"learning_rate": 7.560137457044672e-08,
"logits/chosen": -0.36893123388290405,
"logits/rejected": -0.35938116908073425,
"logps/chosen": -149.61871337890625,
"logps/rejected": -161.56121826171875,
"loss": 1.3643,
"nll_loss": 0.4170606732368469,
"rewards/accuracies": 0.667187511920929,
"rewards/chosen": -14.961870193481445,
"rewards/margins": 1.1942520141601562,
"rewards/rejected": -16.1561222076416,
"step": 280
},
{
"epoch": 2.678983833718245,
"grad_norm": 27.86871548844565,
"learning_rate": 5.8419243986254297e-08,
"logits/chosen": -0.39300569891929626,
"logits/rejected": -0.37821659445762634,
"logps/chosen": -158.05575561523438,
"logps/rejected": -168.22007751464844,
"loss": 1.3372,
"nll_loss": 0.4216877520084381,
"rewards/accuracies": 0.6328125,
"rewards/chosen": -15.805575370788574,
"rewards/margins": 1.0164330005645752,
"rewards/rejected": -16.822010040283203,
"step": 290
},
{
"epoch": 2.771362586605081,
"grad_norm": 23.796037905801665,
"learning_rate": 4.123711340206185e-08,
"logits/chosen": -0.3558502793312073,
"logits/rejected": -0.36145851016044617,
"logps/chosen": -145.80899047851562,
"logps/rejected": -159.22427368164062,
"loss": 1.3825,
"nll_loss": 0.42257922887802124,
"rewards/accuracies": 0.6859375238418579,
"rewards/chosen": -14.580899238586426,
"rewards/margins": 1.3415263891220093,
"rewards/rejected": -15.9224271774292,
"step": 300
},
{
"epoch": 2.771362586605081,
"eval_logits/chosen": -0.34973594546318054,
"eval_logits/rejected": -0.3369295001029968,
"eval_logps/chosen": -151.68421936035156,
"eval_logps/rejected": -160.43328857421875,
"eval_loss": 1.6704407930374146,
"eval_nll_loss": 0.4208527207374573,
"eval_rewards/accuracies": 0.6293103694915771,
"eval_rewards/chosen": -15.168424606323242,
"eval_rewards/margins": 0.8749059438705444,
"eval_rewards/rejected": -16.04332733154297,
"eval_runtime": 42.0278,
"eval_samples_per_second": 43.447,
"eval_steps_per_second": 0.69,
"step": 300
},
{
"epoch": 2.863741339491917,
"grad_norm": 26.44341401066327,
"learning_rate": 2.4054982817869415e-08,
"logits/chosen": -0.35747581720352173,
"logits/rejected": -0.34428220987319946,
"logps/chosen": -149.22958374023438,
"logps/rejected": -160.0894317626953,
"loss": 1.408,
"nll_loss": 0.41082078218460083,
"rewards/accuracies": 0.6468750238418579,
"rewards/chosen": -14.922956466674805,
"rewards/margins": 1.0859849452972412,
"rewards/rejected": -16.008943557739258,
"step": 310
},
{
"epoch": 2.956120092378753,
"grad_norm": 27.055661510056673,
"learning_rate": 6.872852233676975e-09,
"logits/chosen": -0.3484032452106476,
"logits/rejected": -0.3378998041152954,
"logps/chosen": -155.7621307373047,
"logps/rejected": -166.55508422851562,
"loss": 1.3769,
"nll_loss": 0.41843119263648987,
"rewards/accuracies": 0.660937488079071,
"rewards/chosen": -15.576214790344238,
"rewards/margins": 1.0792920589447021,
"rewards/rejected": -16.655506134033203,
"step": 320
},
{
"epoch": 2.9930715935334873,
"step": 324,
"total_flos": 0.0,
"train_loss": 1.6959601876176433,
"train_runtime": 15481.5304,
"train_samples_per_second": 10.731,
"train_steps_per_second": 0.021
}
],
"logging_steps": 10,
"max_steps": 324,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}