|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 385, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 63.25, |
|
"learning_rate": 1.282051282051282e-07, |
|
"logits/chosen": -2.7358343601226807, |
|
"logits/rejected": -2.7480404376983643, |
|
"logps/chosen": -27.35565757751465, |
|
"logps/rejected": -21.06114387512207, |
|
"loss": 1.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 108.0, |
|
"learning_rate": 1.282051282051282e-06, |
|
"logits/chosen": -3.009650945663452, |
|
"logits/rejected": -2.998239040374756, |
|
"logps/chosen": -33.192203521728516, |
|
"logps/rejected": -31.957557678222656, |
|
"loss": 1.0141, |
|
"rewards/accuracies": 0.4166666567325592, |
|
"rewards/chosen": -0.009009478613734245, |
|
"rewards/margins": -0.014087951742112637, |
|
"rewards/rejected": 0.005078474525362253, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 73.5, |
|
"learning_rate": 2.564102564102564e-06, |
|
"logits/chosen": -2.899263381958008, |
|
"logits/rejected": -2.894313335418701, |
|
"logps/chosen": -32.45400619506836, |
|
"logps/rejected": -28.9648494720459, |
|
"loss": 0.9967, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.016872350126504898, |
|
"rewards/margins": 0.003259001299738884, |
|
"rewards/rejected": 0.01361334603279829, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 71.0, |
|
"learning_rate": 3.846153846153847e-06, |
|
"logits/chosen": -3.0970497131347656, |
|
"logits/rejected": -3.108996868133545, |
|
"logps/chosen": -32.78731918334961, |
|
"logps/rejected": -30.140506744384766, |
|
"loss": 0.9473, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.09709431231021881, |
|
"rewards/margins": 0.05265679210424423, |
|
"rewards/rejected": 0.044437527656555176, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 57.75, |
|
"learning_rate": 4.999896948438434e-06, |
|
"logits/chosen": -2.8630309104919434, |
|
"logits/rejected": -2.8540406227111816, |
|
"logps/chosen": -31.542278289794922, |
|
"logps/rejected": -32.394432067871094, |
|
"loss": 0.8414, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.23043569922447205, |
|
"rewards/margins": 0.260015070438385, |
|
"rewards/rejected": -0.02957936003804207, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 44.75, |
|
"learning_rate": 4.987541037542187e-06, |
|
"logits/chosen": -2.8809738159179688, |
|
"logits/rejected": -2.8790669441223145, |
|
"logps/chosen": -29.41156005859375, |
|
"logps/rejected": -30.12240219116211, |
|
"loss": 0.8761, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.26288196444511414, |
|
"rewards/margins": 0.2896483242511749, |
|
"rewards/rejected": -0.02676635980606079, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 63.75, |
|
"learning_rate": 4.954691471941119e-06, |
|
"logits/chosen": -2.9108948707580566, |
|
"logits/rejected": -2.912576675415039, |
|
"logps/chosen": -29.901845932006836, |
|
"logps/rejected": -28.0941219329834, |
|
"loss": 0.7968, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.18233974277973175, |
|
"rewards/margins": 0.2930926978588104, |
|
"rewards/rejected": -0.11075299978256226, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 72.5, |
|
"learning_rate": 4.901618883413549e-06, |
|
"logits/chosen": -2.9933552742004395, |
|
"logits/rejected": -2.9995028972625732, |
|
"logps/chosen": -29.255428314208984, |
|
"logps/rejected": -31.047006607055664, |
|
"loss": 0.9015, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.020431842654943466, |
|
"rewards/margins": 0.14459456503391266, |
|
"rewards/rejected": -0.1241627112030983, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 70.5, |
|
"learning_rate": 4.828760511501322e-06, |
|
"logits/chosen": -2.808861255645752, |
|
"logits/rejected": -2.8243188858032227, |
|
"logps/chosen": -29.406871795654297, |
|
"logps/rejected": -29.915807723999023, |
|
"loss": 0.7973, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.10708501189947128, |
|
"rewards/margins": 0.30030542612075806, |
|
"rewards/rejected": -0.19322039186954498, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 56.25, |
|
"learning_rate": 4.7367166013034295e-06, |
|
"logits/chosen": -2.8983397483825684, |
|
"logits/rejected": -2.880967617034912, |
|
"logps/chosen": -32.71396255493164, |
|
"logps/rejected": -30.347427368164062, |
|
"loss": 0.8646, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.09797407686710358, |
|
"rewards/margins": 0.35859915614128113, |
|
"rewards/rejected": -0.26062512397766113, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 54.75, |
|
"learning_rate": 4.626245458345211e-06, |
|
"logits/chosen": -3.004662275314331, |
|
"logits/rejected": -3.005678653717041, |
|
"logps/chosen": -31.851581573486328, |
|
"logps/rejected": -30.93560791015625, |
|
"loss": 0.8401, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.1642352044582367, |
|
"rewards/margins": 0.25978168845176697, |
|
"rewards/rejected": -0.09554646909236908, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": -2.811776876449585, |
|
"eval_logits/rejected": -2.809250831604004, |
|
"eval_logps/chosen": -31.276582717895508, |
|
"eval_logps/rejected": -34.853797912597656, |
|
"eval_loss": 0.8872909545898438, |
|
"eval_rewards/accuracies": 0.6067276000976562, |
|
"eval_rewards/chosen": 0.0046949307434260845, |
|
"eval_rewards/margins": 0.12921129167079926, |
|
"eval_rewards/rejected": -0.12451635301113129, |
|
"eval_runtime": 113.4101, |
|
"eval_samples_per_second": 3.024, |
|
"eval_steps_per_second": 0.379, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 83.0, |
|
"learning_rate": 4.498257201263691e-06, |
|
"logits/chosen": -2.9626810550689697, |
|
"logits/rejected": -2.9382669925689697, |
|
"logps/chosen": -31.831439971923828, |
|
"logps/rejected": -31.40035057067871, |
|
"loss": 0.6952, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.3082864582538605, |
|
"rewards/margins": 0.5014885663986206, |
|
"rewards/rejected": -0.1932021528482437, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 65.0, |
|
"learning_rate": 4.353806263777678e-06, |
|
"logits/chosen": -3.0443854331970215, |
|
"logits/rejected": -3.073098659515381, |
|
"logps/chosen": -28.707149505615234, |
|
"logps/rejected": -34.32903289794922, |
|
"loss": 0.7028, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.3654031455516815, |
|
"rewards/margins": 0.5105921030044556, |
|
"rewards/rejected": -0.14518897235393524, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 44.5, |
|
"learning_rate": 4.1940827077152755e-06, |
|
"logits/chosen": -2.7482428550720215, |
|
"logits/rejected": -2.743565082550049, |
|
"logps/chosen": -28.566293716430664, |
|
"logps/rejected": -30.31746482849121, |
|
"loss": 0.7405, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.3267093598842621, |
|
"rewards/margins": 0.47363653779029846, |
|
"rewards/rejected": -0.14692717790603638, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 46.25, |
|
"learning_rate": 4.0204024186666215e-06, |
|
"logits/chosen": -3.022853136062622, |
|
"logits/rejected": -3.0205140113830566, |
|
"logps/chosen": -27.139602661132812, |
|
"logps/rejected": -31.89032554626465, |
|
"loss": 0.7769, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.27729907631874084, |
|
"rewards/margins": 0.46414414048194885, |
|
"rewards/rejected": -0.1868450939655304, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 48.0, |
|
"learning_rate": 3.834196265035119e-06, |
|
"logits/chosen": -2.816462516784668, |
|
"logits/rejected": -2.810920000076294, |
|
"logps/chosen": -27.263103485107422, |
|
"logps/rejected": -31.520715713500977, |
|
"loss": 0.6276, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.4499203562736511, |
|
"rewards/margins": 0.6875794529914856, |
|
"rewards/rejected": -0.2376590520143509, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 68.5, |
|
"learning_rate": 3.636998309800573e-06, |
|
"logits/chosen": -3.133582592010498, |
|
"logits/rejected": -3.1159331798553467, |
|
"logps/chosen": -31.7524471282959, |
|
"logps/rejected": -29.40524673461914, |
|
"loss": 0.547, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.5914198756217957, |
|
"rewards/margins": 0.9317981600761414, |
|
"rewards/rejected": -0.3403782546520233, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 58.25, |
|
"learning_rate": 3.4304331721118078e-06, |
|
"logits/chosen": -2.944953203201294, |
|
"logits/rejected": -2.952117681503296, |
|
"logps/chosen": -29.341304779052734, |
|
"logps/rejected": -31.5146427154541, |
|
"loss": 0.6062, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.4426456391811371, |
|
"rewards/margins": 0.7747727632522583, |
|
"rewards/rejected": -0.33212706446647644, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 57.25, |
|
"learning_rate": 3.2162026428305436e-06, |
|
"logits/chosen": -2.795180082321167, |
|
"logits/rejected": -2.792935848236084, |
|
"logps/chosen": -29.07159423828125, |
|
"logps/rejected": -30.085384368896484, |
|
"loss": 0.5999, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.47860392928123474, |
|
"rewards/margins": 0.7323731184005737, |
|
"rewards/rejected": -0.2537691593170166, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 35.5, |
|
"learning_rate": 2.996071664294641e-06, |
|
"logits/chosen": -2.9086050987243652, |
|
"logits/rejected": -2.9050517082214355, |
|
"logps/chosen": -29.76633644104004, |
|
"logps/rejected": -28.544025421142578, |
|
"loss": 0.778, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.3363017439842224, |
|
"rewards/margins": 0.4854954779148102, |
|
"rewards/rejected": -0.14919371902942657, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 51.0, |
|
"learning_rate": 2.7718537898066833e-06, |
|
"logits/chosen": -2.9782276153564453, |
|
"logits/rejected": -2.9666411876678467, |
|
"logps/chosen": -32.82664108276367, |
|
"logps/rejected": -30.458984375, |
|
"loss": 0.724, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.6979023218154907, |
|
"rewards/margins": 0.7178188562393188, |
|
"rewards/rejected": -0.01991647481918335, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -2.8178980350494385, |
|
"eval_logits/rejected": -2.815643548965454, |
|
"eval_logps/chosen": -31.38960838317871, |
|
"eval_logps/rejected": -34.92823791503906, |
|
"eval_loss": 0.9140273332595825, |
|
"eval_rewards/accuracies": 0.565614640712738, |
|
"eval_rewards/chosen": -0.08572381734848022, |
|
"eval_rewards/margins": 0.09834489226341248, |
|
"eval_rewards/rejected": -0.1840687096118927, |
|
"eval_runtime": 113.15, |
|
"eval_samples_per_second": 3.031, |
|
"eval_steps_per_second": 0.38, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 52.5, |
|
"learning_rate": 2.5453962426402006e-06, |
|
"logits/chosen": -2.9144248962402344, |
|
"logits/rejected": -2.914703845977783, |
|
"logps/chosen": -32.25563049316406, |
|
"logps/rejected": -34.11750793457031, |
|
"loss": 0.5504, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.6222246289253235, |
|
"rewards/margins": 0.8270590901374817, |
|
"rewards/rejected": -0.2048344612121582, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 47.5, |
|
"learning_rate": 2.3185646976551794e-06, |
|
"logits/chosen": -2.8935537338256836, |
|
"logits/rejected": -2.909308671951294, |
|
"logps/chosen": -29.393646240234375, |
|
"logps/rejected": -28.79998779296875, |
|
"loss": 0.6005, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.6275160908699036, |
|
"rewards/margins": 0.8448917269706726, |
|
"rewards/rejected": -0.21737566590309143, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 46.5, |
|
"learning_rate": 2.0932279108998323e-06, |
|
"logits/chosen": -2.9393625259399414, |
|
"logits/rejected": -2.9434802532196045, |
|
"logps/chosen": -30.659032821655273, |
|
"logps/rejected": -31.946239471435547, |
|
"loss": 0.7369, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.39426764845848083, |
|
"rewards/margins": 0.5447386503219604, |
|
"rewards/rejected": -0.15047098696231842, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 46.75, |
|
"learning_rate": 1.8712423238279358e-06, |
|
"logits/chosen": -2.99426531791687, |
|
"logits/rejected": -3.00126314163208, |
|
"logps/chosen": -29.958850860595703, |
|
"logps/rejected": -30.447010040283203, |
|
"loss": 0.4564, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.7568598985671997, |
|
"rewards/margins": 0.9262750744819641, |
|
"rewards/rejected": -0.16941508650779724, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 55.0, |
|
"learning_rate": 1.6544367689701824e-06, |
|
"logits/chosen": -2.8251967430114746, |
|
"logits/rejected": -2.815450429916382, |
|
"logps/chosen": -26.474361419677734, |
|
"logps/rejected": -29.600570678710938, |
|
"loss": 0.721, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.3838837444782257, |
|
"rewards/margins": 0.43902724981307983, |
|
"rewards/rejected": -0.05514346435666084, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 21.75, |
|
"learning_rate": 1.4445974030621963e-06, |
|
"logits/chosen": -2.8065786361694336, |
|
"logits/rejected": -2.8274922370910645, |
|
"logps/chosen": -28.88662338256836, |
|
"logps/rejected": -34.49494934082031, |
|
"loss": 0.4422, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.7546705603599548, |
|
"rewards/margins": 1.0113604068756104, |
|
"rewards/rejected": -0.25668981671333313, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 60.0, |
|
"learning_rate": 1.243452991757889e-06, |
|
"logits/chosen": -2.948761463165283, |
|
"logits/rejected": -2.955237627029419, |
|
"logps/chosen": -30.040813446044922, |
|
"logps/rejected": -30.221237182617188, |
|
"loss": 0.5322, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.5924339890480042, |
|
"rewards/margins": 0.8772123456001282, |
|
"rewards/rejected": -0.2847784161567688, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 29.375, |
|
"learning_rate": 1.0526606671603523e-06, |
|
"logits/chosen": -2.9630208015441895, |
|
"logits/rejected": -2.949868679046631, |
|
"logps/chosen": -30.0323429107666, |
|
"logps/rejected": -28.719945907592773, |
|
"loss": 0.7384, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.5514736771583557, |
|
"rewards/margins": 0.6223500967025757, |
|
"rewards/rejected": -0.07087641209363937, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 36.5, |
|
"learning_rate": 8.737922755071455e-07, |
|
"logits/chosen": -2.890263080596924, |
|
"logits/rejected": -2.872467279434204, |
|
"logps/chosen": -31.614023208618164, |
|
"logps/rejected": -30.969629287719727, |
|
"loss": 0.4081, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.8178254961967468, |
|
"rewards/margins": 1.1748807430267334, |
|
"rewards/rejected": -0.3570552468299866, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 47.0, |
|
"learning_rate": 7.08321427484816e-07, |
|
"logits/chosen": -2.892866611480713, |
|
"logits/rejected": -2.8899810314178467, |
|
"logps/chosen": -31.352060317993164, |
|
"logps/rejected": -27.79092025756836, |
|
"loss": 0.6408, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.6964761018753052, |
|
"rewards/margins": 0.8463441729545593, |
|
"rewards/rejected": -0.14986807107925415, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_logits/chosen": -2.810232400894165, |
|
"eval_logits/rejected": -2.807687520980835, |
|
"eval_logps/chosen": -31.392431259155273, |
|
"eval_logps/rejected": -34.949954986572266, |
|
"eval_loss": 0.9091227650642395, |
|
"eval_rewards/accuracies": 0.5627076625823975, |
|
"eval_rewards/chosen": -0.08798420429229736, |
|
"eval_rewards/margins": 0.11345873028039932, |
|
"eval_rewards/rejected": -0.20144294202327728, |
|
"eval_runtime": 113.1296, |
|
"eval_samples_per_second": 3.032, |
|
"eval_steps_per_second": 0.38, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 57.5, |
|
"learning_rate": 5.576113578589035e-07, |
|
"logits/chosen": -2.771563768386841, |
|
"logits/rejected": -2.790158748626709, |
|
"logps/chosen": -28.552204132080078, |
|
"logps/rejected": -31.123676300048828, |
|
"loss": 0.4625, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.5417758226394653, |
|
"rewards/margins": 0.855111300945282, |
|
"rewards/rejected": -0.31333547830581665, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 50.75, |
|
"learning_rate": 4.229036944380913e-07, |
|
"logits/chosen": -3.01896333694458, |
|
"logits/rejected": -3.0036964416503906, |
|
"logps/chosen": -29.334665298461914, |
|
"logps/rejected": -28.647085189819336, |
|
"loss": 0.4327, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.7015351057052612, |
|
"rewards/margins": 0.9641841650009155, |
|
"rewards/rejected": -0.2626491189002991, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 19.25, |
|
"learning_rate": 3.053082288996112e-07, |
|
"logits/chosen": -2.9340624809265137, |
|
"logits/rejected": -2.916344404220581, |
|
"logps/chosen": -27.29937744140625, |
|
"logps/rejected": -30.881011962890625, |
|
"loss": 0.3934, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.8501566052436829, |
|
"rewards/margins": 1.2416493892669678, |
|
"rewards/rejected": -0.3914927840232849, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 54.0, |
|
"learning_rate": 2.0579377374915805e-07, |
|
"logits/chosen": -3.1503987312316895, |
|
"logits/rejected": -3.156454563140869, |
|
"logps/chosen": -30.917760848999023, |
|
"logps/rejected": -33.217864990234375, |
|
"loss": 0.4822, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.5771540403366089, |
|
"rewards/margins": 0.994644820690155, |
|
"rewards/rejected": -0.4174906611442566, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 27.0, |
|
"learning_rate": 1.2518018074041684e-07, |
|
"logits/chosen": -3.026543140411377, |
|
"logits/rejected": -3.0300345420837402, |
|
"logps/chosen": -29.901714324951172, |
|
"logps/rejected": -31.784900665283203, |
|
"loss": 0.5385, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.7241344451904297, |
|
"rewards/margins": 0.9414092302322388, |
|
"rewards/rejected": -0.2172747403383255, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 47.25, |
|
"learning_rate": 6.41315865106129e-08, |
|
"logits/chosen": -2.8658933639526367, |
|
"logits/rejected": -2.8671889305114746, |
|
"logps/chosen": -27.54721450805664, |
|
"logps/rejected": -30.053890228271484, |
|
"loss": 0.4298, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.8820101022720337, |
|
"rewards/margins": 1.0603699684143066, |
|
"rewards/rejected": -0.17835985124111176, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 62.75, |
|
"learning_rate": 2.3150941078050325e-08, |
|
"logits/chosen": -2.9409708976745605, |
|
"logits/rejected": -2.9394469261169434, |
|
"logps/chosen": -29.869876861572266, |
|
"logps/rejected": -32.248016357421875, |
|
"loss": 0.486, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.7334206700325012, |
|
"rewards/margins": 0.9893990755081177, |
|
"rewards/rejected": -0.25597840547561646, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 56.5, |
|
"learning_rate": 2.575864278703266e-09, |
|
"logits/chosen": -2.8988537788391113, |
|
"logits/rejected": -2.8815865516662598, |
|
"logps/chosen": -28.09465980529785, |
|
"logps/rejected": -28.32416343688965, |
|
"loss": 0.4149, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.6803036332130432, |
|
"rewards/margins": 1.027940273284912, |
|
"rewards/rejected": -0.3476366698741913, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 385, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6588526527602951, |
|
"train_runtime": 2719.293, |
|
"train_samples_per_second": 1.132, |
|
"train_steps_per_second": 0.142 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 385, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|