phi3m0128-wds-0.1-kendall-onof-ofif-corr-max-2-simpo-max1500-default
/
checkpoint-600
/trainer_state.json
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 0.9417304296645085, | |
"eval_steps": 50, | |
"global_step": 600, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.015695507161075144, | |
"grad_norm": 0.07061820477247238, | |
"learning_rate": 4.999451708687114e-06, | |
"logits/chosen": 15.001790046691895, | |
"logits/rejected": 14.624488830566406, | |
"logps/chosen": -0.33085882663726807, | |
"logps/rejected": -0.24924471974372864, | |
"loss": 1.0519, | |
"rewards/accuracies": 0.23749999701976776, | |
"rewards/chosen": -0.4962882399559021, | |
"rewards/margins": -0.12242116779088974, | |
"rewards/rejected": -0.37386709451675415, | |
"step": 10 | |
}, | |
{ | |
"epoch": 0.03139101432215029, | |
"grad_norm": 0.060370542109012604, | |
"learning_rate": 4.997807075247147e-06, | |
"logits/chosen": 15.272351264953613, | |
"logits/rejected": 14.801017761230469, | |
"logps/chosen": -0.3379867672920227, | |
"logps/rejected": -0.24759705364704132, | |
"loss": 1.0494, | |
"rewards/accuracies": 0.15000000596046448, | |
"rewards/chosen": -0.5069801211357117, | |
"rewards/margins": -0.13558456301689148, | |
"rewards/rejected": -0.3713955581188202, | |
"step": 20 | |
}, | |
{ | |
"epoch": 0.047086521483225424, | |
"grad_norm": 0.050332240760326385, | |
"learning_rate": 4.9950668210706795e-06, | |
"logits/chosen": 15.569372177124023, | |
"logits/rejected": 15.249380111694336, | |
"logps/chosen": -0.3347483277320862, | |
"logps/rejected": -0.275604784488678, | |
"loss": 1.0375, | |
"rewards/accuracies": 0.21250000596046448, | |
"rewards/chosen": -0.5021225214004517, | |
"rewards/margins": -0.08871528506278992, | |
"rewards/rejected": -0.41340717673301697, | |
"step": 30 | |
}, | |
{ | |
"epoch": 0.06278202864430057, | |
"grad_norm": 0.054660990834236145, | |
"learning_rate": 4.9912321481237616e-06, | |
"logits/chosen": 15.344067573547363, | |
"logits/rejected": 14.881669998168945, | |
"logps/chosen": -0.3336474299430847, | |
"logps/rejected": -0.27252259850502014, | |
"loss": 1.0375, | |
"rewards/accuracies": 0.20000000298023224, | |
"rewards/chosen": -0.5004712343215942, | |
"rewards/margins": -0.09168727695941925, | |
"rewards/rejected": -0.40878385305404663, | |
"step": 40 | |
}, | |
{ | |
"epoch": 0.07847753580537571, | |
"grad_norm": 0.060405269265174866, | |
"learning_rate": 4.986304738420684e-06, | |
"logits/chosen": 15.5701904296875, | |
"logits/rejected": 15.60998821258545, | |
"logps/chosen": -0.3075069785118103, | |
"logps/rejected": -0.24850216507911682, | |
"loss": 1.0341, | |
"rewards/accuracies": 0.20000000298023224, | |
"rewards/chosen": -0.46126049757003784, | |
"rewards/margins": -0.08850721269845963, | |
"rewards/rejected": -0.3727532923221588, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.07847753580537571, | |
"eval_logits/chosen": 15.719181060791016, | |
"eval_logits/rejected": 15.20205307006836, | |
"eval_logps/chosen": -0.32490479946136475, | |
"eval_logps/rejected": -0.26058924198150635, | |
"eval_loss": 1.0295902490615845, | |
"eval_rewards/accuracies": 0.26923078298568726, | |
"eval_rewards/chosen": -0.48735716938972473, | |
"eval_rewards/margins": -0.09647335112094879, | |
"eval_rewards/rejected": -0.39088380336761475, | |
"eval_runtime": 14.6102, | |
"eval_samples_per_second": 28.199, | |
"eval_steps_per_second": 3.559, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.09417304296645085, | |
"grad_norm": 0.06511708348989487, | |
"learning_rate": 4.980286753286196e-06, | |
"logits/chosen": 15.571348190307617, | |
"logits/rejected": 15.418828010559082, | |
"logps/chosen": -0.3217025101184845, | |
"logps/rejected": -0.2813836932182312, | |
"loss": 1.0385, | |
"rewards/accuracies": 0.23749999701976776, | |
"rewards/chosen": -0.48255378007888794, | |
"rewards/margins": -0.060478221625089645, | |
"rewards/rejected": -0.4220755696296692, | |
"step": 60 | |
}, | |
{ | |
"epoch": 0.109868550127526, | |
"grad_norm": 0.06922140717506409, | |
"learning_rate": 4.973180832407471e-06, | |
"logits/chosen": 15.926614761352539, | |
"logits/rejected": 15.90565299987793, | |
"logps/chosen": -0.34742942452430725, | |
"logps/rejected": -0.26853513717651367, | |
"loss": 1.0417, | |
"rewards/accuracies": 0.21250000596046448, | |
"rewards/chosen": -0.5211440324783325, | |
"rewards/margins": -0.11834144592285156, | |
"rewards/rejected": -0.40280264616012573, | |
"step": 70 | |
}, | |
{ | |
"epoch": 0.12556405728860115, | |
"grad_norm": 0.07853339612483978, | |
"learning_rate": 4.964990092676263e-06, | |
"logits/chosen": 15.876957893371582, | |
"logits/rejected": 15.636863708496094, | |
"logps/chosen": -0.34999752044677734, | |
"logps/rejected": -0.2652502655982971, | |
"loss": 1.0255, | |
"rewards/accuracies": 0.1875, | |
"rewards/chosen": -0.524996280670166, | |
"rewards/margins": -0.12712089717388153, | |
"rewards/rejected": -0.3978753685951233, | |
"step": 80 | |
}, | |
{ | |
"epoch": 0.14125956444967627, | |
"grad_norm": 0.07262148708105087, | |
"learning_rate": 4.9557181268217225e-06, | |
"logits/chosen": 16.037456512451172, | |
"logits/rejected": 15.742408752441406, | |
"logps/chosen": -0.32482510805130005, | |
"logps/rejected": -0.2520079016685486, | |
"loss": 1.0297, | |
"rewards/accuracies": 0.21250000596046448, | |
"rewards/chosen": -0.48723769187927246, | |
"rewards/margins": -0.1092257872223854, | |
"rewards/rejected": -0.3780118525028229, | |
"step": 90 | |
}, | |
{ | |
"epoch": 0.15695507161075142, | |
"grad_norm": 0.08407289534807205, | |
"learning_rate": 4.9453690018345144e-06, | |
"logits/chosen": 16.34904670715332, | |
"logits/rejected": 16.30767059326172, | |
"logps/chosen": -0.3302846848964691, | |
"logps/rejected": -0.2799247205257416, | |
"loss": 1.0285, | |
"rewards/accuracies": 0.2750000059604645, | |
"rewards/chosen": -0.4954269826412201, | |
"rewards/margins": -0.07553993165493011, | |
"rewards/rejected": -0.41988706588745117, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.15695507161075142, | |
"eval_logits/chosen": 16.43848419189453, | |
"eval_logits/rejected": 15.978095054626465, | |
"eval_logps/chosen": -0.3256901502609253, | |
"eval_logps/rejected": -0.2794351279735565, | |
"eval_loss": 1.0174708366394043, | |
"eval_rewards/accuracies": 0.2884615361690521, | |
"eval_rewards/chosen": -0.4885352551937103, | |
"eval_rewards/margins": -0.06938254088163376, | |
"eval_rewards/rejected": -0.41915270686149597, | |
"eval_runtime": 14.607, | |
"eval_samples_per_second": 28.206, | |
"eval_steps_per_second": 3.56, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.17265057877182657, | |
"grad_norm": 0.07448805868625641, | |
"learning_rate": 4.933947257182901e-06, | |
"logits/chosen": 16.681476593017578, | |
"logits/rejected": 16.208908081054688, | |
"logps/chosen": -0.3410753309726715, | |
"logps/rejected": -0.28177398443222046, | |
"loss": 1.0364, | |
"rewards/accuracies": 0.2874999940395355, | |
"rewards/chosen": -0.5116130113601685, | |
"rewards/margins": -0.08895199000835419, | |
"rewards/rejected": -0.4226610064506531, | |
"step": 110 | |
}, | |
{ | |
"epoch": 0.1883460859329017, | |
"grad_norm": 0.17266640067100525, | |
"learning_rate": 4.921457902821578e-06, | |
"logits/chosen": 16.73276710510254, | |
"logits/rejected": 16.46255111694336, | |
"logps/chosen": -0.30808666348457336, | |
"logps/rejected": -0.2663067877292633, | |
"loss": 1.0242, | |
"rewards/accuracies": 0.23749999701976776, | |
"rewards/chosen": -0.46213001012802124, | |
"rewards/margins": -0.0626697838306427, | |
"rewards/rejected": -0.39946022629737854, | |
"step": 120 | |
}, | |
{ | |
"epoch": 0.20404159309397685, | |
"grad_norm": 0.1035998985171318, | |
"learning_rate": 4.907906416994146e-06, | |
"logits/chosen": 16.380239486694336, | |
"logits/rejected": 16.233612060546875, | |
"logps/chosen": -0.32250356674194336, | |
"logps/rejected": -0.27460020780563354, | |
"loss": 1.0162, | |
"rewards/accuracies": 0.2874999940395355, | |
"rewards/chosen": -0.48375529050827026, | |
"rewards/margins": -0.0718550831079483, | |
"rewards/rejected": -0.41190028190612793, | |
"step": 130 | |
}, | |
{ | |
"epoch": 0.219737100255052, | |
"grad_norm": 0.0961030125617981, | |
"learning_rate": 4.893298743830168e-06, | |
"logits/chosen": 16.440303802490234, | |
"logits/rejected": 16.233903884887695, | |
"logps/chosen": -0.33421438932418823, | |
"logps/rejected": -0.2857271134853363, | |
"loss": 1.0008, | |
"rewards/accuracies": 0.3125, | |
"rewards/chosen": -0.5013214945793152, | |
"rewards/margins": -0.07273083180189133, | |
"rewards/rejected": -0.42859068512916565, | |
"step": 140 | |
}, | |
{ | |
"epoch": 0.23543260741612712, | |
"grad_norm": 0.14799565076828003, | |
"learning_rate": 4.8776412907378845e-06, | |
"logits/chosen": 16.600738525390625, | |
"logits/rejected": 16.536447525024414, | |
"logps/chosen": -0.3243326246738434, | |
"logps/rejected": -0.3061785101890564, | |
"loss": 1.0085, | |
"rewards/accuracies": 0.3375000059604645, | |
"rewards/chosen": -0.4864989221096039, | |
"rewards/margins": -0.027231160551309586, | |
"rewards/rejected": -0.459267795085907, | |
"step": 150 | |
}, | |
{ | |
"epoch": 0.23543260741612712, | |
"eval_logits/chosen": 16.8078556060791, | |
"eval_logits/rejected": 16.311922073364258, | |
"eval_logps/chosen": -0.326242595911026, | |
"eval_logps/rejected": -0.3398977816104889, | |
"eval_loss": 0.9791463017463684, | |
"eval_rewards/accuracies": 0.4038461446762085, | |
"eval_rewards/chosen": -0.4893638789653778, | |
"eval_rewards/margins": 0.020482787862420082, | |
"eval_rewards/rejected": -0.5098467469215393, | |
"eval_runtime": 14.6072, | |
"eval_samples_per_second": 28.205, | |
"eval_steps_per_second": 3.56, | |
"step": 150 | |
}, | |
{ | |
"epoch": 0.2511281145772023, | |
"grad_norm": 0.11464422941207886, | |
"learning_rate": 4.860940925593703e-06, | |
"logits/chosen": 16.586620330810547, | |
"logits/rejected": 16.253028869628906, | |
"logps/chosen": -0.33331993222236633, | |
"logps/rejected": -0.35580095648765564, | |
"loss": 0.9827, | |
"rewards/accuracies": 0.42500001192092896, | |
"rewards/chosen": -0.49997982382774353, | |
"rewards/margins": 0.03372158855199814, | |
"rewards/rejected": -0.5337014198303223, | |
"step": 160 | |
}, | |
{ | |
"epoch": 0.2668236217382774, | |
"grad_norm": 0.09462621062994003, | |
"learning_rate": 4.84320497372973e-06, | |
"logits/chosen": 16.220312118530273, | |
"logits/rejected": 16.105356216430664, | |
"logps/chosen": -0.2957116663455963, | |
"logps/rejected": -0.3528788089752197, | |
"loss": 0.9716, | |
"rewards/accuracies": 0.4625000059604645, | |
"rewards/chosen": -0.44356757402420044, | |
"rewards/margins": 0.08575066924095154, | |
"rewards/rejected": -0.5293182134628296, | |
"step": 170 | |
}, | |
{ | |
"epoch": 0.28251912889935255, | |
"grad_norm": 0.10995513945817947, | |
"learning_rate": 4.824441214720629e-06, | |
"logits/chosen": 16.811044692993164, | |
"logits/rejected": 16.43205451965332, | |
"logps/chosen": -0.3263992369174957, | |
"logps/rejected": -0.35878634452819824, | |
"loss": 0.9546, | |
"rewards/accuracies": 0.4124999940395355, | |
"rewards/chosen": -0.4895988404750824, | |
"rewards/margins": 0.04858064278960228, | |
"rewards/rejected": -0.5381795167922974, | |
"step": 180 | |
}, | |
{ | |
"epoch": 0.2982146360604277, | |
"grad_norm": 0.1348036825656891, | |
"learning_rate": 4.804657878971252e-06, | |
"logits/chosen": 16.816158294677734, | |
"logits/rejected": 16.55413055419922, | |
"logps/chosen": -0.31842875480651855, | |
"logps/rejected": -0.3791810870170593, | |
"loss": 0.9472, | |
"rewards/accuracies": 0.5249999761581421, | |
"rewards/chosen": -0.47764310240745544, | |
"rewards/margins": 0.09112847596406937, | |
"rewards/rejected": -0.5687715411186218, | |
"step": 190 | |
}, | |
{ | |
"epoch": 0.31391014322150285, | |
"grad_norm": 0.13393694162368774, | |
"learning_rate": 4.783863644106502e-06, | |
"logits/chosen": 17.075885772705078, | |
"logits/rejected": 16.982004165649414, | |
"logps/chosen": -0.3368823528289795, | |
"logps/rejected": -0.3636801540851593, | |
"loss": 0.9455, | |
"rewards/accuracies": 0.375, | |
"rewards/chosen": -0.5053235292434692, | |
"rewards/margins": 0.04019671678543091, | |
"rewards/rejected": -0.5455202460289001, | |
"step": 200 | |
}, | |
{ | |
"epoch": 0.31391014322150285, | |
"eval_logits/chosen": 17.17239761352539, | |
"eval_logits/rejected": 16.609193801879883, | |
"eval_logps/chosen": -0.33508703112602234, | |
"eval_logps/rejected": -0.4493505656719208, | |
"eval_loss": 0.9283667802810669, | |
"eval_rewards/accuracies": 0.4615384638309479, | |
"eval_rewards/chosen": -0.5026305913925171, | |
"eval_rewards/margins": 0.17139528691768646, | |
"eval_rewards/rejected": -0.67402583360672, | |
"eval_runtime": 14.5913, | |
"eval_samples_per_second": 28.236, | |
"eval_steps_per_second": 3.564, | |
"step": 200 | |
}, | |
{ | |
"epoch": 0.329605650382578, | |
"grad_norm": 0.14985737204551697, | |
"learning_rate": 4.762067631165049e-06, | |
"logits/chosen": 16.92913246154785, | |
"logits/rejected": 16.648632049560547, | |
"logps/chosen": -0.3326043486595154, | |
"logps/rejected": -0.4218205511569977, | |
"loss": 0.9225, | |
"rewards/accuracies": 0.48750001192092896, | |
"rewards/chosen": -0.4989064335823059, | |
"rewards/margins": 0.13382436335086823, | |
"rewards/rejected": -0.6327308416366577, | |
"step": 210 | |
}, | |
{ | |
"epoch": 0.34530115754365315, | |
"grad_norm": 0.16972233355045319, | |
"learning_rate": 4.7392794005985324e-06, | |
"logits/chosen": 17.074562072753906, | |
"logits/rejected": 16.863161087036133, | |
"logps/chosen": -0.3645358383655548, | |
"logps/rejected": -0.5221719145774841, | |
"loss": 0.9264, | |
"rewards/accuracies": 0.512499988079071, | |
"rewards/chosen": -0.5468038320541382, | |
"rewards/margins": 0.23645417392253876, | |
"rewards/rejected": -0.7832580208778381, | |
"step": 220 | |
}, | |
{ | |
"epoch": 0.3609966647047283, | |
"grad_norm": 0.20796354115009308, | |
"learning_rate": 4.715508948078037e-06, | |
"logits/chosen": 17.4168643951416, | |
"logits/rejected": 17.00813102722168, | |
"logps/chosen": -0.35443297028541565, | |
"logps/rejected": -0.5115998983383179, | |
"loss": 0.9037, | |
"rewards/accuracies": 0.512499988079071, | |
"rewards/chosen": -0.5316494703292847, | |
"rewards/margins": 0.23575039207935333, | |
"rewards/rejected": -0.767399787902832, | |
"step": 230 | |
}, | |
{ | |
"epoch": 0.3766921718658034, | |
"grad_norm": 0.1932348757982254, | |
"learning_rate": 4.690766700109659e-06, | |
"logits/chosen": 17.034719467163086, | |
"logits/rejected": 16.65166664123535, | |
"logps/chosen": -0.3426091969013214, | |
"logps/rejected": -0.47884297370910645, | |
"loss": 0.9154, | |
"rewards/accuracies": 0.5625, | |
"rewards/chosen": -0.5139138102531433, | |
"rewards/margins": 0.20435063540935516, | |
"rewards/rejected": -0.7182644605636597, | |
"step": 240 | |
}, | |
{ | |
"epoch": 0.39238767902687854, | |
"grad_norm": 1.016450047492981, | |
"learning_rate": 4.665063509461098e-06, | |
"logits/chosen": 16.935081481933594, | |
"logits/rejected": 16.65024185180664, | |
"logps/chosen": -0.41738080978393555, | |
"logps/rejected": -0.48515433073043823, | |
"loss": 0.8774, | |
"rewards/accuracies": 0.42500001192092896, | |
"rewards/chosen": -0.6260712146759033, | |
"rewards/margins": 0.10166029632091522, | |
"rewards/rejected": -0.7277315258979797, | |
"step": 250 | |
}, | |
{ | |
"epoch": 0.39238767902687854, | |
"eval_logits/chosen": 17.221881866455078, | |
"eval_logits/rejected": 16.536666870117188, | |
"eval_logps/chosen": -0.3716265559196472, | |
"eval_logps/rejected": -0.87992262840271, | |
"eval_loss": 0.8047741651535034, | |
"eval_rewards/accuracies": 0.5, | |
"eval_rewards/chosen": -0.5574398636817932, | |
"eval_rewards/margins": 0.7624441981315613, | |
"eval_rewards/rejected": -1.3198840618133545, | |
"eval_runtime": 14.6008, | |
"eval_samples_per_second": 28.218, | |
"eval_steps_per_second": 3.561, | |
"step": 250 | |
}, | |
{ | |
"epoch": 0.4080831861879537, | |
"grad_norm": 0.2831684648990631, | |
"learning_rate": 4.638410650401267e-06, | |
"logits/chosen": 17.084096908569336, | |
"logits/rejected": 16.564823150634766, | |
"logps/chosen": -0.38100525736808777, | |
"logps/rejected": -0.8173269033432007, | |
"loss": 0.8197, | |
"rewards/accuracies": 0.4375, | |
"rewards/chosen": -0.5715079307556152, | |
"rewards/margins": 0.6544824838638306, | |
"rewards/rejected": -1.2259904146194458, | |
"step": 260 | |
}, | |
{ | |
"epoch": 0.42377869334902885, | |
"grad_norm": 0.9195305109024048, | |
"learning_rate": 4.610819813755038e-06, | |
"logits/chosen": 17.316537857055664, | |
"logits/rejected": 16.623016357421875, | |
"logps/chosen": -0.4713365435600281, | |
"logps/rejected": -1.2184536457061768, | |
"loss": 0.7493, | |
"rewards/accuracies": 0.4375, | |
"rewards/chosen": -0.7070047855377197, | |
"rewards/margins": 1.120675802230835, | |
"rewards/rejected": -1.8276805877685547, | |
"step": 270 | |
}, | |
{ | |
"epoch": 0.439474200510104, | |
"grad_norm": 0.5542411804199219, | |
"learning_rate": 4.582303101775249e-06, | |
"logits/chosen": 17.173168182373047, | |
"logits/rejected": 16.759702682495117, | |
"logps/chosen": -0.4629506468772888, | |
"logps/rejected": -1.3020483255386353, | |
"loss": 0.7638, | |
"rewards/accuracies": 0.637499988079071, | |
"rewards/chosen": -0.6944260597229004, | |
"rewards/margins": 1.2586463689804077, | |
"rewards/rejected": -1.9530725479125977, | |
"step": 280 | |
}, | |
{ | |
"epoch": 0.45516970767117915, | |
"grad_norm": 0.2836654782295227, | |
"learning_rate": 4.55287302283426e-06, | |
"logits/chosen": 17.022424697875977, | |
"logits/rejected": 16.43834114074707, | |
"logps/chosen": -0.527222216129303, | |
"logps/rejected": -1.632965087890625, | |
"loss": 0.7466, | |
"rewards/accuracies": 0.6625000238418579, | |
"rewards/chosen": -0.7908332943916321, | |
"rewards/margins": 1.6586145162582397, | |
"rewards/rejected": -2.4494476318359375, | |
"step": 290 | |
}, | |
{ | |
"epoch": 0.47086521483225424, | |
"grad_norm": 0.658358097076416, | |
"learning_rate": 4.522542485937369e-06, | |
"logits/chosen": 16.6541690826416, | |
"logits/rejected": 16.34103012084961, | |
"logps/chosen": -0.50589519739151, | |
"logps/rejected": -1.8910831212997437, | |
"loss": 0.6982, | |
"rewards/accuracies": 0.699999988079071, | |
"rewards/chosen": -0.7588427662849426, | |
"rewards/margins": 2.077782154083252, | |
"rewards/rejected": -2.8366246223449707, | |
"step": 300 | |
}, | |
{ | |
"epoch": 0.47086521483225424, | |
"eval_logits/chosen": 16.950870513916016, | |
"eval_logits/rejected": 16.154659271240234, | |
"eval_logps/chosen": -0.5594518184661865, | |
"eval_logps/rejected": -1.9617934226989746, | |
"eval_loss": 0.7422243356704712, | |
"eval_rewards/accuracies": 0.7115384340286255, | |
"eval_rewards/chosen": -0.839177668094635, | |
"eval_rewards/margins": 2.103512763977051, | |
"eval_rewards/rejected": -2.942690372467041, | |
"eval_runtime": 14.5969, | |
"eval_samples_per_second": 28.225, | |
"eval_steps_per_second": 3.562, | |
"step": 300 | |
}, | |
{ | |
"epoch": 0.4865607219933294, | |
"grad_norm": 1.1044840812683105, | |
"learning_rate": 4.491324795060491e-06, | |
"logits/chosen": 17.233802795410156, | |
"logits/rejected": 16.292484283447266, | |
"logps/chosen": -0.5940151214599609, | |
"logps/rejected": -2.041588306427002, | |
"loss": 0.7415, | |
"rewards/accuracies": 0.6625000238418579, | |
"rewards/chosen": -0.8910226821899414, | |
"rewards/margins": 2.1713600158691406, | |
"rewards/rejected": -3.062382459640503, | |
"step": 310 | |
}, | |
{ | |
"epoch": 0.5022562291544046, | |
"grad_norm": 0.524356484413147, | |
"learning_rate": 4.4592336433146e-06, | |
"logits/chosen": 17.155719757080078, | |
"logits/rejected": 16.328996658325195, | |
"logps/chosen": -0.6095865964889526, | |
"logps/rejected": -1.831730604171753, | |
"loss": 0.7249, | |
"rewards/accuracies": 0.6499999761581421, | |
"rewards/chosen": -0.9143797755241394, | |
"rewards/margins": 1.8332160711288452, | |
"rewards/rejected": -2.747596025466919, | |
"step": 320 | |
}, | |
{ | |
"epoch": 0.5179517363154797, | |
"grad_norm": 0.4383145570755005, | |
"learning_rate": 4.426283106939474e-06, | |
"logits/chosen": 16.774551391601562, | |
"logits/rejected": 16.532238006591797, | |
"logps/chosen": -0.669482946395874, | |
"logps/rejected": -1.7058660984039307, | |
"loss": 0.6868, | |
"rewards/accuracies": 0.6625000238418579, | |
"rewards/chosen": -1.004224419593811, | |
"rewards/margins": 1.5545748472213745, | |
"rewards/rejected": -2.5587992668151855, | |
"step": 330 | |
}, | |
{ | |
"epoch": 0.5336472434765548, | |
"grad_norm": 3.194026231765747, | |
"learning_rate": 4.3924876391293915e-06, | |
"logits/chosen": 16.963787078857422, | |
"logits/rejected": 16.3092041015625, | |
"logps/chosen": -0.7091449499130249, | |
"logps/rejected": -2.1812453269958496, | |
"loss": 0.706, | |
"rewards/accuracies": 0.7250000238418579, | |
"rewards/chosen": -1.0637174844741821, | |
"rewards/margins": 2.208150863647461, | |
"rewards/rejected": -3.2718684673309326, | |
"step": 340 | |
}, | |
{ | |
"epoch": 0.5493427506376299, | |
"grad_norm": 0.5639687776565552, | |
"learning_rate": 4.357862063693486e-06, | |
"logits/chosen": 17.019062042236328, | |
"logits/rejected": 16.315940856933594, | |
"logps/chosen": -0.8996315002441406, | |
"logps/rejected": -2.0307648181915283, | |
"loss": 0.674, | |
"rewards/accuracies": 0.75, | |
"rewards/chosen": -1.349447250366211, | |
"rewards/margins": 1.696699857711792, | |
"rewards/rejected": -3.046147108078003, | |
"step": 350 | |
}, | |
{ | |
"epoch": 0.5493427506376299, | |
"eval_logits/chosen": 16.70261001586914, | |
"eval_logits/rejected": 15.845681190490723, | |
"eval_logps/chosen": -0.9138904809951782, | |
"eval_logps/rejected": -2.5003392696380615, | |
"eval_loss": 0.6938430070877075, | |
"eval_rewards/accuracies": 0.8461538553237915, | |
"eval_rewards/chosen": -1.3708356618881226, | |
"eval_rewards/margins": 2.3796732425689697, | |
"eval_rewards/rejected": -3.75050950050354, | |
"eval_runtime": 14.6072, | |
"eval_samples_per_second": 28.205, | |
"eval_steps_per_second": 3.56, | |
"step": 350 | |
}, | |
{ | |
"epoch": 0.5650382577987051, | |
"grad_norm": 0.6917555928230286, | |
"learning_rate": 4.322421568553529e-06, | |
"logits/chosen": 16.269399642944336, | |
"logits/rejected": 15.73193645477295, | |
"logps/chosen": -1.0171478986740112, | |
"logps/rejected": -2.201298236846924, | |
"loss": 0.651, | |
"rewards/accuracies": 0.6499999761581421, | |
"rewards/chosen": -1.525721788406372, | |
"rewards/margins": 1.7762253284454346, | |
"rewards/rejected": -3.3019473552703857, | |
"step": 360 | |
}, | |
{ | |
"epoch": 0.5807337649597802, | |
"grad_norm": 1.151584267616272, | |
"learning_rate": 4.286181699082008e-06, | |
"logits/chosen": 15.88188648223877, | |
"logits/rejected": 15.261810302734375, | |
"logps/chosen": -1.2272206544876099, | |
"logps/rejected": -3.0203287601470947, | |
"loss": 0.665, | |
"rewards/accuracies": 0.800000011920929, | |
"rewards/chosen": -1.8408310413360596, | |
"rewards/margins": 2.689661741256714, | |
"rewards/rejected": -4.530492305755615, | |
"step": 370 | |
}, | |
{ | |
"epoch": 0.5964292721208554, | |
"grad_norm": 1.1879621744155884, | |
"learning_rate": 4.249158351283414e-06, | |
"logits/chosen": 15.906834602355957, | |
"logits/rejected": 15.42895221710205, | |
"logps/chosen": -1.5027008056640625, | |
"logps/rejected": -2.9172558784484863, | |
"loss": 0.5998, | |
"rewards/accuracies": 0.887499988079071, | |
"rewards/chosen": -2.2540509700775146, | |
"rewards/margins": 2.1218326091766357, | |
"rewards/rejected": -4.37588357925415, | |
"step": 380 | |
}, | |
{ | |
"epoch": 0.6121247792819305, | |
"grad_norm": 1.678884744644165, | |
"learning_rate": 4.211367764821722e-06, | |
"logits/chosen": 15.630645751953125, | |
"logits/rejected": 14.888415336608887, | |
"logps/chosen": -2.231367349624634, | |
"logps/rejected": -3.5754780769348145, | |
"loss": 0.5937, | |
"rewards/accuracies": 0.824999988079071, | |
"rewards/chosen": -3.347050905227661, | |
"rewards/margins": 2.0161664485931396, | |
"rewards/rejected": -5.363216876983643, | |
"step": 390 | |
}, | |
{ | |
"epoch": 0.6278202864430057, | |
"grad_norm": 2.008671998977661, | |
"learning_rate": 4.172826515897146e-06, | |
"logits/chosen": 15.024614334106445, | |
"logits/rejected": 14.981277465820312, | |
"logps/chosen": -2.665980815887451, | |
"logps/rejected": -3.582763195037842, | |
"loss": 0.5722, | |
"rewards/accuracies": 0.7749999761581421, | |
"rewards/chosen": -3.998971462249756, | |
"rewards/margins": 1.3751739263534546, | |
"rewards/rejected": -5.374145030975342, | |
"step": 400 | |
}, | |
{ | |
"epoch": 0.6278202864430057, | |
"eval_logits/chosen": 15.45953369140625, | |
"eval_logits/rejected": 14.663763046264648, | |
"eval_logps/chosen": -2.7269070148468018, | |
"eval_logps/rejected": -4.235719680786133, | |
"eval_loss": 0.5668805837631226, | |
"eval_rewards/accuracies": 0.8846153616905212, | |
"eval_rewards/chosen": -4.090360641479492, | |
"eval_rewards/margins": 2.263219118118286, | |
"eval_rewards/rejected": -6.353579998016357, | |
"eval_runtime": 14.6036, | |
"eval_samples_per_second": 28.212, | |
"eval_steps_per_second": 3.561, | |
"step": 400 | |
}, | |
{ | |
"epoch": 0.6435157936040808, | |
"grad_norm": 1.018362045288086, | |
"learning_rate": 4.133551509975264e-06, | |
"logits/chosen": 15.027656555175781, | |
"logits/rejected": 14.74786376953125, | |
"logps/chosen": -2.76598858833313, | |
"logps/rejected": -4.335003852844238, | |
"loss": 0.5412, | |
"rewards/accuracies": 0.8374999761581421, | |
"rewards/chosen": -4.148982524871826, | |
"rewards/margins": 2.3535237312316895, | |
"rewards/rejected": -6.502506256103516, | |
"step": 410 | |
}, | |
{ | |
"epoch": 0.659211300765156, | |
"grad_norm": 2.2899765968322754, | |
"learning_rate": 4.093559974371725e-06, | |
"logits/chosen": 15.15197467803955, | |
"logits/rejected": 14.681947708129883, | |
"logps/chosen": -2.9831995964050293, | |
"logps/rejected": -4.723050117492676, | |
"loss": 0.4652, | |
"rewards/accuracies": 0.862500011920929, | |
"rewards/chosen": -4.474799633026123, | |
"rewards/margins": 2.6097757816314697, | |
"rewards/rejected": -7.0845746994018555, | |
"step": 420 | |
}, | |
{ | |
"epoch": 0.6749068079262311, | |
"grad_norm": 1.419196605682373, | |
"learning_rate": 4.052869450695776e-06, | |
"logits/chosen": 15.000317573547363, | |
"logits/rejected": 14.538591384887695, | |
"logps/chosen": -3.543004274368286, | |
"logps/rejected": -5.037484169006348, | |
"loss": 0.427, | |
"rewards/accuracies": 0.8999999761581421, | |
"rewards/chosen": -5.314507007598877, | |
"rewards/margins": 2.2417192459106445, | |
"rewards/rejected": -7.556225776672363, | |
"step": 430 | |
}, | |
{ | |
"epoch": 0.6906023150873063, | |
"grad_norm": 1.639215111732483, | |
"learning_rate": 4.011497787155938e-06, | |
"logits/chosen": 14.798017501831055, | |
"logits/rejected": 14.236108779907227, | |
"logps/chosen": -4.154418468475342, | |
"logps/rejected": -6.0606184005737305, | |
"loss": 0.4692, | |
"rewards/accuracies": 0.8374999761581421, | |
"rewards/chosen": -6.231626987457275, | |
"rewards/margins": 2.859299421310425, | |
"rewards/rejected": -9.090926170349121, | |
"step": 440 | |
}, | |
{ | |
"epoch": 0.7062978222483814, | |
"grad_norm": 1.6229957342147827, | |
"learning_rate": 3.969463130731183e-06, | |
"logits/chosen": 14.495908737182617, | |
"logits/rejected": 13.978658676147461, | |
"logps/chosen": -3.4806721210479736, | |
"logps/rejected": -5.746390342712402, | |
"loss": 0.4218, | |
"rewards/accuracies": 0.8500000238418579, | |
"rewards/chosen": -5.221007823944092, | |
"rewards/margins": 3.3985772132873535, | |
"rewards/rejected": -8.619585037231445, | |
"step": 450 | |
}, | |
{ | |
"epoch": 0.7062978222483814, | |
"eval_logits/chosen": 15.033548355102539, | |
"eval_logits/rejected": 14.20669174194336, | |
"eval_logps/chosen": -3.423999547958374, | |
"eval_logps/rejected": -5.855647563934326, | |
"eval_loss": 0.45794492959976196, | |
"eval_rewards/accuracies": 0.942307710647583, | |
"eval_rewards/chosen": -5.135998725891113, | |
"eval_rewards/margins": 3.6474733352661133, | |
"eval_rewards/rejected": -8.783472061157227, | |
"eval_runtime": 14.6088, | |
"eval_samples_per_second": 28.202, | |
"eval_steps_per_second": 3.559, | |
"step": 450 | |
}, | |
{ | |
"epoch": 0.7219933294094566, | |
"grad_norm": 1.7993106842041016, | |
"learning_rate": 3.92678391921108e-06, | |
"logits/chosen": 14.569323539733887, | |
"logits/rejected": 13.937005996704102, | |
"logps/chosen": -3.3623528480529785, | |
"logps/rejected": -5.784353733062744, | |
"loss": 0.3675, | |
"rewards/accuracies": 0.8999999761581421, | |
"rewards/chosen": -5.043529033660889, | |
"rewards/margins": 3.6330013275146484, | |
"rewards/rejected": -8.676530838012695, | |
"step": 460 | |
}, | |
{ | |
"epoch": 0.7376888365705316, | |
"grad_norm": 2.1828088760375977, | |
"learning_rate": 3.88347887310836e-06, | |
"logits/chosen": 13.90056037902832, | |
"logits/rejected": 13.489587783813477, | |
"logps/chosen": -3.959341049194336, | |
"logps/rejected": -6.217314720153809, | |
"loss": 0.3653, | |
"rewards/accuracies": 0.862500011920929, | |
"rewards/chosen": -5.939011573791504, | |
"rewards/margins": 3.3869614601135254, | |
"rewards/rejected": -9.325971603393555, | |
"step": 470 | |
}, | |
{ | |
"epoch": 0.7533843437316068, | |
"grad_norm": 2.128547191619873, | |
"learning_rate": 3.839566987447492e-06, | |
"logits/chosen": 14.113011360168457, | |
"logits/rejected": 13.571691513061523, | |
"logps/chosen": -4.004420280456543, | |
"logps/rejected": -6.216452121734619, | |
"loss": 0.3439, | |
"rewards/accuracies": 0.9125000238418579, | |
"rewards/chosen": -6.0066304206848145, | |
"rewards/margins": 3.3180477619171143, | |
"rewards/rejected": -9.324677467346191, | |
"step": 480 | |
}, | |
{ | |
"epoch": 0.7690798508926819, | |
"grad_norm": 3.2651569843292236, | |
"learning_rate": 3.795067523432826e-06, | |
"logits/chosen": 13.918218612670898, | |
"logits/rejected": 13.147412300109863, | |
"logps/chosen": -4.574521064758301, | |
"logps/rejected": -7.411087989807129, | |
"loss": 0.3645, | |
"rewards/accuracies": 0.8999999761581421, | |
"rewards/chosen": -6.861782073974609, | |
"rewards/margins": 4.254849910736084, | |
"rewards/rejected": -11.116632461547852, | |
"step": 490 | |
}, | |
{ | |
"epoch": 0.7847753580537571, | |
"grad_norm": 2.163595676422119, | |
"learning_rate": 3.7500000000000005e-06, | |
"logits/chosen": 13.972379684448242, | |
"logits/rejected": 13.390825271606445, | |
"logps/chosen": -3.931866407394409, | |
"logps/rejected": -6.547454833984375, | |
"loss": 0.3698, | |
"rewards/accuracies": 0.9125000238418579, | |
"rewards/chosen": -5.897799491882324, | |
"rewards/margins": 3.923382520675659, | |
"rewards/rejected": -9.821184158325195, | |
"step": 500 | |
}, | |
{ | |
"epoch": 0.7847753580537571, | |
"eval_logits/chosen": 14.325434684753418, | |
"eval_logits/rejected": 13.4517822265625, | |
"eval_logps/chosen": -3.7717440128326416, | |
"eval_logps/rejected": -6.710938453674316, | |
"eval_loss": 0.38915327191352844, | |
"eval_rewards/accuracies": 0.942307710647583, | |
"eval_rewards/chosen": -5.657615661621094, | |
"eval_rewards/margins": 4.408792018890381, | |
"eval_rewards/rejected": -10.066408157348633, | |
"eval_runtime": 14.5996, | |
"eval_samples_per_second": 28.22, | |
"eval_steps_per_second": 3.562, | |
"step": 500 | |
}, | |
{ | |
"epoch": 0.8004708652148322, | |
"grad_norm": 2.7098453044891357, | |
"learning_rate": 3.7043841852542884e-06, | |
"logits/chosen": 13.909749984741211, | |
"logits/rejected": 13.060315132141113, | |
"logps/chosen": -3.848449230194092, | |
"logps/rejected": -6.747067928314209, | |
"loss": 0.3547, | |
"rewards/accuracies": 0.875, | |
"rewards/chosen": -5.772673606872559, | |
"rewards/margins": 4.347928524017334, | |
"rewards/rejected": -10.120603561401367, | |
"step": 510 | |
}, | |
{ | |
"epoch": 0.8161663723759074, | |
"grad_norm": 1.693467140197754, | |
"learning_rate": 3.658240087799655e-06, | |
"logits/chosen": 13.52336311340332, | |
"logits/rejected": 13.026924133300781, | |
"logps/chosen": -4.218991279602051, | |
"logps/rejected": -6.984036445617676, | |
"loss": 0.3718, | |
"rewards/accuracies": 0.9375, | |
"rewards/chosen": -6.328486442565918, | |
"rewards/margins": 4.1475677490234375, | |
"rewards/rejected": -10.476054191589355, | |
"step": 520 | |
}, | |
{ | |
"epoch": 0.8318618795369825, | |
"grad_norm": 2.4496915340423584, | |
"learning_rate": 3.611587947962319e-06, | |
"logits/chosen": 13.840197563171387, | |
"logits/rejected": 13.181543350219727, | |
"logps/chosen": -4.708582401275635, | |
"logps/rejected": -7.19864559173584, | |
"loss": 0.329, | |
"rewards/accuracies": 0.925000011920929, | |
"rewards/chosen": -7.062872886657715, | |
"rewards/margins": 3.7350947856903076, | |
"rewards/rejected": -10.797967910766602, | |
"step": 530 | |
}, | |
{ | |
"epoch": 0.8475573866980577, | |
"grad_norm": 5.772508144378662, | |
"learning_rate": 3.564448228912682e-06, | |
"logits/chosen": 13.432130813598633, | |
"logits/rejected": 12.825490951538086, | |
"logps/chosen": -4.506819248199463, | |
"logps/rejected": -7.506214141845703, | |
"loss": 0.2544, | |
"rewards/accuracies": 0.925000011920929, | |
"rewards/chosen": -6.760228633880615, | |
"rewards/margins": 4.499091625213623, | |
"rewards/rejected": -11.259321212768555, | |
"step": 540 | |
}, | |
{ | |
"epoch": 0.8632528938591328, | |
"grad_norm": 1.7776503562927246, | |
"learning_rate": 3.516841607689501e-06, | |
"logits/chosen": 13.536295890808105, | |
"logits/rejected": 12.703726768493652, | |
"logps/chosen": -4.724976062774658, | |
"logps/rejected": -7.864454746246338, | |
"loss": 0.2795, | |
"rewards/accuracies": 0.925000011920929, | |
"rewards/chosen": -7.087464332580566, | |
"rewards/margins": 4.7092180252075195, | |
"rewards/rejected": -11.796682357788086, | |
"step": 550 | |
}, | |
{ | |
"epoch": 0.8632528938591328, | |
"eval_logits/chosen": 13.692171096801758, | |
"eval_logits/rejected": 12.81371021270752, | |
"eval_logps/chosen": -4.32358980178833, | |
"eval_logps/rejected": -7.658167839050293, | |
"eval_loss": 0.335475891828537, | |
"eval_rewards/accuracies": 0.942307710647583, | |
"eval_rewards/chosen": -6.485384464263916, | |
"eval_rewards/margins": 5.00186824798584, | |
"eval_rewards/rejected": -11.487252235412598, | |
"eval_runtime": 14.6093, | |
"eval_samples_per_second": 28.201, | |
"eval_steps_per_second": 3.559, | |
"step": 550 | |
}, | |
{ | |
"epoch": 0.878948401020208, | |
"grad_norm": 1.7613197565078735, | |
"learning_rate": 3.4687889661302577e-06, | |
"logits/chosen": 13.689091682434082, | |
"logits/rejected": 12.911500930786133, | |
"logps/chosen": -3.8375942707061768, | |
"logps/rejected": -6.98319149017334, | |
"loss": 0.3008, | |
"rewards/accuracies": 0.9125000238418579, | |
"rewards/chosen": -5.7563910484313965, | |
"rewards/margins": 4.7183966636657715, | |
"rewards/rejected": -10.474787712097168, | |
"step": 560 | |
}, | |
{ | |
"epoch": 0.8946439081812831, | |
"grad_norm": 2.1285247802734375, | |
"learning_rate": 3.4203113817116955e-06, | |
"logits/chosen": 13.902356147766113, | |
"logits/rejected": 12.766775131225586, | |
"logps/chosen": -3.7727108001708984, | |
"logps/rejected": -7.6362409591674805, | |
"loss": 0.2816, | |
"rewards/accuracies": 0.9375, | |
"rewards/chosen": -5.6590657234191895, | |
"rewards/margins": 5.795297622680664, | |
"rewards/rejected": -11.454363822937012, | |
"step": 570 | |
}, | |
{ | |
"epoch": 0.9103394153423583, | |
"grad_norm": 1.6965669393539429, | |
"learning_rate": 3.3714301183045382e-06, | |
"logits/chosen": 13.374112129211426, | |
"logits/rejected": 12.414613723754883, | |
"logps/chosen": -4.747832298278809, | |
"logps/rejected": -8.099973678588867, | |
"loss": 0.2921, | |
"rewards/accuracies": 0.9125000238418579, | |
"rewards/chosen": -7.121748924255371, | |
"rewards/margins": 5.028212547302246, | |
"rewards/rejected": -12.1499605178833, | |
"step": 580 | |
}, | |
{ | |
"epoch": 0.9260349225034334, | |
"grad_norm": 2.5470242500305176, | |
"learning_rate": 3.3221666168464584e-06, | |
"logits/chosen": 13.287660598754883, | |
"logits/rejected": 12.554139137268066, | |
"logps/chosen": -4.818625450134277, | |
"logps/rejected": -8.005637168884277, | |
"loss": 0.3155, | |
"rewards/accuracies": 0.949999988079071, | |
"rewards/chosen": -7.227938175201416, | |
"rewards/margins": 4.780518531799316, | |
"rewards/rejected": -12.008456230163574, | |
"step": 590 | |
}, | |
{ | |
"epoch": 0.9417304296645085, | |
"grad_norm": 2.374859571456909, | |
"learning_rate": 3.272542485937369e-06, | |
"logits/chosen": 13.152572631835938, | |
"logits/rejected": 12.557012557983398, | |
"logps/chosen": -3.7581982612609863, | |
"logps/rejected": -6.985205173492432, | |
"loss": 0.2565, | |
"rewards/accuracies": 0.9375, | |
"rewards/chosen": -5.6372971534729, | |
"rewards/margins": 4.840510368347168, | |
"rewards/rejected": -10.477807998657227, | |
"step": 600 | |
}, | |
{ | |
"epoch": 0.9417304296645085, | |
"eval_logits/chosen": 13.585193634033203, | |
"eval_logits/rejected": 12.657953262329102, | |
"eval_logps/chosen": -4.077207088470459, | |
"eval_logps/rejected": -7.660572528839111, | |
"eval_loss": 0.30504748225212097, | |
"eval_rewards/accuracies": 0.9615384340286255, | |
"eval_rewards/chosen": -6.115810871124268, | |
"eval_rewards/margins": 5.375046730041504, | |
"eval_rewards/rejected": -11.49085807800293, | |
"eval_runtime": 14.6046, | |
"eval_samples_per_second": 28.21, | |
"eval_steps_per_second": 3.561, | |
"step": 600 | |
} | |
], | |
"logging_steps": 10, | |
"max_steps": 1500, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 3, | |
"save_steps": 50, | |
"stateful_callbacks": { | |
"TrainerControl": { | |
"args": { | |
"should_epoch_stop": false, | |
"should_evaluate": false, | |
"should_log": false, | |
"should_save": true, | |
"should_training_stop": false | |
}, | |
"attributes": {} | |
} | |
}, | |
"total_flos": 1.4354126705797366e+18, | |
"train_batch_size": 1, | |
"trial_name": null, | |
"trial_params": null | |
} | |