|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.998691442030882, |
|
"eval_steps": 50, |
|
"global_step": 477, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002093692750588851, |
|
"grad_norm": 4.405554687265435, |
|
"learning_rate": 1.0416666666666666e-08, |
|
"logits/chosen": -0.4866575300693512, |
|
"logits/rejected": -0.7110590934753418, |
|
"logps/chosen": -355.9316101074219, |
|
"logps/rejected": -328.53912353515625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02093692750588851, |
|
"grad_norm": 4.356279351306001, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -0.570002019405365, |
|
"logits/rejected": -0.6484304070472717, |
|
"logps/chosen": -295.9329528808594, |
|
"logps/rejected": -294.4837951660156, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4409722089767456, |
|
"rewards/chosen": -8.168106433004141e-05, |
|
"rewards/margins": -0.0002471136685926467, |
|
"rewards/rejected": 0.00016543263336643577, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04187385501177702, |
|
"grad_norm": 4.946352322484767, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -0.6396545767784119, |
|
"logits/rejected": -0.7154265642166138, |
|
"logps/chosen": -303.1146545410156, |
|
"logps/rejected": -268.5458679199219, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.0005873010377399623, |
|
"rewards/margins": -5.269009852781892e-05, |
|
"rewards/rejected": 0.0006399911362677813, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06281078251766553, |
|
"grad_norm": 3.7069226422915262, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -0.571534276008606, |
|
"logits/rejected": -0.7066272497177124, |
|
"logps/chosen": -318.60552978515625, |
|
"logps/rejected": -287.88037109375, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.007322841789573431, |
|
"rewards/margins": 0.0024591959081590176, |
|
"rewards/rejected": 0.004863646812736988, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08374771002355404, |
|
"grad_norm": 3.83088263918906, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -0.6221814155578613, |
|
"logits/rejected": -0.6772241592407227, |
|
"logps/chosen": -314.6795349121094, |
|
"logps/rejected": -282.56732177734375, |
|
"loss": 0.6887, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": 0.02986811473965645, |
|
"rewards/margins": 0.011089036241173744, |
|
"rewards/rejected": 0.018779078498482704, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.10468463752944256, |
|
"grad_norm": 3.669105350122918, |
|
"learning_rate": 4.999731868769026e-07, |
|
"logits/chosen": -0.6346956491470337, |
|
"logits/rejected": -0.7239211797714233, |
|
"logps/chosen": -284.1347351074219, |
|
"logps/rejected": -266.820556640625, |
|
"loss": 0.6829, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.05462328717112541, |
|
"rewards/margins": 0.023288695141673088, |
|
"rewards/rejected": 0.031334586441516876, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.10468463752944256, |
|
"eval_logits/chosen": -0.6508491635322571, |
|
"eval_logits/rejected": -0.7277823686599731, |
|
"eval_logps/chosen": -284.6937561035156, |
|
"eval_logps/rejected": -264.2071838378906, |
|
"eval_loss": 0.6801902055740356, |
|
"eval_rewards/accuracies": 0.6579999923706055, |
|
"eval_rewards/chosen": 0.061179425567388535, |
|
"eval_rewards/margins": 0.027551723644137383, |
|
"eval_rewards/rejected": 0.033627700060606, |
|
"eval_runtime": 363.9047, |
|
"eval_samples_per_second": 5.496, |
|
"eval_steps_per_second": 1.374, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12562156503533106, |
|
"grad_norm": 4.218127495844181, |
|
"learning_rate": 4.990353313429303e-07, |
|
"logits/chosen": -0.6323250532150269, |
|
"logits/rejected": -0.7270756959915161, |
|
"logps/chosen": -276.2745056152344, |
|
"logps/rejected": -254.50173950195312, |
|
"loss": 0.6752, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": 0.05940670520067215, |
|
"rewards/margins": 0.04031256586313248, |
|
"rewards/rejected": 0.019094135612249374, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.14655849254121958, |
|
"grad_norm": 6.381025648346635, |
|
"learning_rate": 4.967625656594781e-07, |
|
"logits/chosen": -0.6830436587333679, |
|
"logits/rejected": -0.7048647403717041, |
|
"logps/chosen": -299.7218017578125, |
|
"logps/rejected": -286.69842529296875, |
|
"loss": 0.6631, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -0.004782336764037609, |
|
"rewards/margins": 0.055135466158390045, |
|
"rewards/rejected": -0.05991780757904053, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16749542004710807, |
|
"grad_norm": 4.8225743752994505, |
|
"learning_rate": 4.93167072587771e-07, |
|
"logits/chosen": -0.72892826795578, |
|
"logits/rejected": -0.7647081613540649, |
|
"logps/chosen": -325.7091369628906, |
|
"logps/rejected": -299.4671630859375, |
|
"loss": 0.6529, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.01807677373290062, |
|
"rewards/margins": 0.09614584594964981, |
|
"rewards/rejected": -0.11422260105609894, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1884323475529966, |
|
"grad_norm": 6.458187740670673, |
|
"learning_rate": 4.882681251368548e-07, |
|
"logits/chosen": -0.72618168592453, |
|
"logits/rejected": -0.7866657972335815, |
|
"logps/chosen": -284.3786315917969, |
|
"logps/rejected": -288.81207275390625, |
|
"loss": 0.6357, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.056771814823150635, |
|
"rewards/margins": 0.13238851726055145, |
|
"rewards/rejected": -0.1891603320837021, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2093692750588851, |
|
"grad_norm": 9.681896471190186, |
|
"learning_rate": 4.820919832540181e-07, |
|
"logits/chosen": -0.7013887166976929, |
|
"logits/rejected": -0.7288186550140381, |
|
"logps/chosen": -296.72760009765625, |
|
"logps/rejected": -284.09246826171875, |
|
"loss": 0.6237, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.12606890499591827, |
|
"rewards/margins": 0.1648329347372055, |
|
"rewards/rejected": -0.2909018397331238, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2093692750588851, |
|
"eval_logits/chosen": -0.6815055012702942, |
|
"eval_logits/rejected": -0.7410086989402771, |
|
"eval_logps/chosen": -302.6812438964844, |
|
"eval_logps/rejected": -297.7958068847656, |
|
"eval_loss": 0.6211419701576233, |
|
"eval_rewards/accuracies": 0.7080000042915344, |
|
"eval_rewards/chosen": -0.1186954528093338, |
|
"eval_rewards/margins": 0.18356309831142426, |
|
"eval_rewards/rejected": -0.30225852131843567, |
|
"eval_runtime": 362.6984, |
|
"eval_samples_per_second": 5.514, |
|
"eval_steps_per_second": 1.379, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23030620256477363, |
|
"grad_norm": 9.835955943860144, |
|
"learning_rate": 4.7467175306295647e-07, |
|
"logits/chosen": -0.6474356651306152, |
|
"logits/rejected": -0.6836977005004883, |
|
"logps/chosen": -304.34320068359375, |
|
"logps/rejected": -310.12701416015625, |
|
"loss": 0.6217, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": -0.1749463528394699, |
|
"rewards/margins": 0.19409213960170746, |
|
"rewards/rejected": -0.36903852224349976, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2512431300706621, |
|
"grad_norm": 8.239660157761222, |
|
"learning_rate": 4.6604720940421207e-07, |
|
"logits/chosen": -0.607188880443573, |
|
"logits/rejected": -0.6938163042068481, |
|
"logps/chosen": -345.035400390625, |
|
"logps/rejected": -316.98529052734375, |
|
"loss": 0.6104, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -0.2799115777015686, |
|
"rewards/margins": 0.2499914914369583, |
|
"rewards/rejected": -0.5299030542373657, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2721800575765506, |
|
"grad_norm": 14.29803862258713, |
|
"learning_rate": 4.5626458262912735e-07, |
|
"logits/chosen": -0.6138719916343689, |
|
"logits/rejected": -0.6717087626457214, |
|
"logps/chosen": -312.33154296875, |
|
"logps/rejected": -326.3365478515625, |
|
"loss": 0.5966, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.141954243183136, |
|
"rewards/margins": 0.27666208148002625, |
|
"rewards/rejected": -0.4186163544654846, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.29311698508243916, |
|
"grad_norm": 11.127586752555215, |
|
"learning_rate": 4.453763107901675e-07, |
|
"logits/chosen": -0.6942325830459595, |
|
"logits/rejected": -0.7251573801040649, |
|
"logps/chosen": -342.69415283203125, |
|
"logps/rejected": -336.53369140625, |
|
"loss": 0.5974, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.22019581496715546, |
|
"rewards/margins": 0.24864068627357483, |
|
"rewards/rejected": -0.46883654594421387, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.31405391258832765, |
|
"grad_norm": 18.14233757409728, |
|
"learning_rate": 4.3344075855595097e-07, |
|
"logits/chosen": -0.6244213581085205, |
|
"logits/rejected": -0.7043098211288452, |
|
"logps/chosen": -350.4010925292969, |
|
"logps/rejected": -343.3636779785156, |
|
"loss": 0.5943, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.2886292040348053, |
|
"rewards/margins": 0.3421526551246643, |
|
"rewards/rejected": -0.6307818293571472, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.31405391258832765, |
|
"eval_logits/chosen": -0.6515117287635803, |
|
"eval_logits/rejected": -0.7014611959457397, |
|
"eval_logps/chosen": -314.8688659667969, |
|
"eval_logps/rejected": -318.15289306640625, |
|
"eval_loss": 0.5983646512031555, |
|
"eval_rewards/accuracies": 0.6980000138282776, |
|
"eval_rewards/chosen": -0.24057185649871826, |
|
"eval_rewards/margins": 0.265257328748703, |
|
"eval_rewards/rejected": -0.5058292150497437, |
|
"eval_runtime": 362.535, |
|
"eval_samples_per_second": 5.517, |
|
"eval_steps_per_second": 1.379, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33499084009421615, |
|
"grad_norm": 38.32033611861921, |
|
"learning_rate": 4.2052190435769554e-07, |
|
"logits/chosen": -0.6601163148880005, |
|
"logits/rejected": -0.6881546378135681, |
|
"logps/chosen": -315.28863525390625, |
|
"logps/rejected": -332.63458251953125, |
|
"loss": 0.5868, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.2826778292655945, |
|
"rewards/margins": 0.2917526066303253, |
|
"rewards/rejected": -0.5744304656982422, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3559277676001047, |
|
"grad_norm": 28.729045640780658, |
|
"learning_rate": 4.0668899744407567e-07, |
|
"logits/chosen": -0.6568697690963745, |
|
"logits/rejected": -0.6840031147003174, |
|
"logps/chosen": -313.22998046875, |
|
"logps/rejected": -343.158203125, |
|
"loss": 0.5898, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.3508220911026001, |
|
"rewards/margins": 0.3158671259880066, |
|
"rewards/rejected": -0.6666892170906067, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3768646951059932, |
|
"grad_norm": 31.687184520759857, |
|
"learning_rate": 3.920161866827889e-07, |
|
"logits/chosen": -0.6633812785148621, |
|
"logits/rejected": -0.7255716919898987, |
|
"logps/chosen": -348.14739990234375, |
|
"logps/rejected": -364.039794921875, |
|
"loss": 0.5712, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": -0.38741543889045715, |
|
"rewards/margins": 0.4109956622123718, |
|
"rewards/rejected": -0.7984111309051514, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.39780162261188173, |
|
"grad_norm": 30.89197698830195, |
|
"learning_rate": 3.765821230985757e-07, |
|
"logits/chosen": -0.6792806386947632, |
|
"logits/rejected": -0.6605275273323059, |
|
"logps/chosen": -321.3431091308594, |
|
"logps/rejected": -347.12255859375, |
|
"loss": 0.5812, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.40531882643699646, |
|
"rewards/margins": 0.33206993341445923, |
|
"rewards/rejected": -0.7373887300491333, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4187385501177702, |
|
"grad_norm": 37.44350102005558, |
|
"learning_rate": 3.604695382782159e-07, |
|
"logits/chosen": -0.6688074469566345, |
|
"logits/rejected": -0.7198851108551025, |
|
"logps/chosen": -349.90423583984375, |
|
"logps/rejected": -371.4599304199219, |
|
"loss": 0.5788, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5965304970741272, |
|
"rewards/margins": 0.3574155569076538, |
|
"rewards/rejected": -0.9539459943771362, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4187385501177702, |
|
"eval_logits/chosen": -0.656848669052124, |
|
"eval_logits/rejected": -0.7012197375297546, |
|
"eval_logps/chosen": -356.0472106933594, |
|
"eval_logps/rejected": -370.5501708984375, |
|
"eval_loss": 0.573124349117279, |
|
"eval_rewards/accuracies": 0.7099999785423279, |
|
"eval_rewards/chosen": -0.652355432510376, |
|
"eval_rewards/margins": 0.3774465024471283, |
|
"eval_rewards/rejected": -1.0298019647598267, |
|
"eval_runtime": 361.6699, |
|
"eval_samples_per_second": 5.53, |
|
"eval_steps_per_second": 1.382, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4396754776236587, |
|
"grad_norm": 20.28041078703544, |
|
"learning_rate": 3.4376480090239047e-07, |
|
"logits/chosen": -0.6503298878669739, |
|
"logits/rejected": -0.6961864233016968, |
|
"logps/chosen": -363.98443603515625, |
|
"logps/rejected": -377.7153015136719, |
|
"loss": 0.5838, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -0.6623082160949707, |
|
"rewards/margins": 0.4378587305545807, |
|
"rewards/rejected": -1.100166916847229, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.46061240512954726, |
|
"grad_norm": 35.49367620967288, |
|
"learning_rate": 3.265574537815398e-07, |
|
"logits/chosen": -0.6686447858810425, |
|
"logits/rejected": -0.6819853186607361, |
|
"logps/chosen": -350.9520263671875, |
|
"logps/rejected": -395.07806396484375, |
|
"loss": 0.5712, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.6993459463119507, |
|
"rewards/margins": 0.39070096611976624, |
|
"rewards/rejected": -1.090047001838684, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.48154933263543576, |
|
"grad_norm": 16.234101384109916, |
|
"learning_rate": 3.0893973387735683e-07, |
|
"logits/chosen": -0.7044863700866699, |
|
"logits/rejected": -0.7113361358642578, |
|
"logps/chosen": -372.82562255859375, |
|
"logps/rejected": -418.2840881347656, |
|
"loss": 0.5785, |
|
"rewards/accuracies": 0.659375011920929, |
|
"rewards/chosen": -0.960254967212677, |
|
"rewards/margins": 0.3817780911922455, |
|
"rewards/rejected": -1.3420331478118896, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5024862601413242, |
|
"grad_norm": 28.450587843543108, |
|
"learning_rate": 2.910060778827554e-07, |
|
"logits/chosen": -0.6691449880599976, |
|
"logits/rejected": -0.690998375415802, |
|
"logps/chosen": -392.90472412109375, |
|
"logps/rejected": -415.3272399902344, |
|
"loss": 0.5413, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.8516443967819214, |
|
"rewards/margins": 0.5042856931686401, |
|
"rewards/rejected": -1.355930209159851, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5234231876472127, |
|
"grad_norm": 26.335941375612, |
|
"learning_rate": 2.7285261601056697e-07, |
|
"logits/chosen": -0.744831383228302, |
|
"logits/rejected": -0.7566218972206116, |
|
"logps/chosen": -391.26239013671875, |
|
"logps/rejected": -434.4244079589844, |
|
"loss": 0.5518, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.8992071151733398, |
|
"rewards/margins": 0.5220283269882202, |
|
"rewards/rejected": -1.4212353229522705, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5234231876472127, |
|
"eval_logits/chosen": -0.6885221600532532, |
|
"eval_logits/rejected": -0.7286450266838074, |
|
"eval_logps/chosen": -390.97772216796875, |
|
"eval_logps/rejected": -414.00164794921875, |
|
"eval_loss": 0.5652250051498413, |
|
"eval_rewards/accuracies": 0.7260000109672546, |
|
"eval_rewards/chosen": -1.0016601085662842, |
|
"eval_rewards/margins": 0.4626566171646118, |
|
"eval_rewards/rejected": -1.4643168449401855, |
|
"eval_runtime": 359.194, |
|
"eval_samples_per_second": 5.568, |
|
"eval_steps_per_second": 1.392, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5443601151531012, |
|
"grad_norm": 50.5833145233516, |
|
"learning_rate": 2.5457665670441937e-07, |
|
"logits/chosen": -0.7291234135627747, |
|
"logits/rejected": -0.7106319665908813, |
|
"logps/chosen": -386.3546142578125, |
|
"logps/rejected": -427.50469970703125, |
|
"loss": 0.5567, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.9915366172790527, |
|
"rewards/margins": 0.4434414803981781, |
|
"rewards/rejected": -1.4349782466888428, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5652970426589898, |
|
"grad_norm": 29.40437710442188, |
|
"learning_rate": 2.3627616503391812e-07, |
|
"logits/chosen": -0.6915990114212036, |
|
"logits/rejected": -0.7419033050537109, |
|
"logps/chosen": -384.0176696777344, |
|
"logps/rejected": -393.740234375, |
|
"loss": 0.556, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9498584866523743, |
|
"rewards/margins": 0.4780716001987457, |
|
"rewards/rejected": -1.4279301166534424, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5862339701648783, |
|
"grad_norm": 555.5760315124888, |
|
"learning_rate": 2.1804923757009882e-07, |
|
"logits/chosen": -0.7014500498771667, |
|
"logits/rejected": -0.7159109115600586, |
|
"logps/chosen": -389.5604553222656, |
|
"logps/rejected": -420.940185546875, |
|
"loss": 0.571, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": -0.9026616811752319, |
|
"rewards/margins": 0.4052717089653015, |
|
"rewards/rejected": -1.3079332113265991, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6071708976707668, |
|
"grad_norm": 17.414681195430056, |
|
"learning_rate": 1.9999357655598891e-07, |
|
"logits/chosen": -0.6687137484550476, |
|
"logits/rejected": -0.7034614086151123, |
|
"logps/chosen": -383.6869201660156, |
|
"logps/rejected": -404.7913818359375, |
|
"loss": 0.5536, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.8191441297531128, |
|
"rewards/margins": 0.5369825959205627, |
|
"rewards/rejected": -1.3561267852783203, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6281078251766553, |
|
"grad_norm": 111.908211236837, |
|
"learning_rate": 1.8220596619089573e-07, |
|
"logits/chosen": -0.6898786425590515, |
|
"logits/rejected": -0.7226337790489197, |
|
"logps/chosen": -403.2303771972656, |
|
"logps/rejected": -441.4107971191406, |
|
"loss": 0.5472, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.030331015586853, |
|
"rewards/margins": 0.5372998118400574, |
|
"rewards/rejected": -1.5676310062408447, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6281078251766553, |
|
"eval_logits/chosen": -0.6861531734466553, |
|
"eval_logits/rejected": -0.7268748879432678, |
|
"eval_logps/chosen": -395.8287048339844, |
|
"eval_logps/rejected": -419.298583984375, |
|
"eval_loss": 0.5599412322044373, |
|
"eval_rewards/accuracies": 0.722000002861023, |
|
"eval_rewards/chosen": -1.0501700639724731, |
|
"eval_rewards/margins": 0.46711620688438416, |
|
"eval_rewards/rejected": -1.5172861814498901, |
|
"eval_runtime": 354.6129, |
|
"eval_samples_per_second": 5.64, |
|
"eval_steps_per_second": 1.41, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6490447526825438, |
|
"grad_norm": 37.29689892406944, |
|
"learning_rate": 1.647817538357072e-07, |
|
"logits/chosen": -0.7173858880996704, |
|
"logits/rejected": -0.7734094858169556, |
|
"logps/chosen": -422.9934997558594, |
|
"logps/rejected": -421.84832763671875, |
|
"loss": 0.5395, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.9894211888313293, |
|
"rewards/margins": 0.4697929918766022, |
|
"rewards/rejected": -1.459214210510254, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6699816801884323, |
|
"grad_norm": 42.01533855938247, |
|
"learning_rate": 1.478143389201113e-07, |
|
"logits/chosen": -0.6970559358596802, |
|
"logits/rejected": -0.6931095123291016, |
|
"logps/chosen": -377.6057434082031, |
|
"logps/rejected": -406.4757995605469, |
|
"loss": 0.5333, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.9958661794662476, |
|
"rewards/margins": 0.5368868112564087, |
|
"rewards/rejected": -1.5327531099319458, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6909186076943209, |
|
"grad_norm": 37.59789471432025, |
|
"learning_rate": 1.3139467229135998e-07, |
|
"logits/chosen": -0.7029486298561096, |
|
"logits/rejected": -0.7086675763130188, |
|
"logps/chosen": -405.01715087890625, |
|
"logps/rejected": -445.0240783691406, |
|
"loss": 0.5457, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -1.0576202869415283, |
|
"rewards/margins": 0.5650633573532104, |
|
"rewards/rejected": -1.6226835250854492, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7118555352002094, |
|
"grad_norm": 36.37515401195654, |
|
"learning_rate": 1.1561076868822755e-07, |
|
"logits/chosen": -0.7101846933364868, |
|
"logits/rejected": -0.704253077507019, |
|
"logps/chosen": -403.80133056640625, |
|
"logps/rejected": -455.9847106933594, |
|
"loss": 0.5449, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": -1.0073381662368774, |
|
"rewards/margins": 0.5620848536491394, |
|
"rewards/rejected": -1.569422960281372, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7327924627060979, |
|
"grad_norm": 24.48749996210416, |
|
"learning_rate": 1.0054723495346482e-07, |
|
"logits/chosen": -0.7318924069404602, |
|
"logits/rejected": -0.7332495450973511, |
|
"logps/chosen": -386.4432373046875, |
|
"logps/rejected": -431.122314453125, |
|
"loss": 0.5215, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.9518150091171265, |
|
"rewards/margins": 0.6185209155082703, |
|
"rewards/rejected": -1.5703357458114624, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7327924627060979, |
|
"eval_logits/chosen": -0.7031384110450745, |
|
"eval_logits/rejected": -0.7401583790779114, |
|
"eval_logps/chosen": -392.8218688964844, |
|
"eval_logps/rejected": -421.5936279296875, |
|
"eval_loss": 0.5506237745285034, |
|
"eval_rewards/accuracies": 0.7379999756813049, |
|
"eval_rewards/chosen": -1.0201021432876587, |
|
"eval_rewards/margins": 0.5201343894004822, |
|
"eval_rewards/rejected": -1.5402365922927856, |
|
"eval_runtime": 353.9993, |
|
"eval_samples_per_second": 5.65, |
|
"eval_steps_per_second": 1.412, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7537293902119864, |
|
"grad_norm": 33.943607014642296, |
|
"learning_rate": 8.628481651367875e-08, |
|
"logits/chosen": -0.6926234364509583, |
|
"logits/rejected": -0.708940327167511, |
|
"logps/chosen": -418.54705810546875, |
|
"logps/rejected": -442.49365234375, |
|
"loss": 0.557, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -1.08583664894104, |
|
"rewards/margins": 0.4989503026008606, |
|
"rewards/rejected": -1.5847870111465454, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7746663177178749, |
|
"grad_norm": 29.32569904282417, |
|
"learning_rate": 7.289996455765748e-08, |
|
"logits/chosen": -0.6894348859786987, |
|
"logits/rejected": -0.7344834804534912, |
|
"logps/chosen": -399.79083251953125, |
|
"logps/rejected": -426.5049743652344, |
|
"loss": 0.5393, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.078011155128479, |
|
"rewards/margins": 0.5504921674728394, |
|
"rewards/rejected": -1.6285032033920288, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7956032452237635, |
|
"grad_norm": 46.152539915607406, |
|
"learning_rate": 6.046442623320145e-08, |
|
"logits/chosen": -0.6863051056861877, |
|
"logits/rejected": -0.7106188535690308, |
|
"logps/chosen": -421.65606689453125, |
|
"logps/rejected": -461.34490966796875, |
|
"loss": 0.5398, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.169628620147705, |
|
"rewards/margins": 0.5851330757141113, |
|
"rewards/rejected": -1.7547616958618164, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.816540172729652, |
|
"grad_norm": 32.74884413709988, |
|
"learning_rate": 4.904486005914027e-08, |
|
"logits/chosen": -0.6976770162582397, |
|
"logits/rejected": -0.7492203712463379, |
|
"logps/chosen": -417.27020263671875, |
|
"logps/rejected": -452.70343017578125, |
|
"loss": 0.5329, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -1.0417944192886353, |
|
"rewards/margins": 0.5599567890167236, |
|
"rewards/rejected": -1.6017510890960693, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8374771002355405, |
|
"grad_norm": 43.939889505795925, |
|
"learning_rate": 3.8702478614051345e-08, |
|
"logits/chosen": -0.7140255570411682, |
|
"logits/rejected": -0.750179648399353, |
|
"logps/chosen": -409.387451171875, |
|
"logps/rejected": -452.68414306640625, |
|
"loss": 0.5415, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -1.0880142450332642, |
|
"rewards/margins": 0.5527244806289673, |
|
"rewards/rejected": -1.6407387256622314, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8374771002355405, |
|
"eval_logits/chosen": -0.7055131793022156, |
|
"eval_logits/rejected": -0.7418683767318726, |
|
"eval_logps/chosen": -402.3448181152344, |
|
"eval_logps/rejected": -432.3640441894531, |
|
"eval_loss": 0.5493519306182861, |
|
"eval_rewards/accuracies": 0.7459999918937683, |
|
"eval_rewards/chosen": -1.1153309345245361, |
|
"eval_rewards/margins": 0.5326094031333923, |
|
"eval_rewards/rejected": -1.6479403972625732, |
|
"eval_runtime": 354.6496, |
|
"eval_samples_per_second": 5.639, |
|
"eval_steps_per_second": 1.41, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8584140277414289, |
|
"grad_norm": 34.6918001312517, |
|
"learning_rate": 2.9492720416985e-08, |
|
"logits/chosen": -0.7722108960151672, |
|
"logits/rejected": -0.7759251594543457, |
|
"logps/chosen": -425.9440002441406, |
|
"logps/rejected": -437.1736755371094, |
|
"loss": 0.5366, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -1.136293649673462, |
|
"rewards/margins": 0.545237123966217, |
|
"rewards/rejected": -1.6815307140350342, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8793509552473174, |
|
"grad_norm": 26.59254420115498, |
|
"learning_rate": 2.1464952759020856e-08, |
|
"logits/chosen": -0.7044004201889038, |
|
"logits/rejected": -0.7171922922134399, |
|
"logps/chosen": -400.56964111328125, |
|
"logps/rejected": -466.5582580566406, |
|
"loss": 0.5326, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1527706384658813, |
|
"rewards/margins": 0.6072098016738892, |
|
"rewards/rejected": -1.7599804401397705, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9002878827532059, |
|
"grad_norm": 18.548901459218577, |
|
"learning_rate": 1.4662207078575684e-08, |
|
"logits/chosen": -0.7108010053634644, |
|
"logits/rejected": -0.7494346499443054, |
|
"logps/chosen": -422.11761474609375, |
|
"logps/rejected": -446.8285217285156, |
|
"loss": 0.5159, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -1.1354727745056152, |
|
"rewards/margins": 0.583838701248169, |
|
"rewards/rejected": -1.7193113565444946, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9212248102590945, |
|
"grad_norm": 46.949560224254434, |
|
"learning_rate": 9.12094829893642e-09, |
|
"logits/chosen": -0.7222899198532104, |
|
"logits/rejected": -0.7426605820655823, |
|
"logps/chosen": -414.1346740722656, |
|
"logps/rejected": -462.6570739746094, |
|
"loss": 0.5371, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1052252054214478, |
|
"rewards/margins": 0.6353217959403992, |
|
"rewards/rejected": -1.7405471801757812, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.942161737764983, |
|
"grad_norm": 52.60402288327978, |
|
"learning_rate": 4.8708793644441086e-09, |
|
"logits/chosen": -0.709662914276123, |
|
"logits/rejected": -0.7304754853248596, |
|
"logps/chosen": -394.4976501464844, |
|
"logps/rejected": -445.88360595703125, |
|
"loss": 0.5368, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.1392234563827515, |
|
"rewards/margins": 0.5495996475219727, |
|
"rewards/rejected": -1.6888229846954346, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.942161737764983, |
|
"eval_logits/chosen": -0.7091179490089417, |
|
"eval_logits/rejected": -0.7446374893188477, |
|
"eval_logps/chosen": -405.1720275878906, |
|
"eval_logps/rejected": -435.4569091796875, |
|
"eval_loss": 0.5487431287765503, |
|
"eval_rewards/accuracies": 0.7379999756813049, |
|
"eval_rewards/chosen": -1.1436034440994263, |
|
"eval_rewards/margins": 0.5352665185928345, |
|
"eval_rewards/rejected": -1.6788699626922607, |
|
"eval_runtime": 354.9509, |
|
"eval_samples_per_second": 5.635, |
|
"eval_steps_per_second": 1.409, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9630986652708715, |
|
"grad_norm": 25.01747933998231, |
|
"learning_rate": 1.9347820230782295e-09, |
|
"logits/chosen": -0.7313689589500427, |
|
"logits/rejected": -0.7425884008407593, |
|
"logps/chosen": -407.93182373046875, |
|
"logps/rejected": -438.13311767578125, |
|
"loss": 0.5418, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.1677922010421753, |
|
"rewards/margins": 0.5207014083862305, |
|
"rewards/rejected": -1.6884937286376953, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.98403559277676, |
|
"grad_norm": 18.928838252315014, |
|
"learning_rate": 3.2839470889836627e-10, |
|
"logits/chosen": -0.6598347425460815, |
|
"logits/rejected": -0.7180498242378235, |
|
"logps/chosen": -415.50347900390625, |
|
"logps/rejected": -432.26556396484375, |
|
"loss": 0.5174, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": -1.1242364645004272, |
|
"rewards/margins": 0.6142801642417908, |
|
"rewards/rejected": -1.7385165691375732, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.998691442030882, |
|
"step": 477, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5814160232274037, |
|
"train_runtime": 29745.7246, |
|
"train_samples_per_second": 2.055, |
|
"train_steps_per_second": 0.016 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 477, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|