| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9982859101816935, | |
| "eval_steps": 0, | |
| "global_step": 182, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.005485087418580734, | |
| "grad_norm": 5.038297822406707, | |
| "learning_rate": 2.6315789473684213e-07, | |
| "logits/chosen": -0.3854110836982727, | |
| "logits/rejected": -0.38843637704849243, | |
| "logps/chosen": -0.5867404937744141, | |
| "logps/rejected": -0.7349259853363037, | |
| "loss": 0.8549, | |
| "odds_ratio_loss": 8.495767593383789, | |
| "rewards/accuracies": 0.328125, | |
| "rewards/chosen": -0.07349259406328201, | |
| "rewards/margins": -0.014818555675446987, | |
| "rewards/rejected": -0.05867404490709305, | |
| "sft_loss": 0.00530597660690546, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.010970174837161468, | |
| "grad_norm": 3.8688701991936516, | |
| "learning_rate": 5.263157894736843e-07, | |
| "logits/chosen": -0.4200110137462616, | |
| "logits/rejected": -0.4337027370929718, | |
| "logps/chosen": -0.5888247489929199, | |
| "logps/rejected": -0.7141146659851074, | |
| "loss": 0.8261, | |
| "odds_ratio_loss": 8.218369483947754, | |
| "rewards/accuracies": 0.3671875, | |
| "rewards/chosen": -0.07141146808862686, | |
| "rewards/margins": -0.01252899132668972, | |
| "rewards/rejected": -0.05888247489929199, | |
| "sft_loss": 0.004305172245949507, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0164552622557422, | |
| "grad_norm": 3.4925619509791903, | |
| "learning_rate": 7.894736842105263e-07, | |
| "logits/chosen": -0.38829293847084045, | |
| "logits/rejected": -0.36370420455932617, | |
| "logps/chosen": -0.6846970319747925, | |
| "logps/rejected": -0.6908957362174988, | |
| "loss": 0.8057, | |
| "odds_ratio_loss": 8.025218963623047, | |
| "rewards/accuracies": 0.375, | |
| "rewards/chosen": -0.06908956915140152, | |
| "rewards/margins": -0.0006198745686560869, | |
| "rewards/rejected": -0.06846970319747925, | |
| "sft_loss": 0.003192172385752201, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.021940349674322936, | |
| "grad_norm": 5.442119869368371, | |
| "learning_rate": 1.0526315789473685e-06, | |
| "logits/chosen": -0.4157654047012329, | |
| "logits/rejected": -0.4449327886104584, | |
| "logps/chosen": -0.6106168031692505, | |
| "logps/rejected": -0.7167029976844788, | |
| "loss": 0.8331, | |
| "odds_ratio_loss": 8.275010108947754, | |
| "rewards/accuracies": 0.4140625, | |
| "rewards/chosen": -0.0716702938079834, | |
| "rewards/margins": -0.010608619078993797, | |
| "rewards/rejected": -0.06106168404221535, | |
| "sft_loss": 0.005572030786424875, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.027425437092903668, | |
| "grad_norm": 3.9040280718139004, | |
| "learning_rate": 1.3157894736842106e-06, | |
| "logits/chosen": -0.4705536961555481, | |
| "logits/rejected": -0.448209285736084, | |
| "logps/chosen": -0.5760213136672974, | |
| "logps/rejected": -0.6794447898864746, | |
| "loss": 0.7994, | |
| "odds_ratio_loss": 7.961273193359375, | |
| "rewards/accuracies": 0.3359375, | |
| "rewards/chosen": -0.0679444819688797, | |
| "rewards/margins": -0.010342349298298359, | |
| "rewards/rejected": -0.057602137327194214, | |
| "sft_loss": 0.003320502582937479, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0329105245114844, | |
| "grad_norm": 3.9490673466980417, | |
| "learning_rate": 1.5789473684210526e-06, | |
| "logits/chosen": -0.3221694231033325, | |
| "logits/rejected": -0.405836820602417, | |
| "logps/chosen": -0.6044615507125854, | |
| "logps/rejected": -0.7186299562454224, | |
| "loss": 0.8315, | |
| "odds_ratio_loss": 8.270588874816895, | |
| "rewards/accuracies": 0.375, | |
| "rewards/chosen": -0.07186298817396164, | |
| "rewards/margins": -0.011416830122470856, | |
| "rewards/rejected": -0.060446158051490784, | |
| "sft_loss": 0.004453588742762804, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.03839561193006513, | |
| "grad_norm": 4.211003798628622, | |
| "learning_rate": 1.8421052631578948e-06, | |
| "logits/chosen": -0.4302704334259033, | |
| "logits/rejected": -0.48908236622810364, | |
| "logps/chosen": -0.6016876697540283, | |
| "logps/rejected": -0.6534877419471741, | |
| "loss": 0.7601, | |
| "odds_ratio_loss": 7.552478313446045, | |
| "rewards/accuracies": 0.4140625, | |
| "rewards/chosen": -0.06534876674413681, | |
| "rewards/margins": -0.005180003587156534, | |
| "rewards/rejected": -0.06016876921057701, | |
| "sft_loss": 0.004846740514039993, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.04388069934864587, | |
| "grad_norm": 4.5970681547716055, | |
| "learning_rate": 2.105263157894737e-06, | |
| "logits/chosen": -0.4125446677207947, | |
| "logits/rejected": -0.48786473274230957, | |
| "logps/chosen": -0.5902884602546692, | |
| "logps/rejected": -0.6903232336044312, | |
| "loss": 0.8077, | |
| "odds_ratio_loss": 8.029266357421875, | |
| "rewards/accuracies": 0.390625, | |
| "rewards/chosen": -0.06903232634067535, | |
| "rewards/margins": -0.010003475472331047, | |
| "rewards/rejected": -0.05902884528040886, | |
| "sft_loss": 0.004780109040439129, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.049365786767226603, | |
| "grad_norm": 3.9830322243241154, | |
| "learning_rate": 2.368421052631579e-06, | |
| "logits/chosen": -0.4158952832221985, | |
| "logits/rejected": -0.4516904354095459, | |
| "logps/chosen": -0.49427568912506104, | |
| "logps/rejected": -0.7636269927024841, | |
| "loss": 0.8995, | |
| "odds_ratio_loss": 8.943946838378906, | |
| "rewards/accuracies": 0.265625, | |
| "rewards/chosen": -0.07636269927024841, | |
| "rewards/margins": -0.02693513222038746, | |
| "rewards/rejected": -0.049427565187215805, | |
| "sft_loss": 0.005144394934177399, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.054850874185807336, | |
| "grad_norm": 3.963656319575701, | |
| "learning_rate": 2.631578947368421e-06, | |
| "logits/chosen": -0.4935796558856964, | |
| "logits/rejected": -0.4773009419441223, | |
| "logps/chosen": -0.5309734344482422, | |
| "logps/rejected": -0.7864019870758057, | |
| "loss": 0.914, | |
| "odds_ratio_loss": 9.082796096801758, | |
| "rewards/accuracies": 0.296875, | |
| "rewards/chosen": -0.07864020019769669, | |
| "rewards/margins": -0.025542862713336945, | |
| "rewards/rejected": -0.05309733748435974, | |
| "sft_loss": 0.005673164501786232, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.06033596160438807, | |
| "grad_norm": 6.086739375903808, | |
| "learning_rate": 2.8947368421052634e-06, | |
| "logits/chosen": -0.49898672103881836, | |
| "logits/rejected": -0.5067213773727417, | |
| "logps/chosen": -0.5432000756263733, | |
| "logps/rejected": -0.728182315826416, | |
| "loss": 0.8437, | |
| "odds_ratio_loss": 8.347268104553223, | |
| "rewards/accuracies": 0.3359375, | |
| "rewards/chosen": -0.07281822711229324, | |
| "rewards/margins": -0.018498217687010765, | |
| "rewards/rejected": -0.05432000756263733, | |
| "sft_loss": 0.00892576016485691, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.0658210490229688, | |
| "grad_norm": 2.6128540943717016, | |
| "learning_rate": 3.157894736842105e-06, | |
| "logits/chosen": -0.5385469198226929, | |
| "logits/rejected": -0.5363799333572388, | |
| "logps/chosen": -0.6628677248954773, | |
| "logps/rejected": -0.7211645245552063, | |
| "loss": 0.8365, | |
| "odds_ratio_loss": 8.324950218200684, | |
| "rewards/accuracies": 0.296875, | |
| "rewards/chosen": -0.07211645692586899, | |
| "rewards/margins": -0.005829682573676109, | |
| "rewards/rejected": -0.06628676503896713, | |
| "sft_loss": 0.003960596397519112, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.07130613644154954, | |
| "grad_norm": 3.092946841589385, | |
| "learning_rate": 3.421052631578948e-06, | |
| "logits/chosen": -0.5176294445991516, | |
| "logits/rejected": -0.5164329409599304, | |
| "logps/chosen": -0.5450438857078552, | |
| "logps/rejected": -0.6731284856796265, | |
| "loss": 0.7842, | |
| "odds_ratio_loss": 7.793478012084961, | |
| "rewards/accuracies": 0.3359375, | |
| "rewards/chosen": -0.06731285154819489, | |
| "rewards/margins": -0.012808457016944885, | |
| "rewards/rejected": -0.054504383355379105, | |
| "sft_loss": 0.004814960993826389, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.07679122386013026, | |
| "grad_norm": 2.8558270742996137, | |
| "learning_rate": 3.6842105263157896e-06, | |
| "logits/chosen": -0.6181472539901733, | |
| "logits/rejected": -0.5952631831169128, | |
| "logps/chosen": -0.6915932893753052, | |
| "logps/rejected": -0.753544807434082, | |
| "loss": 0.8682, | |
| "odds_ratio_loss": 8.637476921081543, | |
| "rewards/accuracies": 0.328125, | |
| "rewards/chosen": -0.07535447925329208, | |
| "rewards/margins": -0.006195154972374439, | |
| "rewards/rejected": -0.06915932893753052, | |
| "sft_loss": 0.0044339620508253574, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.082276311278711, | |
| "grad_norm": 3.7864729296015134, | |
| "learning_rate": 3.947368421052632e-06, | |
| "logits/chosen": -0.6190563440322876, | |
| "logits/rejected": -0.5924232006072998, | |
| "logps/chosen": -0.5566739439964294, | |
| "logps/rejected": -0.7037681937217712, | |
| "loss": 0.8093, | |
| "odds_ratio_loss": 8.019332885742188, | |
| "rewards/accuracies": 0.328125, | |
| "rewards/chosen": -0.07037682831287384, | |
| "rewards/margins": -0.014709431678056717, | |
| "rewards/rejected": -0.05566739663481712, | |
| "sft_loss": 0.007347959093749523, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.08776139869729174, | |
| "grad_norm": 3.0120281360552097, | |
| "learning_rate": 4.210526315789474e-06, | |
| "logits/chosen": -0.6849666833877563, | |
| "logits/rejected": -0.6525503396987915, | |
| "logps/chosen": -0.5696557760238647, | |
| "logps/rejected": -0.7036406993865967, | |
| "loss": 0.8093, | |
| "odds_ratio_loss": 8.039867401123047, | |
| "rewards/accuracies": 0.359375, | |
| "rewards/chosen": -0.0703640729188919, | |
| "rewards/margins": -0.013398496434092522, | |
| "rewards/rejected": -0.05696558207273483, | |
| "sft_loss": 0.005266908556222916, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.09324648611587247, | |
| "grad_norm": 2.6504291324403315, | |
| "learning_rate": 4.473684210526316e-06, | |
| "logits/chosen": -0.641123354434967, | |
| "logits/rejected": -0.6362437605857849, | |
| "logps/chosen": -0.6884087324142456, | |
| "logps/rejected": -0.7263792157173157, | |
| "loss": 0.8353, | |
| "odds_ratio_loss": 8.31103515625, | |
| "rewards/accuracies": 0.2734375, | |
| "rewards/chosen": -0.07263792306184769, | |
| "rewards/margins": -0.0037970547564327717, | |
| "rewards/rejected": -0.0688408762216568, | |
| "sft_loss": 0.004170028492808342, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.09873157353445321, | |
| "grad_norm": 2.886807767840121, | |
| "learning_rate": 4.736842105263158e-06, | |
| "logits/chosen": -0.6371440291404724, | |
| "logits/rejected": -0.6085755825042725, | |
| "logps/chosen": -0.6927422881126404, | |
| "logps/rejected": -0.706704318523407, | |
| "loss": 0.8013, | |
| "odds_ratio_loss": 7.961912155151367, | |
| "rewards/accuracies": 0.4140625, | |
| "rewards/chosen": -0.07067042589187622, | |
| "rewards/margins": -0.0013961929362267256, | |
| "rewards/rejected": -0.06927423179149628, | |
| "sft_loss": 0.005079690366983414, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.10421666095303393, | |
| "grad_norm": 5.880825411085245, | |
| "learning_rate": 5e-06, | |
| "logits/chosen": -0.6107761859893799, | |
| "logits/rejected": -0.6409167647361755, | |
| "logps/chosen": -0.628943681716919, | |
| "logps/rejected": -0.6733644008636475, | |
| "loss": 0.7634, | |
| "odds_ratio_loss": 7.441443920135498, | |
| "rewards/accuracies": 0.3359375, | |
| "rewards/chosen": -0.06733644008636475, | |
| "rewards/margins": -0.004442068748176098, | |
| "rewards/rejected": -0.06289438158273697, | |
| "sft_loss": 0.01928992196917534, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.10970174837161467, | |
| "grad_norm": 2.5047291630264965, | |
| "learning_rate": 4.999535676028338e-06, | |
| "logits/chosen": -0.53981614112854, | |
| "logits/rejected": -0.5691806674003601, | |
| "logps/chosen": -0.5706157088279724, | |
| "logps/rejected": -0.6909669637680054, | |
| "loss": 0.7906, | |
| "odds_ratio_loss": 7.871767997741699, | |
| "rewards/accuracies": 0.359375, | |
| "rewards/chosen": -0.06909669935703278, | |
| "rewards/margins": -0.012035122141242027, | |
| "rewards/rejected": -0.0570615753531456, | |
| "sft_loss": 0.003470680210739374, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.11518683579019541, | |
| "grad_norm": 2.731427921236176, | |
| "learning_rate": 4.998142876590749e-06, | |
| "logits/chosen": -0.6109368801116943, | |
| "logits/rejected": -0.5822383165359497, | |
| "logps/chosen": -0.5740761756896973, | |
| "logps/rejected": -0.7196378111839294, | |
| "loss": 0.8229, | |
| "odds_ratio_loss": 8.185101509094238, | |
| "rewards/accuracies": 0.3203125, | |
| "rewards/chosen": -0.07196377962827682, | |
| "rewards/margins": -0.014556167647242546, | |
| "rewards/rejected": -0.05740761756896973, | |
| "sft_loss": 0.004432335030287504, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.12067192320877614, | |
| "grad_norm": 2.6457518569851914, | |
| "learning_rate": 4.9958221190553705e-06, | |
| "logits/chosen": -0.6459155082702637, | |
| "logits/rejected": -0.6224872469902039, | |
| "logps/chosen": -0.9075093269348145, | |
| "logps/rejected": -0.6858919858932495, | |
| "loss": 0.7814, | |
| "odds_ratio_loss": 7.773059844970703, | |
| "rewards/accuracies": 0.3359375, | |
| "rewards/chosen": -0.06858920305967331, | |
| "rewards/margins": 0.02216172218322754, | |
| "rewards/rejected": -0.09075092524290085, | |
| "sft_loss": 0.004068214446306229, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.12615701062735687, | |
| "grad_norm": 2.3568903077737398, | |
| "learning_rate": 4.992574265488883e-06, | |
| "logits/chosen": -0.596120297908783, | |
| "logits/rejected": -0.5599789619445801, | |
| "logps/chosen": -0.6051114201545715, | |
| "logps/rejected": -0.6642252206802368, | |
| "loss": 0.7587, | |
| "odds_ratio_loss": 7.555657386779785, | |
| "rewards/accuracies": 0.3203125, | |
| "rewards/chosen": -0.06642251461744308, | |
| "rewards/margins": -0.0059113758616149426, | |
| "rewards/rejected": -0.06051114201545715, | |
| "sft_loss": 0.0031609206926077604, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.1316420980459376, | |
| "grad_norm": 2.4794879192215245, | |
| "learning_rate": 4.988400522336304e-06, | |
| "logits/chosen": -0.6102786064147949, | |
| "logits/rejected": -0.5751500129699707, | |
| "logps/chosen": -0.6058512926101685, | |
| "logps/rejected": -0.6483108997344971, | |
| "loss": 0.7446, | |
| "odds_ratio_loss": 7.4141364097595215, | |
| "rewards/accuracies": 0.40625, | |
| "rewards/chosen": -0.06483108550310135, | |
| "rewards/margins": -0.004245956428349018, | |
| "rewards/rejected": -0.060585130006074905, | |
| "sft_loss": 0.003168251132592559, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.13712718546451835, | |
| "grad_norm": 2.6638728856099076, | |
| "learning_rate": 4.9833024399728295e-06, | |
| "logits/chosen": -0.6613443493843079, | |
| "logits/rejected": -0.6328415870666504, | |
| "logps/chosen": -0.6213655471801758, | |
| "logps/rejected": -0.6468154191970825, | |
| "loss": 0.7378, | |
| "odds_ratio_loss": 7.336475372314453, | |
| "rewards/accuracies": 0.3984375, | |
| "rewards/chosen": -0.06468154489994049, | |
| "rewards/margins": -0.0025449953973293304, | |
| "rewards/rejected": -0.06213654577732086, | |
| "sft_loss": 0.004186041187494993, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.14261227288309908, | |
| "grad_norm": 2.487669137174005, | |
| "learning_rate": 4.9772819121279395e-06, | |
| "logits/chosen": -0.6384425759315491, | |
| "logits/rejected": -0.6189238429069519, | |
| "logps/chosen": -0.5178334712982178, | |
| "logps/rejected": -0.6201947927474976, | |
| "loss": 0.7189, | |
| "odds_ratio_loss": 7.1527557373046875, | |
| "rewards/accuracies": 0.421875, | |
| "rewards/chosen": -0.0620194748044014, | |
| "rewards/margins": -0.010236131027340889, | |
| "rewards/rejected": -0.05178334563970566, | |
| "sft_loss": 0.00362430140376091, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.1480973603016798, | |
| "grad_norm": 2.3790278282662314, | |
| "learning_rate": 4.970341175181957e-06, | |
| "logits/chosen": -0.6167687177658081, | |
| "logits/rejected": -0.6090523600578308, | |
| "logps/chosen": -0.5448204278945923, | |
| "logps/rejected": -0.660010576248169, | |
| "loss": 0.7598, | |
| "odds_ratio_loss": 7.5589399337768555, | |
| "rewards/accuracies": 0.3203125, | |
| "rewards/chosen": -0.06600106507539749, | |
| "rewards/margins": -0.011519018560647964, | |
| "rewards/rejected": -0.05448204651474953, | |
| "sft_loss": 0.00385806686244905, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.15358244772026053, | |
| "grad_norm": 2.6617242551359275, | |
| "learning_rate": 4.9624828073353144e-06, | |
| "logits/chosen": -0.6628867387771606, | |
| "logits/rejected": -0.642227053642273, | |
| "logps/chosen": -0.5881469249725342, | |
| "logps/rejected": -0.6607677340507507, | |
| "loss": 0.7543, | |
| "odds_ratio_loss": 7.503334045410156, | |
| "rewards/accuracies": 0.3828125, | |
| "rewards/chosen": -0.06607677042484283, | |
| "rewards/margins": -0.007262084167450666, | |
| "rewards/rejected": -0.05881468579173088, | |
| "sft_loss": 0.003945589065551758, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.15906753513884128, | |
| "grad_norm": 5.959951004497131, | |
| "learning_rate": 4.95370972765087e-06, | |
| "logits/chosen": -0.6791335344314575, | |
| "logits/rejected": -0.6436203122138977, | |
| "logps/chosen": -0.6152352094650269, | |
| "logps/rejected": -0.7523278594017029, | |
| "loss": 0.853, | |
| "odds_ratio_loss": 8.355252265930176, | |
| "rewards/accuracies": 0.28125, | |
| "rewards/chosen": -0.07523278146982193, | |
| "rewards/margins": -0.013709258288145065, | |
| "rewards/rejected": -0.06152352690696716, | |
| "sft_loss": 0.017459843307733536, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.164552622557422, | |
| "grad_norm": 2.3211607809375328, | |
| "learning_rate": 4.944025194969586e-06, | |
| "logits/chosen": -0.6864458322525024, | |
| "logits/rejected": -0.6491591930389404, | |
| "logps/chosen": -0.591070294380188, | |
| "logps/rejected": -0.6974666118621826, | |
| "loss": 0.7936, | |
| "odds_ratio_loss": 7.89208984375, | |
| "rewards/accuracies": 0.3984375, | |
| "rewards/chosen": -0.06974666565656662, | |
| "rewards/margins": -0.010639630258083344, | |
| "rewards/rejected": -0.05910703167319298, | |
| "sft_loss": 0.004418414086103439, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.17003770997600273, | |
| "grad_norm": 2.4315181303090307, | |
| "learning_rate": 4.933432806700004e-06, | |
| "logits/chosen": -0.6587315797805786, | |
| "logits/rejected": -0.6222696304321289, | |
| "logps/chosen": -0.6303814053535461, | |
| "logps/rejected": -0.7028146982192993, | |
| "loss": 0.8067, | |
| "odds_ratio_loss": 8.026537895202637, | |
| "rewards/accuracies": 0.375, | |
| "rewards/chosen": -0.07028146833181381, | |
| "rewards/margins": -0.007243326865136623, | |
| "rewards/rejected": -0.06303813308477402, | |
| "sft_loss": 0.004071646369993687, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.17552279739458349, | |
| "grad_norm": 2.210136151504232, | |
| "learning_rate": 4.921936497481956e-06, | |
| "logits/chosen": -0.7069422006607056, | |
| "logits/rejected": -0.6679366230964661, | |
| "logps/chosen": -0.5178747177124023, | |
| "logps/rejected": -0.6546376347541809, | |
| "loss": 0.7551, | |
| "odds_ratio_loss": 7.520661354064941, | |
| "rewards/accuracies": 0.3515625, | |
| "rewards/chosen": -0.06546376645565033, | |
| "rewards/margins": -0.013676293194293976, | |
| "rewards/rejected": -0.05178747698664665, | |
| "sft_loss": 0.003003381658345461, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.1810078848131642, | |
| "grad_norm": 3.4053506226819574, | |
| "learning_rate": 4.909540537725007e-06, | |
| "logits/chosen": -0.6776018142700195, | |
| "logits/rejected": -0.6548346877098083, | |
| "logps/chosen": -0.6834210157394409, | |
| "logps/rejected": -0.6428653001785278, | |
| "loss": 0.7378, | |
| "odds_ratio_loss": 7.308858871459961, | |
| "rewards/accuracies": 0.421875, | |
| "rewards/chosen": -0.06428653001785278, | |
| "rewards/margins": 0.004055576398968697, | |
| "rewards/rejected": -0.06834210455417633, | |
| "sft_loss": 0.006872784812003374, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.18649297223174494, | |
| "grad_norm": 2.539813335312958, | |
| "learning_rate": 4.8962495320221714e-06, | |
| "logits/chosen": -0.7110268473625183, | |
| "logits/rejected": -0.6500384211540222, | |
| "logps/chosen": -0.5525631308555603, | |
| "logps/rejected": -0.673240065574646, | |
| "loss": 0.7743, | |
| "odds_ratio_loss": 7.700582027435303, | |
| "rewards/accuracies": 0.3359375, | |
| "rewards/chosen": -0.06732401251792908, | |
| "rewards/margins": -0.012067697942256927, | |
| "rewards/rejected": -0.05525631457567215, | |
| "sft_loss": 0.0042497809045016766, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.1919780596503257, | |
| "grad_norm": 2.655970243707059, | |
| "learning_rate": 4.8820684174394935e-06, | |
| "logits/chosen": -0.7138683199882507, | |
| "logits/rejected": -0.6818023920059204, | |
| "logps/chosen": -0.5597530603408813, | |
| "logps/rejected": -0.6535578966140747, | |
| "loss": 0.7491, | |
| "odds_ratio_loss": 7.4469194412231445, | |
| "rewards/accuracies": 0.375, | |
| "rewards/chosen": -0.06535579264163971, | |
| "rewards/margins": -0.009380483068525791, | |
| "rewards/rejected": -0.0559752993285656, | |
| "sft_loss": 0.004387051332741976, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.19746314706890641, | |
| "grad_norm": 2.5709022001566275, | |
| "learning_rate": 4.867002461682129e-06, | |
| "logits/chosen": -0.6977266073226929, | |
| "logits/rejected": -0.6760712265968323, | |
| "logps/chosen": -0.5894060134887695, | |
| "logps/rejected": -0.7010530829429626, | |
| "loss": 0.8056, | |
| "odds_ratio_loss": 8.00703239440918, | |
| "rewards/accuracies": 0.3828125, | |
| "rewards/chosen": -0.07010531425476074, | |
| "rewards/margins": -0.011164708994328976, | |
| "rewards/rejected": -0.058940596878528595, | |
| "sft_loss": 0.004914172925055027, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.20294823448748714, | |
| "grad_norm": 2.3705551951599895, | |
| "learning_rate": 4.851057261137608e-06, | |
| "logits/chosen": -0.69024658203125, | |
| "logits/rejected": -0.6706061363220215, | |
| "logps/chosen": -0.5469837784767151, | |
| "logps/rejected": -0.6902478933334351, | |
| "loss": 0.7905, | |
| "odds_ratio_loss": 7.859821319580078, | |
| "rewards/accuracies": 0.34375, | |
| "rewards/chosen": -0.06902480125427246, | |
| "rewards/margins": -0.014326417818665504, | |
| "rewards/rejected": -0.05469837784767151, | |
| "sft_loss": 0.0044726720079779625, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.20843332190606786, | |
| "grad_norm": 5.234021982751859, | |
| "learning_rate": 4.8342387387970105e-06, | |
| "logits/chosen": -0.6684221625328064, | |
| "logits/rejected": -0.6224137544631958, | |
| "logps/chosen": -0.6020541191101074, | |
| "logps/rejected": -0.6657444834709167, | |
| "loss": 0.7536, | |
| "odds_ratio_loss": 7.442554473876953, | |
| "rewards/accuracies": 0.4140625, | |
| "rewards/chosen": -0.06657445430755615, | |
| "rewards/margins": -0.0063690319657325745, | |
| "rewards/rejected": -0.06020541861653328, | |
| "sft_loss": 0.009305169805884361, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.21391840932464862, | |
| "grad_norm": 2.0334062957234833, | |
| "learning_rate": 4.816553142054806e-06, | |
| "logits/chosen": -0.6587538719177246, | |
| "logits/rejected": -0.6190416216850281, | |
| "logps/chosen": -0.5317018032073975, | |
| "logps/rejected": -0.6559545993804932, | |
| "loss": 0.7589, | |
| "odds_ratio_loss": 7.557468414306641, | |
| "rewards/accuracies": 0.328125, | |
| "rewards/chosen": -0.06559545546770096, | |
| "rewards/margins": -0.012425270862877369, | |
| "rewards/rejected": -0.053170185536146164, | |
| "sft_loss": 0.003142669564113021, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.21940349674322934, | |
| "grad_norm": 2.615502723756528, | |
| "learning_rate": 4.798007040388212e-06, | |
| "logits/chosen": -0.6537474393844604, | |
| "logits/rejected": -0.6558958888053894, | |
| "logps/chosen": -0.5594260692596436, | |
| "logps/rejected": -0.7137230634689331, | |
| "loss": 0.8176, | |
| "odds_ratio_loss": 8.131279945373535, | |
| "rewards/accuracies": 0.296875, | |
| "rewards/chosen": -0.07137230783700943, | |
| "rewards/margins": -0.015429697930812836, | |
| "rewards/rejected": -0.055942609906196594, | |
| "sft_loss": 0.004487923812121153, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.22488858416181007, | |
| "grad_norm": 3.147981874003909, | |
| "learning_rate": 4.778607322916896e-06, | |
| "logits/chosen": -0.7805840969085693, | |
| "logits/rejected": -0.7332777976989746, | |
| "logps/chosen": -0.5440269708633423, | |
| "logps/rejected": -0.6570194959640503, | |
| "loss": 0.755, | |
| "odds_ratio_loss": 7.461404800415039, | |
| "rewards/accuracies": 0.34375, | |
| "rewards/chosen": -0.06570195406675339, | |
| "rewards/margins": -0.011299250647425652, | |
| "rewards/rejected": -0.05440270155668259, | |
| "sft_loss": 0.008839858695864677, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.23037367158039082, | |
| "grad_norm": 2.4132096188646868, | |
| "learning_rate": 4.7583611958439514e-06, | |
| "logits/chosen": -0.6508989334106445, | |
| "logits/rejected": -0.6227612495422363, | |
| "logps/chosen": -0.6552981734275818, | |
| "logps/rejected": -0.7332005500793457, | |
| "loss": 0.8445, | |
| "odds_ratio_loss": 8.40545654296875, | |
| "rewards/accuracies": 0.2734375, | |
| "rewards/chosen": -0.07332006096839905, | |
| "rewards/margins": -0.007790243253111839, | |
| "rewards/rejected": -0.06552981585264206, | |
| "sft_loss": 0.0039908913895487785, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.23585875899897155, | |
| "grad_norm": 2.029276338522748, | |
| "learning_rate": 4.7372761797790836e-06, | |
| "logits/chosen": -0.6611707210540771, | |
| "logits/rejected": -0.6425488591194153, | |
| "logps/chosen": -0.5845383405685425, | |
| "logps/rejected": -0.6395372748374939, | |
| "loss": 0.7307, | |
| "odds_ratio_loss": 7.273993968963623, | |
| "rewards/accuracies": 0.3828125, | |
| "rewards/chosen": -0.06395373493432999, | |
| "rewards/margins": -0.005499903112649918, | |
| "rewards/rejected": -0.05845382809638977, | |
| "sft_loss": 0.003296809270977974, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.24134384641755227, | |
| "grad_norm": 2.334850850202144, | |
| "learning_rate": 4.715360106945015e-06, | |
| "logits/chosen": -0.6538242101669312, | |
| "logits/rejected": -0.6651148796081543, | |
| "logps/chosen": -0.5922138690948486, | |
| "logps/rejected": -0.6789554357528687, | |
| "loss": 0.7687, | |
| "odds_ratio_loss": 7.646347999572754, | |
| "rewards/accuracies": 0.359375, | |
| "rewards/chosen": -0.0678955465555191, | |
| "rewards/margins": -0.008674154058098793, | |
| "rewards/rejected": -0.059221383184194565, | |
| "sft_loss": 0.004072089679539204, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.24682893383613302, | |
| "grad_norm": 3.278246254509892, | |
| "learning_rate": 4.6926211182681295e-06, | |
| "logits/chosen": -0.7525214552879333, | |
| "logits/rejected": -0.7238048315048218, | |
| "logps/chosen": -0.6908121109008789, | |
| "logps/rejected": -0.6656503677368164, | |
| "loss": 0.7586, | |
| "odds_ratio_loss": 7.535221099853516, | |
| "rewards/accuracies": 0.3828125, | |
| "rewards/chosen": -0.06656503677368164, | |
| "rewards/margins": 0.0025161777157336473, | |
| "rewards/rejected": -0.06908121705055237, | |
| "sft_loss": 0.005099683068692684, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.25231402125471375, | |
| "grad_norm": 2.1664816320305897, | |
| "learning_rate": 4.669067660354456e-06, | |
| "logits/chosen": -0.6813299059867859, | |
| "logits/rejected": -0.6507269144058228, | |
| "logps/chosen": -0.5852293372154236, | |
| "logps/rejected": -0.6381067633628845, | |
| "loss": 0.726, | |
| "odds_ratio_loss": 7.226728439331055, | |
| "rewards/accuracies": 0.3515625, | |
| "rewards/chosen": -0.06381067633628845, | |
| "rewards/margins": -0.005287742242217064, | |
| "rewards/rejected": -0.05852293595671654, | |
| "sft_loss": 0.003340219147503376, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.2577991086732945, | |
| "grad_norm": 2.3755468836145535, | |
| "learning_rate": 4.644708482352093e-06, | |
| "logits/chosen": -0.7196726202964783, | |
| "logits/rejected": -0.671117901802063, | |
| "logps/chosen": -0.496429979801178, | |
| "logps/rejected": -0.6285478472709656, | |
| "loss": 0.7368, | |
| "odds_ratio_loss": 7.332725524902344, | |
| "rewards/accuracies": 0.3203125, | |
| "rewards/chosen": -0.06285478919744492, | |
| "rewards/margins": -0.013211790472269058, | |
| "rewards/rejected": -0.04964299499988556, | |
| "sft_loss": 0.003541831858456135, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.2632841960918752, | |
| "grad_norm": 1.882676786275039, | |
| "learning_rate": 4.619552632701263e-06, | |
| "logits/chosen": -0.7029179334640503, | |
| "logits/rejected": -0.6741898059844971, | |
| "logps/chosen": -0.5508084297180176, | |
| "logps/rejected": -0.6012480854988098, | |
| "loss": 0.6905, | |
| "odds_ratio_loss": 6.879641532897949, | |
| "rewards/accuracies": 0.3828125, | |
| "rewards/chosen": -0.06012480705976486, | |
| "rewards/margins": -0.005043962970376015, | |
| "rewards/rejected": -0.055080845952034, | |
| "sft_loss": 0.0025444331113249063, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.2687692835104559, | |
| "grad_norm": 2.3230672316134244, | |
| "learning_rate": 4.5936094557731815e-06, | |
| "logits/chosen": -0.6754356622695923, | |
| "logits/rejected": -0.6543477177619934, | |
| "logps/chosen": -0.5631517171859741, | |
| "logps/rejected": -0.6714473366737366, | |
| "loss": 0.7666, | |
| "odds_ratio_loss": 7.623527526855469, | |
| "rewards/accuracies": 0.328125, | |
| "rewards/chosen": -0.0671447217464447, | |
| "rewards/margins": -0.010829559527337551, | |
| "rewards/rejected": -0.05631516873836517, | |
| "sft_loss": 0.004294781945645809, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.2742543709290367, | |
| "grad_norm": 2.3609733896189353, | |
| "learning_rate": 4.566888588399007e-06, | |
| "logits/chosen": -0.6326004266738892, | |
| "logits/rejected": -0.6783662438392639, | |
| "logps/chosen": -0.6179240345954895, | |
| "logps/rejected": -0.64500892162323, | |
| "loss": 0.7366, | |
| "odds_ratio_loss": 7.319670677185059, | |
| "rewards/accuracies": 0.4140625, | |
| "rewards/chosen": -0.06450089812278748, | |
| "rewards/margins": -0.002708489540964365, | |
| "rewards/rejected": -0.06179240718483925, | |
| "sft_loss": 0.0045843422412872314, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.27973945834761743, | |
| "grad_norm": 2.150795076150455, | |
| "learning_rate": 4.539399956290152e-06, | |
| "logits/chosen": -0.6697653532028198, | |
| "logits/rejected": -0.6334677934646606, | |
| "logps/chosen": -0.5712593197822571, | |
| "logps/rejected": -0.5926052927970886, | |
| "loss": 0.6852, | |
| "odds_ratio_loss": 6.818970680236816, | |
| "rewards/accuracies": 0.3828125, | |
| "rewards/chosen": -0.0592605359852314, | |
| "rewards/margins": -0.002134602749720216, | |
| "rewards/rejected": -0.05712592974305153, | |
| "sft_loss": 0.003330723848193884, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.28522454576619816, | |
| "grad_norm": 2.2865734329168563, | |
| "learning_rate": 4.511153770351288e-06, | |
| "logits/chosen": -0.7038233280181885, | |
| "logits/rejected": -0.6797916889190674, | |
| "logps/chosen": -0.626400351524353, | |
| "logps/rejected": -0.6849571466445923, | |
| "loss": 0.7765, | |
| "odds_ratio_loss": 7.725215435028076, | |
| "rewards/accuracies": 0.328125, | |
| "rewards/chosen": -0.06849571317434311, | |
| "rewards/margins": -0.0058556715957820415, | |
| "rewards/rejected": -0.06264004111289978, | |
| "sft_loss": 0.00396262900903821, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.2907096331847789, | |
| "grad_norm": 2.3920025552439967, | |
| "learning_rate": 4.482160522887404e-06, | |
| "logits/chosen": -0.6559309363365173, | |
| "logits/rejected": -0.6758289337158203, | |
| "logps/chosen": -0.6616750955581665, | |
| "logps/rejected": -0.6394928693771362, | |
| "loss": 0.7281, | |
| "odds_ratio_loss": 7.238113880157471, | |
| "rewards/accuracies": 0.390625, | |
| "rewards/chosen": -0.06394927203655243, | |
| "rewards/margins": 0.0022182257380336523, | |
| "rewards/rejected": -0.06616750359535217, | |
| "sft_loss": 0.004315241239964962, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.2961947206033596, | |
| "grad_norm": 2.5117956914717032, | |
| "learning_rate": 4.452430983706351e-06, | |
| "logits/chosen": -0.7268598079681396, | |
| "logits/rejected": -0.7092704772949219, | |
| "logps/chosen": -0.549469530582428, | |
| "logps/rejected": -0.6699557304382324, | |
| "loss": 0.7657, | |
| "odds_ratio_loss": 7.608267784118652, | |
| "rewards/accuracies": 0.3671875, | |
| "rewards/chosen": -0.06699557602405548, | |
| "rewards/margins": -0.012048620730638504, | |
| "rewards/rejected": -0.054946959018707275, | |
| "sft_loss": 0.004888926167041063, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.30167980802194033, | |
| "grad_norm": 6.1164968534949375, | |
| "learning_rate": 4.421976196118297e-06, | |
| "logits/chosen": -0.6996357440948486, | |
| "logits/rejected": -0.6125066876411438, | |
| "logps/chosen": -0.5666108131408691, | |
| "logps/rejected": -0.7606990933418274, | |
| "loss": 0.8647, | |
| "odds_ratio_loss": 7.164684295654297, | |
| "rewards/accuracies": 0.3984375, | |
| "rewards/chosen": -0.0760699063539505, | |
| "rewards/margins": -0.019408833235502243, | |
| "rewards/rejected": -0.056661076843738556, | |
| "sft_loss": 0.14825321733951569, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.30716489544052106, | |
| "grad_norm": 2.3449325969971553, | |
| "learning_rate": 4.390807472833585e-06, | |
| "logits/chosen": -0.7196471095085144, | |
| "logits/rejected": -0.6497766375541687, | |
| "logps/chosen": -0.5705811977386475, | |
| "logps/rejected": -0.6152413487434387, | |
| "loss": 0.7056, | |
| "odds_ratio_loss": 7.016576766967773, | |
| "rewards/accuracies": 0.4140625, | |
| "rewards/chosen": -0.061524130403995514, | |
| "rewards/margins": -0.004466014914214611, | |
| "rewards/rejected": -0.05705811828374863, | |
| "sft_loss": 0.003966608550399542, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.31264998285910184, | |
| "grad_norm": 1.9758244727864838, | |
| "learning_rate": 4.358936391760524e-06, | |
| "logits/chosen": -0.6982048749923706, | |
| "logits/rejected": -0.6489894986152649, | |
| "logps/chosen": -0.7246107459068298, | |
| "logps/rejected": -0.6382527947425842, | |
| "loss": 0.7298, | |
| "odds_ratio_loss": 7.263551235198975, | |
| "rewards/accuracies": 0.3984375, | |
| "rewards/chosen": -0.06382527947425842, | |
| "rewards/margins": 0.008635802194476128, | |
| "rewards/rejected": -0.0724610835313797, | |
| "sft_loss": 0.0034022387117147446, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.31813507027768256, | |
| "grad_norm": 2.174135911231623, | |
| "learning_rate": 4.32637479170467e-06, | |
| "logits/chosen": -0.7073652148246765, | |
| "logits/rejected": -0.6690998077392578, | |
| "logps/chosen": -0.6008009314537048, | |
| "logps/rejected": -0.6260058879852295, | |
| "loss": 0.7136, | |
| "odds_ratio_loss": 7.0961079597473145, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -0.06260059028863907, | |
| "rewards/margins": -0.0025204988196492195, | |
| "rewards/rejected": -0.060080088675022125, | |
| "sft_loss": 0.00403111707419157, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.3236201576962633, | |
| "grad_norm": 2.168405994153748, | |
| "learning_rate": 4.293134767971193e-06, | |
| "logits/chosen": -0.7020463347434998, | |
| "logits/rejected": -0.6942731142044067, | |
| "logps/chosen": -0.5773230791091919, | |
| "logps/rejected": -0.6443250179290771, | |
| "loss": 0.7339, | |
| "odds_ratio_loss": 7.302745342254639, | |
| "rewards/accuracies": 0.375, | |
| "rewards/chosen": -0.06443249434232712, | |
| "rewards/margins": -0.006700189784169197, | |
| "rewards/rejected": -0.05773231014609337, | |
| "sft_loss": 0.003663026262074709, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.329105245114844, | |
| "grad_norm": 2.185211880662804, | |
| "learning_rate": 4.259228667871963e-06, | |
| "logits/chosen": -0.6689984202384949, | |
| "logits/rejected": -0.675392746925354, | |
| "logps/chosen": -0.6228610277175903, | |
| "logps/rejected": -0.6264554262161255, | |
| "loss": 0.7157, | |
| "odds_ratio_loss": 7.120779991149902, | |
| "rewards/accuracies": 0.40625, | |
| "rewards/chosen": -0.06264554709196091, | |
| "rewards/margins": -0.0003594460431486368, | |
| "rewards/rejected": -0.062286097556352615, | |
| "sft_loss": 0.0035854382440447807, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.33459033253342474, | |
| "grad_norm": 2.57070026430243, | |
| "learning_rate": 4.22466908613903e-06, | |
| "logits/chosen": -0.702937126159668, | |
| "logits/rejected": -0.7012047171592712, | |
| "logps/chosen": -0.7283627986907959, | |
| "logps/rejected": -0.6880404949188232, | |
| "loss": 0.7811, | |
| "odds_ratio_loss": 7.746993064880371, | |
| "rewards/accuracies": 0.3671875, | |
| "rewards/chosen": -0.0688040480017662, | |
| "rewards/margins": 0.0040322281420230865, | |
| "rewards/rejected": -0.07283627241849899, | |
| "sft_loss": 0.006447223015129566, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.34007541995200546, | |
| "grad_norm": 4.244463596912575, | |
| "learning_rate": 4.189468860246192e-06, | |
| "logits/chosen": -0.6791540384292603, | |
| "logits/rejected": -0.6635554432868958, | |
| "logps/chosen": -0.6107332706451416, | |
| "logps/rejected": -0.6247188448905945, | |
| "loss": 0.7142, | |
| "odds_ratio_loss": 7.000344753265381, | |
| "rewards/accuracies": 0.3984375, | |
| "rewards/chosen": -0.06247188523411751, | |
| "rewards/margins": -0.001398557680658996, | |
| "rewards/rejected": -0.06107332557439804, | |
| "sft_loss": 0.014190858229994774, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.34556050737058625, | |
| "grad_norm": 2.025728542662067, | |
| "learning_rate": 4.153641065640402e-06, | |
| "logits/chosen": -0.693527340888977, | |
| "logits/rejected": -0.6523553133010864, | |
| "logps/chosen": -0.5719731450080872, | |
| "logps/rejected": -0.6185833811759949, | |
| "loss": 0.7056, | |
| "odds_ratio_loss": 7.020816802978516, | |
| "rewards/accuracies": 0.375, | |
| "rewards/chosen": -0.06185833364725113, | |
| "rewards/margins": -0.004661021754145622, | |
| "rewards/rejected": -0.057197313755750656, | |
| "sft_loss": 0.003522280603647232, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.35104559478916697, | |
| "grad_norm": 2.295177803143149, | |
| "learning_rate": 4.1171990108847705e-06, | |
| "logits/chosen": -0.6868746280670166, | |
| "logits/rejected": -0.6699408888816833, | |
| "logps/chosen": -0.6467013359069824, | |
| "logps/rejected": -0.6266091465950012, | |
| "loss": 0.719, | |
| "odds_ratio_loss": 7.1493024826049805, | |
| "rewards/accuracies": 0.3984375, | |
| "rewards/chosen": -0.06266091763973236, | |
| "rewards/margins": 0.002009219955652952, | |
| "rewards/rejected": -0.0646701380610466, | |
| "sft_loss": 0.004113649483770132, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.3565306822077477, | |
| "grad_norm": 2.2636310766420693, | |
| "learning_rate": 4.080156232714976e-06, | |
| "logits/chosen": -0.6988135576248169, | |
| "logits/rejected": -0.6200304627418518, | |
| "logps/chosen": -0.560631513595581, | |
| "logps/rejected": -0.6326756477355957, | |
| "loss": 0.7288, | |
| "odds_ratio_loss": 7.247946262359619, | |
| "rewards/accuracies": 0.3828125, | |
| "rewards/chosen": -0.06326755881309509, | |
| "rewards/margins": -0.007204408757388592, | |
| "rewards/rejected": -0.056063152849674225, | |
| "sft_loss": 0.004002667032182217, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.3620157696263284, | |
| "grad_norm": 2.3726953806807543, | |
| "learning_rate": 4.0425264910109245e-06, | |
| "logits/chosen": -0.6342322826385498, | |
| "logits/rejected": -0.6143465042114258, | |
| "logps/chosen": -0.5673171877861023, | |
| "logps/rejected": -0.6150503158569336, | |
| "loss": 0.704, | |
| "odds_ratio_loss": 6.996849060058594, | |
| "rewards/accuracies": 0.3515625, | |
| "rewards/chosen": -0.0615050233900547, | |
| "rewards/margins": -0.004773305729031563, | |
| "rewards/rejected": -0.05673171579837799, | |
| "sft_loss": 0.004326392896473408, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.36750085704490915, | |
| "grad_norm": 2.3264926175994343, | |
| "learning_rate": 4.004323763685511e-06, | |
| "logits/chosen": -0.6961945295333862, | |
| "logits/rejected": -0.6595050096511841, | |
| "logps/chosen": -0.7507973909378052, | |
| "logps/rejected": -0.6752977967262268, | |
| "loss": 0.7631, | |
| "odds_ratio_loss": 7.585504531860352, | |
| "rewards/accuracies": 0.4140625, | |
| "rewards/chosen": -0.06752977520227432, | |
| "rewards/margins": 0.0075499615631997585, | |
| "rewards/rejected": -0.07507973909378052, | |
| "sft_loss": 0.004573096055537462, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.37298594446348987, | |
| "grad_norm": 2.1356607945545245, | |
| "learning_rate": 3.965562241492401e-06, | |
| "logits/chosen": -0.6466645002365112, | |
| "logits/rejected": -0.5876697301864624, | |
| "logps/chosen": -0.6032683253288269, | |
| "logps/rejected": -0.6798368692398071, | |
| "loss": 0.7748, | |
| "odds_ratio_loss": 7.713741779327393, | |
| "rewards/accuracies": 0.3359375, | |
| "rewards/chosen": -0.06798368692398071, | |
| "rewards/margins": -0.007656855508685112, | |
| "rewards/rejected": -0.06032683700323105, | |
| "sft_loss": 0.0033814627677202225, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.3784710318820706, | |
| "grad_norm": 2.2504419735299632, | |
| "learning_rate": 3.92625632275474e-06, | |
| "logits/chosen": -0.6422359943389893, | |
| "logits/rejected": -0.635747492313385, | |
| "logps/chosen": -0.6667078137397766, | |
| "logps/rejected": -0.6623092889785767, | |
| "loss": 0.7479, | |
| "odds_ratio_loss": 7.437192916870117, | |
| "rewards/accuracies": 0.3828125, | |
| "rewards/chosen": -0.06623092293739319, | |
| "rewards/margins": 0.00043986161472275853, | |
| "rewards/rejected": -0.06667079031467438, | |
| "sft_loss": 0.004172264598309994, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.3839561193006514, | |
| "grad_norm": 2.305721199557074, | |
| "learning_rate": 3.886420608016767e-06, | |
| "logits/chosen": -0.6914317607879639, | |
| "logits/rejected": -0.6497669219970703, | |
| "logps/chosen": -0.6576675176620483, | |
| "logps/rejected": -0.6327704191207886, | |
| "loss": 0.72, | |
| "odds_ratio_loss": 7.161937236785889, | |
| "rewards/accuracies": 0.421875, | |
| "rewards/chosen": -0.06327703595161438, | |
| "rewards/margins": 0.002489713719114661, | |
| "rewards/rejected": -0.06576675176620483, | |
| "sft_loss": 0.003831625683233142, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.3894412067192321, | |
| "grad_norm": 2.3551007707163962, | |
| "learning_rate": 3.846069894620306e-06, | |
| "logits/chosen": -0.6762869358062744, | |
| "logits/rejected": -0.689199686050415, | |
| "logps/chosen": -0.5313611030578613, | |
| "logps/rejected": -0.6086665391921997, | |
| "loss": 0.6972, | |
| "odds_ratio_loss": 6.926117420196533, | |
| "rewards/accuracies": 0.3828125, | |
| "rewards/chosen": -0.06086665764451027, | |
| "rewards/margins": -0.007730549667030573, | |
| "rewards/rejected": -0.05313611030578613, | |
| "sft_loss": 0.0045737153850495815, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.39492629413781283, | |
| "grad_norm": 2.556752307102043, | |
| "learning_rate": 3.80521917120816e-06, | |
| "logits/chosen": -0.6555116772651672, | |
| "logits/rejected": -0.6547709107398987, | |
| "logps/chosen": -0.624289333820343, | |
| "logps/rejected": -0.6435787081718445, | |
| "loss": 0.7314, | |
| "odds_ratio_loss": 7.269766330718994, | |
| "rewards/accuracies": 0.3828125, | |
| "rewards/chosen": -0.06435786932706833, | |
| "rewards/margins": -0.001928933197632432, | |
| "rewards/rejected": -0.06242894381284714, | |
| "sft_loss": 0.00443875789642334, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.40041138155639355, | |
| "grad_norm": 2.62244580300382, | |
| "learning_rate": 3.7638836121564414e-06, | |
| "logits/chosen": -0.633210301399231, | |
| "logits/rejected": -0.6068264245986938, | |
| "logps/chosen": -0.6534138917922974, | |
| "logps/rejected": -0.6456267833709717, | |
| "loss": 0.7342, | |
| "odds_ratio_loss": 7.288712501525879, | |
| "rewards/accuracies": 0.390625, | |
| "rewards/chosen": -0.06456267833709717, | |
| "rewards/margins": 0.0007787136128172278, | |
| "rewards/rejected": -0.06534139811992645, | |
| "sft_loss": 0.005336450412869453, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.4058964689749743, | |
| "grad_norm": 2.375173958989858, | |
| "learning_rate": 3.72207857193791e-06, | |
| "logits/chosen": -0.6621152758598328, | |
| "logits/rejected": -0.6223613023757935, | |
| "logps/chosen": -0.5641170144081116, | |
| "logps/rejected": -0.6517760157585144, | |
| "loss": 0.7414, | |
| "odds_ratio_loss": 7.36290979385376, | |
| "rewards/accuracies": 0.3671875, | |
| "rewards/chosen": -0.06517761200666428, | |
| "rewards/margins": -0.00876590609550476, | |
| "rewards/rejected": -0.056411705911159515, | |
| "sft_loss": 0.005075749009847641, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.411381556393555, | |
| "grad_norm": 2.178626918834999, | |
| "learning_rate": 3.679819579418414e-06, | |
| "logits/chosen": -0.6223607659339905, | |
| "logits/rejected": -0.6354334354400635, | |
| "logps/chosen": -0.6071732640266418, | |
| "logps/rejected": -0.664821207523346, | |
| "loss": 0.7537, | |
| "odds_ratio_loss": 7.4999518394470215, | |
| "rewards/accuracies": 0.3515625, | |
| "rewards/chosen": -0.06648211926221848, | |
| "rewards/margins": -0.005764788947999477, | |
| "rewards/rejected": -0.06071732938289642, | |
| "sft_loss": 0.003658156841993332, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.41686664381213573, | |
| "grad_norm": 2.029172062754319, | |
| "learning_rate": 3.6371223320885492e-06, | |
| "logits/chosen": -0.6456342339515686, | |
| "logits/rejected": -0.6101202368736267, | |
| "logps/chosen": -0.5387505888938904, | |
| "logps/rejected": -0.653649091720581, | |
| "loss": 0.748, | |
| "odds_ratio_loss": 7.446066379547119, | |
| "rewards/accuracies": 0.28125, | |
| "rewards/chosen": -0.06536491215229034, | |
| "rewards/margins": -0.011489855125546455, | |
| "rewards/rejected": -0.05387505888938904, | |
| "sft_loss": 0.0033676247112452984, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.4223517312307165, | |
| "grad_norm": 2.566739156357152, | |
| "learning_rate": 3.5940026902326825e-06, | |
| "logits/chosen": -0.6308640837669373, | |
| "logits/rejected": -0.5544230937957764, | |
| "logps/chosen": -0.6081095933914185, | |
| "logps/rejected": -0.6010054349899292, | |
| "loss": 0.686, | |
| "odds_ratio_loss": 6.817296028137207, | |
| "rewards/accuracies": 0.421875, | |
| "rewards/chosen": -0.06010054424405098, | |
| "rewards/margins": 0.000710406806319952, | |
| "rewards/rejected": -0.06081094965338707, | |
| "sft_loss": 0.00426267646253109, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.42783681864929723, | |
| "grad_norm": 2.2168093785628424, | |
| "learning_rate": 3.550476671037505e-06, | |
| "logits/chosen": -0.6168922781944275, | |
| "logits/rejected": -0.6250343322753906, | |
| "logps/chosen": -0.7373520135879517, | |
| "logps/rejected": -0.6632636785507202, | |
| "loss": 0.7541, | |
| "odds_ratio_loss": 7.504645824432373, | |
| "rewards/accuracies": 0.3671875, | |
| "rewards/chosen": -0.06632636487483978, | |
| "rewards/margins": 0.0074088433757424355, | |
| "rewards/rejected": -0.07373520731925964, | |
| "sft_loss": 0.003666748758405447, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.43332190606787796, | |
| "grad_norm": 2.0671981983555767, | |
| "learning_rate": 3.5065604426422995e-06, | |
| "logits/chosen": -0.6872790455818176, | |
| "logits/rejected": -0.6633861064910889, | |
| "logps/chosen": -0.6065261363983154, | |
| "logps/rejected": -0.6005845069885254, | |
| "loss": 0.6867, | |
| "odds_ratio_loss": 6.836608409881592, | |
| "rewards/accuracies": 0.3984375, | |
| "rewards/chosen": -0.06005845218896866, | |
| "rewards/margins": 0.0005941606359556317, | |
| "rewards/rejected": -0.06065261363983154, | |
| "sft_loss": 0.003060600720345974, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.4388069934864587, | |
| "grad_norm": 2.2579388709063477, | |
| "learning_rate": 3.462270318133136e-06, | |
| "logits/chosen": -0.7127959728240967, | |
| "logits/rejected": -0.7076155543327332, | |
| "logps/chosen": -0.6201507449150085, | |
| "logps/rejected": -0.6548338532447815, | |
| "loss": 0.7463, | |
| "odds_ratio_loss": 7.421444892883301, | |
| "rewards/accuracies": 0.296875, | |
| "rewards/chosen": -0.06548339128494263, | |
| "rewards/margins": -0.003468305105343461, | |
| "rewards/rejected": -0.06201507896184921, | |
| "sft_loss": 0.0041392529383301735, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.4442920809050394, | |
| "grad_norm": 2.2666354495836587, | |
| "learning_rate": 3.4176227494832305e-06, | |
| "logits/chosen": -0.5993680953979492, | |
| "logits/rejected": -0.5843874216079712, | |
| "logps/chosen": -0.5722631216049194, | |
| "logps/rejected": -0.6499100923538208, | |
| "loss": 0.7394, | |
| "odds_ratio_loss": 7.35059118270874, | |
| "rewards/accuracies": 0.34375, | |
| "rewards/chosen": -0.06499100476503372, | |
| "rewards/margins": -0.007764694280922413, | |
| "rewards/rejected": -0.05722631514072418, | |
| "sft_loss": 0.004326997324824333, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.44977716832362014, | |
| "grad_norm": 2.283880288724398, | |
| "learning_rate": 3.3726343214417023e-06, | |
| "logits/chosen": -0.6333982348442078, | |
| "logits/rejected": -0.6226303577423096, | |
| "logps/chosen": -0.7987644672393799, | |
| "logps/rejected": -0.652344822883606, | |
| "loss": 0.734, | |
| "odds_ratio_loss": 7.291712284088135, | |
| "rewards/accuracies": 0.3984375, | |
| "rewards/chosen": -0.0652344822883606, | |
| "rewards/margins": 0.014641974121332169, | |
| "rewards/rejected": -0.07987645268440247, | |
| "sft_loss": 0.0048220655880868435, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.4552622557422009, | |
| "grad_norm": 2.1155189965315406, | |
| "learning_rate": 3.327321745373021e-06, | |
| "logits/chosen": -0.6671404242515564, | |
| "logits/rejected": -0.6946146488189697, | |
| "logps/chosen": -0.5693039894104004, | |
| "logps/rejected": -0.6252968907356262, | |
| "loss": 0.7121, | |
| "odds_ratio_loss": 7.0859150886535645, | |
| "rewards/accuracies": 0.375, | |
| "rewards/chosen": -0.06252970546483994, | |
| "rewards/margins": -0.005599300377070904, | |
| "rewards/rejected": -0.05693039298057556, | |
| "sft_loss": 0.003511702874675393, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.46074734316078164, | |
| "grad_norm": 1.9540588513738342, | |
| "learning_rate": 3.2817018530494164e-06, | |
| "logits/chosen": -0.6938011646270752, | |
| "logits/rejected": -0.6819745302200317, | |
| "logps/chosen": -0.6028576493263245, | |
| "logps/rejected": -0.6305505037307739, | |
| "loss": 0.7166, | |
| "odds_ratio_loss": 7.133669853210449, | |
| "rewards/accuracies": 0.40625, | |
| "rewards/chosen": -0.06305506080389023, | |
| "rewards/margins": -0.0027692890726029873, | |
| "rewards/rejected": -0.060285769402980804, | |
| "sft_loss": 0.003260136814787984, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.46623243057936237, | |
| "grad_norm": 2.3068483623141813, | |
| "learning_rate": 3.2357915903985605e-06, | |
| "logits/chosen": -0.669509768486023, | |
| "logits/rejected": -0.6838399171829224, | |
| "logps/chosen": -0.7373601198196411, | |
| "logps/rejected": -0.63920658826828, | |
| "loss": 0.7284, | |
| "odds_ratio_loss": 7.239907264709473, | |
| "rewards/accuracies": 0.3359375, | |
| "rewards/chosen": -0.06392066925764084, | |
| "rewards/margins": 0.009815343655645847, | |
| "rewards/rejected": -0.07373600453138351, | |
| "sft_loss": 0.004396573640406132, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.4717175179979431, | |
| "grad_norm": 2.400639181271588, | |
| "learning_rate": 3.1896080112088477e-06, | |
| "logits/chosen": -0.6363841891288757, | |
| "logits/rejected": -0.6474689245223999, | |
| "logps/chosen": -0.6146495938301086, | |
| "logps/rejected": -0.665170431137085, | |
| "loss": 0.7518, | |
| "odds_ratio_loss": 7.468950271606445, | |
| "rewards/accuracies": 0.40625, | |
| "rewards/chosen": -0.06651704013347626, | |
| "rewards/margins": -0.005052081309258938, | |
| "rewards/rejected": -0.061464957892894745, | |
| "sft_loss": 0.00492935162037611, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.4772026054165238, | |
| "grad_norm": 6.788892165244669, | |
| "learning_rate": 3.143168270794612e-06, | |
| "logits/chosen": -0.7378703355789185, | |
| "logits/rejected": -0.7363454699516296, | |
| "logps/chosen": -0.792299747467041, | |
| "logps/rejected": -0.6177616119384766, | |
| "loss": 0.6952, | |
| "odds_ratio_loss": 6.825002670288086, | |
| "rewards/accuracies": 0.4453125, | |
| "rewards/chosen": -0.061776161193847656, | |
| "rewards/margins": 0.017453810200095177, | |
| "rewards/rejected": -0.07922996580600739, | |
| "sft_loss": 0.012708396650850773, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.48268769283510454, | |
| "grad_norm": 4.194375970507543, | |
| "learning_rate": 3.0964896196236217e-06, | |
| "logits/chosen": -0.6821984052658081, | |
| "logits/rejected": -0.6771599054336548, | |
| "logps/chosen": -0.6080144643783569, | |
| "logps/rejected": -0.6563798189163208, | |
| "loss": 0.7478, | |
| "odds_ratio_loss": 7.3482136726379395, | |
| "rewards/accuracies": 0.359375, | |
| "rewards/chosen": -0.0656379908323288, | |
| "rewards/margins": -0.004836536943912506, | |
| "rewards/rejected": -0.06080144643783569, | |
| "sft_loss": 0.012991908006370068, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.48817278025368527, | |
| "grad_norm": 3.2712874288459868, | |
| "learning_rate": 3.0495893969092395e-06, | |
| "logits/chosen": -0.7360261082649231, | |
| "logits/rejected": -0.648233950138092, | |
| "logps/chosen": -0.6474156975746155, | |
| "logps/rejected": -0.6273612976074219, | |
| "loss": 0.7148, | |
| "odds_ratio_loss": 7.092808723449707, | |
| "rewards/accuracies": 0.4296875, | |
| "rewards/chosen": -0.0627361312508583, | |
| "rewards/margins": 0.002005442278459668, | |
| "rewards/rejected": -0.0647415742278099, | |
| "sft_loss": 0.005477006547152996, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.49365786767226605, | |
| "grad_norm": 3.363992426087972, | |
| "learning_rate": 3.0024850241696128e-06, | |
| "logits/chosen": -0.6951555013656616, | |
| "logits/rejected": -0.6976528167724609, | |
| "logps/chosen": -0.6175632476806641, | |
| "logps/rejected": -0.6432512402534485, | |
| "loss": 0.7269, | |
| "odds_ratio_loss": 7.175661087036133, | |
| "rewards/accuracies": 0.390625, | |
| "rewards/chosen": -0.06432512402534485, | |
| "rewards/margins": -0.0025687951128929853, | |
| "rewards/rejected": -0.061756327748298645, | |
| "sft_loss": 0.009336180053651333, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.4991429550908468, | |
| "grad_norm": 2.4802416219258125, | |
| "learning_rate": 2.9551939987562866e-06, | |
| "logits/chosen": -0.6312170028686523, | |
| "logits/rejected": -0.6572614908218384, | |
| "logps/chosen": -0.590150773525238, | |
| "logps/rejected": -0.6796594858169556, | |
| "loss": 0.7688, | |
| "odds_ratio_loss": 7.620222568511963, | |
| "rewards/accuracies": 0.3671875, | |
| "rewards/chosen": -0.06796594709157944, | |
| "rewards/margins": -0.008950873278081417, | |
| "rewards/rejected": -0.059015076607465744, | |
| "sft_loss": 0.006825140677392483, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.5046280425094275, | |
| "grad_norm": 2.0888605610235156, | |
| "learning_rate": 2.907733887354657e-06, | |
| "logits/chosen": -0.7382671236991882, | |
| "logits/rejected": -0.704073965549469, | |
| "logps/chosen": -0.60980224609375, | |
| "logps/rejected": -0.5962368249893188, | |
| "loss": 0.6756, | |
| "odds_ratio_loss": 6.72347354888916, | |
| "rewards/accuracies": 0.421875, | |
| "rewards/chosen": -0.05962368845939636, | |
| "rewards/margins": 0.0013565376866608858, | |
| "rewards/rejected": -0.06098022684454918, | |
| "sft_loss": 0.0032284611370414495, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.5101131299280083, | |
| "grad_norm": 2.594959628383668, | |
| "learning_rate": 2.8601223194586613e-06, | |
| "logits/chosen": -0.6301463842391968, | |
| "logits/rejected": -0.6508597731590271, | |
| "logps/chosen": -0.6760177612304688, | |
| "logps/rejected": -0.6208183169364929, | |
| "loss": 0.6984, | |
| "odds_ratio_loss": 6.931189060211182, | |
| "rewards/accuracies": 0.4453125, | |
| "rewards/chosen": -0.06208183616399765, | |
| "rewards/margins": 0.005519941449165344, | |
| "rewards/rejected": -0.067601777613163, | |
| "sft_loss": 0.0052444045431911945, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.515598217346589, | |
| "grad_norm": 2.4177461424114957, | |
| "learning_rate": 2.8123769808221407e-06, | |
| "logits/chosen": -0.7098735570907593, | |
| "logits/rejected": -0.694360613822937, | |
| "logps/chosen": -0.6068992614746094, | |
| "logps/rejected": -0.672624945640564, | |
| "loss": 0.7656, | |
| "odds_ratio_loss": 7.604451656341553, | |
| "rewards/accuracies": 0.3203125, | |
| "rewards/chosen": -0.06726250052452087, | |
| "rewards/margins": -0.006572564598172903, | |
| "rewards/rejected": -0.060689933598041534, | |
| "sft_loss": 0.005188749171793461, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.5210833047651697, | |
| "grad_norm": 2.1914520317020125, | |
| "learning_rate": 2.7645156068893075e-06, | |
| "logits/chosen": -0.6409396529197693, | |
| "logits/rejected": -0.6979643702507019, | |
| "logps/chosen": -0.534928560256958, | |
| "logps/rejected": -0.6406592726707458, | |
| "loss": 0.7345, | |
| "odds_ratio_loss": 7.306841850280762, | |
| "rewards/accuracies": 0.3515625, | |
| "rewards/chosen": -0.06406592577695847, | |
| "rewards/margins": -0.010573070496320724, | |
| "rewards/rejected": -0.05349285155534744, | |
| "sft_loss": 0.0038562421686947346, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.5265683921837504, | |
| "grad_norm": 2.0840677499320335, | |
| "learning_rate": 2.716555976206748e-06, | |
| "logits/chosen": -0.7479691505432129, | |
| "logits/rejected": -0.723030149936676, | |
| "logps/chosen": -0.5895535349845886, | |
| "logps/rejected": -0.6176682114601135, | |
| "loss": 0.7002, | |
| "odds_ratio_loss": 6.960841655731201, | |
| "rewards/accuracies": 0.484375, | |
| "rewards/chosen": -0.06176682561635971, | |
| "rewards/margins": -0.0028114626184105873, | |
| "rewards/rejected": -0.0589553564786911, | |
| "sft_loss": 0.004080518614500761, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.5320534796023312, | |
| "grad_norm": 2.444265846509054, | |
| "learning_rate": 2.6685159038194202e-06, | |
| "logits/chosen": -0.6860970258712769, | |
| "logits/rejected": -0.6489076018333435, | |
| "logps/chosen": -0.61269611120224, | |
| "logps/rejected": -0.6757325530052185, | |
| "loss": 0.7617, | |
| "odds_ratio_loss": 7.5671162605285645, | |
| "rewards/accuracies": 0.3359375, | |
| "rewards/chosen": -0.06757325679063797, | |
| "rewards/margins": -0.006303644739091396, | |
| "rewards/rejected": -0.061269611120224, | |
| "sft_loss": 0.0049641747027635574, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.5375385670209119, | |
| "grad_norm": 2.128773042167142, | |
| "learning_rate": 2.6204132346530936e-06, | |
| "logits/chosen": -0.7502190470695496, | |
| "logits/rejected": -0.7040455937385559, | |
| "logps/chosen": -0.7278321981430054, | |
| "logps/rejected": -0.6399706602096558, | |
| "loss": 0.7272, | |
| "odds_ratio_loss": 7.2354841232299805, | |
| "rewards/accuracies": 0.390625, | |
| "rewards/chosen": -0.0639970600605011, | |
| "rewards/margins": 0.008786162361502647, | |
| "rewards/rejected": -0.0727832242846489, | |
| "sft_loss": 0.00368284760043025, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.5430236544394926, | |
| "grad_norm": 2.5919828332695833, | |
| "learning_rate": 2.572265836885682e-06, | |
| "logits/chosen": -0.7643656134605408, | |
| "logits/rejected": -0.6870676279067993, | |
| "logps/chosen": -0.7556079030036926, | |
| "logps/rejected": -0.6754346489906311, | |
| "loss": 0.7635, | |
| "odds_ratio_loss": 7.5766706466674805, | |
| "rewards/accuracies": 0.3671875, | |
| "rewards/chosen": -0.06754346191883087, | |
| "rewards/margins": 0.008017327636480331, | |
| "rewards/rejected": -0.0755607932806015, | |
| "sft_loss": 0.005802985280752182, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.5485087418580734, | |
| "grad_norm": 2.449701609589594, | |
| "learning_rate": 2.524091595309952e-06, | |
| "logits/chosen": -0.701339602470398, | |
| "logits/rejected": -0.6497517824172974, | |
| "logps/chosen": -0.6323676705360413, | |
| "logps/rejected": -0.633608341217041, | |
| "loss": 0.7136, | |
| "odds_ratio_loss": 7.087641716003418, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -0.06336082518100739, | |
| "rewards/margins": -0.00012405950110405684, | |
| "rewards/rejected": -0.0632367730140686, | |
| "sft_loss": 0.004797851666808128, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.5539938292766541, | |
| "grad_norm": 1.8980955014875789, | |
| "learning_rate": 2.475908404690049e-06, | |
| "logits/chosen": -0.7044223546981812, | |
| "logits/rejected": -0.6829949021339417, | |
| "logps/chosen": -0.5901881456375122, | |
| "logps/rejected": -0.5841273665428162, | |
| "loss": 0.6646, | |
| "odds_ratio_loss": 6.614431381225586, | |
| "rewards/accuracies": 0.453125, | |
| "rewards/chosen": -0.05841274932026863, | |
| "rewards/margins": 0.0006060737650841475, | |
| "rewards/rejected": -0.0590188167989254, | |
| "sft_loss": 0.0031673426274210215, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.5594789166952349, | |
| "grad_norm": 2.283805508955311, | |
| "learning_rate": 2.427734163114319e-06, | |
| "logits/chosen": -0.7973791360855103, | |
| "logits/rejected": -0.7453299164772034, | |
| "logps/chosen": -0.5993779301643372, | |
| "logps/rejected": -0.6565759778022766, | |
| "loss": 0.7469, | |
| "odds_ratio_loss": 7.423154354095459, | |
| "rewards/accuracies": 0.34375, | |
| "rewards/chosen": -0.0656576007604599, | |
| "rewards/margins": -0.0057198042050004005, | |
| "rewards/rejected": -0.059937797486782074, | |
| "sft_loss": 0.004618693143129349, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.5649640041138155, | |
| "grad_norm": 2.1097973408223165, | |
| "learning_rate": 2.3795867653469072e-06, | |
| "logits/chosen": -0.7150146961212158, | |
| "logits/rejected": -0.6962917447090149, | |
| "logps/chosen": -0.6036943793296814, | |
| "logps/rejected": -0.6037816405296326, | |
| "loss": 0.6859, | |
| "odds_ratio_loss": 6.8253865242004395, | |
| "rewards/accuracies": 0.390625, | |
| "rewards/chosen": -0.06037816405296326, | |
| "rewards/margins": -8.72323289513588e-06, | |
| "rewards/rejected": -0.06036944314837456, | |
| "sft_loss": 0.0033163288608193398, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.5704490915323963, | |
| "grad_norm": 2.261348542854153, | |
| "learning_rate": 2.3314840961805806e-06, | |
| "logits/chosen": -0.7404282689094543, | |
| "logits/rejected": -0.7224586606025696, | |
| "logps/chosen": -0.6297097206115723, | |
| "logps/rejected": -0.6101536154747009, | |
| "loss": 0.6902, | |
| "odds_ratio_loss": 6.866119384765625, | |
| "rewards/accuracies": 0.40625, | |
| "rewards/chosen": -0.06101536750793457, | |
| "rewards/margins": 0.0019556120969355106, | |
| "rewards/rejected": -0.06297097355127335, | |
| "sft_loss": 0.0035803657956421375, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.575934178950977, | |
| "grad_norm": 2.3462527146244394, | |
| "learning_rate": 2.2834440237932537e-06, | |
| "logits/chosen": -0.7421097755432129, | |
| "logits/rejected": -0.7392350435256958, | |
| "logps/chosen": -0.5688321590423584, | |
| "logps/rejected": -0.5941362380981445, | |
| "loss": 0.676, | |
| "odds_ratio_loss": 6.721547603607178, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -0.05941362306475639, | |
| "rewards/margins": -0.0025304073933511972, | |
| "rewards/rejected": -0.05688321590423584, | |
| "sft_loss": 0.003852433292195201, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.5814192663695578, | |
| "grad_norm": 2.2368897755771844, | |
| "learning_rate": 2.2354843931106933e-06, | |
| "logits/chosen": -0.6827410459518433, | |
| "logits/rejected": -0.6356707811355591, | |
| "logps/chosen": -0.7240877747535706, | |
| "logps/rejected": -0.6512930393218994, | |
| "loss": 0.7339, | |
| "odds_ratio_loss": 7.29488468170166, | |
| "rewards/accuracies": 0.3984375, | |
| "rewards/chosen": -0.06512930989265442, | |
| "rewards/margins": 0.007279472425580025, | |
| "rewards/rejected": -0.0724087804555893, | |
| "sft_loss": 0.004439079202711582, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.5869043537881385, | |
| "grad_norm": 3.1022172612718313, | |
| "learning_rate": 2.1876230191778598e-06, | |
| "logits/chosen": -0.749907374382019, | |
| "logits/rejected": -0.7236763834953308, | |
| "logps/chosen": -0.6079933047294617, | |
| "logps/rejected": -0.6530497074127197, | |
| "loss": 0.7391, | |
| "odds_ratio_loss": 7.280797481536865, | |
| "rewards/accuracies": 0.3828125, | |
| "rewards/chosen": -0.06530497968196869, | |
| "rewards/margins": -0.004505641758441925, | |
| "rewards/rejected": -0.06079933047294617, | |
| "sft_loss": 0.01101373415440321, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.5923894412067192, | |
| "grad_norm": 2.197063400051544, | |
| "learning_rate": 2.13987768054134e-06, | |
| "logits/chosen": -0.6889519095420837, | |
| "logits/rejected": -0.6868714094161987, | |
| "logps/chosen": -0.6039581894874573, | |
| "logps/rejected": -0.5748084187507629, | |
| "loss": 0.6528, | |
| "odds_ratio_loss": 6.491232395172119, | |
| "rewards/accuracies": 0.4296875, | |
| "rewards/chosen": -0.057480841875076294, | |
| "rewards/margins": 0.00291498308070004, | |
| "rewards/rejected": -0.060395821928977966, | |
| "sft_loss": 0.00366477994248271, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.5978745286253, | |
| "grad_norm": 2.159407846285504, | |
| "learning_rate": 2.0922661126453436e-06, | |
| "logits/chosen": -0.6941989064216614, | |
| "logits/rejected": -0.6537268757820129, | |
| "logps/chosen": -0.7240587472915649, | |
| "logps/rejected": -0.6288081407546997, | |
| "loss": 0.7097, | |
| "odds_ratio_loss": 7.062224864959717, | |
| "rewards/accuracies": 0.453125, | |
| "rewards/chosen": -0.06288080662488937, | |
| "rewards/margins": 0.00952506810426712, | |
| "rewards/rejected": -0.0724058747291565, | |
| "sft_loss": 0.0034658340737223625, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.6033596160438807, | |
| "grad_norm": 2.4398874181778822, | |
| "learning_rate": 2.044806001243714e-06, | |
| "logits/chosen": -0.7023935317993164, | |
| "logits/rejected": -0.6535657644271851, | |
| "logps/chosen": -0.6954001188278198, | |
| "logps/rejected": -0.6107276082038879, | |
| "loss": 0.6873, | |
| "odds_ratio_loss": 6.822115898132324, | |
| "rewards/accuracies": 0.4921875, | |
| "rewards/chosen": -0.061072759330272675, | |
| "rewards/margins": 0.008467256091535091, | |
| "rewards/rejected": -0.06954001635313034, | |
| "sft_loss": 0.00511753186583519, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.6088447034624614, | |
| "grad_norm": 3.1443922922473795, | |
| "learning_rate": 1.9975149758303885e-06, | |
| "logits/chosen": -0.6652725338935852, | |
| "logits/rejected": -0.6267548203468323, | |
| "logps/chosen": -0.7502924799919128, | |
| "logps/rejected": -0.6726891398429871, | |
| "loss": 0.7606, | |
| "odds_ratio_loss": 7.564757347106934, | |
| "rewards/accuracies": 0.34375, | |
| "rewards/chosen": -0.06726891547441483, | |
| "rewards/margins": 0.007760328706353903, | |
| "rewards/rejected": -0.07502924650907516, | |
| "sft_loss": 0.004097915254533291, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.6143297908810421, | |
| "grad_norm": 3.640891877033526, | |
| "learning_rate": 1.9504106030907605e-06, | |
| "logits/chosen": -0.6462224125862122, | |
| "logits/rejected": -0.661245584487915, | |
| "logps/chosen": -0.6592689752578735, | |
| "logps/rejected": -0.6112386584281921, | |
| "loss": 0.6912, | |
| "odds_ratio_loss": 6.873685836791992, | |
| "rewards/accuracies": 0.453125, | |
| "rewards/chosen": -0.06112387031316757, | |
| "rewards/margins": 0.00480302982032299, | |
| "rewards/rejected": -0.06592689454555511, | |
| "sft_loss": 0.0038500106893479824, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.6198148782996229, | |
| "grad_norm": 2.7204380499722967, | |
| "learning_rate": 1.9035103803763793e-06, | |
| "logits/chosen": -0.6889287233352661, | |
| "logits/rejected": -0.7191394567489624, | |
| "logps/chosen": -0.6261990070343018, | |
| "logps/rejected": -0.6758837699890137, | |
| "loss": 0.7625, | |
| "odds_ratio_loss": 7.566086769104004, | |
| "rewards/accuracies": 0.359375, | |
| "rewards/chosen": -0.06758838146924973, | |
| "rewards/margins": -0.004968480207026005, | |
| "rewards/rejected": -0.0626199021935463, | |
| "sft_loss": 0.005911496467888355, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.6252999657182037, | |
| "grad_norm": 2.3221228206896543, | |
| "learning_rate": 1.8568317292053894e-06, | |
| "logits/chosen": -0.6986726522445679, | |
| "logits/rejected": -0.7248036861419678, | |
| "logps/chosen": -0.7008225917816162, | |
| "logps/rejected": -0.6063806414604187, | |
| "loss": 0.6896, | |
| "odds_ratio_loss": 6.8465399742126465, | |
| "rewards/accuracies": 0.4296875, | |
| "rewards/chosen": -0.06063806638121605, | |
| "rewards/margins": 0.009444191120564938, | |
| "rewards/rejected": -0.07008224725723267, | |
| "sft_loss": 0.004963008686900139, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.6307850531367843, | |
| "grad_norm": 2.3323657507043585, | |
| "learning_rate": 1.8103919887911525e-06, | |
| "logits/chosen": -0.6923948526382446, | |
| "logits/rejected": -0.6713389158248901, | |
| "logps/chosen": -0.6527585983276367, | |
| "logps/rejected": -0.6079282164573669, | |
| "loss": 0.6887, | |
| "odds_ratio_loss": 6.84266996383667, | |
| "rewards/accuracies": 0.421875, | |
| "rewards/chosen": -0.06079282611608505, | |
| "rewards/margins": 0.004483034834265709, | |
| "rewards/rejected": -0.06527585536241531, | |
| "sft_loss": 0.004444844089448452, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.6362701405553651, | |
| "grad_norm": 2.2756656865836824, | |
| "learning_rate": 1.7642084096014405e-06, | |
| "logits/chosen": -0.6769150495529175, | |
| "logits/rejected": -0.6519336104393005, | |
| "logps/chosen": -0.6810208559036255, | |
| "logps/rejected": -0.6118040680885315, | |
| "loss": 0.6967, | |
| "odds_ratio_loss": 6.925226211547852, | |
| "rewards/accuracies": 0.359375, | |
| "rewards/chosen": -0.06118040531873703, | |
| "rewards/margins": 0.006921680178493261, | |
| "rewards/rejected": -0.06810208410024643, | |
| "sft_loss": 0.0041780658066272736, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.6417552279739458, | |
| "grad_norm": 4.533574316801059, | |
| "learning_rate": 1.718298146950585e-06, | |
| "logits/chosen": -0.6806620955467224, | |
| "logits/rejected": -0.6420994400978088, | |
| "logps/chosen": -0.6579238176345825, | |
| "logps/rejected": -0.6758315563201904, | |
| "loss": 0.7643, | |
| "odds_ratio_loss": 7.476547718048096, | |
| "rewards/accuracies": 0.390625, | |
| "rewards/chosen": -0.06758316606283188, | |
| "rewards/margins": -0.0017907717265188694, | |
| "rewards/rejected": -0.06579238921403885, | |
| "sft_loss": 0.016644245013594627, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.6472403153925266, | |
| "grad_norm": 2.4121811138925016, | |
| "learning_rate": 1.6726782546269793e-06, | |
| "logits/chosen": -0.7141998410224915, | |
| "logits/rejected": -0.6570146083831787, | |
| "logps/chosen": -0.6056728959083557, | |
| "logps/rejected": -0.6163904666900635, | |
| "loss": 0.7006, | |
| "odds_ratio_loss": 6.9596381187438965, | |
| "rewards/accuracies": 0.375, | |
| "rewards/chosen": -0.061639051884412766, | |
| "rewards/margins": -0.0010717544937506318, | |
| "rewards/rejected": -0.06056729331612587, | |
| "sft_loss": 0.004627158399671316, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.6527254028111072, | |
| "grad_norm": 2.116647341589779, | |
| "learning_rate": 1.6273656785582986e-06, | |
| "logits/chosen": -0.7080201506614685, | |
| "logits/rejected": -0.6555320024490356, | |
| "logps/chosen": -0.6334236860275269, | |
| "logps/rejected": -0.6177656650543213, | |
| "loss": 0.7001, | |
| "odds_ratio_loss": 6.9610371589660645, | |
| "rewards/accuracies": 0.421875, | |
| "rewards/chosen": -0.06177656352519989, | |
| "rewards/margins": 0.0015658115735277534, | |
| "rewards/rejected": -0.0633423700928688, | |
| "sft_loss": 0.00395183265209198, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.658210490229688, | |
| "grad_norm": 2.10163675871464, | |
| "learning_rate": 1.58237725051677e-06, | |
| "logits/chosen": -0.6549379229545593, | |
| "logits/rejected": -0.6431285738945007, | |
| "logps/chosen": -0.5666630268096924, | |
| "logps/rejected": -0.534731388092041, | |
| "loss": 0.6123, | |
| "odds_ratio_loss": 6.093177795410156, | |
| "rewards/accuracies": 0.453125, | |
| "rewards/chosen": -0.05347314849495888, | |
| "rewards/margins": 0.0031931637786328793, | |
| "rewards/rejected": -0.056666307151317596, | |
| "sft_loss": 0.003024790436029434, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.6636955776482688, | |
| "grad_norm": 2.117301914309323, | |
| "learning_rate": 1.5377296818668638e-06, | |
| "logits/chosen": -0.6497895121574402, | |
| "logits/rejected": -0.6862410306930542, | |
| "logps/chosen": -0.5236161351203918, | |
| "logps/rejected": -0.6108285188674927, | |
| "loss": 0.7019, | |
| "odds_ratio_loss": 6.986342430114746, | |
| "rewards/accuracies": 0.3515625, | |
| "rewards/chosen": -0.06108284741640091, | |
| "rewards/margins": -0.008721234276890755, | |
| "rewards/rejected": -0.052361615002155304, | |
| "sft_loss": 0.0032780070323497057, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.6691806650668495, | |
| "grad_norm": 2.2927939430967093, | |
| "learning_rate": 1.4934395573577016e-06, | |
| "logits/chosen": -0.680001974105835, | |
| "logits/rejected": -0.6553654670715332, | |
| "logps/chosen": -0.5565463900566101, | |
| "logps/rejected": -0.5977818965911865, | |
| "loss": 0.681, | |
| "odds_ratio_loss": 6.767004489898682, | |
| "rewards/accuracies": 0.4140625, | |
| "rewards/chosen": -0.05977818742394447, | |
| "rewards/margins": -0.004123550374060869, | |
| "rewards/rejected": -0.05565463379025459, | |
| "sft_loss": 0.004254105966538191, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.6746657524854303, | |
| "grad_norm": 1.9919891401234977, | |
| "learning_rate": 1.449523328962496e-06, | |
| "logits/chosen": -0.6618916988372803, | |
| "logits/rejected": -0.6230502724647522, | |
| "logps/chosen": -0.6067531108856201, | |
| "logps/rejected": -0.6063209772109985, | |
| "loss": 0.6881, | |
| "odds_ratio_loss": 6.851033687591553, | |
| "rewards/accuracies": 0.40625, | |
| "rewards/chosen": -0.06063209846615791, | |
| "rewards/margins": 4.3215928599238396e-05, | |
| "rewards/rejected": -0.06067531555891037, | |
| "sft_loss": 0.0030309129506349564, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.6801508399040109, | |
| "grad_norm": 2.4998193508161006, | |
| "learning_rate": 1.4059973097673187e-06, | |
| "logits/chosen": -0.7269992232322693, | |
| "logits/rejected": -0.6942679286003113, | |
| "logps/chosen": -0.6811712980270386, | |
| "logps/rejected": -0.6196709871292114, | |
| "loss": 0.6976, | |
| "odds_ratio_loss": 6.929691314697266, | |
| "rewards/accuracies": 0.453125, | |
| "rewards/chosen": -0.061967093497514725, | |
| "rewards/margins": 0.006150041241198778, | |
| "rewards/rejected": -0.06811713427305222, | |
| "sft_loss": 0.004657902754843235, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.6856359273225917, | |
| "grad_norm": 2.1081004296697583, | |
| "learning_rate": 1.3628776679114516e-06, | |
| "logits/chosen": -0.7377051711082458, | |
| "logits/rejected": -0.7069607973098755, | |
| "logps/chosen": -0.8355605602264404, | |
| "logps/rejected": -0.6344387531280518, | |
| "loss": 0.7112, | |
| "odds_ratio_loss": 7.074479103088379, | |
| "rewards/accuracies": 0.4765625, | |
| "rewards/chosen": -0.0634438693523407, | |
| "rewards/margins": 0.020112188532948494, | |
| "rewards/rejected": -0.08355606347322464, | |
| "sft_loss": 0.003787390887737274, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.6911210147411725, | |
| "grad_norm": 2.085036928586896, | |
| "learning_rate": 1.3201804205815872e-06, | |
| "logits/chosen": -0.6863076686859131, | |
| "logits/rejected": -0.6810216307640076, | |
| "logps/chosen": -0.7168741226196289, | |
| "logps/rejected": -0.6459383964538574, | |
| "loss": 0.7361, | |
| "odds_ratio_loss": 7.325974464416504, | |
| "rewards/accuracies": 0.3515625, | |
| "rewards/chosen": -0.0645938366651535, | |
| "rewards/margins": 0.0070935748517513275, | |
| "rewards/rejected": -0.07168740779161453, | |
| "sft_loss": 0.0035439669154584408, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.6966061021597532, | |
| "grad_norm": 2.351506031335298, | |
| "learning_rate": 1.277921428062091e-06, | |
| "logits/chosen": -0.6885042190551758, | |
| "logits/rejected": -0.6516430974006653, | |
| "logps/chosen": -0.6768959164619446, | |
| "logps/rejected": -0.6008927822113037, | |
| "loss": 0.6796, | |
| "odds_ratio_loss": 6.754604339599609, | |
| "rewards/accuracies": 0.484375, | |
| "rewards/chosen": -0.06008927896618843, | |
| "rewards/margins": 0.007600321434438229, | |
| "rewards/rejected": -0.06768959760665894, | |
| "sft_loss": 0.004147130064666271, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.7020911895783339, | |
| "grad_norm": 2.027558705672352, | |
| "learning_rate": 1.2361163878435594e-06, | |
| "logits/chosen": -0.6875728964805603, | |
| "logits/rejected": -0.6000896096229553, | |
| "logps/chosen": -0.6457569599151611, | |
| "logps/rejected": -0.6216841340065002, | |
| "loss": 0.7047, | |
| "odds_ratio_loss": 7.014739036560059, | |
| "rewards/accuracies": 0.40625, | |
| "rewards/chosen": -0.062168411910533905, | |
| "rewards/margins": 0.0024072853848338127, | |
| "rewards/rejected": -0.06457570195198059, | |
| "sft_loss": 0.0032098242081701756, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.7075762769969146, | |
| "grad_norm": 2.6891394023374913, | |
| "learning_rate": 1.1947808287918406e-06, | |
| "logits/chosen": -0.7073705792427063, | |
| "logits/rejected": -0.6391410231590271, | |
| "logps/chosen": -0.9505314826965332, | |
| "logps/rejected": -0.6497754454612732, | |
| "loss": 0.7381, | |
| "odds_ratio_loss": 7.3174543380737305, | |
| "rewards/accuracies": 0.3359375, | |
| "rewards/chosen": -0.06497755646705627, | |
| "rewards/margins": 0.030075591057538986, | |
| "rewards/rejected": -0.09505314379930496, | |
| "sft_loss": 0.006380516104400158, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.7130613644154954, | |
| "grad_norm": 2.7243982781242653, | |
| "learning_rate": 1.153930105379695e-06, | |
| "logits/chosen": -0.6403245329856873, | |
| "logits/rejected": -0.6941784620285034, | |
| "logps/chosen": -0.6206578016281128, | |
| "logps/rejected": -0.6329513788223267, | |
| "loss": 0.7166, | |
| "odds_ratio_loss": 7.099569797515869, | |
| "rewards/accuracies": 0.4140625, | |
| "rewards/chosen": -0.0632951408624649, | |
| "rewards/margins": -0.0012293587205931544, | |
| "rewards/rejected": -0.06206577643752098, | |
| "sft_loss": 0.006686141714453697, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.7185464518340761, | |
| "grad_norm": 3.0501747152037226, | |
| "learning_rate": 1.1135793919832336e-06, | |
| "logits/chosen": -0.6880025863647461, | |
| "logits/rejected": -0.6472803950309753, | |
| "logps/chosen": -0.6972535252571106, | |
| "logps/rejected": -0.6315404176712036, | |
| "loss": 0.7149, | |
| "odds_ratio_loss": 7.093357086181641, | |
| "rewards/accuracies": 0.40625, | |
| "rewards/chosen": -0.06315404176712036, | |
| "rewards/margins": 0.006571306847035885, | |
| "rewards/rejected": -0.06972534954547882, | |
| "sft_loss": 0.005599521566182375, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.7240315392526568, | |
| "grad_norm": 4.097059851270684, | |
| "learning_rate": 1.0737436772452602e-06, | |
| "logits/chosen": -0.6801282167434692, | |
| "logits/rejected": -0.6225499510765076, | |
| "logps/chosen": -0.6466732025146484, | |
| "logps/rejected": -0.6455317139625549, | |
| "loss": 0.7287, | |
| "odds_ratio_loss": 7.189926624298096, | |
| "rewards/accuracies": 0.4296875, | |
| "rewards/chosen": -0.0645531713962555, | |
| "rewards/margins": 0.00011415383778512478, | |
| "rewards/rejected": -0.06466732174158096, | |
| "sft_loss": 0.00968949869275093, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.7295166266712376, | |
| "grad_norm": 2.1627454720493695, | |
| "learning_rate": 1.0344377585076e-06, | |
| "logits/chosen": -0.6447775363922119, | |
| "logits/rejected": -0.6088653206825256, | |
| "logps/chosen": -0.5911606550216675, | |
| "logps/rejected": -0.5996113419532776, | |
| "loss": 0.6819, | |
| "odds_ratio_loss": 6.785092830657959, | |
| "rewards/accuracies": 0.3671875, | |
| "rewards/chosen": -0.05996113270521164, | |
| "rewards/margins": -0.000845066097099334, | |
| "rewards/rejected": -0.05911606550216675, | |
| "sft_loss": 0.00336868641898036, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.7350017140898183, | |
| "grad_norm": 2.3398426117994826, | |
| "learning_rate": 9.956762363144892e-07, | |
| "logits/chosen": -0.6249683499336243, | |
| "logits/rejected": -0.6248048543930054, | |
| "logps/chosen": -0.6350124478340149, | |
| "logps/rejected": -0.5843938589096069, | |
| "loss": 0.666, | |
| "odds_ratio_loss": 6.617175579071045, | |
| "rewards/accuracies": 0.4453125, | |
| "rewards/chosen": -0.05843937769532204, | |
| "rewards/margins": 0.005061861127614975, | |
| "rewards/rejected": -0.06350124627351761, | |
| "sft_loss": 0.004313306882977486, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.7404868015083991, | |
| "grad_norm": 2.241323764733524, | |
| "learning_rate": 9.574735089890765e-07, | |
| "logits/chosen": -0.6376866102218628, | |
| "logits/rejected": -0.5942208766937256, | |
| "logps/chosen": -0.6546105742454529, | |
| "logps/rejected": -0.6053332090377808, | |
| "loss": 0.6863, | |
| "odds_ratio_loss": 6.821003437042236, | |
| "rewards/accuracies": 0.3828125, | |
| "rewards/chosen": -0.060533322393894196, | |
| "rewards/margins": 0.0049277422949671745, | |
| "rewards/rejected": -0.06546106189489365, | |
| "sft_loss": 0.004214235581457615, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.7459718889269797, | |
| "grad_norm": 2.2084551304631654, | |
| "learning_rate": 9.198437672850249e-07, | |
| "logits/chosen": -0.6654888391494751, | |
| "logits/rejected": -0.616665244102478, | |
| "logps/chosen": -0.6150586009025574, | |
| "logps/rejected": -0.6242785453796387, | |
| "loss": 0.7088, | |
| "odds_ratio_loss": 7.0509033203125, | |
| "rewards/accuracies": 0.3984375, | |
| "rewards/chosen": -0.062427863478660583, | |
| "rewards/margins": -0.0009219995117746294, | |
| "rewards/rejected": -0.06150586158037186, | |
| "sft_loss": 0.0037482583429664373, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.7514569763455605, | |
| "grad_norm": 2.5791794615739603, | |
| "learning_rate": 8.828009891152301e-07, | |
| "logits/chosen": -0.6492202877998352, | |
| "logits/rejected": -0.6373860836029053, | |
| "logps/chosen": -0.6348594427108765, | |
| "logps/rejected": -0.5977117419242859, | |
| "loss": 0.6816, | |
| "odds_ratio_loss": 6.770447731018066, | |
| "rewards/accuracies": 0.390625, | |
| "rewards/chosen": -0.05977117270231247, | |
| "rewards/margins": 0.003714768448844552, | |
| "rewards/rejected": -0.06348594278097153, | |
| "sft_loss": 0.004584567621350288, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.7569420637641412, | |
| "grad_norm": 2.10960114347503, | |
| "learning_rate": 8.463589343595976e-07, | |
| "logits/chosen": -0.6831432580947876, | |
| "logits/rejected": -0.6670156717300415, | |
| "logps/chosen": -0.6033125519752502, | |
| "logps/rejected": -0.589252769947052, | |
| "loss": 0.6723, | |
| "odds_ratio_loss": 6.688982009887695, | |
| "rewards/accuracies": 0.40625, | |
| "rewards/chosen": -0.05892528221011162, | |
| "rewards/margins": 0.0014059704262763262, | |
| "rewards/rejected": -0.06033124774694443, | |
| "sft_loss": 0.0033750347793102264, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.762427151182722, | |
| "grad_norm": 2.2233884047037513, | |
| "learning_rate": 8.105311397538085e-07, | |
| "logits/chosen": -0.6301755905151367, | |
| "logits/rejected": -0.6074115037918091, | |
| "logps/chosen": -0.642350435256958, | |
| "logps/rejected": -0.6004490852355957, | |
| "loss": 0.6787, | |
| "odds_ratio_loss": 6.747027397155762, | |
| "rewards/accuracies": 0.453125, | |
| "rewards/chosen": -0.06004491448402405, | |
| "rewards/margins": 0.004190134350210428, | |
| "rewards/rejected": -0.06423504650592804, | |
| "sft_loss": 0.0040018935687839985, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.7679122386013028, | |
| "grad_norm": 2.1001393729716997, | |
| "learning_rate": 7.753309138609705e-07, | |
| "logits/chosen": -0.6419239044189453, | |
| "logits/rejected": -0.6177327632904053, | |
| "logps/chosen": -0.7455356121063232, | |
| "logps/rejected": -0.6029459834098816, | |
| "loss": 0.6764, | |
| "odds_ratio_loss": 6.730374336242676, | |
| "rewards/accuracies": 0.4609375, | |
| "rewards/chosen": -0.06029459461569786, | |
| "rewards/margins": 0.014258962124586105, | |
| "rewards/rejected": -0.07455356419086456, | |
| "sft_loss": 0.003380353096872568, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.7733973260198834, | |
| "grad_norm": 1.955388109291546, | |
| "learning_rate": 7.407713321280377e-07, | |
| "logits/chosen": -0.6610326766967773, | |
| "logits/rejected": -0.5916581749916077, | |
| "logps/chosen": -0.6852571368217468, | |
| "logps/rejected": -0.5728943347930908, | |
| "loss": 0.6502, | |
| "odds_ratio_loss": 6.471943378448486, | |
| "rewards/accuracies": 0.453125, | |
| "rewards/chosen": -0.057289429008960724, | |
| "rewards/margins": 0.011236282996833324, | |
| "rewards/rejected": -0.06852570921182632, | |
| "sft_loss": 0.003054150380194187, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.7788824134384642, | |
| "grad_norm": 1.9633538931591568, | |
| "learning_rate": 7.068652320288081e-07, | |
| "logits/chosen": -0.5979795455932617, | |
| "logits/rejected": -0.5797224640846252, | |
| "logps/chosen": -0.5880254507064819, | |
| "logps/rejected": -0.6247429847717285, | |
| "loss": 0.7109, | |
| "odds_ratio_loss": 7.07578706741333, | |
| "rewards/accuracies": 0.375, | |
| "rewards/chosen": -0.06247429549694061, | |
| "rewards/margins": -0.003671749262139201, | |
| "rewards/rejected": -0.05880254879593849, | |
| "sft_loss": 0.003300985088571906, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.7843675008570449, | |
| "grad_norm": 1.9235465695795355, | |
| "learning_rate": 6.736252082953307e-07, | |
| "logits/chosen": -0.5846747756004333, | |
| "logits/rejected": -0.5656622052192688, | |
| "logps/chosen": -0.5795245170593262, | |
| "logps/rejected": -0.5894888639450073, | |
| "loss": 0.6764, | |
| "odds_ratio_loss": 6.733616352081299, | |
| "rewards/accuracies": 0.359375, | |
| "rewards/chosen": -0.05894888564944267, | |
| "rewards/margins": -0.0009964264463633299, | |
| "rewards/rejected": -0.057952456176280975, | |
| "sft_loss": 0.0030053844675421715, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.7898525882756257, | |
| "grad_norm": 1.8461477291438253, | |
| "learning_rate": 6.410636082394772e-07, | |
| "logits/chosen": -0.6314021944999695, | |
| "logits/rejected": -0.6141982078552246, | |
| "logps/chosen": -0.7406495809555054, | |
| "logps/rejected": -0.6059472560882568, | |
| "loss": 0.6888, | |
| "odds_ratio_loss": 6.85874605178833, | |
| "rewards/accuracies": 0.40625, | |
| "rewards/chosen": -0.06059472635388374, | |
| "rewards/margins": 0.013470232486724854, | |
| "rewards/rejected": -0.0740649551153183, | |
| "sft_loss": 0.0029607657343149185, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.7953376756942063, | |
| "grad_norm": 2.4596952552330196, | |
| "learning_rate": 6.091925271664156e-07, | |
| "logits/chosen": -0.6452285051345825, | |
| "logits/rejected": -0.6411218047142029, | |
| "logps/chosen": -0.6096831560134888, | |
| "logps/rejected": -0.6008501648902893, | |
| "loss": 0.6837, | |
| "odds_ratio_loss": 6.7790327072143555, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -0.06008501723408699, | |
| "rewards/margins": 0.0008833012543618679, | |
| "rewards/rejected": -0.0609683133661747, | |
| "sft_loss": 0.005798771046102047, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.8008227631127871, | |
| "grad_norm": 2.395026025025573, | |
| "learning_rate": 5.780238038817035e-07, | |
| "logits/chosen": -0.6628804802894592, | |
| "logits/rejected": -0.6211153268814087, | |
| "logps/chosen": -0.6752290725708008, | |
| "logps/rejected": -0.6579370498657227, | |
| "loss": 0.746, | |
| "odds_ratio_loss": 7.417111873626709, | |
| "rewards/accuracies": 0.328125, | |
| "rewards/chosen": -0.0657937079668045, | |
| "rewards/margins": 0.0017291962867602706, | |
| "rewards/rejected": -0.06752290576696396, | |
| "sft_loss": 0.004298758693039417, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.8063078505313679, | |
| "grad_norm": 2.068579141907181, | |
| "learning_rate": 5.47569016293649e-07, | |
| "logits/chosen": -0.6299046874046326, | |
| "logits/rejected": -0.6121450662612915, | |
| "logps/chosen": -0.8063814640045166, | |
| "logps/rejected": -0.5979243516921997, | |
| "loss": 0.6784, | |
| "odds_ratio_loss": 6.7539262771606445, | |
| "rewards/accuracies": 0.4296875, | |
| "rewards/chosen": -0.05979243293404579, | |
| "rewards/margins": 0.020845718681812286, | |
| "rewards/rejected": -0.08063814043998718, | |
| "sft_loss": 0.0030515664257109165, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.8117929379499486, | |
| "grad_norm": 3.6637986589772233, | |
| "learning_rate": 5.178394771125969e-07, | |
| "logits/chosen": -0.6278913021087646, | |
| "logits/rejected": -0.5689703226089478, | |
| "logps/chosen": -0.7562225461006165, | |
| "logps/rejected": -0.6262752413749695, | |
| "loss": 0.7032, | |
| "odds_ratio_loss": 6.891101837158203, | |
| "rewards/accuracies": 0.484375, | |
| "rewards/chosen": -0.06262752413749695, | |
| "rewards/margins": 0.012994730845093727, | |
| "rewards/rejected": -0.07562224566936493, | |
| "sft_loss": 0.01412445493042469, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.8172780253685293, | |
| "grad_norm": 2.3203742871591144, | |
| "learning_rate": 4.888462296487129e-07, | |
| "logits/chosen": -0.6456272602081299, | |
| "logits/rejected": -0.6417357325553894, | |
| "logps/chosen": -0.6021786332130432, | |
| "logps/rejected": -0.6396222114562988, | |
| "loss": 0.7282, | |
| "odds_ratio_loss": 7.244186878204346, | |
| "rewards/accuracies": 0.3359375, | |
| "rewards/chosen": -0.06396222114562988, | |
| "rewards/margins": -0.0037443540059030056, | |
| "rewards/rejected": -0.06021786853671074, | |
| "sft_loss": 0.0037422627210617065, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.82276311278711, | |
| "grad_norm": 2.5512980917433157, | |
| "learning_rate": 4.6060004370984763e-07, | |
| "logits/chosen": -0.631007969379425, | |
| "logits/rejected": -0.5933969020843506, | |
| "logps/chosen": -0.8098146915435791, | |
| "logps/rejected": -0.6024014353752136, | |
| "loss": 0.6766, | |
| "odds_ratio_loss": 6.713679790496826, | |
| "rewards/accuracies": 0.4921875, | |
| "rewards/chosen": -0.06024014204740524, | |
| "rewards/margins": 0.02074132300913334, | |
| "rewards/rejected": -0.08098147064447403, | |
| "sft_loss": 0.005258024670183659, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.8282482002056908, | |
| "grad_norm": 2.198032076864518, | |
| "learning_rate": 4.331114116009938e-07, | |
| "logits/chosen": -0.6112239360809326, | |
| "logits/rejected": -0.55472332239151, | |
| "logps/chosen": -0.7825672030448914, | |
| "logps/rejected": -0.6584952473640442, | |
| "loss": 0.7438, | |
| "odds_ratio_loss": 7.401620864868164, | |
| "rewards/accuracies": 0.3828125, | |
| "rewards/chosen": -0.06584953516721725, | |
| "rewards/margins": 0.012407200410962105, | |
| "rewards/rejected": -0.07825673371553421, | |
| "sft_loss": 0.0036755804903805256, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.8337332876242715, | |
| "grad_norm": 2.062470910623532, | |
| "learning_rate": 4.063905442268201e-07, | |
| "logits/chosen": -0.6306271553039551, | |
| "logits/rejected": -0.6064797043800354, | |
| "logps/chosen": -0.6712342500686646, | |
| "logps/rejected": -0.6343151926994324, | |
| "loss": 0.7212, | |
| "odds_ratio_loss": 7.176795959472656, | |
| "rewards/accuracies": 0.375, | |
| "rewards/chosen": -0.063431516289711, | |
| "rewards/margins": 0.0036919128615409136, | |
| "rewards/rejected": -0.0671234279870987, | |
| "sft_loss": 0.0035129080060869455, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.8392183750428522, | |
| "grad_norm": 2.3428435069292166, | |
| "learning_rate": 3.80447367298738e-07, | |
| "logits/chosen": -0.6870932579040527, | |
| "logits/rejected": -0.6724913716316223, | |
| "logps/chosen": -0.6094024777412415, | |
| "logps/rejected": -0.6165374517440796, | |
| "loss": 0.7051, | |
| "odds_ratio_loss": 7.0048112869262695, | |
| "rewards/accuracies": 0.3671875, | |
| "rewards/chosen": -0.0616537444293499, | |
| "rewards/margins": -0.0007134978659451008, | |
| "rewards/rejected": -0.06094024330377579, | |
| "sft_loss": 0.004614294972270727, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.844703462461433, | |
| "grad_norm": 2.162714758637227, | |
| "learning_rate": 3.5529151764790715e-07, | |
| "logits/chosen": -0.6563050746917725, | |
| "logits/rejected": -0.6312206983566284, | |
| "logps/chosen": -0.7330479025840759, | |
| "logps/rejected": -0.6632793545722961, | |
| "loss": 0.7478, | |
| "odds_ratio_loss": 7.4362664222717285, | |
| "rewards/accuracies": 0.359375, | |
| "rewards/chosen": -0.06632794439792633, | |
| "rewards/margins": 0.006976846605539322, | |
| "rewards/rejected": -0.07330479472875595, | |
| "sft_loss": 0.004126606043428183, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.8501885498800137, | |
| "grad_norm": 2.061116429438832, | |
| "learning_rate": 3.3093233964554464e-07, | |
| "logits/chosen": -0.6102815866470337, | |
| "logits/rejected": -0.6046280860900879, | |
| "logps/chosen": -0.6141259670257568, | |
| "logps/rejected": -0.5823516845703125, | |
| "loss": 0.6607, | |
| "odds_ratio_loss": 6.578813076019287, | |
| "rewards/accuracies": 0.4609375, | |
| "rewards/chosen": -0.05823516845703125, | |
| "rewards/margins": 0.0031774300150573254, | |
| "rewards/rejected": -0.061412595212459564, | |
| "sft_loss": 0.002771932166069746, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.8556736372985945, | |
| "grad_norm": 2.432594869696479, | |
| "learning_rate": 3.0737888173187067e-07, | |
| "logits/chosen": -0.6550750136375427, | |
| "logits/rejected": -0.6305856108665466, | |
| "logps/chosen": -0.6801073551177979, | |
| "logps/rejected": -0.5993965268135071, | |
| "loss": 0.6803, | |
| "odds_ratio_loss": 6.759056568145752, | |
| "rewards/accuracies": 0.4140625, | |
| "rewards/chosen": -0.05993964523077011, | |
| "rewards/margins": 0.008071082644164562, | |
| "rewards/rejected": -0.06801073253154755, | |
| "sft_loss": 0.004404113162308931, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.8611587247171751, | |
| "grad_norm": 2.0454259039650977, | |
| "learning_rate": 2.8463989305498596e-07, | |
| "logits/chosen": -0.5875197649002075, | |
| "logits/rejected": -0.6273292303085327, | |
| "logps/chosen": -0.5868790149688721, | |
| "logps/rejected": -0.5973340272903442, | |
| "loss": 0.6798, | |
| "odds_ratio_loss": 6.763144493103027, | |
| "rewards/accuracies": 0.375, | |
| "rewards/chosen": -0.05973340570926666, | |
| "rewards/margins": -0.0010455029550939798, | |
| "rewards/rejected": -0.058687906712293625, | |
| "sft_loss": 0.0034761279821395874, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.8666438121357559, | |
| "grad_norm": 2.158571117053065, | |
| "learning_rate": 2.6272382022091704e-07, | |
| "logits/chosen": -0.6387627124786377, | |
| "logits/rejected": -0.6411675214767456, | |
| "logps/chosen": -0.6698893308639526, | |
| "logps/rejected": -0.5845687389373779, | |
| "loss": 0.6596, | |
| "odds_ratio_loss": 6.559039115905762, | |
| "rewards/accuracies": 0.4765625, | |
| "rewards/chosen": -0.05845687910914421, | |
| "rewards/margins": 0.008532052859663963, | |
| "rewards/rejected": -0.06698893010616302, | |
| "sft_loss": 0.0036818967200815678, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.8721288995543367, | |
| "grad_norm": 2.1296893473193985, | |
| "learning_rate": 2.4163880415604913e-07, | |
| "logits/chosen": -0.5978600978851318, | |
| "logits/rejected": -0.5640897154808044, | |
| "logps/chosen": -0.6199032664299011, | |
| "logps/rejected": -0.6052448749542236, | |
| "loss": 0.6879, | |
| "odds_ratio_loss": 6.839813232421875, | |
| "rewards/accuracies": 0.40625, | |
| "rewards/chosen": -0.06052448973059654, | |
| "rewards/margins": 0.0014658391010016203, | |
| "rewards/rejected": -0.06199032440781593, | |
| "sft_loss": 0.003927412908524275, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.8776139869729174, | |
| "grad_norm": 2.155495858759351, | |
| "learning_rate": 2.2139267708310457e-07, | |
| "logits/chosen": -0.676996111869812, | |
| "logits/rejected": -0.6428037285804749, | |
| "logps/chosen": -0.6014530658721924, | |
| "logps/rejected": -0.5981815457344055, | |
| "loss": 0.6794, | |
| "odds_ratio_loss": 6.754974365234375, | |
| "rewards/accuracies": 0.4140625, | |
| "rewards/chosen": -0.05981815233826637, | |
| "rewards/margins": 0.00032715569250285625, | |
| "rewards/rejected": -0.060145311057567596, | |
| "sft_loss": 0.0038914589677006006, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.8830990743914982, | |
| "grad_norm": 2.6138265531338565, | |
| "learning_rate": 2.0199295961178893e-07, | |
| "logits/chosen": -0.6286705136299133, | |
| "logits/rejected": -0.6172723770141602, | |
| "logps/chosen": -0.6388348937034607, | |
| "logps/rejected": -0.5947953462600708, | |
| "loss": 0.6768, | |
| "odds_ratio_loss": 6.710721492767334, | |
| "rewards/accuracies": 0.4140625, | |
| "rewards/chosen": -0.05947953462600708, | |
| "rewards/margins": 0.004403956700116396, | |
| "rewards/rejected": -0.06388349086046219, | |
| "sft_loss": 0.005766674876213074, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.8885841618100788, | |
| "grad_norm": 2.022101568731076, | |
| "learning_rate": 1.8344685794519507e-07, | |
| "logits/chosen": -0.6218512058258057, | |
| "logits/rejected": -0.5689311027526855, | |
| "logps/chosen": -0.6789796352386475, | |
| "logps/rejected": -0.6389855742454529, | |
| "loss": 0.7205, | |
| "odds_ratio_loss": 7.172222137451172, | |
| "rewards/accuracies": 0.421875, | |
| "rewards/chosen": -0.06389855593442917, | |
| "rewards/margins": 0.003999411594122648, | |
| "rewards/rejected": -0.06789796054363251, | |
| "sft_loss": 0.0032456754706799984, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.8940692492286596, | |
| "grad_norm": 2.2581765266272185, | |
| "learning_rate": 1.6576126120299046e-07, | |
| "logits/chosen": -0.6435633301734924, | |
| "logits/rejected": -0.589432954788208, | |
| "logps/chosen": -0.7249476313591003, | |
| "logps/rejected": -0.6334548592567444, | |
| "loss": 0.7181, | |
| "odds_ratio_loss": 7.1389479637146, | |
| "rewards/accuracies": 0.3671875, | |
| "rewards/chosen": -0.06334548443555832, | |
| "rewards/margins": 0.009149276651442051, | |
| "rewards/rejected": -0.0724947601556778, | |
| "sft_loss": 0.004166773054748774, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.8995543366472403, | |
| "grad_norm": 2.2259746913759004, | |
| "learning_rate": 1.4894273886239208e-07, | |
| "logits/chosen": -0.6249025464057922, | |
| "logits/rejected": -0.5969172716140747, | |
| "logps/chosen": -0.633272647857666, | |
| "logps/rejected": -0.6413824558258057, | |
| "loss": 0.726, | |
| "odds_ratio_loss": 7.205265045166016, | |
| "rewards/accuracies": 0.3671875, | |
| "rewards/chosen": -0.06413824111223221, | |
| "rewards/margins": -0.0008109819609671831, | |
| "rewards/rejected": -0.06332726776599884, | |
| "sft_loss": 0.005517784971743822, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.905039424065821, | |
| "grad_norm": 2.2159816572748468, | |
| "learning_rate": 1.3299753831787193e-07, | |
| "logits/chosen": -0.601813018321991, | |
| "logits/rejected": -0.6476290822029114, | |
| "logps/chosen": -0.6103772521018982, | |
| "logps/rejected": -0.6375135183334351, | |
| "loss": 0.7213, | |
| "odds_ratio_loss": 7.173505783081055, | |
| "rewards/accuracies": 0.3828125, | |
| "rewards/chosen": -0.06375134736299515, | |
| "rewards/margins": -0.002713626716285944, | |
| "rewards/rejected": -0.06103772297501564, | |
| "sft_loss": 0.003954712767153978, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.9105245114844018, | |
| "grad_norm": 1.987492056492753, | |
| "learning_rate": 1.1793158256050708e-07, | |
| "logits/chosen": -0.628088116645813, | |
| "logits/rejected": -0.591625452041626, | |
| "logps/chosen": -0.6703177690505981, | |
| "logps/rejected": -0.5828872323036194, | |
| "loss": 0.6629, | |
| "odds_ratio_loss": 6.595256805419922, | |
| "rewards/accuracies": 0.4296875, | |
| "rewards/chosen": -0.05828872323036194, | |
| "rewards/margins": 0.008743051439523697, | |
| "rewards/rejected": -0.06703177839517593, | |
| "sft_loss": 0.003396927611902356, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.9160095989029825, | |
| "grad_norm": 2.059761656496877, | |
| "learning_rate": 1.0375046797782868e-07, | |
| "logits/chosen": -0.6672204732894897, | |
| "logits/rejected": -0.6035845875740051, | |
| "logps/chosen": -0.6069723963737488, | |
| "logps/rejected": -0.6074884533882141, | |
| "loss": 0.6911, | |
| "odds_ratio_loss": 6.8784990310668945, | |
| "rewards/accuracies": 0.3984375, | |
| "rewards/chosen": -0.06074884906411171, | |
| "rewards/margins": -5.1606097258627415e-05, | |
| "rewards/rejected": -0.06069723516702652, | |
| "sft_loss": 0.0032916096970438957, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.9214946863215633, | |
| "grad_norm": 1.8571804970978707, | |
| "learning_rate": 9.045946227499298e-08, | |
| "logits/chosen": -0.670376718044281, | |
| "logits/rejected": -0.5822762846946716, | |
| "logps/chosen": -0.7293166518211365, | |
| "logps/rejected": -0.6415222883224487, | |
| "loss": 0.726, | |
| "odds_ratio_loss": 7.229472637176514, | |
| "rewards/accuracies": 0.34375, | |
| "rewards/chosen": -0.06415222585201263, | |
| "rewards/margins": 0.008779437281191349, | |
| "rewards/rejected": -0.072931669652462, | |
| "sft_loss": 0.0030599033925682306, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.926979773740144, | |
| "grad_norm": 2.279667665781299, | |
| "learning_rate": 7.806350251804484e-08, | |
| "logits/chosen": -0.6420009732246399, | |
| "logits/rejected": -0.5824066400527954, | |
| "logps/chosen": -0.875396728515625, | |
| "logps/rejected": -0.6270475387573242, | |
| "loss": 0.7041, | |
| "odds_ratio_loss": 7.000705242156982, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -0.06270475685596466, | |
| "rewards/margins": 0.02483491040766239, | |
| "rewards/rejected": -0.0875396728515625, | |
| "sft_loss": 0.004043279215693474, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.9324648611587247, | |
| "grad_norm": 1.9899455007963414, | |
| "learning_rate": 6.6567193299997e-08, | |
| "logits/chosen": -0.6606361269950867, | |
| "logits/rejected": -0.6265894174575806, | |
| "logps/chosen": -0.8179676532745361, | |
| "logps/rejected": -0.6608296036720276, | |
| "loss": 0.7442, | |
| "odds_ratio_loss": 7.407998085021973, | |
| "rewards/accuracies": 0.34375, | |
| "rewards/chosen": -0.06608295440673828, | |
| "rewards/margins": 0.015713810920715332, | |
| "rewards/rejected": -0.08179676532745361, | |
| "sft_loss": 0.0034413619432598352, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.9379499485773054, | |
| "grad_norm": 2.432140341651671, | |
| "learning_rate": 5.597480503041486e-08, | |
| "logits/chosen": -0.6267792582511902, | |
| "logits/rejected": -0.6068964004516602, | |
| "logps/chosen": -0.6993312835693359, | |
| "logps/rejected": -0.6519731283187866, | |
| "loss": 0.7372, | |
| "odds_ratio_loss": 7.303807258605957, | |
| "rewards/accuracies": 0.40625, | |
| "rewards/chosen": -0.06519731879234314, | |
| "rewards/margins": 0.004735812544822693, | |
| "rewards/rejected": -0.06993313133716583, | |
| "sft_loss": 0.006868740543723106, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.9434350359958862, | |
| "grad_norm": 2.1615902439918186, | |
| "learning_rate": 4.629027234912986e-08, | |
| "logits/chosen": -0.662708044052124, | |
| "logits/rejected": -0.6557428240776062, | |
| "logps/chosen": -0.608113169670105, | |
| "logps/rejected": -0.5925155878067017, | |
| "loss": 0.6778, | |
| "odds_ratio_loss": 6.741554260253906, | |
| "rewards/accuracies": 0.390625, | |
| "rewards/chosen": -0.059251561760902405, | |
| "rewards/margins": 0.0015597562305629253, | |
| "rewards/rejected": -0.06081131845712662, | |
| "sft_loss": 0.003658043686300516, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.948920123414467, | |
| "grad_norm": 2.2861540937365947, | |
| "learning_rate": 3.7517192664685844e-08, | |
| "logits/chosen": -0.6392232179641724, | |
| "logits/rejected": -0.6070412993431091, | |
| "logps/chosen": -0.6354994177818298, | |
| "logps/rejected": -0.628402590751648, | |
| "loss": 0.7149, | |
| "odds_ratio_loss": 7.101507186889648, | |
| "rewards/accuracies": 0.359375, | |
| "rewards/chosen": -0.06284026056528091, | |
| "rewards/margins": 0.0007096900371834636, | |
| "rewards/rejected": -0.0635499432682991, | |
| "sft_loss": 0.004706856328994036, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.9544052108330476, | |
| "grad_norm": 2.3390784115932726, | |
| "learning_rate": 2.9658824818044328e-08, | |
| "logits/chosen": -0.6274293065071106, | |
| "logits/rejected": -0.5854682326316833, | |
| "logps/chosen": -0.7155969142913818, | |
| "logps/rejected": -0.6342028975486755, | |
| "loss": 0.7122, | |
| "odds_ratio_loss": 7.073307514190674, | |
| "rewards/accuracies": 0.453125, | |
| "rewards/chosen": -0.06342029571533203, | |
| "rewards/margins": 0.008139407262206078, | |
| "rewards/rejected": -0.07155969738960266, | |
| "sft_loss": 0.004918898921459913, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.9598902982516284, | |
| "grad_norm": 2.563514632765213, | |
| "learning_rate": 2.2718087872060925e-08, | |
| "logits/chosen": -0.6743865013122559, | |
| "logits/rejected": -0.6293442845344543, | |
| "logps/chosen": -0.7085027098655701, | |
| "logps/rejected": -0.6935465931892395, | |
| "loss": 0.7803, | |
| "odds_ratio_loss": 7.746453285217285, | |
| "rewards/accuracies": 0.375, | |
| "rewards/chosen": -0.06935466080904007, | |
| "rewards/margins": 0.0014956118538975716, | |
| "rewards/rejected": -0.07085027545690536, | |
| "sft_loss": 0.0056967539712786674, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.9653753856702091, | |
| "grad_norm": 2.056659344905833, | |
| "learning_rate": 1.6697560027171543e-08, | |
| "logits/chosen": -0.620927631855011, | |
| "logits/rejected": -0.5821961760520935, | |
| "logps/chosen": -0.6993247866630554, | |
| "logps/rejected": -0.5995112657546997, | |
| "loss": 0.6786, | |
| "odds_ratio_loss": 6.743234634399414, | |
| "rewards/accuracies": 0.4453125, | |
| "rewards/chosen": -0.05995112285017967, | |
| "rewards/margins": 0.00998135469853878, | |
| "rewards/rejected": -0.0699324831366539, | |
| "sft_loss": 0.004281484521925449, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.9708604730887899, | |
| "grad_norm": 2.133552946595605, | |
| "learning_rate": 1.1599477663696845e-08, | |
| "logits/chosen": -0.6138472557067871, | |
| "logits/rejected": -0.6400952339172363, | |
| "logps/chosen": -0.6972988843917847, | |
| "logps/rejected": -0.6593939661979675, | |
| "loss": 0.7433, | |
| "odds_ratio_loss": 7.3952131271362305, | |
| "rewards/accuracies": 0.3984375, | |
| "rewards/chosen": -0.06593939661979675, | |
| "rewards/margins": 0.0037904919590801, | |
| "rewards/rejected": -0.06972989439964294, | |
| "sft_loss": 0.00373737677000463, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.9763455605073705, | |
| "grad_norm": 2.256606648624031, | |
| "learning_rate": 7.425734511117e-09, | |
| "logits/chosen": -0.6247913241386414, | |
| "logits/rejected": -0.6045972108840942, | |
| "logps/chosen": -0.5932058691978455, | |
| "logps/rejected": -0.6002853512763977, | |
| "loss": 0.685, | |
| "odds_ratio_loss": 6.808387756347656, | |
| "rewards/accuracies": 0.3671875, | |
| "rewards/chosen": -0.06002853438258171, | |
| "rewards/margins": -0.00070795021019876, | |
| "rewards/rejected": -0.059320587664842606, | |
| "sft_loss": 0.004185628145933151, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.9818306479259513, | |
| "grad_norm": 2.370987455595276, | |
| "learning_rate": 4.17788094463023e-09, | |
| "logits/chosen": -0.6684907674789429, | |
| "logits/rejected": -0.613418698310852, | |
| "logps/chosen": -0.6800941824913025, | |
| "logps/rejected": -0.5861297845840454, | |
| "loss": 0.6704, | |
| "odds_ratio_loss": 6.643950462341309, | |
| "rewards/accuracies": 0.375, | |
| "rewards/chosen": -0.05861297994852066, | |
| "rewards/margins": 0.009396445006132126, | |
| "rewards/rejected": -0.06800942122936249, | |
| "sft_loss": 0.006054120138287544, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.9873157353445321, | |
| "grad_norm": 2.134555240071597, | |
| "learning_rate": 1.857123409250705e-09, | |
| "logits/chosen": -0.6588638424873352, | |
| "logits/rejected": -0.6565414071083069, | |
| "logps/chosen": -0.6550459265708923, | |
| "logps/rejected": -0.6189512014389038, | |
| "loss": 0.7047, | |
| "odds_ratio_loss": 7.008230209350586, | |
| "rewards/accuracies": 0.3671875, | |
| "rewards/chosen": -0.06189511716365814, | |
| "rewards/margins": 0.0036094679962843657, | |
| "rewards/rejected": -0.06550458818674088, | |
| "sft_loss": 0.003911715466529131, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.9928008227631128, | |
| "grad_norm": 2.189370604534784, | |
| "learning_rate": 4.6432397166285e-10, | |
| "logits/chosen": -0.6217331886291504, | |
| "logits/rejected": -0.553974986076355, | |
| "logps/chosen": -0.6876395344734192, | |
| "logps/rejected": -0.5693202018737793, | |
| "loss": 0.6418, | |
| "odds_ratio_loss": 6.377911567687988, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.05693202465772629, | |
| "rewards/margins": 0.011831930838525295, | |
| "rewards/rejected": -0.06876395642757416, | |
| "sft_loss": 0.003973289392888546, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.9982859101816935, | |
| "grad_norm": 2.1620947430558077, | |
| "learning_rate": 0.0, | |
| "logits/chosen": -0.7046372890472412, | |
| "logits/rejected": -0.6097927689552307, | |
| "logps/chosen": -0.9193150401115417, | |
| "logps/rejected": -0.5992479920387268, | |
| "loss": 0.6706, | |
| "odds_ratio_loss": 6.670595645904541, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.05992480367422104, | |
| "rewards/margins": 0.032006699591875076, | |
| "rewards/rejected": -0.09193150699138641, | |
| "sft_loss": 0.003579822601750493, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.9982859101816935, | |
| "step": 182, | |
| "total_flos": 58779245903872.0, | |
| "train_loss": 0.733595297559277, | |
| "train_runtime": 13688.6483, | |
| "train_samples_per_second": 1.704, | |
| "train_steps_per_second": 0.013 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 182, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 182, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 58779245903872.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |