|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.998691442030882, |
|
"eval_steps": 500, |
|
"global_step": 477, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02093692750588851, |
|
"grad_norm": 85.02439880371094, |
|
"learning_rate": 3.333333333333333e-07, |
|
"logits/chosen": -3.096651315689087, |
|
"logits/rejected": -3.0814244747161865, |
|
"logps/chosen": -295.3846130371094, |
|
"logps/rejected": -279.3940124511719, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.4281249940395355, |
|
"rewards/chosen": 0.002148410538211465, |
|
"rewards/margins": 0.004025185946375132, |
|
"rewards/rejected": -0.0018767757574096322, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04187385501177702, |
|
"grad_norm": 74.03569030761719, |
|
"learning_rate": 4.998555145953054e-07, |
|
"logits/chosen": -3.083890199661255, |
|
"logits/rejected": -3.068505048751831, |
|
"logps/chosen": -278.1134338378906, |
|
"logps/rejected": -266.706298828125, |
|
"loss": 0.6728, |
|
"rewards/accuracies": 0.628125011920929, |
|
"rewards/chosen": 0.011356602422893047, |
|
"rewards/margins": 0.07497048377990723, |
|
"rewards/rejected": -0.0636138841509819, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06281078251766553, |
|
"grad_norm": 67.47853088378906, |
|
"learning_rate": 4.98700633214251e-07, |
|
"logits/chosen": -3.0271506309509277, |
|
"logits/rejected": -3.0370867252349854, |
|
"logps/chosen": -246.0901336669922, |
|
"logps/rejected": -250.2740478515625, |
|
"loss": 0.6305, |
|
"rewards/accuracies": 0.6781250238418579, |
|
"rewards/chosen": 0.018177634105086327, |
|
"rewards/margins": 0.28142982721328735, |
|
"rewards/rejected": -0.2632521986961365, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08374771002355404, |
|
"grad_norm": 75.60296630859375, |
|
"learning_rate": 4.963962085412632e-07, |
|
"logits/chosen": -3.030393123626709, |
|
"logits/rejected": -3.009413242340088, |
|
"logps/chosen": -298.85662841796875, |
|
"logps/rejected": -275.070068359375, |
|
"loss": 0.6267, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.03324083238840103, |
|
"rewards/margins": 0.2483668327331543, |
|
"rewards/rejected": -0.28160765767097473, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.10468463752944256, |
|
"grad_norm": 69.39188385009766, |
|
"learning_rate": 4.929528920808854e-07, |
|
"logits/chosen": -3.052746534347534, |
|
"logits/rejected": -3.066401720046997, |
|
"logps/chosen": -281.92706298828125, |
|
"logps/rejected": -246.51901245117188, |
|
"loss": 0.6084, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.020495222881436348, |
|
"rewards/margins": 0.40510186553001404, |
|
"rewards/rejected": -0.42559710144996643, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12562156503533106, |
|
"grad_norm": 83.05278015136719, |
|
"learning_rate": 4.883865995197318e-07, |
|
"logits/chosen": -3.035808563232422, |
|
"logits/rejected": -3.0392653942108154, |
|
"logps/chosen": -290.5362548828125, |
|
"logps/rejected": -272.5738830566406, |
|
"loss": 0.5792, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.049367621541023254, |
|
"rewards/margins": 0.44638770818710327, |
|
"rewards/rejected": -0.49575528502464294, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.14655849254121958, |
|
"grad_norm": 68.99510955810547, |
|
"learning_rate": 4.82718437161051e-07, |
|
"logits/chosen": -3.0192034244537354, |
|
"logits/rejected": -3.006897449493408, |
|
"logps/chosen": -265.6653747558594, |
|
"logps/rejected": -260.2899169921875, |
|
"loss": 0.5846, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.11491725593805313, |
|
"rewards/margins": 0.38759148120880127, |
|
"rewards/rejected": -0.5025087594985962, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16749542004710807, |
|
"grad_norm": 63.006248474121094, |
|
"learning_rate": 4.7597460436723613e-07, |
|
"logits/chosen": -3.007894992828369, |
|
"logits/rejected": -2.984534740447998, |
|
"logps/chosen": -291.2572326660156, |
|
"logps/rejected": -261.5260009765625, |
|
"loss": 0.5843, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.1174750104546547, |
|
"rewards/margins": 0.4169933795928955, |
|
"rewards/rejected": -0.5344683527946472, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1884323475529966, |
|
"grad_norm": 69.54000854492188, |
|
"learning_rate": 4.68186272461214e-07, |
|
"logits/chosen": -3.0481808185577393, |
|
"logits/rejected": -3.036348819732666, |
|
"logps/chosen": -273.8735656738281, |
|
"logps/rejected": -258.81866455078125, |
|
"loss": 0.5849, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": -0.09798178821802139, |
|
"rewards/margins": 0.40805816650390625, |
|
"rewards/rejected": -0.5060399770736694, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2093692750588851, |
|
"grad_norm": 75.06998443603516, |
|
"learning_rate": 4.593894406464536e-07, |
|
"logits/chosen": -3.038364887237549, |
|
"logits/rejected": -3.0354368686676025, |
|
"logps/chosen": -296.1470031738281, |
|
"logps/rejected": -286.38592529296875, |
|
"loss": 0.5834, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.08966656774282455, |
|
"rewards/margins": 0.5078560709953308, |
|
"rewards/rejected": -0.5975226759910583, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23030620256477363, |
|
"grad_norm": 137.9207305908203, |
|
"learning_rate": 4.496247696115597e-07, |
|
"logits/chosen": -3.039151191711426, |
|
"logits/rejected": -3.0391647815704346, |
|
"logps/chosen": -303.8061828613281, |
|
"logps/rejected": -295.7118225097656, |
|
"loss": 0.5804, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -0.07505225390195847, |
|
"rewards/margins": 0.6039966344833374, |
|
"rewards/rejected": -0.6790488958358765, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2512431300706621, |
|
"grad_norm": 77.84745788574219, |
|
"learning_rate": 4.3893739358856455e-07, |
|
"logits/chosen": -3.008737087249756, |
|
"logits/rejected": -2.9903557300567627, |
|
"logps/chosen": -305.4298095703125, |
|
"logps/rejected": -278.39947509765625, |
|
"loss": 0.5582, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.14889295399188995, |
|
"rewards/margins": 0.5994052886962891, |
|
"rewards/rejected": -0.7482982277870178, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2721800575765506, |
|
"grad_norm": 67.5359115600586, |
|
"learning_rate": 4.273767117336217e-07, |
|
"logits/chosen": -3.0301320552825928, |
|
"logits/rejected": -3.012173891067505, |
|
"logps/chosen": -308.94891357421875, |
|
"logps/rejected": -295.3975524902344, |
|
"loss": 0.5478, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.14121344685554504, |
|
"rewards/margins": 0.6831844449043274, |
|
"rewards/rejected": -0.82439786195755, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.29311698508243916, |
|
"grad_norm": 70.47966766357422, |
|
"learning_rate": 4.1499615979437983e-07, |
|
"logits/chosen": -2.9864563941955566, |
|
"logits/rejected": -2.9899039268493652, |
|
"logps/chosen": -279.08477783203125, |
|
"logps/rejected": -257.7115173339844, |
|
"loss": 0.5548, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.09717626124620438, |
|
"rewards/margins": 0.624592661857605, |
|
"rewards/rejected": -0.7217689752578735, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.31405391258832765, |
|
"grad_norm": 90.4140396118164, |
|
"learning_rate": 4.018529631194369e-07, |
|
"logits/chosen": -2.9848761558532715, |
|
"logits/rejected": -2.9709620475769043, |
|
"logps/chosen": -281.3067932128906, |
|
"logps/rejected": -271.0277099609375, |
|
"loss": 0.5703, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.2502523362636566, |
|
"rewards/margins": 0.6211402416229248, |
|
"rewards/rejected": -0.871392548084259, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33499084009421615, |
|
"grad_norm": 68.7781753540039, |
|
"learning_rate": 3.8800787215151164e-07, |
|
"logits/chosen": -3.032036066055298, |
|
"logits/rejected": -3.009941339492798, |
|
"logps/chosen": -321.748779296875, |
|
"logps/rejected": -281.04107666015625, |
|
"loss": 0.5392, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.12777641415596008, |
|
"rewards/margins": 0.6283574104309082, |
|
"rewards/rejected": -0.7561337947845459, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3559277676001047, |
|
"grad_norm": 66.1634292602539, |
|
"learning_rate": 3.7352488162693715e-07, |
|
"logits/chosen": -3.0462286472320557, |
|
"logits/rejected": -3.030794620513916, |
|
"logps/chosen": -274.5036926269531, |
|
"logps/rejected": -251.90499877929688, |
|
"loss": 0.5505, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -0.12955203652381897, |
|
"rewards/margins": 0.6119082570075989, |
|
"rewards/rejected": -0.7414603233337402, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3768646951059932, |
|
"grad_norm": 75.37867736816406, |
|
"learning_rate": 3.584709347793895e-07, |
|
"logits/chosen": -3.058922052383423, |
|
"logits/rejected": -3.0691912174224854, |
|
"logps/chosen": -301.69635009765625, |
|
"logps/rejected": -248.55593872070312, |
|
"loss": 0.5508, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.2145983725786209, |
|
"rewards/margins": 0.5311049222946167, |
|
"rewards/rejected": -0.7457033395767212, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.39780162261188173, |
|
"grad_norm": 75.07308959960938, |
|
"learning_rate": 3.4291561391508185e-07, |
|
"logits/chosen": -3.0233283042907715, |
|
"logits/rejected": -3.0086400508880615, |
|
"logps/chosen": -278.5184326171875, |
|
"logps/rejected": -270.7456970214844, |
|
"loss": 0.5632, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.28689366579055786, |
|
"rewards/margins": 0.6087759733200073, |
|
"rewards/rejected": -0.8956696391105652, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4187385501177702, |
|
"grad_norm": 71.18640899658203, |
|
"learning_rate": 3.2693081878964544e-07, |
|
"logits/chosen": -3.0013060569763184, |
|
"logits/rejected": -3.005615472793579, |
|
"logps/chosen": -292.04852294921875, |
|
"logps/rejected": -276.50811767578125, |
|
"loss": 0.5475, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.20541390776634216, |
|
"rewards/margins": 0.6916528940200806, |
|
"rewards/rejected": -0.8970667719841003, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4396754776236587, |
|
"grad_norm": 85.28279113769531, |
|
"learning_rate": 3.1059043427330314e-07, |
|
"logits/chosen": -2.9617443084716797, |
|
"logits/rejected": -2.9682388305664062, |
|
"logps/chosen": -261.1861572265625, |
|
"logps/rejected": -263.7696838378906, |
|
"loss": 0.533, |
|
"rewards/accuracies": 0.778124988079071, |
|
"rewards/chosen": -0.204990416765213, |
|
"rewards/margins": 0.7386445999145508, |
|
"rewards/rejected": -0.9436351656913757, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.46061240512954726, |
|
"grad_norm": 70.95091247558594, |
|
"learning_rate": 2.9396998884045234e-07, |
|
"logits/chosen": -3.0342681407928467, |
|
"logits/rejected": -3.040320873260498, |
|
"logps/chosen": -300.98077392578125, |
|
"logps/rejected": -272.7954406738281, |
|
"loss": 0.5389, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.25737327337265015, |
|
"rewards/margins": 0.695563018321991, |
|
"rewards/rejected": -0.9529362916946411, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.48154933263543576, |
|
"grad_norm": 64.26698303222656, |
|
"learning_rate": 2.7714630546218634e-07, |
|
"logits/chosen": -3.1135382652282715, |
|
"logits/rejected": -3.1126351356506348, |
|
"logps/chosen": -326.8101806640625, |
|
"logps/rejected": -296.044921875, |
|
"loss": 0.5438, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.2218112051486969, |
|
"rewards/margins": 0.7040417790412903, |
|
"rewards/rejected": -0.9258529543876648, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5024862601413242, |
|
"grad_norm": 85.34664154052734, |
|
"learning_rate": 2.6019714651539645e-07, |
|
"logits/chosen": -3.0325405597686768, |
|
"logits/rejected": -3.017796516418457, |
|
"logps/chosen": -297.9241638183594, |
|
"logps/rejected": -286.4637756347656, |
|
"loss": 0.5647, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.274208128452301, |
|
"rewards/margins": 0.7521761655807495, |
|
"rewards/rejected": -1.0263843536376953, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5234231876472127, |
|
"grad_norm": 70.70326232910156, |
|
"learning_rate": 2.4320085434975556e-07, |
|
"logits/chosen": -3.0199804306030273, |
|
"logits/rejected": -3.01350736618042, |
|
"logps/chosen": -284.5586853027344, |
|
"logps/rejected": -259.7466125488281, |
|
"loss": 0.56, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.2730976641178131, |
|
"rewards/margins": 0.7632043957710266, |
|
"rewards/rejected": -1.036302089691162, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5443601151531012, |
|
"grad_norm": 63.27799606323242, |
|
"learning_rate": 2.2623598917395436e-07, |
|
"logits/chosen": -2.9862048625946045, |
|
"logits/rejected": -3.020139217376709, |
|
"logps/chosen": -296.0469665527344, |
|
"logps/rejected": -276.1849365234375, |
|
"loss": 0.5463, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.23287267982959747, |
|
"rewards/margins": 0.7090679407119751, |
|
"rewards/rejected": -0.9419406652450562, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5652970426589898, |
|
"grad_norm": 66.7594223022461, |
|
"learning_rate": 2.0938096593494853e-07, |
|
"logits/chosen": -3.041605234146118, |
|
"logits/rejected": -3.052452325820923, |
|
"logps/chosen": -286.18707275390625, |
|
"logps/rejected": -260.3746032714844, |
|
"loss": 0.5256, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.11122454702854156, |
|
"rewards/margins": 0.802563488483429, |
|
"rewards/rejected": -0.9137881398200989, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5862339701648783, |
|
"grad_norm": 88.30416107177734, |
|
"learning_rate": 1.9271369186863618e-07, |
|
"logits/chosen": -3.0525062084198, |
|
"logits/rejected": -3.0589468479156494, |
|
"logps/chosen": -284.6452941894531, |
|
"logps/rejected": -277.75067138671875, |
|
"loss": 0.5551, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.22388038039207458, |
|
"rewards/margins": 0.6198626756668091, |
|
"rewards/rejected": -0.8437430262565613, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6071708976707668, |
|
"grad_norm": 65.08110809326172, |
|
"learning_rate": 1.763112063972739e-07, |
|
"logits/chosen": -3.044279098510742, |
|
"logits/rejected": -3.0555179119110107, |
|
"logps/chosen": -285.0969543457031, |
|
"logps/rejected": -259.02142333984375, |
|
"loss": 0.5278, |
|
"rewards/accuracies": 0.7718750238418579, |
|
"rewards/chosen": -0.16408179700374603, |
|
"rewards/margins": 0.8104633092880249, |
|
"rewards/rejected": -0.9745450019836426, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6281078251766553, |
|
"grad_norm": 87.96784210205078, |
|
"learning_rate": 1.602493250381003e-07, |
|
"logits/chosen": -3.0667061805725098, |
|
"logits/rejected": -3.064436435699463, |
|
"logps/chosen": -287.88372802734375, |
|
"logps/rejected": -248.08615112304688, |
|
"loss": 0.564, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.27062320709228516, |
|
"rewards/margins": 0.6274420022964478, |
|
"rewards/rejected": -0.8980652093887329, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6490447526825438, |
|
"grad_norm": 67.1192398071289, |
|
"learning_rate": 1.446022889690875e-07, |
|
"logits/chosen": -3.0603392124176025, |
|
"logits/rejected": -3.0506479740142822, |
|
"logps/chosen": -275.33941650390625, |
|
"logps/rejected": -292.2793884277344, |
|
"loss": 0.5304, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.27399036288261414, |
|
"rewards/margins": 0.7174574136734009, |
|
"rewards/rejected": -0.9914478063583374, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6699816801884323, |
|
"grad_norm": 68.73091125488281, |
|
"learning_rate": 1.2944242187160015e-07, |
|
"logits/chosen": -3.0304224491119385, |
|
"logits/rejected": -3.0630006790161133, |
|
"logps/chosen": -265.5944519042969, |
|
"logps/rejected": -270.86041259765625, |
|
"loss": 0.5819, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": -0.20603282749652863, |
|
"rewards/margins": 0.8553716540336609, |
|
"rewards/rejected": -1.0614043474197388, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6909186076943209, |
|
"grad_norm": 78.73789978027344, |
|
"learning_rate": 1.1483979563610069e-07, |
|
"logits/chosen": -3.044661045074463, |
|
"logits/rejected": -3.035492181777954, |
|
"logps/chosen": -274.28204345703125, |
|
"logps/rejected": -274.99151611328125, |
|
"loss": 0.5374, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.1646738052368164, |
|
"rewards/margins": 0.8839667439460754, |
|
"rewards/rejected": -1.048640489578247, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7118555352002094, |
|
"grad_norm": 70.24629211425781, |
|
"learning_rate": 1.0086190647607529e-07, |
|
"logits/chosen": -3.0631115436553955, |
|
"logits/rejected": -3.089351177215576, |
|
"logps/chosen": -287.9900817871094, |
|
"logps/rejected": -272.482421875, |
|
"loss": 0.5607, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -0.11857350915670395, |
|
"rewards/margins": 0.8544532060623169, |
|
"rewards/rejected": -0.9730268716812134, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7327924627060979, |
|
"grad_norm": 96.91629791259766, |
|
"learning_rate": 8.757336294724687e-08, |
|
"logits/chosen": -3.068084239959717, |
|
"logits/rejected": -3.0875658988952637, |
|
"logps/chosen": -291.7541198730469, |
|
"logps/rejected": -258.79132080078125, |
|
"loss": 0.5348, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.15175102651119232, |
|
"rewards/margins": 0.8772052526473999, |
|
"rewards/rejected": -1.028956413269043, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7537293902119864, |
|
"grad_norm": 69.54812622070312, |
|
"learning_rate": 7.503558731410958e-08, |
|
"logits/chosen": -3.07660174369812, |
|
"logits/rejected": -3.0733513832092285, |
|
"logps/chosen": -252.8855438232422, |
|
"logps/rejected": -264.5438232421875, |
|
"loss": 0.5477, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.31728893518447876, |
|
"rewards/margins": 0.6826174259185791, |
|
"rewards/rejected": -0.9999063611030579, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7746663177178749, |
|
"grad_norm": 68.41463470458984, |
|
"learning_rate": 6.330653164412908e-08, |
|
"logits/chosen": -3.0837528705596924, |
|
"logits/rejected": -3.074859619140625, |
|
"logps/chosen": -292.6845703125, |
|
"logps/rejected": -274.19189453125, |
|
"loss": 0.5639, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.18258486688137054, |
|
"rewards/margins": 0.7360013723373413, |
|
"rewards/rejected": -0.9185863733291626, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7956032452237635, |
|
"grad_norm": 73.8513412475586, |
|
"learning_rate": 5.2440409941877456e-08, |
|
"logits/chosen": -3.080451250076294, |
|
"logits/rejected": -3.1014645099639893, |
|
"logps/chosen": -282.2720642089844, |
|
"logps/rejected": -274.5783996582031, |
|
"loss": 0.5627, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.17349520325660706, |
|
"rewards/margins": 0.7617751359939575, |
|
"rewards/rejected": -0.9352704286575317, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.816540172729652, |
|
"grad_norm": 62.425689697265625, |
|
"learning_rate": 4.248744756122985e-08, |
|
"logits/chosen": -3.1146225929260254, |
|
"logits/rejected": -3.1159985065460205, |
|
"logps/chosen": -284.4311828613281, |
|
"logps/rejected": -270.375244140625, |
|
"loss": 0.5397, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.1737302988767624, |
|
"rewards/margins": 0.7495090365409851, |
|
"rewards/rejected": -0.9232394099235535, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8374771002355405, |
|
"grad_norm": 67.75579833984375, |
|
"learning_rate": 3.349364905389032e-08, |
|
"logits/chosen": -3.039133071899414, |
|
"logits/rejected": -3.0417704582214355, |
|
"logps/chosen": -289.43792724609375, |
|
"logps/rejected": -279.08123779296875, |
|
"loss": 0.5557, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.19276252388954163, |
|
"rewards/margins": 0.713485062122345, |
|
"rewards/rejected": -0.906247615814209, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8584140277414289, |
|
"grad_norm": 60.96617126464844, |
|
"learning_rate": 2.550058552729639e-08, |
|
"logits/chosen": -3.0589489936828613, |
|
"logits/rejected": -3.0491528511047363, |
|
"logps/chosen": -298.5786437988281, |
|
"logps/rejected": -275.2989807128906, |
|
"loss": 0.5378, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.22146447002887726, |
|
"rewards/margins": 0.7704640626907349, |
|
"rewards/rejected": -0.9919285774230957, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8793509552473174, |
|
"grad_norm": 57.156639099121094, |
|
"learning_rate": 1.854520249477551e-08, |
|
"logits/chosen": -3.0775399208068848, |
|
"logits/rejected": -3.0917420387268066, |
|
"logps/chosen": -281.49053955078125, |
|
"logps/rejected": -252.451416015625, |
|
"loss": 0.5338, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.14828899502754211, |
|
"rewards/margins": 0.7465869188308716, |
|
"rewards/rejected": -0.8948760032653809, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9002878827532059, |
|
"grad_norm": 80.24808502197266, |
|
"learning_rate": 1.265964910610884e-08, |
|
"logits/chosen": -3.1026782989501953, |
|
"logits/rejected": -3.111166477203369, |
|
"logps/chosen": -285.04193115234375, |
|
"logps/rejected": -284.14410400390625, |
|
"loss": 0.5455, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.1942686289548874, |
|
"rewards/margins": 0.8707529306411743, |
|
"rewards/rejected": -1.0650215148925781, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9212248102590945, |
|
"grad_norm": 61.17852020263672, |
|
"learning_rate": 7.871129547831062e-09, |
|
"logits/chosen": -3.0820913314819336, |
|
"logits/rejected": -3.0653717517852783, |
|
"logps/chosen": -278.7796325683594, |
|
"logps/rejected": -235.0684814453125, |
|
"loss": 0.5408, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.23389343917369843, |
|
"rewards/margins": 0.6883670091629028, |
|
"rewards/rejected": -0.9222604632377625, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.942161737764983, |
|
"grad_norm": 85.3263168334961, |
|
"learning_rate": 4.201777300124249e-09, |
|
"logits/chosen": -3.0574049949645996, |
|
"logits/rejected": -3.0575528144836426, |
|
"logps/chosen": -273.01531982421875, |
|
"logps/rejected": -243.1544189453125, |
|
"loss": 0.5495, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -0.13399073481559753, |
|
"rewards/margins": 0.6954258680343628, |
|
"rewards/rejected": -0.8294164538383484, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9630986652708715, |
|
"grad_norm": 67.3755874633789, |
|
"learning_rate": 1.6685528315146802e-09, |
|
"logits/chosen": -3.0953588485717773, |
|
"logits/rejected": -3.0970802307128906, |
|
"logps/chosen": -282.9346618652344, |
|
"logps/rejected": -261.16497802734375, |
|
"loss": 0.5443, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.24288193881511688, |
|
"rewards/margins": 0.7198012471199036, |
|
"rewards/rejected": -0.9626832008361816, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.98403559277676, |
|
"grad_norm": 61.79122543334961, |
|
"learning_rate": 2.831652042480093e-10, |
|
"logits/chosen": -3.086475372314453, |
|
"logits/rejected": -3.0854830741882324, |
|
"logps/chosen": -301.7154235839844, |
|
"logps/rejected": -291.1816101074219, |
|
"loss": 0.5439, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.232115238904953, |
|
"rewards/margins": 0.7247028350830078, |
|
"rewards/rejected": -0.9568179845809937, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.998691442030882, |
|
"step": 477, |
|
"total_flos": 5.005717235969294e+18, |
|
"train_loss": 0.5631812908364542, |
|
"train_runtime": 18694.5367, |
|
"train_samples_per_second": 3.27, |
|
"train_steps_per_second": 0.026 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 477, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 256, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.005717235969294e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|