diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,3390 +1,9274 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 2.9959514170040484, - "eval_steps": 500, - "global_step": 1110, + "epoch": 2.9998200683740177, + "eval_steps": 100, + "global_step": 6252, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 0.01349527665317139, - "grad_norm": 85.5, - "learning_rate": 2.2522522522522524e-07, - "logits/chosen": -1.500240683555603, - "logits/rejected": -1.5190627574920654, - "logps/chosen": -159.05484008789062, - "logps/rejected": -164.59542846679688, - "loss": 0.6946, - "rewards/accuracies": 0.3499999940395355, - "rewards/chosen": 0.006750366650521755, - "rewards/margins": -0.002313111675903201, - "rewards/rejected": 0.0090634785592556, + "epoch": 0.002399088346428357, + "grad_norm": 34.25, + "learning_rate": 2.6652452025586355e-08, + "loss": 1.3138, "step": 5 }, { - "epoch": 0.02699055330634278, - "grad_norm": 92.5, - "learning_rate": 4.504504504504505e-07, - "logits/chosen": -1.4508098363876343, - "logits/rejected": -1.4352288246154785, - "logps/chosen": -141.31773376464844, - "logps/rejected": -167.95175170898438, - "loss": 0.7035, - "rewards/accuracies": 0.30000001192092896, - "rewards/chosen": -0.00739473570138216, - "rewards/margins": -0.01960981823503971, - "rewards/rejected": 0.01221508253365755, + "epoch": 0.004798176692856714, + "grad_norm": 42.5, + "learning_rate": 5.330490405117271e-08, + "loss": 1.3611, "step": 10 }, { - "epoch": 0.04048582995951417, - "grad_norm": 74.0, - "learning_rate": 6.756756756756758e-07, - "logits/chosen": -1.3884494304656982, - "logits/rejected": -1.3975419998168945, - "logps/chosen": -192.84548950195312, - "logps/rejected": -180.82046508789062, - "loss": 0.6966, - "rewards/accuracies": 0.44999998807907104, - "rewards/chosen": 0.004980484023690224, - "rewards/margins": -0.006102551706135273, - "rewards/rejected": 0.011083034798502922, + "epoch": 0.007197265039285071, + "grad_norm": 103.5, + "learning_rate": 7.995735607675907e-08, + "loss": 1.2608, "step": 15 }, { - "epoch": 0.05398110661268556, - "grad_norm": 99.0, - "learning_rate": 9.00900900900901e-07, - "logits/chosen": -1.4855096340179443, - "logits/rejected": -1.4922425746917725, - "logps/chosen": -148.1718292236328, - "logps/rejected": -152.18133544921875, - "loss": 0.6843, - "rewards/accuracies": 0.5249999761581421, - "rewards/chosen": 0.002431074623018503, - "rewards/margins": 0.018751021474599838, - "rewards/rejected": -0.016319945454597473, + "epoch": 0.009596353385713428, + "grad_norm": 9.6875, + "learning_rate": 1.0660980810234542e-07, + "loss": 1.2714, "step": 20 }, { - "epoch": 0.06747638326585695, - "grad_norm": 113.0, - "learning_rate": 1.1261261261261262e-06, - "logits/chosen": -1.4175087213516235, - "logits/rejected": -1.4836245775222778, - "logps/chosen": -264.17132568359375, - "logps/rejected": -193.3080596923828, - "loss": 0.6911, - "rewards/accuracies": 0.625, - "rewards/chosen": 0.002699580043554306, - "rewards/margins": 0.005426598247140646, - "rewards/rejected": -0.00272701820358634, + "epoch": 0.011995441732141785, + "grad_norm": 7.5, + "learning_rate": 1.3326226012793176e-07, + "loss": 1.4005, "step": 25 }, { - "epoch": 0.08097165991902834, - "grad_norm": 89.0, - "learning_rate": 1.3513513513513515e-06, - "logits/chosen": -1.3333433866500854, - "logits/rejected": -1.4199435710906982, - "logps/chosen": -220.9799041748047, - "logps/rejected": -186.35690307617188, - "loss": 0.688, - "rewards/accuracies": 0.4749999940395355, - "rewards/chosen": 0.009898080490529537, - "rewards/margins": 0.012090040370821953, - "rewards/rejected": -0.0021919584833085537, + "epoch": 0.014394530078570143, + "grad_norm": 11.0, + "learning_rate": 1.5991471215351813e-07, + "loss": 1.3235, "step": 30 }, { - "epoch": 0.09446693657219973, - "grad_norm": 66.5, - "learning_rate": 1.5765765765765766e-06, - "logits/chosen": -1.5576092004776, - "logits/rejected": -1.493931770324707, - "logps/chosen": -148.85377502441406, - "logps/rejected": -168.85574340820312, - "loss": 0.6811, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": 0.014485938474535942, - "rewards/margins": 0.025426441803574562, - "rewards/rejected": -0.010940502397716045, + "epoch": 0.0167936184249985, + "grad_norm": 10.625, + "learning_rate": 1.8656716417910447e-07, + "loss": 1.2774, "step": 35 }, { - "epoch": 0.10796221322537113, - "grad_norm": 87.5, - "learning_rate": 1.801801801801802e-06, - "logits/chosen": -1.460998296737671, - "logits/rejected": -1.4714558124542236, - "logps/chosen": -165.34341430664062, - "logps/rejected": -167.67092895507812, - "loss": 0.6808, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": 0.018663501366972923, - "rewards/margins": 0.027817577123641968, - "rewards/rejected": -0.009154075756669044, + "epoch": 0.019192706771426857, + "grad_norm": 6.46875, + "learning_rate": 2.1321961620469084e-07, + "loss": 1.1795, "step": 40 }, { - "epoch": 0.1214574898785425, - "grad_norm": 93.0, - "learning_rate": 2.0270270270270273e-06, - "logits/chosen": -1.3859444856643677, - "logits/rejected": -1.4024606943130493, - "logps/chosen": -162.58734130859375, - "logps/rejected": -191.04025268554688, - "loss": 0.6846, - "rewards/accuracies": 0.550000011920929, - "rewards/chosen": 0.009018613025546074, - "rewards/margins": 0.019761864095926285, - "rewards/rejected": -0.010743250139057636, + "epoch": 0.021591795117855216, + "grad_norm": 8.0625, + "learning_rate": 2.398720682302772e-07, + "loss": 1.345, "step": 45 }, { - "epoch": 0.1349527665317139, - "grad_norm": 89.5, - "learning_rate": 2.2522522522522524e-06, - "logits/chosen": -1.4222023487091064, - "logits/rejected": -1.54598069190979, - "logps/chosen": -285.5871276855469, - "logps/rejected": -167.19281005859375, - "loss": 0.6684, - "rewards/accuracies": 0.75, - "rewards/chosen": 0.02634511888027191, - "rewards/margins": 0.052618540823459625, - "rewards/rejected": -0.026273420080542564, + "epoch": 0.02399088346428357, + "grad_norm": 10.625, + "learning_rate": 2.665245202558635e-07, + "loss": 1.3542, "step": 50 }, { - "epoch": 0.1484480431848853, - "grad_norm": 69.5, - "learning_rate": 2.4774774774774775e-06, - "logits/chosen": -1.5841736793518066, - "logits/rejected": -1.516913890838623, - "logps/chosen": -170.33505249023438, - "logps/rejected": -188.19314575195312, - "loss": 0.6639, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.004526221659034491, - "rewards/margins": 0.06425820291042328, - "rewards/rejected": -0.06878442317247391, + "epoch": 0.02638997181071193, + "grad_norm": 10.75, + "learning_rate": 2.931769722814499e-07, + "loss": 1.2539, "step": 55 }, { - "epoch": 0.16194331983805668, - "grad_norm": 72.0, - "learning_rate": 2.702702702702703e-06, - "logits/chosen": -1.438759207725525, - "logits/rejected": -1.3985353708267212, - "logps/chosen": -198.15411376953125, - "logps/rejected": -208.3758544921875, - "loss": 0.6501, - "rewards/accuracies": 0.75, - "rewards/chosen": 0.047606997191905975, - "rewards/margins": 0.09706764668226242, - "rewards/rejected": -0.049460653215646744, + "epoch": 0.028789060157140285, + "grad_norm": 9.1875, + "learning_rate": 3.1982942430703626e-07, + "loss": 1.4054, "step": 60 }, { - "epoch": 0.17543859649122806, - "grad_norm": 164.0, - "learning_rate": 2.927927927927928e-06, - "logits/chosen": -1.4191879034042358, - "logits/rejected": -1.5293009281158447, - "logps/chosen": -217.4423370361328, - "logps/rejected": -202.1327362060547, - "loss": 0.6846, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": 0.014217356219887733, - "rewards/margins": 0.027354473248124123, - "rewards/rejected": -0.013137114234268665, + "epoch": 0.031188148503568644, + "grad_norm": 7.125, + "learning_rate": 3.4648187633262263e-07, + "loss": 1.4234, "step": 65 }, { - "epoch": 0.18893387314439947, - "grad_norm": 75.5, - "learning_rate": 3.1531531531531532e-06, - "logits/chosen": -1.510615587234497, - "logits/rejected": -1.5524317026138306, - "logps/chosen": -277.9597473144531, - "logps/rejected": -174.99221801757812, - "loss": 0.6538, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": 0.01016303151845932, - "rewards/margins": 0.08965723216533661, - "rewards/rejected": -0.07949419319629669, + "epoch": 0.033587236849997, + "grad_norm": 8.6875, + "learning_rate": 3.7313432835820895e-07, + "loss": 1.1964, "step": 70 }, { - "epoch": 0.20242914979757085, - "grad_norm": 127.5, - "learning_rate": 3.3783783783783788e-06, - "logits/chosen": -1.5467108488082886, - "logits/rejected": -1.7057151794433594, - "logps/chosen": -236.87759399414062, - "logps/rejected": -171.19088745117188, - "loss": 0.6316, - "rewards/accuracies": 0.75, - "rewards/chosen": 0.024651767686009407, - "rewards/margins": 0.13629736006259918, - "rewards/rejected": -0.11164556443691254, + "epoch": 0.03598632519642536, + "grad_norm": 11.75, + "learning_rate": 3.9978678038379537e-07, + "loss": 1.3206, "step": 75 }, { - "epoch": 0.21592442645074225, - "grad_norm": 67.0, - "learning_rate": 3.603603603603604e-06, - "logits/chosen": -1.3438420295715332, - "logits/rejected": -1.5014269351959229, - "logps/chosen": -211.7142791748047, - "logps/rejected": -149.79403686523438, - "loss": 0.6296, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": 0.016034509986639023, - "rewards/margins": 0.1428973227739334, - "rewards/rejected": -0.12686282396316528, + "epoch": 0.038385413542853714, + "grad_norm": 9.8125, + "learning_rate": 4.264392324093817e-07, + "loss": 1.1716, "step": 80 }, { - "epoch": 0.22941970310391363, - "grad_norm": 67.0, - "learning_rate": 3.828828828828829e-06, - "logits/chosen": -1.580759048461914, - "logits/rejected": -1.5942776203155518, - "logps/chosen": -186.5341339111328, - "logps/rejected": -198.06871032714844, - "loss": 0.6112, - "rewards/accuracies": 0.7749999761581421, - "rewards/chosen": -0.011369394138455391, - "rewards/margins": 0.18740348517894745, - "rewards/rejected": -0.1987728774547577, + "epoch": 0.04078450188928207, + "grad_norm": 20.0, + "learning_rate": 4.53091684434968e-07, + "loss": 1.3047, "step": 85 }, { - "epoch": 0.242914979757085, - "grad_norm": 104.5, - "learning_rate": 4.0540540540540545e-06, - "logits/chosen": -1.5142263174057007, - "logits/rejected": -1.526908040046692, - "logps/chosen": -172.0498504638672, - "logps/rejected": -204.1090545654297, - "loss": 0.5947, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": -0.001767634996213019, - "rewards/margins": 0.23096399009227753, - "rewards/rejected": -0.2327316552400589, + "epoch": 0.04318359023571043, + "grad_norm": 8.3125, + "learning_rate": 4.797441364605544e-07, + "loss": 1.2356, "step": 90 }, { - "epoch": 0.2564102564102564, - "grad_norm": 67.0, - "learning_rate": 4.27927927927928e-06, - "logits/chosen": -1.2964483499526978, - "logits/rejected": -1.287847876548767, - "logps/chosen": -152.49652099609375, - "logps/rejected": -162.25242614746094, - "loss": 0.6261, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": -0.031346581876277924, - "rewards/margins": 0.17656004428863525, - "rewards/rejected": -0.20790663361549377, + "epoch": 0.04558267858213879, + "grad_norm": 17.875, + "learning_rate": 5.063965884861407e-07, + "loss": 1.2631, "step": 95 }, { - "epoch": 0.2699055330634278, - "grad_norm": 122.0, - "learning_rate": 4.504504504504505e-06, - "logits/chosen": -1.6146646738052368, - "logits/rejected": -1.6288648843765259, - "logps/chosen": -245.85440063476562, - "logps/rejected": -252.33163452148438, - "loss": 0.5388, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": 0.008112089708447456, - "rewards/margins": 0.4617583155632019, - "rewards/rejected": -0.4536462426185608, + "epoch": 0.04798176692856714, + "grad_norm": 8.8125, + "learning_rate": 5.33049040511727e-07, + "loss": 1.3332, + "step": 100 + }, + { + "epoch": 0.04798176692856714, + "eval_loss": 1.3139781951904297, + "eval_runtime": 177.7725, + "eval_samples_per_second": 41.682, + "eval_steps_per_second": 10.423, "step": 100 }, { - "epoch": 0.2834008097165992, - "grad_norm": 54.75, - "learning_rate": 4.72972972972973e-06, - "logits/chosen": -1.7181060314178467, - "logits/rejected": -1.6348508596420288, - "logps/chosen": -181.34054565429688, - "logps/rejected": -187.49969482421875, - "loss": 0.5332, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.032877303659915924, - "rewards/margins": 0.5002557635307312, - "rewards/rejected": -0.5331330895423889, + "epoch": 0.0503808552749955, + "grad_norm": 13.75, + "learning_rate": 5.597014925373135e-07, + "loss": 1.2867, "step": 105 }, { - "epoch": 0.2968960863697706, - "grad_norm": 93.5, - "learning_rate": 4.954954954954955e-06, - "logits/chosen": -1.471880555152893, - "logits/rejected": -1.4882009029388428, - "logps/chosen": -239.46017456054688, - "logps/rejected": -203.43408203125, - "loss": 0.639, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": -0.20699986815452576, - "rewards/margins": 0.2872315049171448, - "rewards/rejected": -0.49423137307167053, + "epoch": 0.05277994362142386, + "grad_norm": 9.3125, + "learning_rate": 5.863539445628998e-07, + "loss": 1.2629, "step": 110 }, { - "epoch": 0.31039136302294196, - "grad_norm": 83.5, - "learning_rate": 4.999802215142814e-06, - "logits/chosen": -1.572249174118042, - "logits/rejected": -1.5214914083480835, - "logps/chosen": -181.75244140625, - "logps/rejected": -206.9883270263672, - "loss": 0.4953, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.2786533534526825, - "rewards/margins": 0.6539293527603149, - "rewards/rejected": -0.932582676410675, + "epoch": 0.05517903196785222, + "grad_norm": 11.625, + "learning_rate": 6.130063965884862e-07, + "loss": 1.3406, "step": 115 }, { - "epoch": 0.32388663967611336, - "grad_norm": 63.25, - "learning_rate": 4.998998767795805e-06, - "logits/chosen": -1.3965647220611572, - "logits/rejected": -1.5122724771499634, - "logps/chosen": -185.1367645263672, - "logps/rejected": -141.9375457763672, - "loss": 0.5188, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.12487339973449707, - "rewards/margins": 0.5116696357727051, - "rewards/rejected": -0.6365430951118469, + "epoch": 0.05757812031428057, + "grad_norm": 8.375, + "learning_rate": 6.396588486140725e-07, + "loss": 1.2352, "step": 120 }, { - "epoch": 0.33738191632928477, - "grad_norm": 94.5, - "learning_rate": 4.9975774948882615e-06, - "logits/chosen": -1.5592033863067627, - "logits/rejected": -1.5545122623443604, - "logps/chosen": -134.59095764160156, - "logps/rejected": -159.44424438476562, - "loss": 0.5878, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.218244269490242, - "rewards/margins": 0.560061514377594, - "rewards/rejected": -0.7783057689666748, + "epoch": 0.05997720866070893, + "grad_norm": 18.25, + "learning_rate": 6.663113006396589e-07, + "loss": 1.3535, "step": 125 }, { - "epoch": 0.3508771929824561, - "grad_norm": 159.0, - "learning_rate": 4.995538747800403e-06, - "logits/chosen": -1.5116926431655884, - "logits/rejected": -1.5991663932800293, - "logps/chosen": -196.37417602539062, - "logps/rejected": -162.26467895507812, - "loss": 0.555, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.6864209175109863, - "rewards/margins": 0.5580738186836243, - "rewards/rejected": -1.2444946765899658, + "epoch": 0.06237629700713729, + "grad_norm": 11.375, + "learning_rate": 6.929637526652453e-07, + "loss": 1.299, "step": 130 }, { - "epoch": 0.3643724696356275, - "grad_norm": 77.5, - "learning_rate": 4.9928830305701164e-06, - "logits/chosen": -1.4444091320037842, - "logits/rejected": -1.404262661933899, - "logps/chosen": -185.04042053222656, - "logps/rejected": -186.958740234375, - "loss": 0.4598, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.22133490443229675, - "rewards/margins": 0.7992109060287476, - "rewards/rejected": -1.0205457210540771, + "epoch": 0.06477538535356564, + "grad_norm": 7.15625, + "learning_rate": 7.196162046908316e-07, + "loss": 1.2223, "step": 135 }, { - "epoch": 0.37786774628879893, - "grad_norm": 50.25, - "learning_rate": 4.98961099976835e-06, - "logits/chosen": -1.5445549488067627, - "logits/rejected": -1.586544156074524, - "logps/chosen": -199.28408813476562, - "logps/rejected": -183.11032104492188, - "loss": 0.4536, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": -0.06479507684707642, - "rewards/margins": 0.9296582341194153, - "rewards/rejected": -0.9944533109664917, + "epoch": 0.067174473699994, + "grad_norm": 9.0, + "learning_rate": 7.462686567164179e-07, + "loss": 1.2659, "step": 140 }, { - "epoch": 0.3913630229419703, - "grad_norm": 68.0, - "learning_rate": 4.985723464336783e-06, - "logits/chosen": -1.4274847507476807, - "logits/rejected": -1.4104160070419312, - "logps/chosen": -185.9368896484375, - "logps/rejected": -188.2207489013672, - "loss": 0.4902, - "rewards/accuracies": 0.824999988079071, - "rewards/chosen": -0.17553560435771942, - "rewards/margins": 0.6832131743431091, - "rewards/rejected": -0.8587487936019897, + "epoch": 0.06957356204642236, + "grad_norm": 140.0, + "learning_rate": 7.729211087420044e-07, + "loss": 1.3394, "step": 145 }, { - "epoch": 0.4048582995951417, - "grad_norm": 65.0, - "learning_rate": 4.9812213853878376e-06, - "logits/chosen": -1.6410919427871704, - "logits/rejected": -1.6832342147827148, - "logps/chosen": -168.22726440429688, - "logps/rejected": -165.28591918945312, - "loss": 0.4942, - "rewards/accuracies": 0.7749999761581421, - "rewards/chosen": -0.19691412150859833, - "rewards/margins": 0.8052200078964233, - "rewards/rejected": -1.002134084701538, + "epoch": 0.07197265039285072, + "grad_norm": 11.0, + "learning_rate": 7.995735607675907e-07, + "loss": 1.3174, "step": 150 }, { - "epoch": 0.4183535762483131, - "grad_norm": 84.0, - "learning_rate": 4.9761058759670625e-06, - "logits/chosen": -1.4086945056915283, - "logits/rejected": -1.3933309316635132, - "logps/chosen": -200.54226684570312, - "logps/rejected": -191.30516052246094, - "loss": 0.5805, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.38961219787597656, - "rewards/margins": 0.6619648337364197, - "rewards/rejected": -1.051577091217041, + "epoch": 0.07437173873927908, + "grad_norm": 49.25, + "learning_rate": 8.26226012793177e-07, + "loss": 1.2811, "step": 155 }, { - "epoch": 0.4318488529014845, - "grad_norm": 48.75, - "learning_rate": 4.970378200777949e-06, - "logits/chosen": -1.4240281581878662, - "logits/rejected": -1.5275284051895142, - "logps/chosen": -149.6121826171875, - "logps/rejected": -153.7329864501953, - "loss": 0.3726, - "rewards/accuracies": 0.824999988079071, - "rewards/chosen": -0.22904136776924133, - "rewards/margins": 1.2087788581848145, - "rewards/rejected": -1.4378201961517334, + "epoch": 0.07677082708570743, + "grad_norm": 8.625, + "learning_rate": 8.528784648187634e-07, + "loss": 1.2707, "step": 160 }, { - "epoch": 0.44534412955465585, - "grad_norm": 57.5, - "learning_rate": 4.964039775869271e-06, - "logits/chosen": -1.5353929996490479, - "logits/rejected": -1.5400171279907227, - "logps/chosen": -172.69320678710938, - "logps/rejected": -186.09596252441406, - "loss": 0.4821, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.14687059819698334, - "rewards/margins": 1.0381742715835571, - "rewards/rejected": -1.1850450038909912, + "epoch": 0.07916991543213579, + "grad_norm": 7.625, + "learning_rate": 8.795309168443497e-07, + "loss": 1.2574, "step": 165 }, { - "epoch": 0.45883940620782726, - "grad_norm": 68.5, - "learning_rate": 4.957092168284987e-06, - "logits/chosen": -1.5351091623306274, - "logits/rejected": -1.480067253112793, - "logps/chosen": -224.7134246826172, - "logps/rejected": -280.2825012207031, - "loss": 0.4522, - "rewards/accuracies": 0.7749999761581421, - "rewards/chosen": -0.15150094032287598, - "rewards/margins": 0.8322998881340027, - "rewards/rejected": -0.9838007092475891, + "epoch": 0.08156900377856414, + "grad_norm": 18.125, + "learning_rate": 9.06183368869936e-07, + "loss": 1.3314, "step": 170 }, { - "epoch": 0.47233468286099867, - "grad_norm": 47.25, - "learning_rate": 4.949537095676824e-06, - "logits/chosen": -1.5415345430374146, - "logits/rejected": -1.4604427814483643, - "logps/chosen": -173.94085693359375, - "logps/rejected": -215.93075561523438, - "loss": 0.45, - "rewards/accuracies": 0.7749999761581421, - "rewards/chosen": -0.3776322901248932, - "rewards/margins": 1.5937398672103882, - "rewards/rejected": -1.9713722467422485, + "epoch": 0.0839680921249925, + "grad_norm": 8.0625, + "learning_rate": 9.328358208955225e-07, + "loss": 1.3046, "step": 175 }, { - "epoch": 0.48582995951417, - "grad_norm": 95.5, - "learning_rate": 4.9413764258796236e-06, - "logits/chosen": -1.5088344812393188, - "logits/rejected": -1.6158044338226318, - "logps/chosen": -273.03594970703125, - "logps/rejected": -221.93997192382812, - "loss": 0.5881, - "rewards/accuracies": 0.625, - "rewards/chosen": -0.25630080699920654, - "rewards/margins": 0.5983410477638245, - "rewards/rejected": -0.8546417951583862, + "epoch": 0.08636718047142086, + "grad_norm": 6.875, + "learning_rate": 9.594882729211088e-07, + "loss": 1.3634, "step": 180 }, { - "epoch": 0.4993252361673414, - "grad_norm": 83.0, - "learning_rate": 4.93261217644956e-06, - "logits/chosen": -1.3866004943847656, - "logits/rejected": -1.363396406173706, - "logps/chosen": -211.2840576171875, - "logps/rejected": -256.87811279296875, - "loss": 0.4912, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.24753907322883606, - "rewards/margins": 0.9087351560592651, - "rewards/rejected": -1.1562741994857788, + "epoch": 0.08876626881784921, + "grad_norm": 85.0, + "learning_rate": 9.861407249466952e-07, + "loss": 1.2065, "step": 185 }, { - "epoch": 0.5128205128205128, - "grad_norm": 79.0, - "learning_rate": 4.923246514165339e-06, - "logits/chosen": -1.357788324356079, - "logits/rejected": -1.322389841079712, - "logps/chosen": -221.6494598388672, - "logps/rejected": -238.56637573242188, - "loss": 0.3841, - "rewards/accuracies": 0.824999988079071, - "rewards/chosen": -0.21661829948425293, - "rewards/margins": 1.6020748615264893, - "rewards/rejected": -1.8186931610107422, + "epoch": 0.09116535716427758, + "grad_norm": 13.6875, + "learning_rate": 1.0127931769722815e-06, + "loss": 1.188, "step": 190 }, { - "epoch": 0.5263157894736842, - "grad_norm": 78.0, - "learning_rate": 4.913281754492509e-06, - "logits/chosen": -1.5164716243743896, - "logits/rejected": -1.5658130645751953, - "logps/chosen": -211.942138671875, - "logps/rejected": -251.4232177734375, - "loss": 0.439, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.2759682238101959, - "rewards/margins": 1.2201299667358398, - "rewards/rejected": -1.4960981607437134, + "epoch": 0.09356444551070593, + "grad_norm": 31.125, + "learning_rate": 1.0394456289978678e-06, + "loss": 1.3453, "step": 195 }, { - "epoch": 0.5398110661268556, - "grad_norm": 68.0, - "learning_rate": 4.902720361011007e-06, - "logits/chosen": -1.43938148021698, - "logits/rejected": -1.4012665748596191, - "logps/chosen": -198.0753936767578, - "logps/rejected": -230.1431121826172, - "loss": 0.436, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.4660988748073578, - "rewards/margins": 1.3129799365997314, - "rewards/rejected": -1.7790788412094116, + "epoch": 0.09596353385713428, + "grad_norm": 16.75, + "learning_rate": 1.066098081023454e-06, + "loss": 1.2185, + "step": 200 + }, + { + "epoch": 0.09596353385713428, + "eval_loss": 1.2878996133804321, + "eval_runtime": 176.065, + "eval_samples_per_second": 42.087, + "eval_steps_per_second": 10.525, "step": 200 }, { - "epoch": 0.553306342780027, - "grad_norm": 116.0, - "learning_rate": 4.891564944806095e-06, - "logits/chosen": -1.3829123973846436, - "logits/rejected": -1.4532912969589233, - "logps/chosen": -204.92056274414062, - "logps/rejected": -184.2178192138672, - "loss": 0.4408, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.4832437038421631, - "rewards/margins": 1.4000451564788818, - "rewards/rejected": -1.8832887411117554, + "epoch": 0.09836262220356265, + "grad_norm": 12.75, + "learning_rate": 1.0927505330490406e-06, + "loss": 1.2153, "step": 205 }, { - "epoch": 0.5668016194331984, - "grad_norm": 39.0, - "learning_rate": 4.879818263822816e-06, - "logits/chosen": -1.5301909446716309, - "logits/rejected": -1.4669263362884521, - "logps/chosen": -176.71139526367188, - "logps/rejected": -210.8941192626953, - "loss": 0.4359, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.7508934140205383, - "rewards/margins": 1.5884822607040405, - "rewards/rejected": -2.3393757343292236, + "epoch": 0.100761710549991, + "grad_norm": 56.0, + "learning_rate": 1.119402985074627e-06, + "loss": 1.3089, "step": 210 }, { - "epoch": 0.5802968960863698, - "grad_norm": 118.5, - "learning_rate": 4.867483222184158e-06, - "logits/chosen": -1.4969114065170288, - "logits/rejected": -1.4513076543807983, - "logps/chosen": -183.51742553710938, - "logps/rejected": -234.21078491210938, - "loss": 0.4083, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": -2.1092641353607178, - "rewards/margins": 2.7672932147979736, - "rewards/rejected": -4.876556873321533, + "epoch": 0.10316079889641937, + "grad_norm": 17.5, + "learning_rate": 1.1460554371002133e-06, + "loss": 1.228, "step": 215 }, { - "epoch": 0.5937921727395412, - "grad_norm": 82.5, - "learning_rate": 4.854562869473063e-06, - "logits/chosen": -1.6114156246185303, - "logits/rejected": -1.6086403131484985, - "logps/chosen": -158.5917510986328, - "logps/rejected": -182.981689453125, - "loss": 0.5288, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -1.8133976459503174, - "rewards/margins": 2.3693175315856934, - "rewards/rejected": -4.182714939117432, + "epoch": 0.10555988724284772, + "grad_norm": 14.5, + "learning_rate": 1.1727078891257996e-06, + "loss": 1.293, "step": 220 }, { - "epoch": 0.6072874493927125, - "grad_norm": 64.5, - "learning_rate": 4.841060399978481e-06, - "logits/chosen": -1.4258265495300293, - "logits/rejected": -1.5041557550430298, - "logps/chosen": -203.29505920410156, - "logps/rejected": -173.55667114257812, - "loss": 0.467, - "rewards/accuracies": 0.824999988079071, - "rewards/chosen": -0.451561838388443, - "rewards/margins": 0.9895628094673157, - "rewards/rejected": -1.4411247968673706, + "epoch": 0.10795897558927607, + "grad_norm": 9.1875, + "learning_rate": 1.199360341151386e-06, + "loss": 1.2792, "step": 225 }, { - "epoch": 0.6207827260458839, - "grad_norm": 53.75, - "learning_rate": 4.826979151905655e-06, - "logits/chosen": -1.3954380750656128, - "logits/rejected": -1.4369020462036133, - "logps/chosen": -133.7052764892578, - "logps/rejected": -152.63189697265625, - "loss": 0.3819, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": -0.21247024834156036, - "rewards/margins": 1.1218936443328857, - "rewards/rejected": -1.3343639373779297, + "epoch": 0.11035806393570444, + "grad_norm": 8.1875, + "learning_rate": 1.2260127931769724e-06, + "loss": 1.3112, "step": 230 }, { - "epoch": 0.6342780026990553, - "grad_norm": 34.25, - "learning_rate": 4.812322606550813e-06, - "logits/chosen": -1.477416753768921, - "logits/rejected": -1.35099196434021, - "logps/chosen": -183.8603057861328, - "logps/rejected": -200.47122192382812, - "loss": 0.403, - "rewards/accuracies": 0.875, - "rewards/chosen": -0.22856561839580536, - "rewards/margins": 1.1782000064849854, - "rewards/rejected": -1.4067654609680176, + "epoch": 0.11275715228213279, + "grad_norm": 11.125, + "learning_rate": 1.2526652452025587e-06, + "loss": 1.2376, "step": 235 }, { - "epoch": 0.6477732793522267, - "grad_norm": 142.0, - "learning_rate": 4.7970943874404904e-06, - "logits/chosen": -1.5746204853057861, - "logits/rejected": -1.5317301750183105, - "logps/chosen": -132.62966918945312, - "logps/rejected": -169.4604034423828, - "loss": 0.4905, - "rewards/accuracies": 0.7749999761581421, - "rewards/chosen": -0.2887588441371918, - "rewards/margins": 1.0178512334823608, - "rewards/rejected": -1.3066102266311646, + "epoch": 0.11515624062856114, + "grad_norm": 7.9375, + "learning_rate": 1.279317697228145e-06, + "loss": 1.2712, "step": 240 }, { - "epoch": 0.6612685560053981, - "grad_norm": 81.5, - "learning_rate": 4.781298259435691e-06, - "logits/chosen": -1.4620139598846436, - "logits/rejected": -1.5366100072860718, - "logps/chosen": -207.0232696533203, - "logps/rejected": -182.5987548828125, - "loss": 0.3498, - "rewards/accuracies": 0.875, - "rewards/chosen": -0.38011789321899414, - "rewards/margins": 1.517073392868042, - "rewards/rejected": -1.8971912860870361, + "epoch": 0.1175553289749895, + "grad_norm": 8.5, + "learning_rate": 1.3059701492537314e-06, + "loss": 1.2524, "step": 245 }, { - "epoch": 0.6747638326585695, - "grad_norm": 59.0, - "learning_rate": 4.7649381278011e-06, - "logits/chosen": -1.525059700012207, - "logits/rejected": -1.4892899990081787, - "logps/chosen": -132.02548217773438, - "logps/rejected": -172.75595092773438, - "loss": 0.4596, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.47625675797462463, - "rewards/margins": 1.6200672388076782, - "rewards/rejected": -2.0963237285614014, + "epoch": 0.11995441732141786, + "grad_norm": 19.5, + "learning_rate": 1.3326226012793179e-06, + "loss": 1.3747, "step": 250 }, { - "epoch": 0.6882591093117408, - "grad_norm": 93.5, - "learning_rate": 4.748018037239592e-06, - "logits/chosen": -1.6185624599456787, - "logits/rejected": -1.6007747650146484, - "logps/chosen": -190.04196166992188, - "logps/rejected": -271.9373474121094, - "loss": 0.377, - "rewards/accuracies": 0.824999988079071, - "rewards/chosen": -0.29186195135116577, - "rewards/margins": 1.4247747659683228, - "rewards/rejected": -1.7166366577148438, + "epoch": 0.12235350566784622, + "grad_norm": 13.8125, + "learning_rate": 1.3592750533049042e-06, + "loss": 1.1628, "step": 255 }, { - "epoch": 0.7017543859649122, - "grad_norm": 54.75, - "learning_rate": 4.7305421708922596e-06, - "logits/chosen": -1.5387685298919678, - "logits/rejected": -1.4462766647338867, - "logps/chosen": -199.54568481445312, - "logps/rejected": -219.14901733398438, - "loss": 0.5013, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": -0.4976847767829895, - "rewards/margins": 1.649714708328247, - "rewards/rejected": -2.147399425506592, + "epoch": 0.12475259401427458, + "grad_norm": 7.96875, + "learning_rate": 1.3859275053304905e-06, + "loss": 1.2235, "step": 260 }, { - "epoch": 0.7152496626180836, - "grad_norm": 92.0, - "learning_rate": 4.712514849304219e-06, - "logits/chosen": -1.4592026472091675, - "logits/rejected": -1.5086675882339478, - "logps/chosen": -203.43939208984375, - "logps/rejected": -182.27008056640625, - "loss": 0.3704, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": -0.30615222454071045, - "rewards/margins": 1.7558097839355469, - "rewards/rejected": -2.0619618892669678, + "epoch": 0.12715168236070293, + "grad_norm": 13.625, + "learning_rate": 1.412579957356077e-06, + "loss": 1.2642, "step": 265 }, { - "epoch": 0.728744939271255, - "grad_norm": 94.0, - "learning_rate": 4.693940529356444e-06, - "logits/chosen": -1.5462654829025269, - "logits/rejected": -1.5494886636734009, - "logps/chosen": -204.8282470703125, - "logps/rejected": -262.1166076660156, - "loss": 0.4081, - "rewards/accuracies": 0.875, - "rewards/chosen": -0.18543025851249695, - "rewards/margins": 1.581555724143982, - "rewards/rejected": -1.7669861316680908, + "epoch": 0.12955077070713128, + "grad_norm": 5.21875, + "learning_rate": 1.4392324093816632e-06, + "loss": 1.2196, "step": 270 }, { - "epoch": 0.7422402159244265, - "grad_norm": 49.5, - "learning_rate": 4.674823803163899e-06, - "logits/chosen": -1.5121240615844727, - "logits/rejected": -1.378418207168579, - "logps/chosen": -176.5196533203125, - "logps/rejected": -259.83154296875, - "loss": 0.2792, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": -0.2915424704551697, - "rewards/margins": 2.276181697845459, - "rewards/rejected": -2.5677244663238525, + "epoch": 0.13194985905355966, + "grad_norm": 12.875, + "learning_rate": 1.4658848614072497e-06, + "loss": 1.2465, "step": 275 }, { - "epoch": 0.7557354925775979, - "grad_norm": 63.5, - "learning_rate": 4.655169396940229e-06, - "logits/chosen": -1.488743782043457, - "logits/rejected": -1.4984915256500244, - "logps/chosen": -227.04574584960938, - "logps/rejected": -223.5692596435547, - "loss": 0.3756, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": -0.3987753987312317, - "rewards/margins": 1.6648337841033936, - "rewards/rejected": -2.0636088848114014, + "epoch": 0.134348947399988, + "grad_norm": 14.1875, + "learning_rate": 1.4925373134328358e-06, + "loss": 1.2708, "step": 280 }, { - "epoch": 0.7692307692307693, - "grad_norm": 62.75, - "learning_rate": 4.6349821698293025e-06, - "logits/chosen": -1.4782928228378296, - "logits/rejected": -1.480554223060608, - "logps/chosen": -168.77146911621094, - "logps/rejected": -283.3312683105469, - "loss": 0.3639, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": -0.32490164041519165, - "rewards/margins": 1.6070611476898193, - "rewards/rejected": -1.9319626092910767, + "epoch": 0.13674803574641636, + "grad_norm": 10.625, + "learning_rate": 1.5191897654584223e-06, + "loss": 1.1976, "step": 285 }, { - "epoch": 0.7827260458839406, - "grad_norm": 85.0, - "learning_rate": 4.6142671127038905e-06, - "logits/chosen": -1.5204181671142578, - "logits/rejected": -1.4846007823944092, - "logps/chosen": -122.49859619140625, - "logps/rejected": -159.67666625976562, - "loss": 0.3855, - "rewards/accuracies": 0.824999988079071, - "rewards/chosen": -0.5428152680397034, - "rewards/margins": 1.4056587219238281, - "rewards/rejected": -1.9484741687774658, + "epoch": 0.13914712409284472, + "grad_norm": 19.875, + "learning_rate": 1.5458422174840088e-06, + "loss": 1.2336, "step": 290 }, { - "epoch": 0.796221322537112, - "grad_norm": 124.5, - "learning_rate": 4.593029346931777e-06, - "logits/chosen": -1.5233218669891357, - "logits/rejected": -1.4880311489105225, - "logps/chosen": -190.8978271484375, - "logps/rejected": -212.50808715820312, - "loss": 0.4094, - "rewards/accuracies": 0.8999999761581421, - "rewards/chosen": -0.5791584253311157, - "rewards/margins": 1.7821210622787476, - "rewards/rejected": -2.3612794876098633, + "epoch": 0.14154621243927307, + "grad_norm": 13.9375, + "learning_rate": 1.572494669509595e-06, + "loss": 1.2791, "step": 295 }, { - "epoch": 0.8097165991902834, - "grad_norm": 121.0, - "learning_rate": 4.571274123109606e-06, - "logits/chosen": -1.5600152015686035, - "logits/rejected": -1.5772325992584229, - "logps/chosen": -211.6980438232422, - "logps/rejected": -159.11520385742188, - "loss": 0.5103, - "rewards/accuracies": 0.75, - "rewards/chosen": -0.5033570528030396, - "rewards/margins": 1.3233957290649414, - "rewards/rejected": -1.8267529010772705, + "epoch": 0.14394530078570145, + "grad_norm": 32.75, + "learning_rate": 1.5991471215351815e-06, + "loss": 1.1976, + "step": 300 + }, + { + "epoch": 0.14394530078570145, + "eval_loss": 1.2532644271850586, + "eval_runtime": 176.1055, + "eval_samples_per_second": 42.077, + "eval_steps_per_second": 10.522, "step": 300 }, { - "epoch": 0.8232118758434548, - "grad_norm": 87.0, - "learning_rate": 4.549006819764779e-06, - "logits/chosen": -1.3667839765548706, - "logits/rejected": -1.408111333847046, - "logps/chosen": -252.8665008544922, - "logps/rejected": -246.56600952148438, - "loss": 0.6645, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": -0.4156951904296875, - "rewards/margins": 0.9969050288200378, - "rewards/rejected": -1.4126002788543701, + "epoch": 0.1463443891321298, + "grad_norm": 5.25, + "learning_rate": 1.6257995735607676e-06, + "loss": 1.2925, "step": 305 }, { - "epoch": 0.8367071524966262, - "grad_norm": 65.0, - "learning_rate": 4.52623294202573e-06, - "logits/chosen": -1.5357733964920044, - "logits/rejected": -1.6000627279281616, - "logps/chosen": -203.2954864501953, - "logps/rejected": -178.47378540039062, - "loss": 0.3625, - "rewards/accuracies": 0.8999999761581421, - "rewards/chosen": -0.1179068312048912, - "rewards/margins": 1.5459201335906982, - "rewards/rejected": -1.6638271808624268, + "epoch": 0.14874347747855815, + "grad_norm": 6.78125, + "learning_rate": 1.652452025586354e-06, + "loss": 1.2051, "step": 310 }, { - "epoch": 0.8502024291497976, - "grad_norm": 38.75, - "learning_rate": 4.502958120260894e-06, - "logits/chosen": -1.4177687168121338, - "logits/rejected": -1.466953992843628, - "logps/chosen": -208.93142700195312, - "logps/rejected": -204.0532989501953, - "loss": 0.3943, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.10137398540973663, - "rewards/margins": 1.5527517795562744, - "rewards/rejected": -1.6541255712509155, + "epoch": 0.1511425658249865, + "grad_norm": 8.9375, + "learning_rate": 1.6791044776119406e-06, + "loss": 1.3741, "step": 315 }, { - "epoch": 0.863697705802969, - "grad_norm": 94.5, - "learning_rate": 4.479188108686714e-06, - "logits/chosen": -1.543738603591919, - "logits/rejected": -1.5562658309936523, - "logps/chosen": -195.75601196289062, - "logps/rejected": -243.9476776123047, - "loss": 0.393, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": -0.09615819901227951, - "rewards/margins": 1.808638334274292, - "rewards/rejected": -1.9047966003417969, + "epoch": 0.15354165417141485, + "grad_norm": 83.5, + "learning_rate": 1.7057569296375267e-06, + "loss": 1.219, "step": 320 }, { - "epoch": 0.8771929824561403, - "grad_norm": 53.25, - "learning_rate": 4.454928783945033e-06, - "logits/chosen": -1.4368815422058105, - "logits/rejected": -1.465288519859314, - "logps/chosen": -182.02488708496094, - "logps/rejected": -166.5155487060547, - "loss": 0.3673, - "rewards/accuracies": 0.8999999761581421, - "rewards/chosen": -0.09929310530424118, - "rewards/margins": 1.477452039718628, - "rewards/rejected": -1.5767452716827393, + "epoch": 0.1559407425178432, + "grad_norm": 9.75, + "learning_rate": 1.7324093816631133e-06, + "loss": 1.233, "step": 325 }, { - "epoch": 0.8906882591093117, - "grad_norm": 94.5, - "learning_rate": 4.430186143650216e-06, - "logits/chosen": -1.3891671895980835, - "logits/rejected": -1.3638372421264648, - "logps/chosen": -167.63204956054688, - "logps/rejected": -166.39913940429688, - "loss": 0.4332, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": -0.18427793681621552, - "rewards/margins": 1.2914403676986694, - "rewards/rejected": -1.4757182598114014, + "epoch": 0.15833983086427159, + "grad_norm": 12.1875, + "learning_rate": 1.7590618336886994e-06, + "loss": 1.179, "step": 330 }, { - "epoch": 0.9041835357624831, - "grad_norm": 68.5, - "learning_rate": 4.404966304906363e-06, - "logits/chosen": -1.5300304889678955, - "logits/rejected": -1.541245698928833, - "logps/chosen": -237.1887969970703, - "logps/rejected": -258.4833984375, - "loss": 0.2851, - "rewards/accuracies": 0.875, - "rewards/chosen": -0.2509092092514038, - "rewards/margins": 2.2454378604888916, - "rewards/rejected": -2.496346950531006, + "epoch": 0.16073891921069994, + "grad_norm": 14.4375, + "learning_rate": 1.7857142857142859e-06, + "loss": 1.2121, "step": 335 }, { - "epoch": 0.9176788124156545, - "grad_norm": 91.5, - "learning_rate": 4.379275502794984e-06, - "logits/chosen": -1.4159671068191528, - "logits/rejected": -1.3942148685455322, - "logps/chosen": -204.76268005371094, - "logps/rejected": -194.83755493164062, - "loss": 0.3974, - "rewards/accuracies": 0.925000011920929, - "rewards/chosen": -0.590856671333313, - "rewards/margins": 1.8947960138320923, - "rewards/rejected": -2.4856529235839844, + "epoch": 0.1631380075571283, + "grad_norm": 7.9375, + "learning_rate": 1.812366737739872e-06, + "loss": 1.1753, "step": 340 }, { - "epoch": 0.9311740890688259, - "grad_norm": 24.875, - "learning_rate": 4.3531200888335015e-06, - "logits/chosen": -1.499260663986206, - "logits/rejected": -1.5041369199752808, - "logps/chosen": -158.403076171875, - "logps/rejected": -188.42300415039062, - "loss": 0.3399, - "rewards/accuracies": 0.875, - "rewards/chosen": -0.4716406464576721, - "rewards/margins": 2.255904197692871, - "rewards/rejected": -2.7275447845458984, + "epoch": 0.16553709590355664, + "grad_norm": 4.625, + "learning_rate": 1.8390191897654585e-06, + "loss": 1.2174, "step": 345 }, { - "epoch": 0.9446693657219973, - "grad_norm": 49.0, - "learning_rate": 4.326506529404973e-06, - "logits/chosen": -1.4987239837646484, - "logits/rejected": -1.5489791631698608, - "logps/chosen": -228.030517578125, - "logps/rejected": -199.24453735351562, - "loss": 0.4954, - "rewards/accuracies": 0.824999988079071, - "rewards/chosen": -0.5366212129592896, - "rewards/margins": 1.576836347579956, - "rewards/rejected": -2.113457441329956, + "epoch": 0.167936184249985, + "grad_norm": 4.28125, + "learning_rate": 1.865671641791045e-06, + "loss": 1.219, "step": 350 }, { - "epoch": 0.9581646423751687, - "grad_norm": 50.5, - "learning_rate": 4.299441404159409e-06, - "logits/chosen": -1.4427543878555298, - "logits/rejected": -1.4410443305969238, - "logps/chosen": -142.67196655273438, - "logps/rejected": -182.15530395507812, - "loss": 0.3882, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": -0.45489731431007385, - "rewards/margins": 1.885206937789917, - "rewards/rejected": -2.340104341506958, + "epoch": 0.17033527259641337, + "grad_norm": 9.6875, + "learning_rate": 1.8923240938166312e-06, + "loss": 1.1994, "step": 355 }, { - "epoch": 0.97165991902834, - "grad_norm": 71.0, - "learning_rate": 4.271931404387096e-06, - "logits/chosen": -1.4958666563034058, - "logits/rejected": -1.4852968454360962, - "logps/chosen": -203.7172088623047, - "logps/rejected": -223.72958374023438, - "loss": 0.3129, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": -0.4084866940975189, - "rewards/margins": 2.0505545139312744, - "rewards/rejected": -2.459041118621826, + "epoch": 0.17273436094284172, + "grad_norm": 25.5, + "learning_rate": 1.9189765458422177e-06, + "loss": 1.2914, "step": 360 }, { - "epoch": 0.9851551956815114, - "grad_norm": 72.0, - "learning_rate": 4.243983331364307e-06, - "logits/chosen": -1.6051279306411743, - "logits/rejected": -1.5763704776763916, - "logps/chosen": -156.02700805664062, - "logps/rejected": -212.16317749023438, - "loss": 0.4821, - "rewards/accuracies": 0.7749999761581421, - "rewards/chosen": -0.6169974207878113, - "rewards/margins": 1.195291519165039, - "rewards/rejected": -1.8122888803482056, + "epoch": 0.17513344928927008, + "grad_norm": 15.5625, + "learning_rate": 1.945628997867804e-06, + "loss": 1.2052, "step": 365 }, { - "epoch": 0.9986504723346828, - "grad_norm": 91.0, - "learning_rate": 4.215604094671835e-06, - "logits/chosen": -1.5946276187896729, - "logits/rejected": -1.525407075881958, - "logps/chosen": -190.231689453125, - "logps/rejected": -210.0182342529297, - "loss": 0.4743, - "rewards/accuracies": 0.824999988079071, - "rewards/chosen": -0.5886441469192505, - "rewards/margins": 1.6572681665420532, - "rewards/rejected": -2.245912551879883, + "epoch": 0.17753253763569843, + "grad_norm": 96.0, + "learning_rate": 1.9722814498933903e-06, + "loss": 1.2929, "step": 370 }, { - "epoch": 1.0121457489878543, - "grad_norm": 71.5, - "learning_rate": 4.186800710486732e-06, - "logits/chosen": -1.503097414970398, - "logits/rejected": -1.4615429639816284, - "logps/chosen": -177.4516143798828, - "logps/rejected": -223.7339324951172, - "loss": 0.2691, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.2226639688014984, - "rewards/margins": 2.2762439250946045, - "rewards/rejected": -2.4989078044891357, + "epoch": 0.17993162598212678, + "grad_norm": 5.09375, + "learning_rate": 1.9989339019189766e-06, + "loss": 1.2518, "step": 375 }, { - "epoch": 1.0256410256410255, - "grad_norm": 16.75, - "learning_rate": 4.157580299847717e-06, - "logits/chosen": -1.4365036487579346, - "logits/rejected": -1.4489128589630127, - "logps/chosen": -185.9925994873047, - "logps/rejected": -210.19802856445312, - "loss": 0.126, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.1753823310136795, - "rewards/margins": 3.160768508911133, - "rewards/rejected": -3.336151123046875, + "epoch": 0.18233071432855516, + "grad_norm": 6.9375, + "learning_rate": 2.025586353944563e-06, + "loss": 1.1894, "step": 380 }, { - "epoch": 1.039136302294197, - "grad_norm": 27.125, - "learning_rate": 4.12795008689464e-06, - "logits/chosen": -1.4434540271759033, - "logits/rejected": -1.5021578073501587, - "logps/chosen": -210.2549591064453, - "logps/rejected": -247.6964569091797, - "loss": 0.2329, - "rewards/accuracies": 0.925000011920929, - "rewards/chosen": 0.21405327320098877, - "rewards/margins": 2.4333832263946533, - "rewards/rejected": -2.219329833984375, + "epoch": 0.1847298026749835, + "grad_norm": 8.375, + "learning_rate": 2.0522388059701497e-06, + "loss": 1.2302, "step": 385 }, { - "epoch": 1.0526315789473684, - "grad_norm": 29.5, - "learning_rate": 4.0979173970824626e-06, - "logits/chosen": -1.5133657455444336, - "logits/rejected": -1.5038350820541382, - "logps/chosen": -187.3416290283203, - "logps/rejected": -197.63766479492188, - "loss": 0.1885, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.0815470814704895, - "rewards/margins": 2.5452542304992676, - "rewards/rejected": -2.463707447052002, + "epoch": 0.18712889102141186, + "grad_norm": 5.875, + "learning_rate": 2.0788912579957356e-06, + "loss": 1.2647, "step": 390 }, { - "epoch": 1.0661268556005399, - "grad_norm": 11.3125, - "learning_rate": 4.067489655370197e-06, - "logits/chosen": -1.486011028289795, - "logits/rejected": -1.5427876710891724, - "logps/chosen": -248.8966064453125, - "logps/rejected": -205.6848602294922, - "loss": 0.1103, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.543197751045227, - "rewards/margins": 3.468106746673584, - "rewards/rejected": -2.9249091148376465, + "epoch": 0.18952797936784022, + "grad_norm": 4.6875, + "learning_rate": 2.1055437100213223e-06, + "loss": 1.1564, "step": 395 }, { - "epoch": 1.0796221322537112, - "grad_norm": 21.625, - "learning_rate": 4.0366743843852315e-06, - "logits/chosen": -1.4536128044128418, - "logits/rejected": -1.39426851272583, - "logps/chosen": -157.4046173095703, - "logps/rejected": -206.4637451171875, - "loss": 0.1189, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.14299368858337402, - "rewards/margins": 3.642580032348633, - "rewards/rejected": -3.7855734825134277, + "epoch": 0.19192706771426857, + "grad_norm": 5.03125, + "learning_rate": 2.132196162046908e-06, + "loss": 1.1627, + "step": 400 + }, + { + "epoch": 0.19192706771426857, + "eval_loss": 1.2169007062911987, + "eval_runtime": 176.1168, + "eval_samples_per_second": 42.074, + "eval_steps_per_second": 10.521, "step": 400 }, { - "epoch": 1.0931174089068827, - "grad_norm": 73.0, - "learning_rate": 4.005479202563524e-06, - "logits/chosen": -1.4207379817962646, - "logits/rejected": -1.4653427600860596, - "logps/chosen": -175.64657592773438, - "logps/rejected": -188.96347045898438, - "loss": 0.113, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.22152826189994812, - "rewards/margins": 3.9064407348632812, - "rewards/rejected": -4.127968788146973, + "epoch": 0.19432615606069695, + "grad_norm": 40.5, + "learning_rate": 2.158848614072495e-06, + "loss": 1.25, "step": 405 }, { - "epoch": 1.106612685560054, - "grad_norm": 22.5, - "learning_rate": 3.973911822266099e-06, - "logits/chosen": -1.3683284521102905, - "logits/rejected": -1.4073810577392578, - "logps/chosen": -200.2495880126953, - "logps/rejected": -196.02499389648438, - "loss": 0.1506, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.4190312922000885, - "rewards/margins": 3.017284393310547, - "rewards/rejected": -3.4363160133361816, + "epoch": 0.1967252444071253, + "grad_norm": 7.53125, + "learning_rate": 2.1855010660980813e-06, + "loss": 1.1966, "step": 410 }, { - "epoch": 1.1201079622132253, - "grad_norm": 61.0, - "learning_rate": 3.941980047872324e-06, - "logits/chosen": -1.3142037391662598, - "logits/rejected": -1.3677208423614502, - "logps/chosen": -200.49827575683594, - "logps/rejected": -213.0048828125, - "loss": 0.2229, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.29499849677085876, - "rewards/margins": 2.430476188659668, - "rewards/rejected": -2.7254748344421387, + "epoch": 0.19912433275355365, + "grad_norm": 10.0625, + "learning_rate": 2.2121535181236676e-06, + "loss": 1.1397, "step": 415 }, { - "epoch": 1.1336032388663968, - "grad_norm": 33.5, - "learning_rate": 3.9096917738504445e-06, - "logits/chosen": -1.5029326677322388, - "logits/rejected": -1.522037386894226, - "logps/chosen": -211.3799285888672, - "logps/rejected": -195.49777221679688, - "loss": 0.2023, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.20138970017433167, - "rewards/margins": 3.0471653938293457, - "rewards/rejected": -3.2485554218292236, + "epoch": 0.201523421099982, + "grad_norm": 7.25, + "learning_rate": 2.238805970149254e-06, + "loss": 1.2399, "step": 420 }, { - "epoch": 1.147098515519568, - "grad_norm": 67.5, - "learning_rate": 3.877054982805835e-06, - "logits/chosen": -1.503327488899231, - "logits/rejected": -1.5182857513427734, - "logps/chosen": -206.69345092773438, - "logps/rejected": -220.8511505126953, - "loss": 0.2, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.075591079890728, - "rewards/margins": 3.3199775218963623, - "rewards/rejected": -3.39556884765625, + "epoch": 0.20392250944641035, + "grad_norm": 6.4375, + "learning_rate": 2.26545842217484e-06, + "loss": 1.1008, "step": 425 }, { - "epoch": 1.1605937921727396, - "grad_norm": 41.25, - "learning_rate": 3.844077743507468e-06, - "logits/chosen": -1.4972890615463257, - "logits/rejected": -1.4547359943389893, - "logps/chosen": -190.38272094726562, - "logps/rejected": -237.7483367919922, - "loss": 0.1763, - "rewards/accuracies": 0.925000011920929, - "rewards/chosen": -0.00510750338435173, - "rewards/margins": 3.4418201446533203, - "rewards/rejected": -3.446927309036255, + "epoch": 0.20632159779283873, + "grad_norm": 6.09375, + "learning_rate": 2.2921108742004265e-06, + "loss": 1.2229, "step": 430 }, { - "epoch": 1.174089068825911, - "grad_norm": 43.0, - "learning_rate": 3.8107682088930797e-06, - "logits/chosen": -1.5898491144180298, - "logits/rejected": -1.628394365310669, - "logps/chosen": -209.7681884765625, - "logps/rejected": -223.9811248779297, - "loss": 0.2875, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": -0.15720273554325104, - "rewards/margins": 2.540311336517334, - "rewards/rejected": -2.697514057159424, + "epoch": 0.2087206861392671, + "grad_norm": 12.75, + "learning_rate": 2.318763326226013e-06, + "loss": 1.1546, "step": 435 }, { - "epoch": 1.1875843454790824, - "grad_norm": 19.875, - "learning_rate": 3.777134614053522e-06, - "logits/chosen": -1.3833550214767456, - "logits/rejected": -1.3048458099365234, - "logps/chosen": -153.44886779785156, - "logps/rejected": -187.23211669921875, - "loss": 0.2094, - "rewards/accuracies": 0.925000011920929, - "rewards/chosen": -0.15450401604175568, - "rewards/margins": 2.7406177520751953, - "rewards/rejected": -2.8951218128204346, + "epoch": 0.21111977448569544, + "grad_norm": 5.875, + "learning_rate": 2.345415778251599e-06, + "loss": 1.1509, "step": 440 }, { - "epoch": 1.2010796221322537, - "grad_norm": 25.25, - "learning_rate": 3.7431852741968104e-06, - "logits/chosen": -1.5894601345062256, - "logits/rejected": -1.4398654699325562, - "logps/chosen": -161.95870971679688, - "logps/rejected": -259.89544677734375, - "loss": 0.2674, - "rewards/accuracies": 0.8999999761581421, - "rewards/chosen": -0.3261250853538513, - "rewards/margins": 2.652719497680664, - "rewards/rejected": -2.9788451194763184, + "epoch": 0.2135188628321238, + "grad_norm": 3.859375, + "learning_rate": 2.372068230277186e-06, + "loss": 1.1714, "step": 445 }, { - "epoch": 1.214574898785425, - "grad_norm": 25.625, - "learning_rate": 3.7089285825923614e-06, - "logits/chosen": -1.481194257736206, - "logits/rejected": -1.4744828939437866, - "logps/chosen": -136.75341796875, - "logps/rejected": -182.98255920410156, - "loss": 0.216, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.15366610884666443, - "rewards/margins": 2.4346675872802734, - "rewards/rejected": -2.5883336067199707, + "epoch": 0.21591795117855214, + "grad_norm": 4.5, + "learning_rate": 2.398720682302772e-06, + "loss": 1.2313, "step": 450 }, { - "epoch": 1.2280701754385965, - "grad_norm": 59.0, - "learning_rate": 3.6743730084959275e-06, - "logits/chosen": -1.4641847610473633, - "logits/rejected": -1.4495608806610107, - "logps/chosen": -226.5570068359375, - "logps/rejected": -231.99484252929688, - "loss": 0.1606, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": 0.0351928249001503, - "rewards/margins": 2.678950071334839, - "rewards/rejected": -2.6437573432922363, + "epoch": 0.2183170395249805, + "grad_norm": 7.875, + "learning_rate": 2.4253731343283585e-06, + "loss": 1.1727, "step": 455 }, { - "epoch": 1.2415654520917678, - "grad_norm": 29.125, - "learning_rate": 3.639527095055753e-06, - "logits/chosen": -1.4890583753585815, - "logits/rejected": -1.4146323204040527, - "logps/chosen": -211.8848419189453, - "logps/rejected": -223.7265167236328, - "loss": 0.1515, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.1912011355161667, - "rewards/margins": 3.216825008392334, - "rewards/rejected": -3.4080262184143066, + "epoch": 0.22071612787140887, + "grad_norm": 4.25, + "learning_rate": 2.452025586353945e-06, + "loss": 1.2308, "step": 460 }, { - "epoch": 1.2550607287449393, - "grad_norm": 28.5, - "learning_rate": 3.604399457200458e-06, - "logits/chosen": -1.5582194328308105, - "logits/rejected": -1.530056357383728, - "logps/chosen": -174.59786987304688, - "logps/rejected": -235.314697265625, - "loss": 0.1586, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.02259807661175728, - "rewards/margins": 3.3205082416534424, - "rewards/rejected": -3.343106508255005, + "epoch": 0.22311521621783723, + "grad_norm": 5.78125, + "learning_rate": 2.478678038379531e-06, + "loss": 1.1464, "step": 465 }, { - "epoch": 1.2685560053981106, - "grad_norm": 47.0, - "learning_rate": 3.5689987795091735e-06, - "logits/chosen": -1.5336169004440308, - "logits/rejected": -1.5555146932601929, - "logps/chosen": -192.9527587890625, - "logps/rejected": -217.05029296875, - "loss": 0.1666, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.11079835891723633, - "rewards/margins": 2.9511632919311523, - "rewards/rejected": -3.0619616508483887, + "epoch": 0.22551430456426558, + "grad_norm": 9.0625, + "learning_rate": 2.5053304904051175e-06, + "loss": 1.0961, "step": 470 }, { - "epoch": 1.282051282051282, - "grad_norm": 31.5, - "learning_rate": 3.5333338140644602e-06, - "logits/chosen": -1.567378044128418, - "logits/rejected": -1.5020748376846313, - "logps/chosen": -151.2008819580078, - "logps/rejected": -193.5251007080078, - "loss": 0.1562, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": 0.06739845871925354, - "rewards/margins": 2.88545560836792, - "rewards/rejected": -2.81805682182312, + "epoch": 0.22791339291069393, + "grad_norm": 9.3125, + "learning_rate": 2.531982942430704e-06, + "loss": 1.2813, "step": 475 }, { - "epoch": 1.2955465587044535, - "grad_norm": 27.625, - "learning_rate": 3.497413378288541e-06, - "logits/chosen": -1.558091402053833, - "logits/rejected": -1.5880284309387207, - "logps/chosen": -208.2618408203125, - "logps/rejected": -215.33065795898438, - "loss": 0.1537, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.08185993134975433, - "rewards/margins": 2.7448792457580566, - "rewards/rejected": -2.8267390727996826, + "epoch": 0.23031248125712228, + "grad_norm": 17.375, + "learning_rate": 2.55863539445629e-06, + "loss": 1.1666, "step": 480 }, { - "epoch": 1.3090418353576248, - "grad_norm": 21.0, - "learning_rate": 3.4612463527633728e-06, - "logits/chosen": -1.517230749130249, - "logits/rejected": -1.5125114917755127, - "logps/chosen": -165.6942138671875, - "logps/rejected": -177.20965576171875, - "loss": 0.1097, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.21497321128845215, - "rewards/margins": 3.283679485321045, - "rewards/rejected": -3.498652935028076, + "epoch": 0.23271156960355066, + "grad_norm": 4.875, + "learning_rate": 2.5852878464818764e-06, + "loss": 1.1806, "step": 485 }, { - "epoch": 1.3225371120107963, - "grad_norm": 58.25, - "learning_rate": 3.4248416790351086e-06, - "logits/chosen": -1.4563219547271729, - "logits/rejected": -1.4463237524032593, - "logps/chosen": -222.70803833007812, - "logps/rejected": -276.1205139160156, - "loss": 0.1741, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.18195849657058716, - "rewards/margins": 3.079150438308716, - "rewards/rejected": -3.2611091136932373, + "epoch": 0.235110657949979, + "grad_norm": 6.125, + "learning_rate": 2.6119402985074627e-06, + "loss": 1.2986, "step": 490 }, { - "epoch": 1.3360323886639676, - "grad_norm": 26.5, - "learning_rate": 3.3882083574034847e-06, - "logits/chosen": -1.495981216430664, - "logits/rejected": -1.510833501815796, - "logps/chosen": -217.92416381835938, - "logps/rejected": -232.9659881591797, - "loss": 0.1075, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.07798905670642853, - "rewards/margins": 3.7004494667053223, - "rewards/rejected": -3.6224606037139893, + "epoch": 0.23750974629640736, + "grad_norm": 8.5, + "learning_rate": 2.6385927505330495e-06, + "loss": 1.1637, "step": 495 }, { - "epoch": 1.349527665317139, - "grad_norm": 11.625, - "learning_rate": 3.3513554446966846e-06, - "logits/chosen": -1.607877492904663, - "logits/rejected": -1.5209126472473145, - "logps/chosen": -145.24710083007812, - "logps/rejected": -269.81951904296875, - "loss": 0.0835, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.1404910534620285, - "rewards/margins": 3.947847843170166, - "rewards/rejected": -4.088338375091553, + "epoch": 0.23990883464283572, + "grad_norm": 12.625, + "learning_rate": 2.6652452025586358e-06, + "loss": 1.178, "step": 500 }, { - "epoch": 1.349527665317139, - "eval_logits/chosen": -1.5215187072753906, - "eval_logits/rejected": -1.5562808513641357, - "eval_logps/chosen": -190.62527465820312, - "eval_logps/rejected": -222.86770629882812, - "eval_loss": 0.3281523883342743, - "eval_rewards/accuracies": 0.849397599697113, - "eval_rewards/chosen": -0.6181024312973022, - "eval_rewards/margins": 2.1862471103668213, - "eval_rewards/rejected": -2.804349660873413, - "eval_runtime": 23.4839, - "eval_samples_per_second": 14.052, - "eval_steps_per_second": 3.534, + "epoch": 0.23990883464283572, + "eval_loss": 1.1765925884246826, + "eval_runtime": 176.0206, + "eval_samples_per_second": 42.097, + "eval_steps_per_second": 10.527, "step": 500 }, { - "epoch": 1.3630229419703104, - "grad_norm": 25.625, - "learning_rate": 3.314292052032227e-06, - "logits/chosen": -1.4269988536834717, - "logits/rejected": -1.5553017854690552, - "logps/chosen": -245.88330078125, - "logps/rejected": -144.62518310546875, - "loss": 0.2057, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.03630426153540611, - "rewards/margins": 2.8089230060577393, - "rewards/rejected": -2.8452274799346924, + "epoch": 0.24230792298926407, + "grad_norm": 6.5625, + "learning_rate": 2.6918976545842217e-06, + "loss": 1.0748, "step": 505 }, { - "epoch": 1.376518218623482, - "grad_norm": 42.75, - "learning_rate": 3.2770273425644285e-06, - "logits/chosen": -1.3818541765213013, - "logits/rejected": -1.31718909740448, - "logps/chosen": -194.84194946289062, - "logps/rejected": -197.08505249023438, - "loss": 0.1862, - "rewards/accuracies": 0.925000011920929, - "rewards/chosen": -0.11487498134374619, - "rewards/margins": 3.057730197906494, - "rewards/rejected": -3.172605276107788, + "epoch": 0.24470701133569245, + "grad_norm": 5.34375, + "learning_rate": 2.7185501066098084e-06, + "loss": 1.0757, "step": 510 }, { - "epoch": 1.3900134952766532, - "grad_norm": 29.0, - "learning_rate": 3.2395705292190067e-06, - "logits/chosen": -1.467614769935608, - "logits/rejected": -1.438024640083313, - "logps/chosen": -180.7233428955078, - "logps/rejected": -217.57559204101562, - "loss": 0.1711, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.10827420651912689, - "rewards/margins": 3.1877129077911377, - "rewards/rejected": -3.295987367630005, + "epoch": 0.2471060996821208, + "grad_norm": 15.9375, + "learning_rate": 2.7452025586353947e-06, + "loss": 1.2197, "step": 515 }, { - "epoch": 1.4035087719298245, - "grad_norm": 12.125, - "learning_rate": 3.2019308724153743e-06, - "logits/chosen": -1.4175347089767456, - "logits/rejected": -1.5785712003707886, - "logps/chosen": -196.76730346679688, - "logps/rejected": -179.5243377685547, - "loss": 0.114, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.12531700730323792, - "rewards/margins": 3.2615838050842285, - "rewards/rejected": -3.1362667083740234, + "epoch": 0.24950518802854915, + "grad_norm": 6.1875, + "learning_rate": 2.771855010660981e-06, + "loss": 1.2274, "step": 520 }, { - "epoch": 1.417004048582996, - "grad_norm": 27.375, - "learning_rate": 3.164117677777191e-06, - "logits/chosen": -1.5264801979064941, - "logits/rejected": -1.6040115356445312, - "logps/chosen": -150.361328125, - "logps/rejected": -164.02816772460938, - "loss": 0.1757, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.4109339118003845, - "rewards/margins": 3.098153591156006, - "rewards/rejected": -3.509087324142456, + "epoch": 0.25190427637497753, + "grad_norm": 6.34375, + "learning_rate": 2.798507462686567e-06, + "loss": 1.1705, "step": 525 }, { - "epoch": 1.4304993252361673, - "grad_norm": 38.25, - "learning_rate": 3.1261402938317465e-06, - "logits/chosen": -1.5730303525924683, - "logits/rejected": -1.6026499271392822, - "logps/chosen": -164.3070831298828, - "logps/rejected": -246.06338500976562, - "loss": 0.1532, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": 0.005527207162231207, - "rewards/margins": 3.9187304973602295, - "rewards/rejected": -3.913203477859497, + "epoch": 0.25430336472140586, + "grad_norm": 3.9375, + "learning_rate": 2.825159914712154e-06, + "loss": 1.179, "step": 530 }, { - "epoch": 1.4439946018893388, - "grad_norm": 20.375, - "learning_rate": 3.088008109698726e-06, - "logits/chosen": -1.444838285446167, - "logits/rejected": -1.5232534408569336, - "logps/chosen": -194.70555114746094, - "logps/rejected": -218.77590942382812, - "loss": 0.1892, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": 0.10898719727993011, - "rewards/margins": 3.2815093994140625, - "rewards/rejected": -3.1725223064422607, + "epoch": 0.25670245306783424, + "grad_norm": 7.46875, + "learning_rate": 2.85181236673774e-06, + "loss": 1.1046, "step": 535 }, { - "epoch": 1.45748987854251, - "grad_norm": 43.0, - "learning_rate": 3.0497305527689446e-06, - "logits/chosen": -1.4176692962646484, - "logits/rejected": -1.4581646919250488, - "logps/chosen": -190.53550720214844, - "logps/rejected": -202.92530822753906, - "loss": 0.1852, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.12854930758476257, - "rewards/margins": 3.113678216934204, - "rewards/rejected": -3.242227554321289, + "epoch": 0.25910154141426256, + "grad_norm": 5.625, + "learning_rate": 2.8784648187633263e-06, + "loss": 1.1889, "step": 540 }, { - "epoch": 1.4709851551956814, - "grad_norm": 42.0, - "learning_rate": 3.011317086373628e-06, - "logits/chosen": -1.4024337530136108, - "logits/rejected": -1.4260265827178955, - "logps/chosen": -222.62124633789062, - "logps/rejected": -228.56295776367188, - "loss": 0.1847, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.02214776910841465, - "rewards/margins": 3.127570629119873, - "rewards/rejected": -3.1497180461883545, + "epoch": 0.26150062976069094, + "grad_norm": 6.25, + "learning_rate": 2.905117270788913e-06, + "loss": 1.136, "step": 545 }, { - "epoch": 1.484480431848853, - "grad_norm": 38.5, - "learning_rate": 2.9727772074447916e-06, - "logits/chosen": -1.4362146854400635, - "logits/rejected": -1.4737937450408936, - "logps/chosen": -190.13218688964844, - "logps/rejected": -182.9353790283203, - "loss": 0.1473, - "rewards/accuracies": 0.925000011920929, - "rewards/chosen": -0.015029204078018665, - "rewards/margins": 3.5559723377227783, - "rewards/rejected": -3.5710015296936035, + "epoch": 0.2638997181071193, + "grad_norm": 20.75, + "learning_rate": 2.9317697228144994e-06, + "loss": 1.1492, "step": 550 }, { - "epoch": 1.4979757085020242, - "grad_norm": 105.0, - "learning_rate": 2.9341204441673267e-06, - "logits/chosen": -1.5892771482467651, - "logits/rejected": -1.5846550464630127, - "logps/chosen": -128.55142211914062, - "logps/rejected": -169.93356323242188, - "loss": 0.2029, - "rewards/accuracies": 0.8999999761581421, - "rewards/chosen": -0.536339282989502, - "rewards/margins": 2.9536054134368896, - "rewards/rejected": -3.4899444580078125, + "epoch": 0.26629880645354764, + "grad_norm": 7.84375, + "learning_rate": 2.9584221748400853e-06, + "loss": 1.1257, "step": 555 }, { - "epoch": 1.5114709851551957, - "grad_norm": 49.75, - "learning_rate": 2.8953563536233525e-06, - "logits/chosen": -1.650007963180542, - "logits/rejected": -1.6943776607513428, - "logps/chosen": -168.49082946777344, - "logps/rejected": -202.83924865722656, - "loss": 0.186, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.5661638379096985, - "rewards/margins": 3.2901504039764404, - "rewards/rejected": -3.856314182281494, + "epoch": 0.268697894799976, + "grad_norm": 4.78125, + "learning_rate": 2.9850746268656716e-06, + "loss": 1.0821, "step": 560 }, { - "epoch": 1.524966261808367, - "grad_norm": 21.0, - "learning_rate": 2.8564945194294273e-06, - "logits/chosen": -1.5658307075500488, - "logits/rejected": -1.46593177318573, - "logps/chosen": -162.1931915283203, - "logps/rejected": -254.7098388671875, - "loss": 0.168, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.5101041793823242, - "rewards/margins": 3.1685078144073486, - "rewards/rejected": -3.6786117553710938, + "epoch": 0.27109698314640435, + "grad_norm": 4.875, + "learning_rate": 3.0117270788912583e-06, + "loss": 1.1404, "step": 565 }, { - "epoch": 1.5384615384615383, - "grad_norm": 13.125, - "learning_rate": 2.817544549367197e-06, - "logits/chosen": -1.4567762613296509, - "logits/rejected": -1.4438632726669312, - "logps/chosen": -173.05821228027344, - "logps/rejected": -226.567626953125, - "loss": 0.1935, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.4246034622192383, - "rewards/margins": 3.5222201347351074, - "rewards/rejected": -3.946824312210083, + "epoch": 0.2734960714928327, + "grad_norm": 7.8125, + "learning_rate": 3.0383795309168446e-06, + "loss": 1.0904, "step": 570 }, { - "epoch": 1.5519568151147098, - "grad_norm": 18.875, - "learning_rate": 2.778516073008071e-06, - "logits/chosen": -1.3770719766616821, - "logits/rejected": -1.4858124256134033, - "logps/chosen": -178.8583221435547, - "logps/rejected": -180.4306640625, - "loss": 0.2049, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.34754911065101624, - "rewards/margins": 2.8492796421051025, - "rewards/rejected": -3.196829080581665, + "epoch": 0.2758951598392611, + "grad_norm": 4.59375, + "learning_rate": 3.065031982942431e-06, + "loss": 1.0591, "step": 575 }, { - "epoch": 1.5654520917678814, - "grad_norm": 51.0, - "learning_rate": 2.7394187393325107e-06, - "logits/chosen": -1.4935017824172974, - "logits/rejected": -1.482154130935669, - "logps/chosen": -183.38815307617188, - "logps/rejected": -203.4325714111328, - "loss": 0.2601, - "rewards/accuracies": 0.8999999761581421, - "rewards/chosen": -0.4926990866661072, - "rewards/margins": 2.8543787002563477, - "rewards/rejected": -3.347078323364258, + "epoch": 0.27829424818568943, + "grad_norm": 8.875, + "learning_rate": 3.0916844349680177e-06, + "loss": 1.0304, "step": 580 }, { - "epoch": 1.5789473684210527, - "grad_norm": 11.875, - "learning_rate": 2.7002622143445177e-06, - "logits/chosen": -1.5763792991638184, - "logits/rejected": -1.581122875213623, - "logps/chosen": -230.5819854736328, - "logps/rejected": -290.4792175292969, - "loss": 0.1305, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": 0.07175219804048538, - "rewards/margins": 4.184114933013916, - "rewards/rejected": -4.112362861633301, + "epoch": 0.2806933365321178, + "grad_norm": 5.0625, + "learning_rate": 3.1183368869936036e-06, + "loss": 1.1397, "step": 585 }, { - "epoch": 1.592442645074224, - "grad_norm": 46.75, - "learning_rate": 2.6610561786819207e-06, - "logits/chosen": -1.6340926885604858, - "logits/rejected": -1.5590074062347412, - "logps/chosen": -145.62442016601562, - "logps/rejected": -248.79403686523438, - "loss": 0.1715, - "rewards/accuracies": 0.925000011920929, - "rewards/chosen": -0.3683429956436157, - "rewards/margins": 3.4242138862609863, - "rewards/rejected": -3.7925562858581543, + "epoch": 0.28309242487854613, + "grad_norm": 12.875, + "learning_rate": 3.14498933901919e-06, + "loss": 1.1217, "step": 590 }, { - "epoch": 1.6059379217273952, - "grad_norm": 8.5625, - "learning_rate": 2.6218103252230302e-06, - "logits/chosen": -1.5815064907073975, - "logits/rejected": -1.558189868927002, - "logps/chosen": -145.986572265625, - "logps/rejected": -209.48776245117188, - "loss": 0.1382, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.15136297047138214, - "rewards/margins": 3.156534194946289, - "rewards/rejected": -3.3078970909118652, + "epoch": 0.2854915132249745, + "grad_norm": 4.9375, + "learning_rate": 3.1716417910447766e-06, + "loss": 1.2383, "step": 595 }, { - "epoch": 1.6194331983805668, - "grad_norm": 33.5, - "learning_rate": 2.582534356690284e-06, - "logits/chosen": -1.4829189777374268, - "logits/rejected": -1.5618332624435425, - "logps/chosen": -280.50482177734375, - "logps/rejected": -227.37191772460938, - "loss": 0.111, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.014963224530220032, - "rewards/margins": 3.7380282878875732, - "rewards/rejected": -3.723065137863159, + "epoch": 0.2878906015714029, + "grad_norm": 6.9375, + "learning_rate": 3.198294243070363e-06, + "loss": 1.133, + "step": 600 + }, + { + "epoch": 0.2878906015714029, + "eval_loss": 1.1296346187591553, + "eval_runtime": 175.841, + "eval_samples_per_second": 42.14, + "eval_steps_per_second": 10.538, "step": 600 }, { - "epoch": 1.6329284750337383, - "grad_norm": 19.25, - "learning_rate": 2.5432379832514437e-06, - "logits/chosen": -1.5892632007598877, - "logits/rejected": -1.6352291107177734, - "logps/chosen": -158.56002807617188, - "logps/rejected": -202.90060424804688, - "loss": 0.2301, - "rewards/accuracies": 0.8999999761581421, - "rewards/chosen": -0.49658140540122986, - "rewards/margins": 3.0457987785339355, - "rewards/rejected": -3.5423800945281982, + "epoch": 0.2902896899178312, + "grad_norm": 4.9375, + "learning_rate": 3.224946695095949e-06, + "loss": 1.0715, "step": 605 }, { - "epoch": 1.6464237516869096, - "grad_norm": 18.5, - "learning_rate": 2.5039309201189618e-06, - "logits/chosen": -1.6018474102020264, - "logits/rejected": -1.6965217590332031, - "logps/chosen": -161.53518676757812, - "logps/rejected": -185.10025024414062, - "loss": 0.1597, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.14256028831005096, - "rewards/margins": 3.0957140922546387, - "rewards/rejected": -3.238274335861206, + "epoch": 0.2926887782642596, + "grad_norm": 5.09375, + "learning_rate": 3.251599147121535e-06, + "loss": 1.0086, "step": 610 }, { - "epoch": 1.6599190283400809, - "grad_norm": 22.375, - "learning_rate": 2.4646228851480957e-06, - "logits/chosen": -1.391078233718872, - "logits/rejected": -1.3691911697387695, - "logps/chosen": -206.93734741210938, - "logps/rejected": -213.29428100585938, - "loss": 0.2172, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": 0.01679610088467598, - "rewards/margins": 2.988704204559326, - "rewards/rejected": -2.9719078540802, + "epoch": 0.2950878666106879, + "grad_norm": 7.3125, + "learning_rate": 3.278251599147122e-06, + "loss": 1.0881, "step": 615 }, { - "epoch": 1.6734143049932524, - "grad_norm": 13.25, - "learning_rate": 2.4253235964343677e-06, - "logits/chosen": -1.590201497077942, - "logits/rejected": -1.4947328567504883, - "logps/chosen": -162.37301635742188, - "logps/rejected": -259.95294189453125, - "loss": 0.1116, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.33501359820365906, - "rewards/margins": 4.118770599365234, - "rewards/rejected": -4.453783988952637, + "epoch": 0.2974869549571163, + "grad_norm": 10.125, + "learning_rate": 3.304904051172708e-06, + "loss": 1.0767, "step": 620 }, { - "epoch": 1.686909581646424, - "grad_norm": 73.0, - "learning_rate": 2.3860427699109726e-06, - "logits/chosen": -1.6217790842056274, - "logits/rejected": -1.6454839706420898, - "logps/chosen": -172.94483947753906, - "logps/rejected": -205.34475708007812, - "loss": 0.2869, - "rewards/accuracies": 0.875, - "rewards/chosen": -0.9174480438232422, - "rewards/margins": 3.128140449523926, - "rewards/rejected": -4.045588493347168, + "epoch": 0.2998860433035447, + "grad_norm": 6.90625, + "learning_rate": 3.3315565031982945e-06, + "loss": 1.0721, "step": 625 }, { - "epoch": 1.7004048582995952, - "grad_norm": 32.5, - "learning_rate": 2.34679011694671e-06, - "logits/chosen": -1.5026500225067139, - "logits/rejected": -1.6494897603988647, - "logps/chosen": -268.9452209472656, - "logps/rejected": -212.0578155517578, - "loss": 0.1194, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.23328566551208496, - "rewards/margins": 4.139514446258545, - "rewards/rejected": -4.372800350189209, + "epoch": 0.302285131649973, + "grad_norm": 6.875, + "learning_rate": 3.3582089552238813e-06, + "loss": 1.1838, "step": 630 }, { - "epoch": 1.7139001349527665, - "grad_norm": 70.5, - "learning_rate": 2.3075753419450524e-06, - "logits/chosen": -1.5526963472366333, - "logits/rejected": -1.6195096969604492, - "logps/chosen": -205.20431518554688, - "logps/rejected": -197.59744262695312, - "loss": 0.2026, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.3273767828941345, - "rewards/margins": 2.9169745445251465, - "rewards/rejected": -3.2443511486053467, + "epoch": 0.3046842199964014, + "grad_norm": 7.75, + "learning_rate": 3.384861407249467e-06, + "loss": 1.0761, "step": 635 }, { - "epoch": 1.7273954116059378, - "grad_norm": 38.5, - "learning_rate": 2.2684081399449327e-06, - "logits/chosen": -1.4865336418151855, - "logits/rejected": -1.479229211807251, - "logps/chosen": -188.85787963867188, - "logps/rejected": -203.17514038085938, - "loss": 0.269, - "rewards/accuracies": 0.925000011920929, - "rewards/chosen": -0.778891921043396, - "rewards/margins": 3.5117366313934326, - "rewards/rejected": -4.290627956390381, + "epoch": 0.3070833083428297, + "grad_norm": 7.59375, + "learning_rate": 3.4115138592750535e-06, + "loss": 1.0424, "step": 640 }, { - "epoch": 1.7408906882591093, - "grad_norm": 116.0, - "learning_rate": 2.2292981942238454e-06, - "logits/chosen": -1.598434329032898, - "logits/rejected": -1.6193567514419556, - "logps/chosen": -170.999267578125, - "logps/rejected": -234.42391967773438, - "loss": 0.3528, - "rewards/accuracies": 0.875, - "rewards/chosen": -0.586583137512207, - "rewards/margins": 3.1737523078918457, - "rewards/rejected": -3.7603354454040527, + "epoch": 0.3094823966892581, + "grad_norm": 37.0, + "learning_rate": 3.4381663113006398e-06, + "loss": 1.0834, "step": 645 }, { - "epoch": 1.7543859649122808, - "grad_norm": 36.0, - "learning_rate": 2.1902551739038624e-06, - "logits/chosen": -1.5177044868469238, - "logits/rejected": -1.4585306644439697, - "logps/chosen": -171.92758178710938, - "logps/rejected": -219.8982696533203, - "loss": 0.2386, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.5028108954429626, - "rewards/margins": 3.118129253387451, - "rewards/rejected": -3.6209399700164795, + "epoch": 0.3118814850356864, + "grad_norm": 5.21875, + "learning_rate": 3.4648187633262265e-06, + "loss": 1.1291, "step": 650 }, { - "epoch": 1.7678812415654521, - "grad_norm": 11.0625, - "learning_rate": 2.151288731561136e-06, - "logits/chosen": -1.532063364982605, - "logits/rejected": -1.4071648120880127, - "logps/chosen": -211.4221649169922, - "logps/rejected": -240.8402099609375, - "loss": 0.1651, - "rewards/accuracies": 0.925000011920929, - "rewards/chosen": -0.42295771837234497, - "rewards/margins": 3.7488913536071777, - "rewards/rejected": -4.171849250793457, + "epoch": 0.3142805733821148, + "grad_norm": 4.5625, + "learning_rate": 3.491471215351813e-06, + "loss": 1.1013, "step": 655 }, { - "epoch": 1.7813765182186234, - "grad_norm": 23.625, - "learning_rate": 2.1124085008395056e-06, - "logits/chosen": -1.4962142705917358, - "logits/rejected": -1.4677404165267944, - "logps/chosen": -197.39447021484375, - "logps/rejected": -263.4613342285156, - "loss": 0.1999, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.16278569400310516, - "rewards/margins": 3.5397281646728516, - "rewards/rejected": -3.7025134563446045, + "epoch": 0.31667966172854317, + "grad_norm": 6.1875, + "learning_rate": 3.5181236673773987e-06, + "loss": 1.0895, "step": 660 }, { - "epoch": 1.7948717948717947, - "grad_norm": 20.625, - "learning_rate": 2.073624094068776e-06, - "logits/chosen": -1.5467997789382935, - "logits/rejected": -1.540650725364685, - "logps/chosen": -186.6321563720703, - "logps/rejected": -259.65045166015625, - "loss": 0.2781, - "rewards/accuracies": 0.8999999761581421, - "rewards/chosen": -0.22982105612754822, - "rewards/margins": 3.7238681316375732, - "rewards/rejected": -3.9536895751953125, + "epoch": 0.3190787500749715, + "grad_norm": 7.4375, + "learning_rate": 3.5447761194029855e-06, + "loss": 1.1223, "step": 665 }, { - "epoch": 1.8083670715249662, - "grad_norm": 12.875, - "learning_rate": 2.03494509988827e-06, - "logits/chosen": -1.6044431924819946, - "logits/rejected": -1.627730131149292, - "logps/chosen": -184.64320373535156, - "logps/rejected": -204.9185791015625, - "loss": 0.1212, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.07731951773166656, - "rewards/margins": 3.6050572395324707, - "rewards/rejected": -3.6823768615722656, + "epoch": 0.3214778384213999, + "grad_norm": 4.3125, + "learning_rate": 3.5714285714285718e-06, + "loss": 1.0967, "step": 670 }, { - "epoch": 1.8218623481781377, - "grad_norm": 22.375, - "learning_rate": 1.996381080876237e-06, - "logits/chosen": -1.6212413311004639, - "logits/rejected": -1.5563671588897705, - "logps/chosen": -219.73171997070312, - "logps/rejected": -281.0826721191406, - "loss": 0.1177, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": 0.023561427369713783, - "rewards/margins": 3.5450756549835205, - "rewards/rejected": -3.521514415740967, + "epoch": 0.3238769267678282, + "grad_norm": 4.40625, + "learning_rate": 3.598081023454158e-06, + "loss": 1.1102, "step": 675 }, { - "epoch": 1.835357624831309, - "grad_norm": 42.5, - "learning_rate": 1.957941571185702e-06, - "logits/chosen": -1.4472072124481201, - "logits/rejected": -1.5231066942214966, - "logps/chosen": -256.3811950683594, - "logps/rejected": -225.1781768798828, - "loss": 0.2672, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.01850978098809719, - "rewards/margins": 3.1582770347595215, - "rewards/rejected": -3.1767868995666504, + "epoch": 0.3262760151142566, + "grad_norm": 6.40625, + "learning_rate": 3.624733475479744e-06, + "loss": 1.1283, "step": 680 }, { - "epoch": 1.8488529014844803, - "grad_norm": 30.625, - "learning_rate": 1.919636074187346e-06, - "logits/chosen": -1.388319730758667, - "logits/rejected": -1.4473168849945068, - "logps/chosen": -253.48312377929688, - "logps/rejected": -212.169189453125, - "loss": 0.1468, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": 0.18813356757164001, - "rewards/margins": 3.097019672393799, - "rewards/rejected": -2.908886194229126, + "epoch": 0.32867510346068496, + "grad_norm": 4.5, + "learning_rate": 3.651385927505331e-06, + "loss": 1.0848, "step": 685 }, { - "epoch": 1.8623481781376519, - "grad_norm": 90.0, - "learning_rate": 1.8814740601199943e-06, - "logits/chosen": -1.4006351232528687, - "logits/rejected": -1.4068963527679443, - "logps/chosen": -164.6719970703125, - "logps/rejected": -193.83538818359375, - "loss": 0.2666, - "rewards/accuracies": 0.925000011920929, - "rewards/chosen": -0.42647585272789, - "rewards/margins": 2.7546064853668213, - "rewards/rejected": -3.181082248687744, + "epoch": 0.3310741918071133, + "grad_norm": 4.5625, + "learning_rate": 3.678038379530917e-06, + "loss": 1.0805, "step": 690 }, { - "epoch": 1.8758434547908234, - "grad_norm": 25.75, - "learning_rate": 1.8434649637492952e-06, - "logits/chosen": -1.341395616531372, - "logits/rejected": -1.3592100143432617, - "logps/chosen": -181.58978271484375, - "logps/rejected": -235.27456665039062, - "loss": 0.1718, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.15408626198768616, - "rewards/margins": 3.203856945037842, - "rewards/rejected": -3.357943296432495, + "epoch": 0.33347328015354166, + "grad_norm": 4.28125, + "learning_rate": 3.7046908315565034e-06, + "loss": 1.0819, "step": 695 }, { - "epoch": 1.8893387314439947, - "grad_norm": 18.625, - "learning_rate": 1.8056181820351737e-06, - "logits/chosen": -1.565199613571167, - "logits/rejected": -1.5012518167495728, - "logps/chosen": -241.5365753173828, - "logps/rejected": -229.1800079345703, - "loss": 0.1734, - "rewards/accuracies": 0.925000011920929, - "rewards/chosen": 0.34816664457321167, - "rewards/margins": 4.178171634674072, - "rewards/rejected": -3.830005168914795, + "epoch": 0.33587236849997, + "grad_norm": 7.84375, + "learning_rate": 3.73134328358209e-06, + "loss": 1.0466, "step": 700 }, { - "epoch": 1.902834008097166, - "grad_norm": 8.875, - "learning_rate": 1.7679430718086244e-06, - "logits/chosen": -1.5023219585418701, - "logits/rejected": -1.4059240818023682, - "logps/chosen": -240.8516082763672, - "logps/rejected": -287.47955322265625, - "loss": 0.0641, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.22036854922771454, - "rewards/margins": 4.166906833648682, - "rewards/rejected": -3.946538209915161, + "epoch": 0.33587236849997, + "eval_loss": 1.0983320474624634, + "eval_runtime": 233.7274, + "eval_samples_per_second": 31.704, + "eval_steps_per_second": 7.928, + "step": 700 + }, + { + "epoch": 0.33827145684639837, + "grad_norm": 15.3125, + "learning_rate": 3.7579957356076764e-06, + "loss": 1.1632, "step": 705 }, { - "epoch": 1.9163292847503373, - "grad_norm": 35.5, - "learning_rate": 1.7304489474584307e-06, - "logits/chosen": -1.565582036972046, - "logits/rejected": -1.4994531869888306, - "logps/chosen": -148.25338745117188, - "logps/rejected": -231.37741088867188, - "loss": 0.123, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": 0.1026371493935585, - "rewards/margins": 3.7467575073242188, - "rewards/rejected": -3.6441197395324707, + "epoch": 0.34067054519282675, + "grad_norm": 9.125, + "learning_rate": 3.7846481876332623e-06, + "loss": 1.0408, "step": 710 }, { - "epoch": 1.9298245614035088, - "grad_norm": 28.125, - "learning_rate": 1.693145078628377e-06, - "logits/chosen": -1.6054456233978271, - "logits/rejected": -1.6087411642074585, - "logps/chosen": -159.12234497070312, - "logps/rejected": -214.5330352783203, - "loss": 0.1255, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.13751724362373352, - "rewards/margins": 3.8032824993133545, - "rewards/rejected": -3.940800428390503, + "epoch": 0.34306963353925507, + "grad_norm": 6.34375, + "learning_rate": 3.8113006396588486e-06, + "loss": 1.0748, "step": 715 }, { - "epoch": 1.9433198380566803, - "grad_norm": 18.375, - "learning_rate": 1.6560406879255192e-06, - "logits/chosen": -1.615686058998108, - "logits/rejected": -1.678998351097107, - "logps/chosen": -179.3768768310547, - "logps/rejected": -188.79124450683594, - "loss": 0.1608, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.13511483371257782, - "rewards/margins": 3.1098551750183105, - "rewards/rejected": -3.2449698448181152, + "epoch": 0.34546872188568345, + "grad_norm": 3.78125, + "learning_rate": 3.837953091684435e-06, + "loss": 1.0707, "step": 720 }, { - "epoch": 1.9568151147098516, - "grad_norm": 20.5, - "learning_rate": 1.6191449486400893e-06, - "logits/chosen": -1.5641348361968994, - "logits/rejected": -1.5269627571105957, - "logps/chosen": -190.90200805664062, - "logps/rejected": -200.14797973632812, - "loss": 0.1858, - "rewards/accuracies": 0.925000011920929, - "rewards/chosen": -0.057850100100040436, - "rewards/margins": 3.392789363861084, - "rewards/rejected": -3.4506402015686035, + "epoch": 0.3478678102321118, + "grad_norm": 4.28125, + "learning_rate": 3.864605543710022e-06, + "loss": 1.1292, "step": 725 }, { - "epoch": 1.9703103913630229, - "grad_norm": 46.25, - "learning_rate": 1.5824669824775868e-06, - "logits/chosen": -1.6585397720336914, - "logits/rejected": -1.6145107746124268, - "logps/chosen": -153.5370330810547, - "logps/rejected": -246.87869262695312, - "loss": 0.1935, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.1705460101366043, - "rewards/margins": 3.2724738121032715, - "rewards/rejected": -3.4430203437805176, + "epoch": 0.35026689857854015, + "grad_norm": 4.28125, + "learning_rate": 3.891257995735608e-06, + "loss": 1.0883, "step": 730 }, { - "epoch": 1.9838056680161942, - "grad_norm": 21.125, - "learning_rate": 1.5460158573036288e-06, - "logits/chosen": -1.425318956375122, - "logits/rejected": -1.5616633892059326, - "logps/chosen": -228.63955688476562, - "logps/rejected": -232.26235961914062, - "loss": 0.1763, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.15325972437858582, - "rewards/margins": 2.676074504852295, - "rewards/rejected": -2.8293344974517822, + "epoch": 0.35266598692496853, + "grad_norm": 6.15625, + "learning_rate": 3.917910447761194e-06, + "loss": 1.0613, "step": 735 }, { - "epoch": 1.9973009446693657, - "grad_norm": 57.75, - "learning_rate": 1.509800584902108e-06, - "logits/chosen": -1.4701238870620728, - "logits/rejected": -1.335039496421814, - "logps/chosen": -165.36788940429688, - "logps/rejected": -248.84915161132812, - "loss": 0.2088, - "rewards/accuracies": 0.925000011920929, - "rewards/chosen": -0.21872563660144806, - "rewards/margins": 3.6630032062530518, - "rewards/rejected": -3.8817286491394043, + "epoch": 0.35506507527139686, + "grad_norm": 4.125, + "learning_rate": 3.944562899786781e-06, + "loss": 1.1056, "step": 740 }, { - "epoch": 2.010796221322537, - "grad_norm": 13.0625, - "learning_rate": 1.473830118747216e-06, - "logits/chosen": -1.3533880710601807, - "logits/rejected": -1.4392606019973755, - "logps/chosen": -173.4610595703125, - "logps/rejected": -189.3846435546875, - "loss": 0.1035, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.04286568984389305, - "rewards/margins": 3.4945671558380127, - "rewards/rejected": -3.537432909011841, + "epoch": 0.35746416361782524, + "grad_norm": 5.09375, + "learning_rate": 3.971215351812367e-06, + "loss": 1.1064, "step": 745 }, { - "epoch": 2.0242914979757085, - "grad_norm": 5.46875, - "learning_rate": 1.4381133517898803e-06, - "logits/chosen": -1.5612472295761108, - "logits/rejected": -1.6096746921539307, - "logps/chosen": -244.1045684814453, - "logps/rejected": -227.0, - "loss": 0.068, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.28070664405822754, - "rewards/margins": 4.070573329925537, - "rewards/rejected": -3.7898666858673096, + "epoch": 0.35986325196425356, + "grad_norm": 4.3125, + "learning_rate": 3.997867803837953e-06, + "loss": 1.0194, "step": 750 }, { - "epoch": 2.03778677462888, - "grad_norm": 46.75, - "learning_rate": 1.4026591142591733e-06, - "logits/chosen": -1.4181170463562012, - "logits/rejected": -1.5695334672927856, - "logps/chosen": -218.1271514892578, - "logps/rejected": -171.77000427246094, - "loss": 0.1633, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.2111760377883911, - "rewards/margins": 2.9384093284606934, - "rewards/rejected": -3.149585247039795, + "epoch": 0.36226234031068194, + "grad_norm": 4.75, + "learning_rate": 4.0245202558635396e-06, + "loss": 1.0052, "step": 755 }, { - "epoch": 2.051282051282051, - "grad_norm": 24.5, - "learning_rate": 1.3674761714792153e-06, - "logits/chosen": -1.5777294635772705, - "logits/rejected": -1.6976985931396484, - "logps/chosen": -224.3392791748047, - "logps/rejected": -254.0798797607422, - "loss": 0.0739, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": 0.15005287528038025, - "rewards/margins": 4.0651535987854, - "rewards/rejected": -3.91510009765625, + "epoch": 0.3646614286571103, + "grad_norm": 3.46875, + "learning_rate": 4.051172707889126e-06, + "loss": 1.0548, "step": 760 }, { - "epoch": 2.064777327935223, - "grad_norm": 23.75, - "learning_rate": 1.33257322170213e-06, - "logits/chosen": -1.4911249876022339, - "logits/rejected": -1.500860571861267, - "logps/chosen": -172.9776611328125, - "logps/rejected": -201.8260040283203, - "loss": 0.1002, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": 0.21206021308898926, - "rewards/margins": 3.810685634613037, - "rewards/rejected": -3.598625659942627, + "epoch": 0.36706051700353864, + "grad_norm": 7.625, + "learning_rate": 4.077825159914712e-06, + "loss": 1.127, "step": 765 }, { - "epoch": 2.078272604588394, - "grad_norm": 28.375, - "learning_rate": 1.2979588939575879e-06, - "logits/chosen": -1.5784046649932861, - "logits/rejected": -1.5579355955123901, - "logps/chosen": -192.16024780273438, - "logps/rejected": -219.4779510498047, - "loss": 0.1696, - "rewards/accuracies": 0.925000011920929, - "rewards/chosen": 0.06577552855014801, - "rewards/margins": 3.573701858520508, - "rewards/rejected": -3.5079262256622314, + "epoch": 0.369459605349967, + "grad_norm": 4.65625, + "learning_rate": 4.104477611940299e-06, + "loss": 0.9835, "step": 770 }, { - "epoch": 2.0917678812415654, - "grad_norm": 14.8125, - "learning_rate": 1.2636417459194536e-06, - "logits/chosen": -1.5944167375564575, - "logits/rejected": -1.6392465829849243, - "logps/chosen": -235.58633422851562, - "logps/rejected": -274.0408630371094, - "loss": 0.0593, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.08247147500514984, - "rewards/margins": 4.281358242034912, - "rewards/rejected": -4.363830089569092, + "epoch": 0.37185869369639535, + "grad_norm": 3.828125, + "learning_rate": 4.131130063965885e-06, + "loss": 1.1199, "step": 775 }, { - "epoch": 2.1052631578947367, - "grad_norm": 5.21875, - "learning_rate": 1.2296302617900772e-06, - "logits/chosen": -1.5774985551834106, - "logits/rejected": -1.6413581371307373, - "logps/chosen": -171.0308074951172, - "logps/rejected": -183.9725341796875, - "loss": 0.0845, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": 0.016073107719421387, - "rewards/margins": 3.9465243816375732, - "rewards/rejected": -3.9304511547088623, + "epoch": 0.37425778204282373, + "grad_norm": 5.3125, + "learning_rate": 4.157782515991471e-06, + "loss": 1.0222, "step": 780 }, { - "epoch": 2.118758434547908, - "grad_norm": 15.0, - "learning_rate": 1.1959328502027556e-06, - "logits/chosen": -1.5672693252563477, - "logits/rejected": -1.5724976062774658, - "logps/chosen": -161.8846435546875, - "logps/rejected": -190.6571807861328, - "loss": 0.1138, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.02019577845931053, - "rewards/margins": 3.7148475646972656, - "rewards/rejected": -3.6946518421173096, + "epoch": 0.3766568703892521, + "grad_norm": 4.90625, + "learning_rate": 4.184434968017058e-06, + "loss": 1.1066, "step": 785 }, { - "epoch": 2.1322537112010798, - "grad_norm": 19.125, - "learning_rate": 1.1625578421428714e-06, - "logits/chosen": -1.4088555574417114, - "logits/rejected": -1.331659197807312, - "logps/chosen": -197.23593139648438, - "logps/rejected": -279.7657470703125, - "loss": 0.1239, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.08394167572259903, - "rewards/margins": 3.702916383743286, - "rewards/rejected": -3.786858081817627, + "epoch": 0.37905595873568043, + "grad_norm": 3.515625, + "learning_rate": 4.211087420042645e-06, + "loss": 1.0559, "step": 790 }, { - "epoch": 2.145748987854251, - "grad_norm": 19.625, - "learning_rate": 1.1295134888882258e-06, - "logits/chosen": -1.5858689546585083, - "logits/rejected": -1.6758959293365479, - "logps/chosen": -194.56253051757812, - "logps/rejected": -206.4073028564453, - "loss": 0.0922, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.14167055487632751, - "rewards/margins": 3.8033957481384277, - "rewards/rejected": -3.945065975189209, + "epoch": 0.3814550470821088, + "grad_norm": 6.3125, + "learning_rate": 4.237739872068231e-06, + "loss": 0.974, "step": 795 }, { - "epoch": 2.1592442645074224, - "grad_norm": 16.25, - "learning_rate": 1.0968079599690872e-06, - "logits/chosen": -1.5427080392837524, - "logits/rejected": -1.509251356124878, - "logps/chosen": -227.91281127929688, - "logps/rejected": -196.93661499023438, - "loss": 0.112, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.0069570960476994514, - "rewards/margins": 3.6783218383789062, - "rewards/rejected": -3.6852786540985107, + "epoch": 0.38385413542853714, + "grad_norm": 8.0625, + "learning_rate": 4.264392324093816e-06, + "loss": 1.0657, "step": 800 }, { - "epoch": 2.1727395411605936, - "grad_norm": 19.875, - "learning_rate": 1.064449341148442e-06, - "logits/chosen": -1.624629020690918, - "logits/rejected": -1.647383689880371, - "logps/chosen": -203.95071411132812, - "logps/rejected": -221.9706573486328, - "loss": 0.1216, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.1412554681301117, - "rewards/margins": 3.5126278400421143, - "rewards/rejected": -3.6538829803466797, + "epoch": 0.38385413542853714, + "eval_loss": 1.0770481824874878, + "eval_runtime": 177.0808, + "eval_samples_per_second": 41.845, + "eval_steps_per_second": 10.464, + "step": 800 + }, + { + "epoch": 0.3862532237749655, + "grad_norm": 3.8125, + "learning_rate": 4.2910447761194036e-06, + "loss": 1.0495, "step": 805 }, { - "epoch": 2.1862348178137654, - "grad_norm": 14.5, - "learning_rate": 1.0324456324229536e-06, - "logits/chosen": -1.4194597005844116, - "logits/rejected": -1.3489387035369873, - "logps/chosen": -166.34426879882812, - "logps/rejected": -239.3138427734375, - "loss": 0.0775, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.0588313452899456, - "rewards/margins": 3.9181437492370605, - "rewards/rejected": -3.976975202560425, + "epoch": 0.3886523121213939, + "grad_norm": 4.21875, + "learning_rate": 4.31769722814499e-06, + "loss": 1.0879, "step": 810 }, { - "epoch": 2.1997300944669367, - "grad_norm": 35.5, - "learning_rate": 1.000804746045138e-06, - "logits/chosen": -1.3923031091690063, - "logits/rejected": -1.4646499156951904, - "logps/chosen": -191.46279907226562, - "logps/rejected": -184.79953002929688, - "loss": 0.1111, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.008677695877850056, - "rewards/margins": 3.193809986114502, - "rewards/rejected": -3.2024874687194824, + "epoch": 0.3910514004678222, + "grad_norm": 8.75, + "learning_rate": 4.344349680170576e-06, + "loss": 1.0887, "step": 815 }, { - "epoch": 2.213225371120108, - "grad_norm": 16.0, - "learning_rate": 9.695345045672167e-07, - "logits/chosen": -1.4313310384750366, - "logits/rejected": -1.4792088270187378, - "logps/chosen": -191.17092895507812, - "logps/rejected": -196.5364532470703, - "loss": 0.118, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.33248454332351685, - "rewards/margins": 3.7640583515167236, - "rewards/rejected": -4.096542835235596, + "epoch": 0.3934504888142506, + "grad_norm": 4.6875, + "learning_rate": 4.3710021321961625e-06, + "loss": 1.0342, "step": 820 }, { - "epoch": 2.2267206477732793, - "grad_norm": 15.5, - "learning_rate": 9.386426389071532e-07, - "logits/chosen": -1.4152162075042725, - "logits/rejected": -1.363843321800232, - "logps/chosen": -229.3914031982422, - "logps/rejected": -278.37847900390625, - "loss": 0.0961, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.30344587564468384, - "rewards/margins": 4.63069486618042, - "rewards/rejected": -4.934141635894775, + "epoch": 0.3958495771606789, + "grad_norm": 5.21875, + "learning_rate": 4.397654584221749e-06, + "loss": 1.0058, "step": 825 }, { - "epoch": 2.2402159244264506, - "grad_norm": 17.625, - "learning_rate": 9.081367864373489e-07, - "logits/chosen": -1.3973594903945923, - "logits/rejected": -1.524677038192749, - "logps/chosen": -168.33126831054688, - "logps/rejected": -156.55892944335938, - "loss": 0.0944, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.1414262354373932, - "rewards/margins": 3.3840813636779785, - "rewards/rejected": -3.5255074501037598, + "epoch": 0.3982486655071073, + "grad_norm": 6.6875, + "learning_rate": 4.424307036247335e-06, + "loss": 0.9424, "step": 830 }, { - "epoch": 2.2537112010796223, - "grad_norm": 11.8125, - "learning_rate": 8.780244890964567e-07, - "logits/chosen": -1.4209728240966797, - "logits/rejected": -1.2569080591201782, - "logps/chosen": -177.04782104492188, - "logps/rejected": -275.0938415527344, - "loss": 0.0698, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.16639022529125214, - "rewards/margins": 3.9153380393981934, - "rewards/rejected": -3.748948335647583, + "epoch": 0.4006477538535357, + "grad_norm": 8.125, + "learning_rate": 4.4509594882729215e-06, + "loss": 1.0973, "step": 835 }, { - "epoch": 2.2672064777327936, - "grad_norm": 10.625, - "learning_rate": 8.483131915247969e-07, - "logits/chosen": -1.563407301902771, - "logits/rejected": -1.534883975982666, - "logps/chosen": -171.35104370117188, - "logps/rejected": -242.4336700439453, - "loss": 0.0949, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.2416602075099945, - "rewards/margins": 4.914166450500488, - "rewards/rejected": -5.155826568603516, + "epoch": 0.403046842199964, + "grad_norm": 5.3125, + "learning_rate": 4.477611940298508e-06, + "loss": 1.1379, "step": 840 }, { - "epoch": 2.280701754385965, - "grad_norm": 19.75, - "learning_rate": 8.190102392238191e-07, - "logits/chosen": -1.4438880681991577, - "logits/rejected": -1.4186255931854248, - "logps/chosen": -154.63705444335938, - "logps/rejected": -207.8048858642578, - "loss": 0.0783, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.18597714602947235, - "rewards/margins": 4.108304500579834, - "rewards/rejected": -4.294281959533691, + "epoch": 0.4054459305463924, + "grad_norm": 4.28125, + "learning_rate": 4.504264392324094e-06, + "loss": 1.0758, "step": 845 }, { - "epoch": 2.294197031039136, - "grad_norm": 32.25, - "learning_rate": 7.90122876740086e-07, - "logits/chosen": -1.63836669921875, - "logits/rejected": -1.5565919876098633, - "logps/chosen": -226.85037231445312, - "logps/rejected": -326.13421630859375, - "loss": 0.0577, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.0758393257856369, - "rewards/margins": 4.579066276550293, - "rewards/rejected": -4.503227233886719, + "epoch": 0.4078450188928207, + "grad_norm": 4.625, + "learning_rate": 4.53091684434968e-06, + "loss": 1.1237, "step": 850 }, { - "epoch": 2.3076923076923075, - "grad_norm": 5.0625, - "learning_rate": 7.616582458742059e-07, - "logits/chosen": -1.4565999507904053, - "logits/rejected": -1.455143928527832, - "logps/chosen": -212.2303009033203, - "logps/rejected": -276.86834716796875, - "loss": 0.0519, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.1173635721206665, - "rewards/margins": 4.344286918640137, - "rewards/rejected": -4.46165132522583, + "epoch": 0.4102441072392491, + "grad_norm": 3.859375, + "learning_rate": 4.557569296375267e-06, + "loss": 1.083, "step": 855 }, { - "epoch": 2.3211875843454792, - "grad_norm": 9.6875, - "learning_rate": 7.336233839151693e-07, - "logits/chosen": -1.6497745513916016, - "logits/rejected": -1.6588242053985596, - "logps/chosen": -169.42959594726562, - "logps/rejected": -258.19207763671875, - "loss": 0.1057, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.21658802032470703, - "rewards/margins": 3.805851697921753, - "rewards/rejected": -4.022439479827881, + "epoch": 0.41264319558567747, + "grad_norm": 3.84375, + "learning_rate": 4.584221748400853e-06, + "loss": 1.0514, "step": 860 }, { - "epoch": 2.3346828609986505, - "grad_norm": 21.5, - "learning_rate": 7.060252219005304e-07, - "logits/chosen": -1.520618200302124, - "logits/rejected": -1.5337458848953247, - "logps/chosen": -227.05679321289062, - "logps/rejected": -317.5985107421875, - "loss": 0.0774, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.06503160297870636, - "rewards/margins": 4.4666852951049805, - "rewards/rejected": -4.531716823577881, + "epoch": 0.4150422839321058, + "grad_norm": 3.921875, + "learning_rate": 4.610874200426439e-06, + "loss": 1.0582, "step": 865 }, { - "epoch": 2.348178137651822, - "grad_norm": 12.5, - "learning_rate": 6.788705829028483e-07, - "logits/chosen": -1.5424460172653198, - "logits/rejected": -1.527999997138977, - "logps/chosen": -186.46414184570312, - "logps/rejected": -190.83157348632812, - "loss": 0.1022, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.1064692884683609, - "rewards/margins": 3.359034776687622, - "rewards/rejected": -3.2525649070739746, + "epoch": 0.4174413722785342, + "grad_norm": 5.15625, + "learning_rate": 4.637526652452026e-06, + "loss": 0.9442, "step": 870 }, { - "epoch": 2.361673414304993, - "grad_norm": 66.5, - "learning_rate": 6.521661803428225e-07, - "logits/chosen": -1.5013136863708496, - "logits/rejected": -1.5206286907196045, - "logps/chosen": -201.0956268310547, - "logps/rejected": -198.01573181152344, - "loss": 0.0955, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.13122853636741638, - "rewards/margins": 3.767671585083008, - "rewards/rejected": -3.898899793624878, + "epoch": 0.4198404606249625, + "grad_norm": 6.03125, + "learning_rate": 4.664179104477613e-06, + "loss": 0.9391, "step": 875 }, { - "epoch": 2.375168690958165, - "grad_norm": 11.3125, - "learning_rate": 6.259186163295439e-07, - "logits/chosen": -1.2552602291107178, - "logits/rejected": -1.3482682704925537, - "logps/chosen": -246.9757080078125, - "logps/rejected": -239.8274383544922, - "loss": 0.0983, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.1879548728466034, - "rewards/margins": 3.7479751110076904, - "rewards/rejected": -3.935929775238037, + "epoch": 0.4222395489713909, + "grad_norm": 3.546875, + "learning_rate": 4.690831556503198e-06, + "loss": 1.016, "step": 880 }, { - "epoch": 2.388663967611336, - "grad_norm": 16.0, - "learning_rate": 6.001343800282569e-07, - "logits/chosen": -1.5184439420700073, - "logits/rejected": -1.4158121347427368, - "logps/chosen": -145.63616943359375, - "logps/rejected": -212.58468627929688, - "loss": 0.0783, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.3523162603378296, - "rewards/margins": 4.166034698486328, - "rewards/rejected": -4.5183515548706055, + "epoch": 0.42463863731781926, + "grad_norm": 4.9375, + "learning_rate": 4.717484008528785e-06, + "loss": 1.0797, "step": 885 }, { - "epoch": 2.4021592442645074, - "grad_norm": 12.0625, - "learning_rate": 5.748198460560475e-07, - "logits/chosen": -1.602419137954712, - "logits/rejected": -1.6869083642959595, - "logps/chosen": -211.70947265625, - "logps/rejected": -220.8863525390625, - "loss": 0.0806, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.16532480716705322, - "rewards/margins": 4.41878080368042, - "rewards/rejected": -4.253456115722656, + "epoch": 0.4270377256642476, + "grad_norm": 6.78125, + "learning_rate": 4.744136460554372e-06, + "loss": 1.0632, "step": 890 }, { - "epoch": 2.4156545209176787, - "grad_norm": 32.75, - "learning_rate": 5.499812729058546e-07, - "logits/chosen": -1.56089186668396, - "logits/rejected": -1.5883516073226929, - "logps/chosen": -181.11459350585938, - "logps/rejected": -161.60299682617188, - "loss": 0.1433, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.2707998752593994, - "rewards/margins": 3.216136932373047, - "rewards/rejected": -3.4869370460510254, + "epoch": 0.42943681401067596, + "grad_norm": 4.28125, + "learning_rate": 4.770788912579958e-06, + "loss": 1.0721, "step": 895 }, { - "epoch": 2.42914979757085, - "grad_norm": 14.8125, - "learning_rate": 5.256248013991857e-07, - "logits/chosen": -1.5014961957931519, - "logits/rejected": -1.4206339120864868, - "logps/chosen": -226.8283233642578, - "logps/rejected": -266.60333251953125, - "loss": 0.0715, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.00030528902425430715, - "rewards/margins": 4.552371978759766, - "rewards/rejected": -4.552066802978516, + "epoch": 0.4318359023571043, + "grad_norm": 4.59375, + "learning_rate": 4.797441364605544e-06, + "loss": 1.054, "step": 900 }, { - "epoch": 2.4426450742240218, - "grad_norm": 23.0, - "learning_rate": 5.01756453167925e-07, - "logits/chosen": -1.5279182195663452, - "logits/rejected": -1.5130751132965088, - "logps/chosen": -199.68397521972656, - "logps/rejected": -246.5128936767578, - "loss": 0.0683, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": 0.14631351828575134, - "rewards/margins": 4.73899507522583, - "rewards/rejected": -4.592680931091309, + "epoch": 0.4318359023571043, + "eval_loss": 1.0616637468338013, + "eval_runtime": 175.5614, + "eval_samples_per_second": 42.207, + "eval_steps_per_second": 10.555, + "step": 900 + }, + { + "epoch": 0.43423499070353266, + "grad_norm": 5.4375, + "learning_rate": 4.82409381663113e-06, + "loss": 1.0619, "step": 905 }, { - "epoch": 2.456140350877193, - "grad_norm": 21.375, - "learning_rate": 4.78382129165613e-07, - "logits/chosen": -1.4500765800476074, - "logits/rejected": -1.5014575719833374, - "logps/chosen": -185.51475524902344, - "logps/rejected": -181.7137908935547, - "loss": 0.1049, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.09951256215572357, - "rewards/margins": 3.4707932472229004, - "rewards/rejected": -3.371281147003174, + "epoch": 0.436634079049961, + "grad_norm": 4.40625, + "learning_rate": 4.850746268656717e-06, + "loss": 0.9793, "step": 910 }, { - "epoch": 2.4696356275303644, - "grad_norm": 32.5, - "learning_rate": 4.5550760820855633e-07, - "logits/chosen": -1.557877779006958, - "logits/rejected": -1.4586069583892822, - "logps/chosen": -209.05062866210938, - "logps/rejected": -308.66424560546875, - "loss": 0.118, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.2664136290550232, - "rewards/margins": 4.0513434410095215, - "rewards/rejected": -4.3177571296691895, + "epoch": 0.43903316739638937, + "grad_norm": 4.78125, + "learning_rate": 4.877398720682303e-06, + "loss": 1.031, "step": 915 }, { - "epoch": 2.4831309041835357, - "grad_norm": 22.5, - "learning_rate": 4.3313854554713457e-07, - "logits/chosen": -1.5593338012695312, - "logits/rejected": -1.5647127628326416, - "logps/chosen": -197.6747283935547, - "logps/rejected": -253.01876831054688, - "loss": 0.0716, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.0987640991806984, - "rewards/margins": 4.090095043182373, - "rewards/rejected": -3.9913315773010254, + "epoch": 0.44143225574281775, + "grad_norm": 3.953125, + "learning_rate": 4.90405117270789e-06, + "loss": 1.0687, "step": 920 }, { - "epoch": 2.4966261808367074, - "grad_norm": 20.125, - "learning_rate": 4.1128047146765936e-07, - "logits/chosen": -1.435847520828247, - "logits/rejected": -1.453253149986267, - "logps/chosen": -141.46656799316406, - "logps/rejected": -162.93905639648438, - "loss": 0.1007, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.20369374752044678, - "rewards/margins": 3.790607452392578, - "rewards/rejected": -3.586913585662842, + "epoch": 0.44383134408924607, + "grad_norm": 3.59375, + "learning_rate": 4.930703624733476e-06, + "loss": 1.0311, "step": 925 }, { - "epoch": 2.5101214574898787, - "grad_norm": 32.5, - "learning_rate": 3.899387899251242e-07, - "logits/chosen": -1.499912142753601, - "logits/rejected": -1.5055288076400757, - "logps/chosen": -179.4788360595703, - "logps/rejected": -202.9369354248047, - "loss": 0.0808, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.04291580989956856, - "rewards/margins": 3.4943645000457764, - "rewards/rejected": -3.537280321121216, + "epoch": 0.44623043243567445, + "grad_norm": 3.265625, + "learning_rate": 4.957356076759062e-06, + "loss": 0.9699, "step": 930 }, { - "epoch": 2.52361673414305, - "grad_norm": 6.59375, - "learning_rate": 3.6911877720719053e-07, - "logits/chosen": -1.6243568658828735, - "logits/rejected": -1.5396671295166016, - "logps/chosen": -155.4473419189453, - "logps/rejected": -191.9477081298828, - "loss": 0.1245, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.33543360233306885, - "rewards/margins": 4.113525867462158, - "rewards/rejected": -4.4489593505859375, + "epoch": 0.4486295207821028, + "grad_norm": 4.78125, + "learning_rate": 4.984008528784649e-06, + "loss": 1.1171, "step": 935 }, { - "epoch": 2.5371120107962213, - "grad_norm": 10.3125, - "learning_rate": 3.488255806297311e-07, - "logits/chosen": -1.4612650871276855, - "logits/rejected": -1.6070709228515625, - "logps/chosen": -164.7592010498047, - "logps/rejected": -161.7231903076172, - "loss": 0.1901, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": 0.06939269602298737, - "rewards/margins": 3.406930446624756, - "rewards/rejected": -3.3375372886657715, + "epoch": 0.45102860912853115, + "grad_norm": 4.0625, + "learning_rate": 5.010660980810235e-06, + "loss": 0.9621, "step": 940 }, { - "epoch": 2.5506072874493926, - "grad_norm": 7.46875, - "learning_rate": 3.2906421726426857e-07, - "logits/chosen": -1.4703078269958496, - "logits/rejected": -1.4379500150680542, - "logps/chosen": -204.19473266601562, - "logps/rejected": -244.11965942382812, - "loss": 0.0904, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6960457563400269, - "rewards/margins": 4.154335975646973, - "rewards/rejected": -4.850381851196289, + "epoch": 0.45342769747495953, + "grad_norm": 3.84375, + "learning_rate": 5.037313432835821e-06, + "loss": 1.0444, "step": 945 }, { - "epoch": 2.564102564102564, - "grad_norm": 17.375, - "learning_rate": 3.09839572697605e-07, - "logits/chosen": -1.560767412185669, - "logits/rejected": -1.4427921772003174, - "logps/chosen": -243.10568237304688, - "logps/rejected": -232.52108764648438, - "loss": 0.0844, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.049421075731515884, - "rewards/margins": 4.088489055633545, - "rewards/rejected": -4.137909889221191, + "epoch": 0.45582678582138786, + "grad_norm": 5.0, + "learning_rate": 5.063965884861408e-06, + "loss": 0.9907, "step": 950 }, { - "epoch": 2.5775978407557356, - "grad_norm": 19.75, - "learning_rate": 2.9115639982396166e-07, - "logits/chosen": -1.515772819519043, - "logits/rejected": -1.6191974878311157, - "logps/chosen": -210.3816375732422, - "logps/rejected": -198.30801391601562, - "loss": 0.1289, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.32715946435928345, - "rewards/margins": 3.6732399463653564, - "rewards/rejected": -4.000399589538574, + "epoch": 0.45822587416781624, + "grad_norm": 3.578125, + "learning_rate": 5.090618336886995e-06, + "loss": 1.0314, "step": 955 }, { - "epoch": 2.591093117408907, - "grad_norm": 16.875, - "learning_rate": 2.7301931766992916e-07, - "logits/chosen": -1.53992760181427, - "logits/rejected": -1.6426169872283936, - "logps/chosen": -202.2464599609375, - "logps/rejected": -200.73020935058594, - "loss": 0.0916, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.2096923142671585, - "rewards/margins": 3.49652361869812, - "rewards/rejected": -3.2868313789367676, + "epoch": 0.46062496251424456, + "grad_norm": 3.78125, + "learning_rate": 5.11727078891258e-06, + "loss": 1.0468, "step": 960 }, { - "epoch": 2.604588394062078, - "grad_norm": 15.875, - "learning_rate": 2.554328102525022e-07, - "logits/chosen": -1.468806505203247, - "logits/rejected": -1.5037376880645752, - "logps/chosen": -225.407470703125, - "logps/rejected": -265.16326904296875, - "loss": 0.0516, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.1482563018798828, - "rewards/margins": 3.908936023712158, - "rewards/rejected": -3.760679244995117, + "epoch": 0.46302405086067294, + "grad_norm": 4.625, + "learning_rate": 5.1439232409381665e-06, + "loss": 1.0273, "step": 965 }, { - "epoch": 2.6180836707152495, - "grad_norm": 28.25, - "learning_rate": 2.3840122547050482e-07, - "logits/chosen": -1.4675546884536743, - "logits/rejected": -1.427056074142456, - "logps/chosen": -189.55482482910156, - "logps/rejected": -238.43399047851562, - "loss": 0.128, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.15342381596565247, - "rewards/margins": 4.185477256774902, - "rewards/rejected": -4.338901042938232, + "epoch": 0.4654231392071013, + "grad_norm": 3.546875, + "learning_rate": 5.170575692963753e-06, + "loss": 1.0078, "step": 970 }, { - "epoch": 2.6315789473684212, - "grad_norm": 10.6875, - "learning_rate": 2.219287740296605e-07, - "logits/chosen": -1.5017975568771362, - "logits/rejected": -1.5283129215240479, - "logps/chosen": -185.2952117919922, - "logps/rejected": -218.5054168701172, - "loss": 0.0971, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.2838120460510254, - "rewards/margins": 4.120657444000244, - "rewards/rejected": -4.4044694900512695, + "epoch": 0.46782222755352965, + "grad_norm": 4.25, + "learning_rate": 5.197228144989339e-06, + "loss": 0.9977, "step": 975 }, { - "epoch": 2.6450742240215925, - "grad_norm": 21.0, - "learning_rate": 2.060195284015837e-07, - "logits/chosen": -1.662113904953003, - "logits/rejected": -1.6862503290176392, - "logps/chosen": -150.606689453125, - "logps/rejected": -198.61793518066406, - "loss": 0.1152, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.2786501944065094, - "rewards/margins": 3.8265221118927, - "rewards/rejected": -4.105172157287598, + "epoch": 0.470221315899958, + "grad_norm": 5.0625, + "learning_rate": 5.2238805970149255e-06, + "loss": 0.9653, "step": 980 }, { - "epoch": 2.658569500674764, - "grad_norm": 19.75, - "learning_rate": 1.9067742181694353e-07, - "logits/chosen": -1.4568703174591064, - "logits/rejected": -1.4512639045715332, - "logps/chosen": -171.15443420410156, - "logps/rejected": -221.99526977539062, - "loss": 0.096, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.17055651545524597, - "rewards/margins": 5.160454273223877, - "rewards/rejected": -5.3310112953186035, + "epoch": 0.47262040424638635, + "grad_norm": 5.0, + "learning_rate": 5.250533049040513e-06, + "loss": 1.0362, "step": 985 }, { - "epoch": 2.672064777327935, - "grad_norm": 93.0, - "learning_rate": 1.75906247293057e-07, - "logits/chosen": -1.6594133377075195, - "logits/rejected": -1.5529086589813232, - "logps/chosen": -156.86392211914062, - "logps/rejected": -285.59197998046875, - "loss": 0.118, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.5136551856994629, - "rewards/margins": 4.625790596008301, - "rewards/rejected": -5.139446258544922, + "epoch": 0.47501949259281473, + "grad_norm": 3.765625, + "learning_rate": 5.277185501066099e-06, + "loss": 1.0136, "step": 990 }, { - "epoch": 2.6855600539811064, - "grad_norm": 11.3125, - "learning_rate": 1.617096566961429e-07, - "logits/chosen": -1.466498613357544, - "logits/rejected": -1.4549661874771118, - "logps/chosen": -155.0102081298828, - "logps/rejected": -232.1795654296875, - "loss": 0.152, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.09109257161617279, - "rewards/margins": 3.467794418334961, - "rewards/rejected": -3.558886766433716, + "epoch": 0.4774185809392431, + "grad_norm": 3.015625, + "learning_rate": 5.303837953091685e-06, + "loss": 0.9821, "step": 995 }, { - "epoch": 2.699055330634278, - "grad_norm": 15.9375, - "learning_rate": 1.4809115983847267e-07, - "logits/chosen": -1.377762794494629, - "logits/rejected": -1.3253929615020752, - "logps/chosen": -148.2834014892578, - "logps/rejected": -208.0382080078125, - "loss": 0.1151, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.24366268515586853, - "rewards/margins": 3.6103515625, - "rewards/rejected": -3.8540141582489014, + "epoch": 0.47981766928567143, + "grad_norm": 3.40625, + "learning_rate": 5.3304904051172716e-06, + "loss": 1.0744, "step": 1000 }, { - "epoch": 2.699055330634278, - "eval_logits/chosen": -1.536294937133789, - "eval_logits/rejected": -1.5776937007904053, - "eval_logps/chosen": -191.7211456298828, - "eval_logps/rejected": -226.05455017089844, - "eval_loss": 0.31860384345054626, - "eval_rewards/accuracies": 0.8524096608161926, - "eval_rewards/chosen": -0.7276893258094788, - "eval_rewards/margins": 2.395343065261841, - "eval_rewards/rejected": -3.1230320930480957, - "eval_runtime": 23.3449, - "eval_samples_per_second": 14.136, - "eval_steps_per_second": 3.555, + "epoch": 0.47981766928567143, + "eval_loss": 1.048653483390808, + "eval_runtime": 175.6017, + "eval_samples_per_second": 42.198, + "eval_steps_per_second": 10.552, "step": 1000 }, { - "epoch": 2.7125506072874495, - "grad_norm": 23.625, - "learning_rate": 1.3505412361064395e-07, - "logits/chosen": -1.4981733560562134, - "logits/rejected": -1.5207927227020264, - "logps/chosen": -192.99154663085938, - "logps/rejected": -194.6613311767578, - "loss": 0.0649, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.07562440633773804, - "rewards/margins": 4.270889759063721, - "rewards/rejected": -4.195265769958496, + "epoch": 0.4822167576320998, + "grad_norm": 3.859375, + "learning_rate": 5.357142857142857e-06, + "loss": 1.0825, "step": 1005 }, { - "epoch": 2.7260458839406208, - "grad_norm": 20.5, - "learning_rate": 1.226017711491867e-07, - "logits/chosen": -1.5061196088790894, - "logits/rejected": -1.5956671237945557, - "logps/chosen": -170.25169372558594, - "logps/rejected": -240.0498046875, - "loss": 0.0805, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.27517637610435486, - "rewards/margins": 3.623337507247925, - "rewards/rejected": -3.89851450920105, + "epoch": 0.48461584597852814, + "grad_norm": 3.359375, + "learning_rate": 5.383795309168443e-06, + "loss": 1.0803, "step": 1010 }, { - "epoch": 2.739541160593792, - "grad_norm": 31.25, - "learning_rate": 1.107371810397076e-07, - "logits/chosen": -1.4881411790847778, - "logits/rejected": -1.5475780963897705, - "logps/chosen": -237.45504760742188, - "logps/rejected": -212.13330078125, - "loss": 0.0625, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.10645435005426407, - "rewards/margins": 4.086081027984619, - "rewards/rejected": -4.192535400390625, + "epoch": 0.4870149343249565, + "grad_norm": 5.53125, + "learning_rate": 5.41044776119403e-06, + "loss": 0.9915, "step": 1015 }, { - "epoch": 2.753036437246964, - "grad_norm": 16.75, - "learning_rate": 9.946328655577625e-08, - "logits/chosen": -1.5837833881378174, - "logits/rejected": -1.6130040884017944, - "logps/chosen": -137.10398864746094, - "logps/rejected": -171.19357299804688, - "loss": 0.0725, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.27897781133651733, - "rewards/margins": 3.8964107036590576, - "rewards/rejected": -4.175389289855957, + "epoch": 0.4894140226713849, + "grad_norm": 5.5, + "learning_rate": 5.437100213219617e-06, + "loss": 1.0955, "step": 1020 }, { - "epoch": 2.766531713900135, - "grad_norm": 28.25, - "learning_rate": 8.878287493373245e-08, - "logits/chosen": -1.5753690004348755, - "logits/rejected": -1.6070302724838257, - "logps/chosen": -214.03018188476562, - "logps/rejected": -189.55850219726562, - "loss": 0.1188, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.10192601382732391, - "rewards/margins": 3.4568443298339844, - "rewards/rejected": -3.558769941329956, + "epoch": 0.4918131110178132, + "grad_norm": 3.15625, + "learning_rate": 5.463752665245203e-06, + "loss": 0.9685, "step": 1025 }, { - "epoch": 2.7800269905533064, - "grad_norm": 26.625, - "learning_rate": 7.869858668360042e-08, - "logits/chosen": -1.4193127155303955, - "logits/rejected": -1.2717030048370361, - "logps/chosen": -187.0641632080078, - "logps/rejected": -224.65066528320312, - "loss": 0.1028, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.18936040997505188, - "rewards/margins": 4.242377758026123, - "rewards/rejected": -4.43173885345459, + "epoch": 0.4942121993642416, + "grad_norm": 4.65625, + "learning_rate": 5.4904051172707895e-06, + "loss": 1.0203, "step": 1030 }, { - "epoch": 2.7935222672064777, - "grad_norm": 24.75, - "learning_rate": 6.921291493627747e-08, - "logits/chosen": -1.6177479028701782, - "logits/rejected": -1.6725289821624756, - "logps/chosen": -248.9903564453125, - "logps/rejected": -230.86611938476562, - "loss": 0.0856, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": 0.29994627833366394, - "rewards/margins": 3.9232945442199707, - "rewards/rejected": -3.6233487129211426, + "epoch": 0.4966112877106699, + "grad_norm": 5.34375, + "learning_rate": 5.517057569296376e-06, + "loss": 1.1175, "step": 1035 }, { - "epoch": 2.807017543859649, - "grad_norm": 42.5, - "learning_rate": 6.032820482716001e-08, - "logits/chosen": -1.5851434469223022, - "logits/rejected": -1.5880482196807861, - "logps/chosen": -155.3755340576172, - "logps/rejected": -186.6389617919922, - "loss": 0.1754, - "rewards/accuracies": 0.8999999761581421, - "rewards/chosen": -0.19228528439998627, - "rewards/margins": 3.5754799842834473, - "rewards/rejected": -3.7677650451660156, + "epoch": 0.4990103760570983, + "grad_norm": 5.09375, + "learning_rate": 5.543710021321962e-06, + "loss": 1.0268, "step": 1040 }, { - "epoch": 2.8205128205128203, - "grad_norm": 43.0, - "learning_rate": 5.204665291635519e-08, - "logits/chosen": -1.496819019317627, - "logits/rejected": -1.5007538795471191, - "logps/chosen": -179.5200653076172, - "logps/rejected": -266.001953125, - "loss": 0.1038, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.35474246740341187, - "rewards/margins": 3.8987841606140137, - "rewards/rejected": -4.253526210784912, + "epoch": 0.5014094644035266, + "grad_norm": 4.09375, + "learning_rate": 5.570362473347548e-06, + "loss": 0.9042, "step": 1045 }, { - "epoch": 2.834008097165992, - "grad_norm": 27.5, - "learning_rate": 4.437030664562969e-08, - "logits/chosen": -1.470956563949585, - "logits/rejected": -1.52825927734375, - "logps/chosen": -203.93551635742188, - "logps/rejected": -220.02639770507812, - "loss": 0.1639, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.0977933406829834, - "rewards/margins": 3.205706834793091, - "rewards/rejected": -3.3035004138946533, + "epoch": 0.5038085527499551, + "grad_norm": 4.34375, + "learning_rate": 5.597014925373134e-06, + "loss": 1.0338, "step": 1050 }, { - "epoch": 2.8475033738191633, - "grad_norm": 65.0, - "learning_rate": 3.730106383222132e-08, - "logits/chosen": -1.5251743793487549, - "logits/rejected": -1.3242510557174683, - "logps/chosen": -186.79141235351562, - "logps/rejected": -250.46566772460938, - "loss": 0.0909, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.299465537071228, - "rewards/margins": 4.545691967010498, - "rewards/rejected": -4.845158100128174, + "epoch": 0.5062076410963834, + "grad_norm": 3.390625, + "learning_rate": 5.623667377398722e-06, + "loss": 1.0724, "step": 1055 }, { - "epoch": 2.8609986504723346, - "grad_norm": 19.75, - "learning_rate": 3.084067219964182e-08, - "logits/chosen": -1.527754783630371, - "logits/rejected": -1.5058457851409912, - "logps/chosen": -173.50900268554688, - "logps/rejected": -246.65628051757812, - "loss": 0.2529, - "rewards/accuracies": 0.8999999761581421, - "rewards/chosen": -0.42231351137161255, - "rewards/margins": 3.4638805389404297, - "rewards/rejected": -3.8861937522888184, + "epoch": 0.5086067294428117, + "grad_norm": 3.703125, + "learning_rate": 5.650319829424308e-06, + "loss": 1.1108, "step": 1060 }, { - "epoch": 2.8744939271255063, - "grad_norm": 43.75, - "learning_rate": 2.499072894559057e-08, - "logits/chosen": -1.6412513256072998, - "logits/rejected": -1.6829668283462524, - "logps/chosen": -180.06788635253906, - "logps/rejected": -219.94528198242188, - "loss": 0.1089, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.23302574455738068, - "rewards/margins": 3.369854688644409, - "rewards/rejected": -3.6028804779052734, + "epoch": 0.51100581778924, + "grad_norm": 3.4375, + "learning_rate": 5.676972281449894e-06, + "loss": 1.0471, "step": 1065 }, { - "epoch": 2.8879892037786776, - "grad_norm": 13.75, - "learning_rate": 1.975268034707878e-08, - "logits/chosen": -1.4751927852630615, - "logits/rejected": -1.5141003131866455, - "logps/chosen": -204.79470825195312, - "logps/rejected": -223.97509765625, - "loss": 0.0822, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.15369097888469696, - "rewards/margins": 3.9829258918762207, - "rewards/rejected": -3.8292346000671387, + "epoch": 0.5134049061356685, + "grad_norm": 2.875, + "learning_rate": 5.70362473347548e-06, + "loss": 1.0467, "step": 1070 }, { - "epoch": 2.901484480431849, - "grad_norm": 39.0, - "learning_rate": 1.512782140286939e-08, - "logits/chosen": -1.4587006568908691, - "logits/rejected": -1.5042657852172852, - "logps/chosen": -156.6952667236328, - "logps/rejected": -263.0159912109375, - "loss": 0.0959, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.11610189825296402, - "rewards/margins": 3.9884142875671387, - "rewards/rejected": -4.10451602935791, + "epoch": 0.5158039944820968, + "grad_norm": 4.28125, + "learning_rate": 5.730277185501066e-06, + "loss": 1.0706, "step": 1075 }, { - "epoch": 2.91497975708502, - "grad_norm": 17.75, - "learning_rate": 1.1117295513313475e-08, - "logits/chosen": -1.665400743484497, - "logits/rejected": -1.6617343425750732, - "logps/chosen": -161.07443237304688, - "logps/rejected": -207.7931671142578, - "loss": 0.0872, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": 0.10158289968967438, - "rewards/margins": 4.066722869873047, - "rewards/rejected": -3.965139865875244, + "epoch": 0.5182030828285251, + "grad_norm": 3.59375, + "learning_rate": 5.756929637526653e-06, + "loss": 1.0565, "step": 1080 }, { - "epoch": 2.9284750337381915, - "grad_norm": 20.75, - "learning_rate": 7.72209419766995e-09, - "logits/chosen": -1.4860131740570068, - "logits/rejected": -1.3406977653503418, - "logps/chosen": -168.0951690673828, - "logps/rejected": -274.35113525390625, - "loss": 0.1053, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": -0.44807571172714233, - "rewards/margins": 3.924337863922119, - "rewards/rejected": -4.372413635253906, + "epoch": 0.5206021711749536, + "grad_norm": 4.0625, + "learning_rate": 5.783582089552239e-06, + "loss": 0.9996, "step": 1085 }, { - "epoch": 2.941970310391363, - "grad_norm": 16.25, - "learning_rate": 4.943056848972227e-09, - "logits/chosen": -1.493690848350525, - "logits/rejected": -1.5224257707595825, - "logps/chosen": -209.3112335205078, - "logps/rejected": -208.22988891601562, - "loss": 0.0878, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.035886406898498535, - "rewards/margins": 3.8605358600616455, - "rewards/rejected": -3.8246493339538574, + "epoch": 0.5230012595213819, + "grad_norm": 4.15625, + "learning_rate": 5.810234541577826e-06, + "loss": 1.0384, "step": 1090 }, { - "epoch": 2.9554655870445345, - "grad_norm": 14.8125, - "learning_rate": 2.7808705265053305e-09, - "logits/chosen": -1.571223497390747, - "logits/rejected": -1.5577231645584106, - "logps/chosen": -169.42562866210938, - "logps/rejected": -181.50631713867188, - "loss": 0.1109, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.13199149072170258, - "rewards/margins": 3.612278699874878, - "rewards/rejected": -3.744269847869873, + "epoch": 0.5254003478678102, + "grad_norm": 3.875, + "learning_rate": 5.836886993603412e-06, + "loss": 1.0247, "step": 1095 }, { - "epoch": 2.968960863697706, - "grad_norm": 27.75, - "learning_rate": 1.2360697859462035e-09, - "logits/chosen": -1.5886671543121338, - "logits/rejected": -1.562727928161621, - "logps/chosen": -162.84046936035156, - "logps/rejected": -219.8025360107422, - "loss": 0.118, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.3454614281654358, - "rewards/margins": 4.1069793701171875, - "rewards/rejected": -4.4524407386779785, + "epoch": 0.5277994362142386, + "grad_norm": 4.0, + "learning_rate": 5.863539445628999e-06, + "loss": 0.9977, "step": 1100 }, { - "epoch": 2.982456140350877, - "grad_norm": 13.0625, - "learning_rate": 3.090365472041557e-10, - "logits/chosen": -1.5336341857910156, - "logits/rejected": -1.5714600086212158, - "logps/chosen": -217.091064453125, - "logps/rejected": -239.0583953857422, - "loss": 0.1793, - "rewards/accuracies": 0.9750000238418579, - "rewards/chosen": -0.2769380509853363, - "rewards/margins": 3.7623977661132812, - "rewards/rejected": -4.039335250854492, + "epoch": 0.5277994362142386, + "eval_loss": 1.0383222103118896, + "eval_runtime": 175.7302, + "eval_samples_per_second": 42.167, + "eval_steps_per_second": 10.545, + "step": 1100 + }, + { + "epoch": 0.530198524560667, + "grad_norm": 3.421875, + "learning_rate": 5.890191897654585e-06, + "loss": 1.0028, "step": 1105 }, { - "epoch": 2.9959514170040484, - "grad_norm": 16.75, - "learning_rate": 0.0, - "logits/chosen": -1.4733049869537354, - "logits/rejected": -1.4821723699569702, - "logps/chosen": -191.77438354492188, - "logps/rejected": -275.19158935546875, - "loss": 0.0661, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.11918088048696518, - "rewards/margins": 4.132817268371582, - "rewards/rejected": -4.013636589050293, + "epoch": 0.5325976129070953, + "grad_norm": 3.3125, + "learning_rate": 5.9168443496801705e-06, + "loss": 1.0242, "step": 1110 }, { - "epoch": 2.9959514170040484, - "step": 1110, - "total_flos": 4.5615607240812134e+17, - "train_loss": 0.26195224279218965, - "train_runtime": 3105.2921, - "train_samples_per_second": 2.862, - "train_steps_per_second": 0.357 + "epoch": 0.5349967012535236, + "grad_norm": 3.359375, + "learning_rate": 5.943496801705757e-06, + "loss": 1.0978, + "step": 1115 + }, + { + "epoch": 0.537395789599952, + "grad_norm": 3.609375, + "learning_rate": 5.970149253731343e-06, + "loss": 0.9951, + "step": 1120 + }, + { + "epoch": 0.5397948779463804, + "grad_norm": 3.65625, + "learning_rate": 5.99680170575693e-06, + "loss": 1.0309, + "step": 1125 + }, + { + "epoch": 0.5421939662928087, + "grad_norm": 4.375, + "learning_rate": 6.023454157782517e-06, + "loss": 1.0587, + "step": 1130 + }, + { + "epoch": 0.5445930546392371, + "grad_norm": 4.03125, + "learning_rate": 6.050106609808103e-06, + "loss": 1.0184, + "step": 1135 + }, + { + "epoch": 0.5469921429856655, + "grad_norm": 3.984375, + "learning_rate": 6.076759061833689e-06, + "loss": 1.0146, + "step": 1140 + }, + { + "epoch": 0.5493912313320938, + "grad_norm": 4.03125, + "learning_rate": 6.1034115138592756e-06, + "loss": 1.1346, + "step": 1145 + }, + { + "epoch": 0.5517903196785222, + "grad_norm": 3.28125, + "learning_rate": 6.130063965884862e-06, + "loss": 0.9866, + "step": 1150 + }, + { + "epoch": 0.5541894080249505, + "grad_norm": 5.21875, + "learning_rate": 6.156716417910447e-06, + "loss": 1.0329, + "step": 1155 + }, + { + "epoch": 0.5565884963713789, + "grad_norm": 4.1875, + "learning_rate": 6.183368869936035e-06, + "loss": 1.0053, + "step": 1160 + }, + { + "epoch": 0.5589875847178072, + "grad_norm": 3.21875, + "learning_rate": 6.210021321961621e-06, + "loss": 1.0875, + "step": 1165 + }, + { + "epoch": 0.5613866730642356, + "grad_norm": 3.84375, + "learning_rate": 6.236673773987207e-06, + "loss": 1.007, + "step": 1170 + }, + { + "epoch": 0.5637857614106639, + "grad_norm": 3.59375, + "learning_rate": 6.2633262260127935e-06, + "loss": 0.982, + "step": 1175 + }, + { + "epoch": 0.5661848497570923, + "grad_norm": 3.5, + "learning_rate": 6.28997867803838e-06, + "loss": 0.9737, + "step": 1180 + }, + { + "epoch": 0.5685839381035207, + "grad_norm": 3.578125, + "learning_rate": 6.316631130063966e-06, + "loss": 1.0252, + "step": 1185 + }, + { + "epoch": 0.570983026449949, + "grad_norm": 4.15625, + "learning_rate": 6.343283582089553e-06, + "loss": 1.1008, + "step": 1190 + }, + { + "epoch": 0.5733821147963774, + "grad_norm": 2.734375, + "learning_rate": 6.3699360341151396e-06, + "loss": 1.1048, + "step": 1195 + }, + { + "epoch": 0.5757812031428058, + "grad_norm": 4.40625, + "learning_rate": 6.396588486140726e-06, + "loss": 0.9778, + "step": 1200 + }, + { + "epoch": 0.5757812031428058, + "eval_loss": 1.0289998054504395, + "eval_runtime": 175.7204, + "eval_samples_per_second": 42.169, + "eval_steps_per_second": 10.545, + "step": 1200 + }, + { + "epoch": 0.5781802914892341, + "grad_norm": 2.796875, + "learning_rate": 6.423240938166312e-06, + "loss": 0.9643, + "step": 1205 + }, + { + "epoch": 0.5805793798356624, + "grad_norm": 3.03125, + "learning_rate": 6.449893390191898e-06, + "loss": 0.9933, + "step": 1210 + }, + { + "epoch": 0.5829784681820908, + "grad_norm": 3.6875, + "learning_rate": 6.476545842217484e-06, + "loss": 1.0294, + "step": 1215 + }, + { + "epoch": 0.5853775565285192, + "grad_norm": 3.984375, + "learning_rate": 6.50319829424307e-06, + "loss": 1.0155, + "step": 1220 + }, + { + "epoch": 0.5877766448749475, + "grad_norm": 3.203125, + "learning_rate": 6.5298507462686575e-06, + "loss": 1.0275, + "step": 1225 + }, + { + "epoch": 0.5901757332213758, + "grad_norm": 5.78125, + "learning_rate": 6.556503198294244e-06, + "loss": 1.0395, + "step": 1230 + }, + { + "epoch": 0.5925748215678043, + "grad_norm": 3.59375, + "learning_rate": 6.58315565031983e-06, + "loss": 1.0462, + "step": 1235 + }, + { + "epoch": 0.5949739099142326, + "grad_norm": 3.84375, + "learning_rate": 6.609808102345416e-06, + "loss": 1.0042, + "step": 1240 + }, + { + "epoch": 0.5973729982606609, + "grad_norm": 4.0625, + "learning_rate": 6.636460554371003e-06, + "loss": 0.9548, + "step": 1245 + }, + { + "epoch": 0.5997720866070894, + "grad_norm": 3.609375, + "learning_rate": 6.663113006396589e-06, + "loss": 1.0012, + "step": 1250 + }, + { + "epoch": 0.6021711749535177, + "grad_norm": 3.046875, + "learning_rate": 6.689765458422175e-06, + "loss": 0.8987, + "step": 1255 + }, + { + "epoch": 0.604570263299946, + "grad_norm": 4.3125, + "learning_rate": 6.7164179104477625e-06, + "loss": 1.0123, + "step": 1260 + }, + { + "epoch": 0.6069693516463743, + "grad_norm": 4.40625, + "learning_rate": 6.743070362473349e-06, + "loss": 0.9884, + "step": 1265 + }, + { + "epoch": 0.6093684399928028, + "grad_norm": 3.140625, + "learning_rate": 6.769722814498934e-06, + "loss": 1.1241, + "step": 1270 + }, + { + "epoch": 0.6117675283392311, + "grad_norm": 4.3125, + "learning_rate": 6.796375266524521e-06, + "loss": 1.0129, + "step": 1275 + }, + { + "epoch": 0.6141666166856594, + "grad_norm": 3.96875, + "learning_rate": 6.823027718550107e-06, + "loss": 1.0729, + "step": 1280 + }, + { + "epoch": 0.6165657050320879, + "grad_norm": 4.0625, + "learning_rate": 6.849680170575693e-06, + "loss": 1.0007, + "step": 1285 + }, + { + "epoch": 0.6189647933785162, + "grad_norm": 4.65625, + "learning_rate": 6.8763326226012796e-06, + "loss": 0.9614, + "step": 1290 + }, + { + "epoch": 0.6213638817249445, + "grad_norm": 3.765625, + "learning_rate": 6.902985074626867e-06, + "loss": 0.9357, + "step": 1295 + }, + { + "epoch": 0.6237629700713728, + "grad_norm": 2.9375, + "learning_rate": 6.929637526652453e-06, + "loss": 1.0187, + "step": 1300 + }, + { + "epoch": 0.6237629700713728, + "eval_loss": 1.0210597515106201, + "eval_runtime": 175.7208, + "eval_samples_per_second": 42.169, + "eval_steps_per_second": 10.545, + "step": 1300 + }, + { + "epoch": 0.6261620584178013, + "grad_norm": 3.40625, + "learning_rate": 6.956289978678039e-06, + "loss": 0.9008, + "step": 1305 + }, + { + "epoch": 0.6285611467642296, + "grad_norm": 3.0625, + "learning_rate": 6.982942430703626e-06, + "loss": 1.0302, + "step": 1310 + }, + { + "epoch": 0.6309602351106579, + "grad_norm": 3.0, + "learning_rate": 7.009594882729211e-06, + "loss": 0.9546, + "step": 1315 + }, + { + "epoch": 0.6333593234570863, + "grad_norm": 3.78125, + "learning_rate": 7.0362473347547975e-06, + "loss": 1.0431, + "step": 1320 + }, + { + "epoch": 0.6357584118035147, + "grad_norm": 3.84375, + "learning_rate": 7.062899786780384e-06, + "loss": 0.9423, + "step": 1325 + }, + { + "epoch": 0.638157500149943, + "grad_norm": 3.890625, + "learning_rate": 7.089552238805971e-06, + "loss": 1.0365, + "step": 1330 + }, + { + "epoch": 0.6405565884963714, + "grad_norm": 3.390625, + "learning_rate": 7.116204690831557e-06, + "loss": 0.973, + "step": 1335 + }, + { + "epoch": 0.6429556768427998, + "grad_norm": 3.34375, + "learning_rate": 7.1428571428571436e-06, + "loss": 0.8795, + "step": 1340 + }, + { + "epoch": 0.6453547651892281, + "grad_norm": 3.359375, + "learning_rate": 7.16950959488273e-06, + "loss": 0.9596, + "step": 1345 + }, + { + "epoch": 0.6477538535356564, + "grad_norm": 3.734375, + "learning_rate": 7.196162046908316e-06, + "loss": 0.9984, + "step": 1350 + }, + { + "epoch": 0.6501529418820848, + "grad_norm": 4.09375, + "learning_rate": 7.2228144989339025e-06, + "loss": 0.9752, + "step": 1355 + }, + { + "epoch": 0.6525520302285132, + "grad_norm": 5.59375, + "learning_rate": 7.249466950959488e-06, + "loss": 0.9701, + "step": 1360 + }, + { + "epoch": 0.6549511185749415, + "grad_norm": 3.40625, + "learning_rate": 7.276119402985076e-06, + "loss": 1.0054, + "step": 1365 + }, + { + "epoch": 0.6573502069213699, + "grad_norm": 3.25, + "learning_rate": 7.302771855010662e-06, + "loss": 0.9919, + "step": 1370 + }, + { + "epoch": 0.6597492952677982, + "grad_norm": 3.515625, + "learning_rate": 7.329424307036248e-06, + "loss": 0.9389, + "step": 1375 + }, + { + "epoch": 0.6621483836142266, + "grad_norm": 3.953125, + "learning_rate": 7.356076759061834e-06, + "loss": 0.9607, + "step": 1380 + }, + { + "epoch": 0.664547471960655, + "grad_norm": 3.046875, + "learning_rate": 7.38272921108742e-06, + "loss": 1.084, + "step": 1385 + }, + { + "epoch": 0.6669465603070833, + "grad_norm": 3.203125, + "learning_rate": 7.409381663113007e-06, + "loss": 0.9887, + "step": 1390 + }, + { + "epoch": 0.6693456486535116, + "grad_norm": 3.953125, + "learning_rate": 7.436034115138593e-06, + "loss": 0.9656, + "step": 1395 + }, + { + "epoch": 0.67174473699994, + "grad_norm": 2.46875, + "learning_rate": 7.46268656716418e-06, + "loss": 1.085, + "step": 1400 + }, + { + "epoch": 0.67174473699994, + "eval_loss": 1.0130821466445923, + "eval_runtime": 178.5199, + "eval_samples_per_second": 41.508, + "eval_steps_per_second": 10.38, + "step": 1400 + }, + { + "epoch": 0.6741438253463684, + "grad_norm": 3.546875, + "learning_rate": 7.4893390191897665e-06, + "loss": 1.0731, + "step": 1405 + }, + { + "epoch": 0.6765429136927967, + "grad_norm": 3.125, + "learning_rate": 7.515991471215353e-06, + "loss": 1.0096, + "step": 1410 + }, + { + "epoch": 0.6789420020392251, + "grad_norm": 2.828125, + "learning_rate": 7.542643923240939e-06, + "loss": 0.9554, + "step": 1415 + }, + { + "epoch": 0.6813410903856535, + "grad_norm": 3.09375, + "learning_rate": 7.569296375266525e-06, + "loss": 1.0688, + "step": 1420 + }, + { + "epoch": 0.6837401787320818, + "grad_norm": 3.359375, + "learning_rate": 7.595948827292111e-06, + "loss": 0.9863, + "step": 1425 + }, + { + "epoch": 0.6861392670785101, + "grad_norm": 4.3125, + "learning_rate": 7.622601279317697e-06, + "loss": 0.9846, + "step": 1430 + }, + { + "epoch": 0.6885383554249386, + "grad_norm": 3.578125, + "learning_rate": 7.649253731343284e-06, + "loss": 0.9898, + "step": 1435 + }, + { + "epoch": 0.6909374437713669, + "grad_norm": 3.53125, + "learning_rate": 7.67590618336887e-06, + "loss": 1.0163, + "step": 1440 + }, + { + "epoch": 0.6933365321177952, + "grad_norm": 3.25, + "learning_rate": 7.702558635394457e-06, + "loss": 1.0098, + "step": 1445 + }, + { + "epoch": 0.6957356204642235, + "grad_norm": 3.359375, + "learning_rate": 7.729211087420043e-06, + "loss": 0.9562, + "step": 1450 + }, + { + "epoch": 0.698134708810652, + "grad_norm": 3.578125, + "learning_rate": 7.75586353944563e-06, + "loss": 0.9638, + "step": 1455 + }, + { + "epoch": 0.7005337971570803, + "grad_norm": 3.421875, + "learning_rate": 7.782515991471216e-06, + "loss": 0.9566, + "step": 1460 + }, + { + "epoch": 0.7029328855035086, + "grad_norm": 3.609375, + "learning_rate": 7.809168443496802e-06, + "loss": 1.0047, + "step": 1465 + }, + { + "epoch": 0.7053319738499371, + "grad_norm": 5.0, + "learning_rate": 7.835820895522389e-06, + "loss": 0.9726, + "step": 1470 + }, + { + "epoch": 0.7077310621963654, + "grad_norm": 3.53125, + "learning_rate": 7.862473347547975e-06, + "loss": 0.951, + "step": 1475 + }, + { + "epoch": 0.7101301505427937, + "grad_norm": 2.8125, + "learning_rate": 7.889125799573561e-06, + "loss": 0.9984, + "step": 1480 + }, + { + "epoch": 0.7125292388892221, + "grad_norm": 3.3125, + "learning_rate": 7.915778251599148e-06, + "loss": 0.9609, + "step": 1485 + }, + { + "epoch": 0.7149283272356505, + "grad_norm": 3.1875, + "learning_rate": 7.942430703624734e-06, + "loss": 0.915, + "step": 1490 + }, + { + "epoch": 0.7173274155820788, + "grad_norm": 3.25, + "learning_rate": 7.96908315565032e-06, + "loss": 1.0487, + "step": 1495 + }, + { + "epoch": 0.7197265039285071, + "grad_norm": 3.296875, + "learning_rate": 7.995735607675907e-06, + "loss": 0.958, + "step": 1500 + }, + { + "epoch": 0.7197265039285071, + "eval_loss": 1.0071519613265991, + "eval_runtime": 175.8655, + "eval_samples_per_second": 42.134, + "eval_steps_per_second": 10.536, + "step": 1500 + }, + { + "epoch": 0.7221255922749356, + "grad_norm": 3.578125, + "learning_rate": 8.022388059701493e-06, + "loss": 1.0321, + "step": 1505 + }, + { + "epoch": 0.7245246806213639, + "grad_norm": 3.625, + "learning_rate": 8.049040511727079e-06, + "loss": 0.9943, + "step": 1510 + }, + { + "epoch": 0.7269237689677922, + "grad_norm": 2.78125, + "learning_rate": 8.075692963752665e-06, + "loss": 0.9148, + "step": 1515 + }, + { + "epoch": 0.7293228573142206, + "grad_norm": 3.625, + "learning_rate": 8.102345415778252e-06, + "loss": 0.9026, + "step": 1520 + }, + { + "epoch": 0.731721945660649, + "grad_norm": 3.15625, + "learning_rate": 8.128997867803838e-06, + "loss": 1.023, + "step": 1525 + }, + { + "epoch": 0.7341210340070773, + "grad_norm": 3.671875, + "learning_rate": 8.155650319829424e-06, + "loss": 1.0484, + "step": 1530 + }, + { + "epoch": 0.7365201223535056, + "grad_norm": 3.078125, + "learning_rate": 8.182302771855012e-06, + "loss": 1.0762, + "step": 1535 + }, + { + "epoch": 0.738919210699934, + "grad_norm": 3.890625, + "learning_rate": 8.208955223880599e-06, + "loss": 1.0513, + "step": 1540 + }, + { + "epoch": 0.7413182990463624, + "grad_norm": 3.515625, + "learning_rate": 8.235607675906185e-06, + "loss": 0.9738, + "step": 1545 + }, + { + "epoch": 0.7437173873927907, + "grad_norm": 4.3125, + "learning_rate": 8.26226012793177e-06, + "loss": 1.0299, + "step": 1550 + }, + { + "epoch": 0.7461164757392191, + "grad_norm": 3.28125, + "learning_rate": 8.288912579957356e-06, + "loss": 0.9971, + "step": 1555 + }, + { + "epoch": 0.7485155640856475, + "grad_norm": 5.75, + "learning_rate": 8.315565031982942e-06, + "loss": 0.9628, + "step": 1560 + }, + { + "epoch": 0.7509146524320758, + "grad_norm": 4.0625, + "learning_rate": 8.342217484008529e-06, + "loss": 1.0038, + "step": 1565 + }, + { + "epoch": 0.7533137407785042, + "grad_norm": 3.828125, + "learning_rate": 8.368869936034117e-06, + "loss": 0.9872, + "step": 1570 + }, + { + "epoch": 0.7557128291249325, + "grad_norm": 3.640625, + "learning_rate": 8.395522388059703e-06, + "loss": 0.9728, + "step": 1575 + }, + { + "epoch": 0.7581119174713609, + "grad_norm": 3.03125, + "learning_rate": 8.42217484008529e-06, + "loss": 0.9373, + "step": 1580 + }, + { + "epoch": 0.7605110058177892, + "grad_norm": 3.65625, + "learning_rate": 8.448827292110876e-06, + "loss": 0.9553, + "step": 1585 + }, + { + "epoch": 0.7629100941642176, + "grad_norm": 3.09375, + "learning_rate": 8.475479744136462e-06, + "loss": 0.8991, + "step": 1590 + }, + { + "epoch": 0.765309182510646, + "grad_norm": 3.125, + "learning_rate": 8.502132196162046e-06, + "loss": 1.0188, + "step": 1595 + }, + { + "epoch": 0.7677082708570743, + "grad_norm": 3.53125, + "learning_rate": 8.528784648187633e-06, + "loss": 1.0482, + "step": 1600 + }, + { + "epoch": 0.7677082708570743, + "eval_loss": 1.0007458925247192, + "eval_runtime": 175.7117, + "eval_samples_per_second": 42.171, + "eval_steps_per_second": 10.546, + "step": 1600 + }, + { + "epoch": 0.7701073592035027, + "grad_norm": 2.75, + "learning_rate": 8.55543710021322e-06, + "loss": 0.9521, + "step": 1605 + }, + { + "epoch": 0.772506447549931, + "grad_norm": 2.875, + "learning_rate": 8.582089552238807e-06, + "loss": 0.9527, + "step": 1610 + }, + { + "epoch": 0.7749055358963594, + "grad_norm": 3.046875, + "learning_rate": 8.608742004264393e-06, + "loss": 1.0012, + "step": 1615 + }, + { + "epoch": 0.7773046242427878, + "grad_norm": 3.203125, + "learning_rate": 8.63539445628998e-06, + "loss": 0.9729, + "step": 1620 + }, + { + "epoch": 0.7797037125892161, + "grad_norm": 4.1875, + "learning_rate": 8.662046908315566e-06, + "loss": 0.9014, + "step": 1625 + }, + { + "epoch": 0.7821028009356444, + "grad_norm": 3.03125, + "learning_rate": 8.688699360341152e-06, + "loss": 1.0124, + "step": 1630 + }, + { + "epoch": 0.7845018892820728, + "grad_norm": 3.078125, + "learning_rate": 8.715351812366739e-06, + "loss": 0.9066, + "step": 1635 + }, + { + "epoch": 0.7869009776285012, + "grad_norm": 3.421875, + "learning_rate": 8.742004264392325e-06, + "loss": 0.9101, + "step": 1640 + }, + { + "epoch": 0.7893000659749295, + "grad_norm": 3.796875, + "learning_rate": 8.768656716417911e-06, + "loss": 0.9386, + "step": 1645 + }, + { + "epoch": 0.7916991543213578, + "grad_norm": 3.6875, + "learning_rate": 8.795309168443498e-06, + "loss": 0.9139, + "step": 1650 + }, + { + "epoch": 0.7940982426677863, + "grad_norm": 3.40625, + "learning_rate": 8.821961620469084e-06, + "loss": 0.9178, + "step": 1655 + }, + { + "epoch": 0.7964973310142146, + "grad_norm": 3.203125, + "learning_rate": 8.84861407249467e-06, + "loss": 0.9809, + "step": 1660 + }, + { + "epoch": 0.7988964193606429, + "grad_norm": 3.34375, + "learning_rate": 8.875266524520257e-06, + "loss": 1.0115, + "step": 1665 + }, + { + "epoch": 0.8012955077070714, + "grad_norm": 2.890625, + "learning_rate": 8.901918976545843e-06, + "loss": 0.9548, + "step": 1670 + }, + { + "epoch": 0.8036945960534997, + "grad_norm": 2.625, + "learning_rate": 8.92857142857143e-06, + "loss": 0.9575, + "step": 1675 + }, + { + "epoch": 0.806093684399928, + "grad_norm": 3.203125, + "learning_rate": 8.955223880597016e-06, + "loss": 1.0284, + "step": 1680 + }, + { + "epoch": 0.8084927727463563, + "grad_norm": 3.265625, + "learning_rate": 8.981876332622602e-06, + "loss": 0.9645, + "step": 1685 + }, + { + "epoch": 0.8108918610927848, + "grad_norm": 4.09375, + "learning_rate": 9.008528784648188e-06, + "loss": 0.9457, + "step": 1690 + }, + { + "epoch": 0.8132909494392131, + "grad_norm": 4.21875, + "learning_rate": 9.035181236673775e-06, + "loss": 0.9795, + "step": 1695 + }, + { + "epoch": 0.8156900377856414, + "grad_norm": 4.09375, + "learning_rate": 9.06183368869936e-06, + "loss": 0.9447, + "step": 1700 + }, + { + "epoch": 0.8156900377856414, + "eval_loss": 0.9945608377456665, + "eval_runtime": 175.6059, + "eval_samples_per_second": 42.197, + "eval_steps_per_second": 10.552, + "step": 1700 + }, + { + "epoch": 0.8180891261320699, + "grad_norm": 3.765625, + "learning_rate": 9.088486140724947e-06, + "loss": 1.0695, + "step": 1705 + }, + { + "epoch": 0.8204882144784982, + "grad_norm": 3.15625, + "learning_rate": 9.115138592750533e-06, + "loss": 0.9614, + "step": 1710 + }, + { + "epoch": 0.8228873028249265, + "grad_norm": 3.734375, + "learning_rate": 9.14179104477612e-06, + "loss": 1.055, + "step": 1715 + }, + { + "epoch": 0.8252863911713549, + "grad_norm": 3.25, + "learning_rate": 9.168443496801706e-06, + "loss": 0.9108, + "step": 1720 + }, + { + "epoch": 0.8276854795177833, + "grad_norm": 5.09375, + "learning_rate": 9.195095948827292e-06, + "loss": 0.9487, + "step": 1725 + }, + { + "epoch": 0.8300845678642116, + "grad_norm": 3.109375, + "learning_rate": 9.221748400852879e-06, + "loss": 0.9393, + "step": 1730 + }, + { + "epoch": 0.8324836562106399, + "grad_norm": 2.953125, + "learning_rate": 9.248400852878465e-06, + "loss": 0.9992, + "step": 1735 + }, + { + "epoch": 0.8348827445570683, + "grad_norm": 3.015625, + "learning_rate": 9.275053304904051e-06, + "loss": 1.0015, + "step": 1740 + }, + { + "epoch": 0.8372818329034967, + "grad_norm": 3.234375, + "learning_rate": 9.30170575692964e-06, + "loss": 0.977, + "step": 1745 + }, + { + "epoch": 0.839680921249925, + "grad_norm": 3.53125, + "learning_rate": 9.328358208955226e-06, + "loss": 1.0238, + "step": 1750 + }, + { + "epoch": 0.8420800095963534, + "grad_norm": 2.78125, + "learning_rate": 9.35501066098081e-06, + "loss": 0.9109, + "step": 1755 + }, + { + "epoch": 0.8444790979427818, + "grad_norm": 3.78125, + "learning_rate": 9.381663113006397e-06, + "loss": 0.9748, + "step": 1760 + }, + { + "epoch": 0.8468781862892101, + "grad_norm": 2.890625, + "learning_rate": 9.408315565031983e-06, + "loss": 0.9892, + "step": 1765 + }, + { + "epoch": 0.8492772746356385, + "grad_norm": 3.4375, + "learning_rate": 9.43496801705757e-06, + "loss": 0.9563, + "step": 1770 + }, + { + "epoch": 0.8516763629820668, + "grad_norm": 3.1875, + "learning_rate": 9.461620469083156e-06, + "loss": 1.0417, + "step": 1775 + }, + { + "epoch": 0.8540754513284952, + "grad_norm": 2.828125, + "learning_rate": 9.488272921108744e-06, + "loss": 0.921, + "step": 1780 + }, + { + "epoch": 0.8564745396749235, + "grad_norm": 3.53125, + "learning_rate": 9.51492537313433e-06, + "loss": 1.0076, + "step": 1785 + }, + { + "epoch": 0.8588736280213519, + "grad_norm": 3.515625, + "learning_rate": 9.541577825159916e-06, + "loss": 1.0417, + "step": 1790 + }, + { + "epoch": 0.8612727163677802, + "grad_norm": 3.75, + "learning_rate": 9.568230277185503e-06, + "loss": 0.934, + "step": 1795 + }, + { + "epoch": 0.8636718047142086, + "grad_norm": 3.78125, + "learning_rate": 9.594882729211089e-06, + "loss": 1.0, + "step": 1800 + }, + { + "epoch": 0.8636718047142086, + "eval_loss": 0.9894086718559265, + "eval_runtime": 175.7987, + "eval_samples_per_second": 42.15, + "eval_steps_per_second": 10.54, + "step": 1800 + }, + { + "epoch": 0.866070893060637, + "grad_norm": 2.765625, + "learning_rate": 9.621535181236673e-06, + "loss": 0.9134, + "step": 1805 + }, + { + "epoch": 0.8684699814070653, + "grad_norm": 3.078125, + "learning_rate": 9.64818763326226e-06, + "loss": 1.033, + "step": 1810 + }, + { + "epoch": 0.8708690697534937, + "grad_norm": 3.5, + "learning_rate": 9.674840085287848e-06, + "loss": 1.0698, + "step": 1815 + }, + { + "epoch": 0.873268158099922, + "grad_norm": 2.890625, + "learning_rate": 9.701492537313434e-06, + "loss": 1.0128, + "step": 1820 + }, + { + "epoch": 0.8756672464463504, + "grad_norm": 3.171875, + "learning_rate": 9.72814498933902e-06, + "loss": 0.9641, + "step": 1825 + }, + { + "epoch": 0.8780663347927787, + "grad_norm": 3.53125, + "learning_rate": 9.754797441364607e-06, + "loss": 1.0036, + "step": 1830 + }, + { + "epoch": 0.8804654231392071, + "grad_norm": 3.515625, + "learning_rate": 9.781449893390193e-06, + "loss": 1.0096, + "step": 1835 + }, + { + "epoch": 0.8828645114856355, + "grad_norm": 3.78125, + "learning_rate": 9.80810234541578e-06, + "loss": 0.9844, + "step": 1840 + }, + { + "epoch": 0.8852635998320638, + "grad_norm": 2.796875, + "learning_rate": 9.834754797441366e-06, + "loss": 0.9803, + "step": 1845 + }, + { + "epoch": 0.8876626881784921, + "grad_norm": 3.828125, + "learning_rate": 9.861407249466952e-06, + "loss": 0.9482, + "step": 1850 + }, + { + "epoch": 0.8900617765249206, + "grad_norm": 2.71875, + "learning_rate": 9.888059701492538e-06, + "loss": 0.8553, + "step": 1855 + }, + { + "epoch": 0.8924608648713489, + "grad_norm": 3.0, + "learning_rate": 9.914712153518125e-06, + "loss": 0.9406, + "step": 1860 + }, + { + "epoch": 0.8948599532177772, + "grad_norm": 2.875, + "learning_rate": 9.941364605543711e-06, + "loss": 1.0665, + "step": 1865 + }, + { + "epoch": 0.8972590415642056, + "grad_norm": 2.5625, + "learning_rate": 9.968017057569297e-06, + "loss": 0.9742, + "step": 1870 + }, + { + "epoch": 0.899658129910634, + "grad_norm": 3.046875, + "learning_rate": 9.994669509594884e-06, + "loss": 0.9881, + "step": 1875 + }, + { + "epoch": 0.9020572182570623, + "grad_norm": 3.4375, + "learning_rate": 9.999979383980725e-06, + "loss": 0.8914, + "step": 1880 + }, + { + "epoch": 0.9044563066034906, + "grad_norm": 3.203125, + "learning_rate": 9.999895631693786e-06, + "loss": 0.9904, + "step": 1885 + }, + { + "epoch": 0.9068553949499191, + "grad_norm": 3.203125, + "learning_rate": 9.999747455716298e-06, + "loss": 1.0054, + "step": 1890 + }, + { + "epoch": 0.9092544832963474, + "grad_norm": 3.40625, + "learning_rate": 9.999534857957508e-06, + "loss": 1.0812, + "step": 1895 + }, + { + "epoch": 0.9116535716427757, + "grad_norm": 3.75, + "learning_rate": 9.999257841156743e-06, + "loss": 0.9685, + "step": 1900 + }, + { + "epoch": 0.9116535716427757, + "eval_loss": 0.9848875999450684, + "eval_runtime": 177.1556, + "eval_samples_per_second": 41.828, + "eval_steps_per_second": 10.46, + "step": 1900 + }, + { + "epoch": 0.9140526599892042, + "grad_norm": 3.71875, + "learning_rate": 9.998916408883365e-06, + "loss": 1.0029, + "step": 1905 + }, + { + "epoch": 0.9164517483356325, + "grad_norm": 3.125, + "learning_rate": 9.99851056553673e-06, + "loss": 0.9063, + "step": 1910 + }, + { + "epoch": 0.9188508366820608, + "grad_norm": 3.53125, + "learning_rate": 9.998040316346134e-06, + "loss": 1.0225, + "step": 1915 + }, + { + "epoch": 0.9212499250284891, + "grad_norm": 7.5625, + "learning_rate": 9.99750566737074e-06, + "loss": 0.9892, + "step": 1920 + }, + { + "epoch": 0.9236490133749176, + "grad_norm": 3.28125, + "learning_rate": 9.996906625499504e-06, + "loss": 0.9066, + "step": 1925 + }, + { + "epoch": 0.9260481017213459, + "grad_norm": 3.21875, + "learning_rate": 9.996243198451085e-06, + "loss": 1.0039, + "step": 1930 + }, + { + "epoch": 0.9284471900677742, + "grad_norm": 3.390625, + "learning_rate": 9.995515394773744e-06, + "loss": 0.9956, + "step": 1935 + }, + { + "epoch": 0.9308462784142026, + "grad_norm": 3.546875, + "learning_rate": 9.99472322384524e-06, + "loss": 0.9169, + "step": 1940 + }, + { + "epoch": 0.933245366760631, + "grad_norm": 3.15625, + "learning_rate": 9.993866695872699e-06, + "loss": 0.9753, + "step": 1945 + }, + { + "epoch": 0.9356444551070593, + "grad_norm": 3.65625, + "learning_rate": 9.992945821892488e-06, + "loss": 1.0543, + "step": 1950 + }, + { + "epoch": 0.9380435434534877, + "grad_norm": 3.375, + "learning_rate": 9.991960613770078e-06, + "loss": 1.039, + "step": 1955 + }, + { + "epoch": 0.940442631799916, + "grad_norm": 3.25, + "learning_rate": 9.990911084199879e-06, + "loss": 0.9921, + "step": 1960 + }, + { + "epoch": 0.9428417201463444, + "grad_norm": 3.25, + "learning_rate": 9.98979724670509e-06, + "loss": 1.0042, + "step": 1965 + }, + { + "epoch": 0.9452408084927727, + "grad_norm": 4.25, + "learning_rate": 9.988619115637514e-06, + "loss": 0.9578, + "step": 1970 + }, + { + "epoch": 0.9476398968392011, + "grad_norm": 4.71875, + "learning_rate": 9.98737670617738e-06, + "loss": 0.9997, + "step": 1975 + }, + { + "epoch": 0.9500389851856295, + "grad_norm": 3.5, + "learning_rate": 9.98607003433314e-06, + "loss": 1.0004, + "step": 1980 + }, + { + "epoch": 0.9524380735320578, + "grad_norm": 3.15625, + "learning_rate": 9.98469911694127e-06, + "loss": 0.8219, + "step": 1985 + }, + { + "epoch": 0.9548371618784862, + "grad_norm": 2.90625, + "learning_rate": 9.983263971666051e-06, + "loss": 1.0284, + "step": 1990 + }, + { + "epoch": 0.9572362502249145, + "grad_norm": 3.703125, + "learning_rate": 9.981764616999339e-06, + "loss": 1.022, + "step": 1995 + }, + { + "epoch": 0.9596353385713429, + "grad_norm": 3.3125, + "learning_rate": 9.980201072260332e-06, + "loss": 0.8576, + "step": 2000 + }, + { + "epoch": 0.9596353385713429, + "eval_loss": 0.9806957244873047, + "eval_runtime": 176.763, + "eval_samples_per_second": 41.921, + "eval_steps_per_second": 10.483, + "step": 2000 + }, + { + "epoch": 0.9620344269177713, + "grad_norm": 3.15625, + "learning_rate": 9.978573357595314e-06, + "loss": 1.0717, + "step": 2005 + }, + { + "epoch": 0.9644335152641996, + "grad_norm": 3.328125, + "learning_rate": 9.9768814939774e-06, + "loss": 0.9899, + "step": 2010 + }, + { + "epoch": 0.966832603610628, + "grad_norm": 3.546875, + "learning_rate": 9.975125503206262e-06, + "loss": 1.047, + "step": 2015 + }, + { + "epoch": 0.9692316919570563, + "grad_norm": 2.90625, + "learning_rate": 9.973305407907856e-06, + "loss": 0.8933, + "step": 2020 + }, + { + "epoch": 0.9716307803034847, + "grad_norm": 3.296875, + "learning_rate": 9.971421231534123e-06, + "loss": 0.9153, + "step": 2025 + }, + { + "epoch": 0.974029868649913, + "grad_norm": 3.34375, + "learning_rate": 9.96947299836269e-06, + "loss": 0.9508, + "step": 2030 + }, + { + "epoch": 0.9764289569963414, + "grad_norm": 3.59375, + "learning_rate": 9.967460733496552e-06, + "loss": 0.945, + "step": 2035 + }, + { + "epoch": 0.9788280453427698, + "grad_norm": 3.359375, + "learning_rate": 9.965384462863757e-06, + "loss": 0.9655, + "step": 2040 + }, + { + "epoch": 0.9812271336891981, + "grad_norm": 3.28125, + "learning_rate": 9.96324421321707e-06, + "loss": 1.094, + "step": 2045 + }, + { + "epoch": 0.9836262220356264, + "grad_norm": 3.1875, + "learning_rate": 9.961040012133618e-06, + "loss": 0.8919, + "step": 2050 + }, + { + "epoch": 0.9860253103820548, + "grad_norm": 3.421875, + "learning_rate": 9.958771888014549e-06, + "loss": 1.0294, + "step": 2055 + }, + { + "epoch": 0.9884243987284832, + "grad_norm": 4.03125, + "learning_rate": 9.95643987008466e-06, + "loss": 1.0486, + "step": 2060 + }, + { + "epoch": 0.9908234870749115, + "grad_norm": 2.71875, + "learning_rate": 9.954043988392017e-06, + "loss": 1.0447, + "step": 2065 + }, + { + "epoch": 0.9932225754213398, + "grad_norm": 3.578125, + "learning_rate": 9.951584273807574e-06, + "loss": 0.9619, + "step": 2070 + }, + { + "epoch": 0.9956216637677683, + "grad_norm": 3.296875, + "learning_rate": 9.949060758024768e-06, + "loss": 0.9278, + "step": 2075 + }, + { + "epoch": 0.9980207521141966, + "grad_norm": 3.453125, + "learning_rate": 9.946473473559122e-06, + "loss": 0.9724, + "step": 2080 + }, + { + "epoch": 1.000419840460625, + "grad_norm": 3.296875, + "learning_rate": 9.943822453747811e-06, + "loss": 0.9471, + "step": 2085 + }, + { + "epoch": 1.0028189288070533, + "grad_norm": 4.03125, + "learning_rate": 9.941107732749247e-06, + "loss": 0.8855, + "step": 2090 + }, + { + "epoch": 1.0052180171534817, + "grad_norm": 3.109375, + "learning_rate": 9.938329345542626e-06, + "loss": 1.013, + "step": 2095 + }, + { + "epoch": 1.0076171054999101, + "grad_norm": 2.578125, + "learning_rate": 9.935487327927487e-06, + "loss": 0.8853, + "step": 2100 + }, + { + "epoch": 1.0076171054999101, + "eval_loss": 0.9774662256240845, + "eval_runtime": 176.4723, + "eval_samples_per_second": 41.99, + "eval_steps_per_second": 10.5, + "step": 2100 + }, + { + "epoch": 1.0100161938463383, + "grad_norm": 3.0, + "learning_rate": 9.93258171652325e-06, + "loss": 1.0145, + "step": 2105 + }, + { + "epoch": 1.0124152821927668, + "grad_norm": 2.546875, + "learning_rate": 9.929612548768735e-06, + "loss": 1.0052, + "step": 2110 + }, + { + "epoch": 1.0148143705391952, + "grad_norm": 3.421875, + "learning_rate": 9.926579862921693e-06, + "loss": 0.9061, + "step": 2115 + }, + { + "epoch": 1.0172134588856234, + "grad_norm": 3.15625, + "learning_rate": 9.923483698058301e-06, + "loss": 0.7924, + "step": 2120 + }, + { + "epoch": 1.0196125472320519, + "grad_norm": 3.0625, + "learning_rate": 9.920324094072663e-06, + "loss": 0.7767, + "step": 2125 + }, + { + "epoch": 1.02201163557848, + "grad_norm": 4.15625, + "learning_rate": 9.917101091676302e-06, + "loss": 0.9295, + "step": 2130 + }, + { + "epoch": 1.0244107239249085, + "grad_norm": 3.5, + "learning_rate": 9.913814732397624e-06, + "loss": 0.8664, + "step": 2135 + }, + { + "epoch": 1.026809812271337, + "grad_norm": 3.140625, + "learning_rate": 9.910465058581395e-06, + "loss": 1.0206, + "step": 2140 + }, + { + "epoch": 1.0292089006177652, + "grad_norm": 2.9375, + "learning_rate": 9.907052113388183e-06, + "loss": 0.9652, + "step": 2145 + }, + { + "epoch": 1.0316079889641936, + "grad_norm": 7.40625, + "learning_rate": 9.90357594079381e-06, + "loss": 0.795, + "step": 2150 + }, + { + "epoch": 1.034007077310622, + "grad_norm": 3.5, + "learning_rate": 9.900036585588788e-06, + "loss": 0.8602, + "step": 2155 + }, + { + "epoch": 1.0364061656570502, + "grad_norm": 2.90625, + "learning_rate": 9.89643409337773e-06, + "loss": 0.9133, + "step": 2160 + }, + { + "epoch": 1.0388052540034787, + "grad_norm": 2.828125, + "learning_rate": 9.892768510578777e-06, + "loss": 0.9104, + "step": 2165 + }, + { + "epoch": 1.041204342349907, + "grad_norm": 2.734375, + "learning_rate": 9.889039884422989e-06, + "loss": 0.9898, + "step": 2170 + }, + { + "epoch": 1.0436034306963353, + "grad_norm": 2.984375, + "learning_rate": 9.885248262953736e-06, + "loss": 1.031, + "step": 2175 + }, + { + "epoch": 1.0460025190427638, + "grad_norm": 3.234375, + "learning_rate": 9.88139369502609e-06, + "loss": 0.9867, + "step": 2180 + }, + { + "epoch": 1.0484016073891922, + "grad_norm": 2.921875, + "learning_rate": 9.87747623030619e-06, + "loss": 0.9899, + "step": 2185 + }, + { + "epoch": 1.0508006957356204, + "grad_norm": 2.828125, + "learning_rate": 9.873495919270593e-06, + "loss": 0.8685, + "step": 2190 + }, + { + "epoch": 1.0531997840820488, + "grad_norm": 2.859375, + "learning_rate": 9.869452813205632e-06, + "loss": 0.8505, + "step": 2195 + }, + { + "epoch": 1.0555988724284773, + "grad_norm": 3.078125, + "learning_rate": 9.865346964206762e-06, + "loss": 0.947, + "step": 2200 + }, + { + "epoch": 1.0555988724284773, + "eval_loss": 0.9739471673965454, + "eval_runtime": 175.7002, + "eval_samples_per_second": 42.174, + "eval_steps_per_second": 10.546, + "step": 2200 + }, + { + "epoch": 1.0579979607749055, + "grad_norm": 2.640625, + "learning_rate": 9.861178425177874e-06, + "loss": 0.9071, + "step": 2205 + }, + { + "epoch": 1.060397049121334, + "grad_norm": 3.578125, + "learning_rate": 9.856947249830624e-06, + "loss": 0.9053, + "step": 2210 + }, + { + "epoch": 1.0627961374677621, + "grad_norm": 3.03125, + "learning_rate": 9.852653492683735e-06, + "loss": 0.974, + "step": 2215 + }, + { + "epoch": 1.0651952258141906, + "grad_norm": 3.25, + "learning_rate": 9.848297209062299e-06, + "loss": 0.8943, + "step": 2220 + }, + { + "epoch": 1.067594314160619, + "grad_norm": 2.84375, + "learning_rate": 9.843878455097061e-06, + "loss": 0.976, + "step": 2225 + }, + { + "epoch": 1.0699934025070472, + "grad_norm": 2.828125, + "learning_rate": 9.839397287723695e-06, + "loss": 0.8868, + "step": 2230 + }, + { + "epoch": 1.0723924908534757, + "grad_norm": 3.375, + "learning_rate": 9.83485376468208e-06, + "loss": 0.9177, + "step": 2235 + }, + { + "epoch": 1.074791579199904, + "grad_norm": 2.671875, + "learning_rate": 9.830247944515536e-06, + "loss": 0.9299, + "step": 2240 + }, + { + "epoch": 1.0771906675463323, + "grad_norm": 3.046875, + "learning_rate": 9.825579886570094e-06, + "loss": 0.9351, + "step": 2245 + }, + { + "epoch": 1.0795897558927607, + "grad_norm": 3.453125, + "learning_rate": 9.820849650993709e-06, + "loss": 0.8633, + "step": 2250 + }, + { + "epoch": 1.0819888442391892, + "grad_norm": 3.921875, + "learning_rate": 9.816057298735501e-06, + "loss": 1.0134, + "step": 2255 + }, + { + "epoch": 1.0843879325856174, + "grad_norm": 3.0625, + "learning_rate": 9.811202891544965e-06, + "loss": 0.9708, + "step": 2260 + }, + { + "epoch": 1.0867870209320458, + "grad_norm": 2.890625, + "learning_rate": 9.80628649197117e-06, + "loss": 0.868, + "step": 2265 + }, + { + "epoch": 1.0891861092784743, + "grad_norm": 3.046875, + "learning_rate": 9.80130816336196e-06, + "loss": 0.9277, + "step": 2270 + }, + { + "epoch": 1.0915851976249025, + "grad_norm": 2.78125, + "learning_rate": 9.796267969863134e-06, + "loss": 0.9102, + "step": 2275 + }, + { + "epoch": 1.093984285971331, + "grad_norm": 4.15625, + "learning_rate": 9.791165976417621e-06, + "loss": 0.8154, + "step": 2280 + }, + { + "epoch": 1.0963833743177593, + "grad_norm": 3.25, + "learning_rate": 9.786002248764642e-06, + "loss": 0.8024, + "step": 2285 + }, + { + "epoch": 1.0987824626641876, + "grad_norm": 3.03125, + "learning_rate": 9.780776853438863e-06, + "loss": 0.9436, + "step": 2290 + }, + { + "epoch": 1.101181551010616, + "grad_norm": 11.1875, + "learning_rate": 9.775489857769544e-06, + "loss": 0.888, + "step": 2295 + }, + { + "epoch": 1.1035806393570444, + "grad_norm": 3.34375, + "learning_rate": 9.770141329879658e-06, + "loss": 0.9207, + "step": 2300 + }, + { + "epoch": 1.1035806393570444, + "eval_loss": 0.9713129997253418, + "eval_runtime": 175.6734, + "eval_samples_per_second": 42.181, + "eval_steps_per_second": 10.548, + "step": 2300 + }, + { + "epoch": 1.1059797277034726, + "grad_norm": 2.84375, + "learning_rate": 9.764731338685026e-06, + "loss": 0.9329, + "step": 2305 + }, + { + "epoch": 1.108378816049901, + "grad_norm": 3.515625, + "learning_rate": 9.75925995389342e-06, + "loss": 0.9009, + "step": 2310 + }, + { + "epoch": 1.1107779043963295, + "grad_norm": 3.125, + "learning_rate": 9.753727246003677e-06, + "loss": 0.9371, + "step": 2315 + }, + { + "epoch": 1.1131769927427577, + "grad_norm": 4.09375, + "learning_rate": 9.748133286304774e-06, + "loss": 0.9952, + "step": 2320 + }, + { + "epoch": 1.1155760810891862, + "grad_norm": 3.15625, + "learning_rate": 9.74247814687492e-06, + "loss": 0.8928, + "step": 2325 + }, + { + "epoch": 1.1179751694356144, + "grad_norm": 3.453125, + "learning_rate": 9.73676190058063e-06, + "loss": 0.7709, + "step": 2330 + }, + { + "epoch": 1.1203742577820428, + "grad_norm": 4.65625, + "learning_rate": 9.730984621075777e-06, + "loss": 0.9633, + "step": 2335 + }, + { + "epoch": 1.1227733461284712, + "grad_norm": 3.578125, + "learning_rate": 9.725146382800644e-06, + "loss": 0.9883, + "step": 2340 + }, + { + "epoch": 1.1251724344748995, + "grad_norm": 2.796875, + "learning_rate": 9.719247260980977e-06, + "loss": 0.8631, + "step": 2345 + }, + { + "epoch": 1.1275715228213279, + "grad_norm": 3.296875, + "learning_rate": 9.713287331627002e-06, + "loss": 0.9008, + "step": 2350 + }, + { + "epoch": 1.1299706111677563, + "grad_norm": 3.21875, + "learning_rate": 9.70726667153245e-06, + "loss": 0.8715, + "step": 2355 + }, + { + "epoch": 1.1323696995141845, + "grad_norm": 3.078125, + "learning_rate": 9.701185358273568e-06, + "loss": 0.9063, + "step": 2360 + }, + { + "epoch": 1.134768787860613, + "grad_norm": 3.15625, + "learning_rate": 9.69504347020812e-06, + "loss": 0.7194, + "step": 2365 + }, + { + "epoch": 1.1371678762070414, + "grad_norm": 3.0, + "learning_rate": 9.688841086474381e-06, + "loss": 0.8225, + "step": 2370 + }, + { + "epoch": 1.1395669645534696, + "grad_norm": 2.921875, + "learning_rate": 9.682578286990105e-06, + "loss": 1.033, + "step": 2375 + }, + { + "epoch": 1.141966052899898, + "grad_norm": 3.484375, + "learning_rate": 9.676255152451508e-06, + "loss": 0.9202, + "step": 2380 + }, + { + "epoch": 1.1443651412463265, + "grad_norm": 2.890625, + "learning_rate": 9.669871764332226e-06, + "loss": 0.9405, + "step": 2385 + }, + { + "epoch": 1.1467642295927547, + "grad_norm": 3.515625, + "learning_rate": 9.663428204882258e-06, + "loss": 0.9197, + "step": 2390 + }, + { + "epoch": 1.1491633179391831, + "grad_norm": 2.953125, + "learning_rate": 9.656924557126913e-06, + "loss": 0.9512, + "step": 2395 + }, + { + "epoch": 1.1515624062856116, + "grad_norm": 2.96875, + "learning_rate": 9.650360904865738e-06, + "loss": 0.8596, + "step": 2400 + }, + { + "epoch": 1.1515624062856116, + "eval_loss": 0.9691145420074463, + "eval_runtime": 175.6179, + "eval_samples_per_second": 42.194, + "eval_steps_per_second": 10.551, + "step": 2400 + }, + { + "epoch": 1.1539614946320398, + "grad_norm": 2.796875, + "learning_rate": 9.643737332671441e-06, + "loss": 1.0536, + "step": 2405 + }, + { + "epoch": 1.1563605829784682, + "grad_norm": 2.59375, + "learning_rate": 9.637053925888793e-06, + "loss": 0.8452, + "step": 2410 + }, + { + "epoch": 1.1587596713248964, + "grad_norm": 2.640625, + "learning_rate": 9.630310770633542e-06, + "loss": 0.7487, + "step": 2415 + }, + { + "epoch": 1.1611587596713249, + "grad_norm": 2.640625, + "learning_rate": 9.623507953791287e-06, + "loss": 1.0087, + "step": 2420 + }, + { + "epoch": 1.1635578480177533, + "grad_norm": 3.265625, + "learning_rate": 9.616645563016373e-06, + "loss": 0.9177, + "step": 2425 + }, + { + "epoch": 1.1659569363641815, + "grad_norm": 2.765625, + "learning_rate": 9.609723686730754e-06, + "loss": 0.8388, + "step": 2430 + }, + { + "epoch": 1.16835602471061, + "grad_norm": 3.484375, + "learning_rate": 9.602742414122855e-06, + "loss": 0.962, + "step": 2435 + }, + { + "epoch": 1.1707551130570384, + "grad_norm": 3.46875, + "learning_rate": 9.59570183514642e-06, + "loss": 0.9372, + "step": 2440 + }, + { + "epoch": 1.1731542014034666, + "grad_norm": 3.53125, + "learning_rate": 9.588602040519363e-06, + "loss": 0.9363, + "step": 2445 + }, + { + "epoch": 1.175553289749895, + "grad_norm": 3.21875, + "learning_rate": 9.581443121722585e-06, + "loss": 0.8577, + "step": 2450 + }, + { + "epoch": 1.1779523780963235, + "grad_norm": 3.578125, + "learning_rate": 9.574225170998807e-06, + "loss": 0.9581, + "step": 2455 + }, + { + "epoch": 1.1803514664427517, + "grad_norm": 2.890625, + "learning_rate": 9.566948281351373e-06, + "loss": 0.9223, + "step": 2460 + }, + { + "epoch": 1.1827505547891801, + "grad_norm": 3.296875, + "learning_rate": 9.55961254654306e-06, + "loss": 1.054, + "step": 2465 + }, + { + "epoch": 1.1851496431356086, + "grad_norm": 4.03125, + "learning_rate": 9.552218061094863e-06, + "loss": 0.8736, + "step": 2470 + }, + { + "epoch": 1.1875487314820368, + "grad_norm": 2.6875, + "learning_rate": 9.544764920284775e-06, + "loss": 0.8971, + "step": 2475 + }, + { + "epoch": 1.1899478198284652, + "grad_norm": 2.734375, + "learning_rate": 9.537253220146574e-06, + "loss": 0.961, + "step": 2480 + }, + { + "epoch": 1.1923469081748936, + "grad_norm": 5.6875, + "learning_rate": 9.529683057468564e-06, + "loss": 0.979, + "step": 2485 + }, + { + "epoch": 1.1947459965213219, + "grad_norm": 3.25, + "learning_rate": 9.522054529792348e-06, + "loss": 0.8174, + "step": 2490 + }, + { + "epoch": 1.1971450848677503, + "grad_norm": 3.15625, + "learning_rate": 9.514367735411558e-06, + "loss": 0.7918, + "step": 2495 + }, + { + "epoch": 1.1995441732141785, + "grad_norm": 2.65625, + "learning_rate": 9.506622773370595e-06, + "loss": 1.0277, + "step": 2500 + }, + { + "epoch": 1.1995441732141785, + "eval_loss": 0.9654711484909058, + "eval_runtime": 175.623, + "eval_samples_per_second": 42.193, + "eval_steps_per_second": 10.551, + "step": 2500 + }, + { + "epoch": 1.201943261560607, + "grad_norm": 6.3125, + "learning_rate": 9.498819743463347e-06, + "loss": 1.0265, + "step": 2505 + }, + { + "epoch": 1.2043423499070354, + "grad_norm": 4.09375, + "learning_rate": 9.490958746231911e-06, + "loss": 0.9911, + "step": 2510 + }, + { + "epoch": 1.2067414382534638, + "grad_norm": 2.609375, + "learning_rate": 9.483039882965293e-06, + "loss": 0.9705, + "step": 2515 + }, + { + "epoch": 1.209140526599892, + "grad_norm": 3.421875, + "learning_rate": 9.4750632556981e-06, + "loss": 0.9417, + "step": 2520 + }, + { + "epoch": 1.2115396149463205, + "grad_norm": 4.59375, + "learning_rate": 9.467028967209232e-06, + "loss": 0.9103, + "step": 2525 + }, + { + "epoch": 1.2139387032927487, + "grad_norm": 2.734375, + "learning_rate": 9.458937121020555e-06, + "loss": 0.8767, + "step": 2530 + }, + { + "epoch": 1.216337791639177, + "grad_norm": 3.1875, + "learning_rate": 9.45078782139556e-06, + "loss": 0.8681, + "step": 2535 + }, + { + "epoch": 1.2187368799856055, + "grad_norm": 2.578125, + "learning_rate": 9.442581173338032e-06, + "loss": 0.8802, + "step": 2540 + }, + { + "epoch": 1.2211359683320337, + "grad_norm": 3.125, + "learning_rate": 9.43431728259069e-06, + "loss": 0.8075, + "step": 2545 + }, + { + "epoch": 1.2235350566784622, + "grad_norm": 3.4375, + "learning_rate": 9.425996255633825e-06, + "loss": 0.8549, + "step": 2550 + }, + { + "epoch": 1.2259341450248906, + "grad_norm": 2.984375, + "learning_rate": 9.417618199683926e-06, + "loss": 0.8986, + "step": 2555 + }, + { + "epoch": 1.2283332333713188, + "grad_norm": 2.703125, + "learning_rate": 9.409183222692307e-06, + "loss": 0.9454, + "step": 2560 + }, + { + "epoch": 1.2307323217177473, + "grad_norm": 3.203125, + "learning_rate": 9.4006914333437e-06, + "loss": 0.7339, + "step": 2565 + }, + { + "epoch": 1.2331314100641757, + "grad_norm": 3.734375, + "learning_rate": 9.392142941054878e-06, + "loss": 0.9632, + "step": 2570 + }, + { + "epoch": 1.235530498410604, + "grad_norm": 3.0625, + "learning_rate": 9.38353785597322e-06, + "loss": 0.9105, + "step": 2575 + }, + { + "epoch": 1.2379295867570324, + "grad_norm": 2.875, + "learning_rate": 9.374876288975307e-06, + "loss": 0.8167, + "step": 2580 + }, + { + "epoch": 1.2403286751034606, + "grad_norm": 3.0625, + "learning_rate": 9.366158351665495e-06, + "loss": 0.871, + "step": 2585 + }, + { + "epoch": 1.242727763449889, + "grad_norm": 3.171875, + "learning_rate": 9.357384156374465e-06, + "loss": 0.8765, + "step": 2590 + }, + { + "epoch": 1.2451268517963174, + "grad_norm": 2.8125, + "learning_rate": 9.348553816157785e-06, + "loss": 1.032, + "step": 2595 + }, + { + "epoch": 1.2475259401427459, + "grad_norm": 4.84375, + "learning_rate": 9.339667444794456e-06, + "loss": 0.9646, + "step": 2600 + }, + { + "epoch": 1.2475259401427459, + "eval_loss": 0.9630805850028992, + "eval_runtime": 175.7116, + "eval_samples_per_second": 42.171, + "eval_steps_per_second": 10.546, + "step": 2600 + }, + { + "epoch": 1.249925028489174, + "grad_norm": 2.609375, + "learning_rate": 9.33072515678543e-06, + "loss": 0.8976, + "step": 2605 + }, + { + "epoch": 1.2523241168356025, + "grad_norm": 2.90625, + "learning_rate": 9.321727067352153e-06, + "loss": 0.9049, + "step": 2610 + }, + { + "epoch": 1.2547232051820307, + "grad_norm": 3.4375, + "learning_rate": 9.312673292435073e-06, + "loss": 0.9314, + "step": 2615 + }, + { + "epoch": 1.2571222935284592, + "grad_norm": 2.734375, + "learning_rate": 9.30356394869214e-06, + "loss": 0.9375, + "step": 2620 + }, + { + "epoch": 1.2595213818748876, + "grad_norm": 4.34375, + "learning_rate": 9.294399153497316e-06, + "loss": 0.886, + "step": 2625 + }, + { + "epoch": 1.261920470221316, + "grad_norm": 3.453125, + "learning_rate": 9.28517902493905e-06, + "loss": 0.8899, + "step": 2630 + }, + { + "epoch": 1.2643195585677443, + "grad_norm": 3.46875, + "learning_rate": 9.275903681818763e-06, + "loss": 0.8431, + "step": 2635 + }, + { + "epoch": 1.2667186469141727, + "grad_norm": 3.328125, + "learning_rate": 9.26657324364932e-06, + "loss": 0.9004, + "step": 2640 + }, + { + "epoch": 1.269117735260601, + "grad_norm": 3.078125, + "learning_rate": 9.257187830653478e-06, + "loss": 0.8216, + "step": 2645 + }, + { + "epoch": 1.2715168236070293, + "grad_norm": 3.21875, + "learning_rate": 9.247747563762353e-06, + "loss": 0.8482, + "step": 2650 + }, + { + "epoch": 1.2739159119534578, + "grad_norm": 2.9375, + "learning_rate": 9.23825256461385e-06, + "loss": 0.903, + "step": 2655 + }, + { + "epoch": 1.276315000299886, + "grad_norm": 3.1875, + "learning_rate": 9.228702955551101e-06, + "loss": 0.8568, + "step": 2660 + }, + { + "epoch": 1.2787140886463144, + "grad_norm": 3.109375, + "learning_rate": 9.219098859620884e-06, + "loss": 0.9839, + "step": 2665 + }, + { + "epoch": 1.2811131769927426, + "grad_norm": 3.671875, + "learning_rate": 9.209440400572045e-06, + "loss": 0.9935, + "step": 2670 + }, + { + "epoch": 1.283512265339171, + "grad_norm": 2.78125, + "learning_rate": 9.199727702853896e-06, + "loss": 0.9291, + "step": 2675 + }, + { + "epoch": 1.2859113536855995, + "grad_norm": 3.375, + "learning_rate": 9.189960891614616e-06, + "loss": 0.8867, + "step": 2680 + }, + { + "epoch": 1.288310442032028, + "grad_norm": 3.546875, + "learning_rate": 9.180140092699636e-06, + "loss": 0.9109, + "step": 2685 + }, + { + "epoch": 1.2907095303784561, + "grad_norm": 2.84375, + "learning_rate": 9.17026543265002e-06, + "loss": 0.9201, + "step": 2690 + }, + { + "epoch": 1.2931086187248846, + "grad_norm": 3.703125, + "learning_rate": 9.160337038700834e-06, + "loss": 0.9066, + "step": 2695 + }, + { + "epoch": 1.2955077070713128, + "grad_norm": 3.34375, + "learning_rate": 9.150355038779504e-06, + "loss": 0.8583, + "step": 2700 + }, + { + "epoch": 1.2955077070713128, + "eval_loss": 0.9613306522369385, + "eval_runtime": 180.9335, + "eval_samples_per_second": 40.954, + "eval_steps_per_second": 10.241, + "step": 2700 + }, + { + "epoch": 1.2979067954177412, + "grad_norm": 2.796875, + "learning_rate": 9.140319561504168e-06, + "loss": 0.8036, + "step": 2705 + }, + { + "epoch": 1.3003058837641697, + "grad_norm": 4.125, + "learning_rate": 9.13023073618202e-06, + "loss": 0.9962, + "step": 2710 + }, + { + "epoch": 1.302704972110598, + "grad_norm": 3.0625, + "learning_rate": 9.12008869280765e-06, + "loss": 0.8352, + "step": 2715 + }, + { + "epoch": 1.3051040604570263, + "grad_norm": 2.859375, + "learning_rate": 9.109893562061353e-06, + "loss": 0.992, + "step": 2720 + }, + { + "epoch": 1.3075031488034548, + "grad_norm": 3.390625, + "learning_rate": 9.099645475307468e-06, + "loss": 0.8995, + "step": 2725 + }, + { + "epoch": 1.309902237149883, + "grad_norm": 2.796875, + "learning_rate": 9.089344564592659e-06, + "loss": 0.9768, + "step": 2730 + }, + { + "epoch": 1.3123013254963114, + "grad_norm": 3.3125, + "learning_rate": 9.078990962644237e-06, + "loss": 0.8367, + "step": 2735 + }, + { + "epoch": 1.3147004138427398, + "grad_norm": 3.03125, + "learning_rate": 9.068584802868434e-06, + "loss": 0.9742, + "step": 2740 + }, + { + "epoch": 1.317099502189168, + "grad_norm": 2.875, + "learning_rate": 9.058126219348692e-06, + "loss": 0.8591, + "step": 2745 + }, + { + "epoch": 1.3194985905355965, + "grad_norm": 2.71875, + "learning_rate": 9.047615346843938e-06, + "loss": 0.9576, + "step": 2750 + }, + { + "epoch": 1.3218976788820247, + "grad_norm": 2.859375, + "learning_rate": 9.037052320786833e-06, + "loss": 1.0105, + "step": 2755 + }, + { + "epoch": 1.3242967672284531, + "grad_norm": 3.765625, + "learning_rate": 9.026437277282044e-06, + "loss": 0.9007, + "step": 2760 + }, + { + "epoch": 1.3266958555748816, + "grad_norm": 4.53125, + "learning_rate": 9.015770353104482e-06, + "loss": 0.9398, + "step": 2765 + }, + { + "epoch": 1.32909494392131, + "grad_norm": 3.078125, + "learning_rate": 9.005051685697544e-06, + "loss": 0.9709, + "step": 2770 + }, + { + "epoch": 1.3314940322677382, + "grad_norm": 2.9375, + "learning_rate": 8.99428141317133e-06, + "loss": 0.8677, + "step": 2775 + }, + { + "epoch": 1.3338931206141666, + "grad_norm": 2.875, + "learning_rate": 8.983459674300877e-06, + "loss": 0.8374, + "step": 2780 + }, + { + "epoch": 1.3362922089605949, + "grad_norm": 3.109375, + "learning_rate": 8.972586608524371e-06, + "loss": 0.8641, + "step": 2785 + }, + { + "epoch": 1.3386912973070233, + "grad_norm": 2.875, + "learning_rate": 8.961662355941339e-06, + "loss": 0.9468, + "step": 2790 + }, + { + "epoch": 1.3410903856534517, + "grad_norm": 3.1875, + "learning_rate": 8.950687057310854e-06, + "loss": 0.9041, + "step": 2795 + }, + { + "epoch": 1.3434894739998802, + "grad_norm": 3.3125, + "learning_rate": 8.939660854049716e-06, + "loss": 0.9367, + "step": 2800 + }, + { + "epoch": 1.3434894739998802, + "eval_loss": 0.9589128494262695, + "eval_runtime": 186.8641, + "eval_samples_per_second": 39.654, + "eval_steps_per_second": 9.916, + "step": 2800 + }, + { + "epoch": 1.3458885623463084, + "grad_norm": 3.359375, + "learning_rate": 8.928583888230632e-06, + "loss": 0.9278, + "step": 2805 + }, + { + "epoch": 1.3482876506927368, + "grad_norm": 2.859375, + "learning_rate": 8.917456302580384e-06, + "loss": 0.8432, + "step": 2810 + }, + { + "epoch": 1.350686739039165, + "grad_norm": 3.078125, + "learning_rate": 8.906278240477993e-06, + "loss": 0.8627, + "step": 2815 + }, + { + "epoch": 1.3530858273855935, + "grad_norm": 2.75, + "learning_rate": 8.895049845952868e-06, + "loss": 0.8152, + "step": 2820 + }, + { + "epoch": 1.355484915732022, + "grad_norm": 3.34375, + "learning_rate": 8.883771263682949e-06, + "loss": 0.909, + "step": 2825 + }, + { + "epoch": 1.3578840040784501, + "grad_norm": 3.046875, + "learning_rate": 8.872442638992853e-06, + "loss": 0.8528, + "step": 2830 + }, + { + "epoch": 1.3602830924248785, + "grad_norm": 4.125, + "learning_rate": 8.861064117851987e-06, + "loss": 0.8544, + "step": 2835 + }, + { + "epoch": 1.362682180771307, + "grad_norm": 2.859375, + "learning_rate": 8.849635846872675e-06, + "loss": 0.905, + "step": 2840 + }, + { + "epoch": 1.3650812691177352, + "grad_norm": 2.984375, + "learning_rate": 8.83815797330827e-06, + "loss": 0.8494, + "step": 2845 + }, + { + "epoch": 1.3674803574641636, + "grad_norm": 3.453125, + "learning_rate": 8.826630645051254e-06, + "loss": 0.8055, + "step": 2850 + }, + { + "epoch": 1.369879445810592, + "grad_norm": 2.671875, + "learning_rate": 8.815054010631336e-06, + "loss": 0.8326, + "step": 2855 + }, + { + "epoch": 1.3722785341570203, + "grad_norm": 3.265625, + "learning_rate": 8.803428219213527e-06, + "loss": 0.8647, + "step": 2860 + }, + { + "epoch": 1.3746776225034487, + "grad_norm": 4.375, + "learning_rate": 8.791753420596237e-06, + "loss": 0.9319, + "step": 2865 + }, + { + "epoch": 1.377076710849877, + "grad_norm": 3.375, + "learning_rate": 8.780029765209324e-06, + "loss": 0.9639, + "step": 2870 + }, + { + "epoch": 1.3794757991963054, + "grad_norm": 3.578125, + "learning_rate": 8.768257404112175e-06, + "loss": 0.9156, + "step": 2875 + }, + { + "epoch": 1.3818748875427338, + "grad_norm": 3.484375, + "learning_rate": 8.756436488991743e-06, + "loss": 0.8767, + "step": 2880 + }, + { + "epoch": 1.3842739758891622, + "grad_norm": 2.859375, + "learning_rate": 8.744567172160601e-06, + "loss": 0.8061, + "step": 2885 + }, + { + "epoch": 1.3866730642355904, + "grad_norm": 3.90625, + "learning_rate": 8.732649606554983e-06, + "loss": 0.9058, + "step": 2890 + }, + { + "epoch": 1.3890721525820189, + "grad_norm": 3.71875, + "learning_rate": 8.720683945732807e-06, + "loss": 0.9113, + "step": 2895 + }, + { + "epoch": 1.391471240928447, + "grad_norm": 3.03125, + "learning_rate": 8.708670343871697e-06, + "loss": 0.9146, + "step": 2900 + }, + { + "epoch": 1.391471240928447, + "eval_loss": 0.9569985866546631, + "eval_runtime": 175.8282, + "eval_samples_per_second": 42.143, + "eval_steps_per_second": 10.539, + "step": 2900 + }, + { + "epoch": 1.3938703292748755, + "grad_norm": 3.25, + "learning_rate": 8.696608955766995e-06, + "loss": 0.8575, + "step": 2905 + }, + { + "epoch": 1.396269417621304, + "grad_norm": 3.109375, + "learning_rate": 8.684499936829773e-06, + "loss": 0.8904, + "step": 2910 + }, + { + "epoch": 1.3986685059677322, + "grad_norm": 2.953125, + "learning_rate": 8.67234344308483e-06, + "loss": 0.8498, + "step": 2915 + }, + { + "epoch": 1.4010675943141606, + "grad_norm": 2.6875, + "learning_rate": 8.660139631168668e-06, + "loss": 0.9119, + "step": 2920 + }, + { + "epoch": 1.403466682660589, + "grad_norm": 3.203125, + "learning_rate": 8.647888658327491e-06, + "loss": 0.9382, + "step": 2925 + }, + { + "epoch": 1.4058657710070173, + "grad_norm": 3.5, + "learning_rate": 8.635590682415172e-06, + "loss": 0.9829, + "step": 2930 + }, + { + "epoch": 1.4082648593534457, + "grad_norm": 2.78125, + "learning_rate": 8.623245861891217e-06, + "loss": 0.9363, + "step": 2935 + }, + { + "epoch": 1.4106639476998741, + "grad_norm": 3.046875, + "learning_rate": 8.610854355818727e-06, + "loss": 0.9634, + "step": 2940 + }, + { + "epoch": 1.4130630360463023, + "grad_norm": 3.40625, + "learning_rate": 8.598416323862344e-06, + "loss": 0.8603, + "step": 2945 + }, + { + "epoch": 1.4154621243927308, + "grad_norm": 2.828125, + "learning_rate": 8.585931926286197e-06, + "loss": 0.9382, + "step": 2950 + }, + { + "epoch": 1.417861212739159, + "grad_norm": 2.796875, + "learning_rate": 8.573401323951838e-06, + "loss": 0.9435, + "step": 2955 + }, + { + "epoch": 1.4202603010855874, + "grad_norm": 2.640625, + "learning_rate": 8.560824678316166e-06, + "loss": 0.9, + "step": 2960 + }, + { + "epoch": 1.4226593894320159, + "grad_norm": 2.8125, + "learning_rate": 8.548202151429351e-06, + "loss": 0.8678, + "step": 2965 + }, + { + "epoch": 1.4250584777784443, + "grad_norm": 3.265625, + "learning_rate": 8.535533905932739e-06, + "loss": 0.868, + "step": 2970 + }, + { + "epoch": 1.4274575661248725, + "grad_norm": 3.140625, + "learning_rate": 8.522820105056762e-06, + "loss": 0.9313, + "step": 2975 + }, + { + "epoch": 1.429856654471301, + "grad_norm": 2.96875, + "learning_rate": 8.510060912618836e-06, + "loss": 0.861, + "step": 2980 + }, + { + "epoch": 1.4322557428177292, + "grad_norm": 2.8125, + "learning_rate": 8.497256493021247e-06, + "loss": 0.8741, + "step": 2985 + }, + { + "epoch": 1.4346548311641576, + "grad_norm": 3.265625, + "learning_rate": 8.484407011249027e-06, + "loss": 0.9683, + "step": 2990 + }, + { + "epoch": 1.437053919510586, + "grad_norm": 3.09375, + "learning_rate": 8.471512632867844e-06, + "loss": 0.8919, + "step": 2995 + }, + { + "epoch": 1.4394530078570145, + "grad_norm": 3.34375, + "learning_rate": 8.458573524021854e-06, + "loss": 0.9697, + "step": 3000 + }, + { + "epoch": 1.4394530078570145, + "eval_loss": 0.9555841088294983, + "eval_runtime": 175.8483, + "eval_samples_per_second": 42.139, + "eval_steps_per_second": 10.537, + "step": 3000 + }, + { + "epoch": 1.4418520962034427, + "grad_norm": 3.796875, + "learning_rate": 8.445589851431563e-06, + "loss": 0.9467, + "step": 3005 + }, + { + "epoch": 1.4442511845498711, + "grad_norm": 3.734375, + "learning_rate": 8.432561782391687e-06, + "loss": 1.0304, + "step": 3010 + }, + { + "epoch": 1.4466502728962993, + "grad_norm": 4.3125, + "learning_rate": 8.419489484768988e-06, + "loss": 0.8586, + "step": 3015 + }, + { + "epoch": 1.4490493612427278, + "grad_norm": 3.53125, + "learning_rate": 8.406373127000111e-06, + "loss": 0.8946, + "step": 3020 + }, + { + "epoch": 1.4514484495891562, + "grad_norm": 3.46875, + "learning_rate": 8.393212878089418e-06, + "loss": 0.9761, + "step": 3025 + }, + { + "epoch": 1.4538475379355844, + "grad_norm": 3.4375, + "learning_rate": 8.380008907606814e-06, + "loss": 0.9166, + "step": 3030 + }, + { + "epoch": 1.4562466262820128, + "grad_norm": 2.890625, + "learning_rate": 8.366761385685547e-06, + "loss": 0.9467, + "step": 3035 + }, + { + "epoch": 1.458645714628441, + "grad_norm": 2.84375, + "learning_rate": 8.353470483020032e-06, + "loss": 0.9424, + "step": 3040 + }, + { + "epoch": 1.4610448029748695, + "grad_norm": 3.140625, + "learning_rate": 8.340136370863644e-06, + "loss": 0.9584, + "step": 3045 + }, + { + "epoch": 1.463443891321298, + "grad_norm": 2.671875, + "learning_rate": 8.326759221026513e-06, + "loss": 0.9652, + "step": 3050 + }, + { + "epoch": 1.4658429796677264, + "grad_norm": 3.390625, + "learning_rate": 8.31333920587331e-06, + "loss": 1.0062, + "step": 3055 + }, + { + "epoch": 1.4682420680141546, + "grad_norm": 3.09375, + "learning_rate": 8.299876498321022e-06, + "loss": 0.8903, + "step": 3060 + }, + { + "epoch": 1.470641156360583, + "grad_norm": 3.0625, + "learning_rate": 8.286371271836734e-06, + "loss": 0.8911, + "step": 3065 + }, + { + "epoch": 1.4730402447070112, + "grad_norm": 3.09375, + "learning_rate": 8.272823700435382e-06, + "loss": 0.8508, + "step": 3070 + }, + { + "epoch": 1.4754393330534397, + "grad_norm": 3.5625, + "learning_rate": 8.259233958677522e-06, + "loss": 0.9078, + "step": 3075 + }, + { + "epoch": 1.477838421399868, + "grad_norm": 3.203125, + "learning_rate": 8.245602221667069e-06, + "loss": 0.9197, + "step": 3080 + }, + { + "epoch": 1.4802375097462965, + "grad_norm": 3.15625, + "learning_rate": 8.231928665049057e-06, + "loss": 0.8263, + "step": 3085 + }, + { + "epoch": 1.4826365980927247, + "grad_norm": 3.40625, + "learning_rate": 8.218213465007352e-06, + "loss": 0.9468, + "step": 3090 + }, + { + "epoch": 1.4850356864391532, + "grad_norm": 2.84375, + "learning_rate": 8.204456798262408e-06, + "loss": 0.9964, + "step": 3095 + }, + { + "epoch": 1.4874347747855814, + "grad_norm": 3.765625, + "learning_rate": 8.190658842068973e-06, + "loss": 0.8713, + "step": 3100 + }, + { + "epoch": 1.4874347747855814, + "eval_loss": 0.9542006850242615, + "eval_runtime": 175.8535, + "eval_samples_per_second": 42.137, + "eval_steps_per_second": 10.537, + "step": 3100 + }, + { + "epoch": 1.4898338631320098, + "grad_norm": 3.484375, + "learning_rate": 8.176819774213807e-06, + "loss": 0.9638, + "step": 3105 + }, + { + "epoch": 1.4922329514784383, + "grad_norm": 3.015625, + "learning_rate": 8.162939773013404e-06, + "loss": 0.889, + "step": 3110 + }, + { + "epoch": 1.4946320398248665, + "grad_norm": 2.390625, + "learning_rate": 8.14901901731167e-06, + "loss": 0.8743, + "step": 3115 + }, + { + "epoch": 1.497031128171295, + "grad_norm": 3.359375, + "learning_rate": 8.135057686477644e-06, + "loss": 0.911, + "step": 3120 + }, + { + "epoch": 1.4994302165177233, + "grad_norm": 2.9375, + "learning_rate": 8.121055960403172e-06, + "loss": 0.8063, + "step": 3125 + }, + { + "epoch": 1.5018293048641516, + "grad_norm": 3.5, + "learning_rate": 8.107014019500593e-06, + "loss": 0.8361, + "step": 3130 + }, + { + "epoch": 1.50422839321058, + "grad_norm": 3.171875, + "learning_rate": 8.092932044700416e-06, + "loss": 0.8645, + "step": 3135 + }, + { + "epoch": 1.5066274815570084, + "grad_norm": 2.734375, + "learning_rate": 8.078810217448986e-06, + "loss": 0.9313, + "step": 3140 + }, + { + "epoch": 1.5090265699034366, + "grad_norm": 3.0625, + "learning_rate": 8.064648719706145e-06, + "loss": 0.8725, + "step": 3145 + }, + { + "epoch": 1.511425658249865, + "grad_norm": 3.484375, + "learning_rate": 8.050447733942892e-06, + "loss": 0.9236, + "step": 3150 + }, + { + "epoch": 1.5138247465962933, + "grad_norm": 3.5, + "learning_rate": 8.03620744313903e-06, + "loss": 0.8835, + "step": 3155 + }, + { + "epoch": 1.5162238349427217, + "grad_norm": 3.265625, + "learning_rate": 8.021928030780806e-06, + "loss": 0.9518, + "step": 3160 + }, + { + "epoch": 1.5186229232891502, + "grad_norm": 3.03125, + "learning_rate": 8.00760968085855e-06, + "loss": 0.9473, + "step": 3165 + }, + { + "epoch": 1.5210220116355786, + "grad_norm": 3.390625, + "learning_rate": 7.993252577864302e-06, + "loss": 0.8956, + "step": 3170 + }, + { + "epoch": 1.5234210999820068, + "grad_norm": 2.515625, + "learning_rate": 7.978856906789433e-06, + "loss": 0.9128, + "step": 3175 + }, + { + "epoch": 1.5258201883284352, + "grad_norm": 3.265625, + "learning_rate": 7.964422853122268e-06, + "loss": 0.8885, + "step": 3180 + }, + { + "epoch": 1.5282192766748635, + "grad_norm": 2.765625, + "learning_rate": 7.949950602845692e-06, + "loss": 0.9026, + "step": 3185 + }, + { + "epoch": 1.530618365021292, + "grad_norm": 2.703125, + "learning_rate": 7.935440342434751e-06, + "loss": 0.9181, + "step": 3190 + }, + { + "epoch": 1.5330174533677203, + "grad_norm": 3.234375, + "learning_rate": 7.920892258854252e-06, + "loss": 0.9653, + "step": 3195 + }, + { + "epoch": 1.5354165417141488, + "grad_norm": 3.34375, + "learning_rate": 7.906306539556354e-06, + "loss": 0.9855, + "step": 3200 + }, + { + "epoch": 1.5354165417141488, + "eval_loss": 0.9524497985839844, + "eval_runtime": 175.7768, + "eval_samples_per_second": 42.156, + "eval_steps_per_second": 10.542, + "step": 3200 + }, + { + "epoch": 1.537815630060577, + "grad_norm": 3.203125, + "learning_rate": 7.891683372478157e-06, + "loss": 0.9054, + "step": 3205 + }, + { + "epoch": 1.5402147184070052, + "grad_norm": 3.078125, + "learning_rate": 7.87702294603927e-06, + "loss": 0.7991, + "step": 3210 + }, + { + "epoch": 1.5426138067534336, + "grad_norm": 3.640625, + "learning_rate": 7.86232544913939e-06, + "loss": 0.9486, + "step": 3215 + }, + { + "epoch": 1.545012895099862, + "grad_norm": 3.375, + "learning_rate": 7.847591071155871e-06, + "loss": 0.8998, + "step": 3220 + }, + { + "epoch": 1.5474119834462905, + "grad_norm": 3.203125, + "learning_rate": 7.832820001941274e-06, + "loss": 0.9656, + "step": 3225 + }, + { + "epoch": 1.549811071792719, + "grad_norm": 2.984375, + "learning_rate": 7.818012431820935e-06, + "loss": 0.842, + "step": 3230 + }, + { + "epoch": 1.5522101601391471, + "grad_norm": 2.859375, + "learning_rate": 7.803168551590496e-06, + "loss": 0.9687, + "step": 3235 + }, + { + "epoch": 1.5546092484855754, + "grad_norm": 2.765625, + "learning_rate": 7.788288552513459e-06, + "loss": 0.9853, + "step": 3240 + }, + { + "epoch": 1.5570083368320038, + "grad_norm": 2.984375, + "learning_rate": 7.773372626318719e-06, + "loss": 0.915, + "step": 3245 + }, + { + "epoch": 1.5594074251784322, + "grad_norm": 3.109375, + "learning_rate": 7.758420965198087e-06, + "loss": 0.8403, + "step": 3250 + }, + { + "epoch": 1.5618065135248607, + "grad_norm": 2.78125, + "learning_rate": 7.743433761803826e-06, + "loss": 0.8819, + "step": 3255 + }, + { + "epoch": 1.5642056018712889, + "grad_norm": 3.09375, + "learning_rate": 7.728411209246156e-06, + "loss": 0.857, + "step": 3260 + }, + { + "epoch": 1.5666046902177173, + "grad_norm": 2.8125, + "learning_rate": 7.713353501090773e-06, + "loss": 0.8918, + "step": 3265 + }, + { + "epoch": 1.5690037785641455, + "grad_norm": 2.46875, + "learning_rate": 7.698260831356352e-06, + "loss": 0.8965, + "step": 3270 + }, + { + "epoch": 1.571402866910574, + "grad_norm": 2.28125, + "learning_rate": 7.683133394512053e-06, + "loss": 0.8654, + "step": 3275 + }, + { + "epoch": 1.5738019552570024, + "grad_norm": 2.9375, + "learning_rate": 7.667971385475002e-06, + "loss": 0.9612, + "step": 3280 + }, + { + "epoch": 1.5762010436034308, + "grad_norm": 3.25, + "learning_rate": 7.652774999607794e-06, + "loss": 0.952, + "step": 3285 + }, + { + "epoch": 1.578600131949859, + "grad_norm": 3.234375, + "learning_rate": 7.63754443271597e-06, + "loss": 0.8155, + "step": 3290 + }, + { + "epoch": 1.5809992202962873, + "grad_norm": 3.875, + "learning_rate": 7.622279881045489e-06, + "loss": 0.9594, + "step": 3295 + }, + { + "epoch": 1.5833983086427157, + "grad_norm": 3.5625, + "learning_rate": 7.606981541280212e-06, + "loss": 0.8651, + "step": 3300 + }, + { + "epoch": 1.5833983086427157, + "eval_loss": 0.9511102437973022, + "eval_runtime": 175.9414, + "eval_samples_per_second": 42.116, + "eval_steps_per_second": 10.532, + "step": 3300 + }, + { + "epoch": 1.5857973969891441, + "grad_norm": 2.640625, + "learning_rate": 7.591649610539349e-06, + "loss": 0.9834, + "step": 3305 + }, + { + "epoch": 1.5881964853355726, + "grad_norm": 2.953125, + "learning_rate": 7.57628428637494e-06, + "loss": 1.0254, + "step": 3310 + }, + { + "epoch": 1.590595573682001, + "grad_norm": 3.234375, + "learning_rate": 7.560885766769295e-06, + "loss": 0.8831, + "step": 3315 + }, + { + "epoch": 1.5929946620284292, + "grad_norm": 3.21875, + "learning_rate": 7.5454542501324445e-06, + "loss": 0.9003, + "step": 3320 + }, + { + "epoch": 1.5953937503748574, + "grad_norm": 3.609375, + "learning_rate": 7.529989935299595e-06, + "loss": 0.8892, + "step": 3325 + }, + { + "epoch": 1.5977928387212859, + "grad_norm": 3.625, + "learning_rate": 7.514493021528548e-06, + "loss": 0.9812, + "step": 3330 + }, + { + "epoch": 1.6001919270677143, + "grad_norm": 2.4375, + "learning_rate": 7.498963708497149e-06, + "loss": 0.9269, + "step": 3335 + }, + { + "epoch": 1.6025910154141427, + "grad_norm": 2.890625, + "learning_rate": 7.483402196300705e-06, + "loss": 0.8783, + "step": 3340 + }, + { + "epoch": 1.604990103760571, + "grad_norm": 2.890625, + "learning_rate": 7.467808685449413e-06, + "loss": 0.8795, + "step": 3345 + }, + { + "epoch": 1.6073891921069994, + "grad_norm": 4.1875, + "learning_rate": 7.452183376865768e-06, + "loss": 0.9221, + "step": 3350 + }, + { + "epoch": 1.6097882804534276, + "grad_norm": 3.09375, + "learning_rate": 7.436526471881982e-06, + "loss": 1.0129, + "step": 3355 + }, + { + "epoch": 1.612187368799856, + "grad_norm": 3.171875, + "learning_rate": 7.420838172237388e-06, + "loss": 0.9056, + "step": 3360 + }, + { + "epoch": 1.6145864571462845, + "grad_norm": 2.765625, + "learning_rate": 7.405118680075835e-06, + "loss": 0.8635, + "step": 3365 + }, + { + "epoch": 1.616985545492713, + "grad_norm": 2.921875, + "learning_rate": 7.389368197943092e-06, + "loss": 0.8995, + "step": 3370 + }, + { + "epoch": 1.619384633839141, + "grad_norm": 2.828125, + "learning_rate": 7.373586928784234e-06, + "loss": 0.8698, + "step": 3375 + }, + { + "epoch": 1.6217837221855693, + "grad_norm": 3.96875, + "learning_rate": 7.357775075941025e-06, + "loss": 0.922, + "step": 3380 + }, + { + "epoch": 1.6241828105319978, + "grad_norm": 3.15625, + "learning_rate": 7.341932843149298e-06, + "loss": 0.9133, + "step": 3385 + }, + { + "epoch": 1.6265818988784262, + "grad_norm": 3.09375, + "learning_rate": 7.326060434536337e-06, + "loss": 0.8743, + "step": 3390 + }, + { + "epoch": 1.6289809872248546, + "grad_norm": 3.1875, + "learning_rate": 7.31015805461824e-06, + "loss": 0.8689, + "step": 3395 + }, + { + "epoch": 1.631380075571283, + "grad_norm": 3.296875, + "learning_rate": 7.294225908297281e-06, + "loss": 0.9448, + "step": 3400 + }, + { + "epoch": 1.631380075571283, + "eval_loss": 0.9495499730110168, + "eval_runtime": 176.9541, + "eval_samples_per_second": 41.875, + "eval_steps_per_second": 10.472, + "step": 3400 + }, + { + "epoch": 1.6337791639177113, + "grad_norm": 3.125, + "learning_rate": 7.278264200859281e-06, + "loss": 0.9375, + "step": 3405 + }, + { + "epoch": 1.6361782522641395, + "grad_norm": 2.796875, + "learning_rate": 7.262273137970953e-06, + "loss": 0.9227, + "step": 3410 + }, + { + "epoch": 1.638577340610568, + "grad_norm": 3.09375, + "learning_rate": 7.246252925677253e-06, + "loss": 0.811, + "step": 3415 + }, + { + "epoch": 1.6409764289569964, + "grad_norm": 3.21875, + "learning_rate": 7.230203770398734e-06, + "loss": 1.0199, + "step": 3420 + }, + { + "epoch": 1.6433755173034248, + "grad_norm": 2.828125, + "learning_rate": 7.21412587892887e-06, + "loss": 0.8974, + "step": 3425 + }, + { + "epoch": 1.645774605649853, + "grad_norm": 3.4375, + "learning_rate": 7.19801945843141e-06, + "loss": 0.9508, + "step": 3430 + }, + { + "epoch": 1.6481736939962814, + "grad_norm": 3.28125, + "learning_rate": 7.181884716437694e-06, + "loss": 0.9031, + "step": 3435 + }, + { + "epoch": 1.6505727823427097, + "grad_norm": 3.171875, + "learning_rate": 7.165721860843987e-06, + "loss": 0.8852, + "step": 3440 + }, + { + "epoch": 1.652971870689138, + "grad_norm": 3.15625, + "learning_rate": 7.149531099908799e-06, + "loss": 0.8157, + "step": 3445 + }, + { + "epoch": 1.6553709590355665, + "grad_norm": 2.90625, + "learning_rate": 7.1333126422501965e-06, + "loss": 0.9073, + "step": 3450 + }, + { + "epoch": 1.657770047381995, + "grad_norm": 3.25, + "learning_rate": 7.1170666968431225e-06, + "loss": 0.9776, + "step": 3455 + }, + { + "epoch": 1.6601691357284232, + "grad_norm": 2.9375, + "learning_rate": 7.100793473016699e-06, + "loss": 0.9031, + "step": 3460 + }, + { + "epoch": 1.6625682240748516, + "grad_norm": 2.78125, + "learning_rate": 7.084493180451529e-06, + "loss": 0.872, + "step": 3465 + }, + { + "epoch": 1.6649673124212798, + "grad_norm": 2.109375, + "learning_rate": 7.068166029176996e-06, + "loss": 0.937, + "step": 3470 + }, + { + "epoch": 1.6673664007677083, + "grad_norm": 3.3125, + "learning_rate": 7.051812229568562e-06, + "loss": 0.8244, + "step": 3475 + }, + { + "epoch": 1.6697654891141367, + "grad_norm": 3.453125, + "learning_rate": 7.035431992345051e-06, + "loss": 0.8413, + "step": 3480 + }, + { + "epoch": 1.6721645774605651, + "grad_norm": 3.28125, + "learning_rate": 7.019025528565933e-06, + "loss": 0.917, + "step": 3485 + }, + { + "epoch": 1.6745636658069933, + "grad_norm": 3.375, + "learning_rate": 7.002593049628611e-06, + "loss": 0.8301, + "step": 3490 + }, + { + "epoch": 1.6769627541534216, + "grad_norm": 5.25, + "learning_rate": 6.986134767265693e-06, + "loss": 0.917, + "step": 3495 + }, + { + "epoch": 1.67936184249985, + "grad_norm": 2.859375, + "learning_rate": 6.969650893542261e-06, + "loss": 0.8997, + "step": 3500 + }, + { + "epoch": 1.67936184249985, + "eval_loss": 0.9484797120094299, + "eval_runtime": 175.9736, + "eval_samples_per_second": 42.109, + "eval_steps_per_second": 10.53, + "step": 3500 + }, + { + "epoch": 1.6817609308462784, + "grad_norm": 3.890625, + "learning_rate": 6.9531416408531475e-06, + "loss": 0.7639, + "step": 3505 + }, + { + "epoch": 1.6841600191927069, + "grad_norm": 3.96875, + "learning_rate": 6.936607221920188e-06, + "loss": 0.8797, + "step": 3510 + }, + { + "epoch": 1.6865591075391353, + "grad_norm": 2.90625, + "learning_rate": 6.920047849789488e-06, + "loss": 0.7968, + "step": 3515 + }, + { + "epoch": 1.6889581958855635, + "grad_norm": 3.84375, + "learning_rate": 6.903463737828675e-06, + "loss": 0.9295, + "step": 3520 + }, + { + "epoch": 1.6913572842319917, + "grad_norm": 3.796875, + "learning_rate": 6.886855099724148e-06, + "loss": 0.8382, + "step": 3525 + }, + { + "epoch": 1.6937563725784202, + "grad_norm": 2.96875, + "learning_rate": 6.870222149478326e-06, + "loss": 0.931, + "step": 3530 + }, + { + "epoch": 1.6961554609248486, + "grad_norm": 2.546875, + "learning_rate": 6.853565101406891e-06, + "loss": 0.9034, + "step": 3535 + }, + { + "epoch": 1.698554549271277, + "grad_norm": 3.046875, + "learning_rate": 6.836884170136026e-06, + "loss": 0.881, + "step": 3540 + }, + { + "epoch": 1.7009536376177052, + "grad_norm": 3.21875, + "learning_rate": 6.8201795705996465e-06, + "loss": 0.8059, + "step": 3545 + }, + { + "epoch": 1.7033527259641337, + "grad_norm": 3.296875, + "learning_rate": 6.8034515180366366e-06, + "loss": 0.7942, + "step": 3550 + }, + { + "epoch": 1.7057518143105619, + "grad_norm": 4.21875, + "learning_rate": 6.786700227988072e-06, + "loss": 0.8774, + "step": 3555 + }, + { + "epoch": 1.7081509026569903, + "grad_norm": 3.375, + "learning_rate": 6.7699259162944445e-06, + "loss": 0.8572, + "step": 3560 + }, + { + "epoch": 1.7105499910034188, + "grad_norm": 2.859375, + "learning_rate": 6.753128799092875e-06, + "loss": 0.9758, + "step": 3565 + }, + { + "epoch": 1.7129490793498472, + "grad_norm": 3.578125, + "learning_rate": 6.7363090928143414e-06, + "loss": 0.9013, + "step": 3570 + }, + { + "epoch": 1.7153481676962754, + "grad_norm": 3.25, + "learning_rate": 6.719467014180876e-06, + "loss": 0.9169, + "step": 3575 + }, + { + "epoch": 1.7177472560427036, + "grad_norm": 3.59375, + "learning_rate": 6.702602780202779e-06, + "loss": 0.7857, + "step": 3580 + }, + { + "epoch": 1.720146344389132, + "grad_norm": 4.21875, + "learning_rate": 6.68571660817583e-06, + "loss": 0.9271, + "step": 3585 + }, + { + "epoch": 1.7225454327355605, + "grad_norm": 2.90625, + "learning_rate": 6.66880871567847e-06, + "loss": 0.9313, + "step": 3590 + }, + { + "epoch": 1.724944521081989, + "grad_norm": 3.0, + "learning_rate": 6.651879320569015e-06, + "loss": 0.9542, + "step": 3595 + }, + { + "epoch": 1.7273436094284174, + "grad_norm": 3.59375, + "learning_rate": 6.634928640982841e-06, + "loss": 1.0446, + "step": 3600 + }, + { + "epoch": 1.7273436094284174, + "eval_loss": 0.9475127458572388, + "eval_runtime": 175.7691, + "eval_samples_per_second": 42.158, + "eval_steps_per_second": 10.542, + "step": 3600 + }, + { + "epoch": 1.7297426977748456, + "grad_norm": 2.75, + "learning_rate": 6.617956895329574e-06, + "loss": 0.8341, + "step": 3605 + }, + { + "epoch": 1.7321417861212738, + "grad_norm": 2.828125, + "learning_rate": 6.600964302290275e-06, + "loss": 0.8448, + "step": 3610 + }, + { + "epoch": 1.7345408744677022, + "grad_norm": 2.265625, + "learning_rate": 6.5839510808146276e-06, + "loss": 0.887, + "step": 3615 + }, + { + "epoch": 1.7369399628141307, + "grad_norm": 2.921875, + "learning_rate": 6.566917450118109e-06, + "loss": 0.9268, + "step": 3620 + }, + { + "epoch": 1.739339051160559, + "grad_norm": 3.28125, + "learning_rate": 6.549863629679174e-06, + "loss": 0.873, + "step": 3625 + }, + { + "epoch": 1.7417381395069873, + "grad_norm": 3.625, + "learning_rate": 6.532789839236417e-06, + "loss": 0.8447, + "step": 3630 + }, + { + "epoch": 1.7441372278534157, + "grad_norm": 2.984375, + "learning_rate": 6.5156962987857485e-06, + "loss": 0.9165, + "step": 3635 + }, + { + "epoch": 1.746536316199844, + "grad_norm": 3.046875, + "learning_rate": 6.498583228577559e-06, + "loss": 0.8892, + "step": 3640 + }, + { + "epoch": 1.7489354045462724, + "grad_norm": 2.890625, + "learning_rate": 6.48145084911388e-06, + "loss": 0.8452, + "step": 3645 + }, + { + "epoch": 1.7513344928927008, + "grad_norm": 3.125, + "learning_rate": 6.464299381145539e-06, + "loss": 0.8913, + "step": 3650 + }, + { + "epoch": 1.7537335812391293, + "grad_norm": 3.21875, + "learning_rate": 6.4471290456693245e-06, + "loss": 0.9104, + "step": 3655 + }, + { + "epoch": 1.7561326695855575, + "grad_norm": 3.234375, + "learning_rate": 6.429940063925129e-06, + "loss": 0.8567, + "step": 3660 + }, + { + "epoch": 1.7585317579319857, + "grad_norm": 2.671875, + "learning_rate": 6.412732657393104e-06, + "loss": 0.8169, + "step": 3665 + }, + { + "epoch": 1.7609308462784141, + "grad_norm": 3.03125, + "learning_rate": 6.395507047790807e-06, + "loss": 0.7973, + "step": 3670 + }, + { + "epoch": 1.7633299346248426, + "grad_norm": 2.78125, + "learning_rate": 6.378263457070334e-06, + "loss": 0.9667, + "step": 3675 + }, + { + "epoch": 1.765729022971271, + "grad_norm": 2.890625, + "learning_rate": 6.361002107415478e-06, + "loss": 0.9165, + "step": 3680 + }, + { + "epoch": 1.7681281113176994, + "grad_norm": 3.078125, + "learning_rate": 6.34372322123885e-06, + "loss": 0.8751, + "step": 3685 + }, + { + "epoch": 1.7705271996641276, + "grad_norm": 3.71875, + "learning_rate": 6.32642702117902e-06, + "loss": 0.9498, + "step": 3690 + }, + { + "epoch": 1.7729262880105559, + "grad_norm": 2.96875, + "learning_rate": 6.309113730097647e-06, + "loss": 0.8921, + "step": 3695 + }, + { + "epoch": 1.7753253763569843, + "grad_norm": 3.015625, + "learning_rate": 6.291783571076612e-06, + "loss": 0.8862, + "step": 3700 + }, + { + "epoch": 1.7753253763569843, + "eval_loss": 0.9464648365974426, + "eval_runtime": 175.7404, + "eval_samples_per_second": 42.164, + "eval_steps_per_second": 10.544, + "step": 3700 + }, + { + "epoch": 1.7777244647034127, + "grad_norm": 2.609375, + "learning_rate": 6.274436767415133e-06, + "loss": 0.9401, + "step": 3705 + }, + { + "epoch": 1.7801235530498412, + "grad_norm": 2.765625, + "learning_rate": 6.257073542626899e-06, + "loss": 0.9079, + "step": 3710 + }, + { + "epoch": 1.7825226413962694, + "grad_norm": 2.734375, + "learning_rate": 6.239694120437186e-06, + "loss": 0.8473, + "step": 3715 + }, + { + "epoch": 1.7849217297426978, + "grad_norm": 3.5625, + "learning_rate": 6.2222987247799705e-06, + "loss": 0.867, + "step": 3720 + }, + { + "epoch": 1.787320818089126, + "grad_norm": 3.1875, + "learning_rate": 6.204887579795046e-06, + "loss": 0.8493, + "step": 3725 + }, + { + "epoch": 1.7897199064355545, + "grad_norm": 3.296875, + "learning_rate": 6.187460909825142e-06, + "loss": 0.8569, + "step": 3730 + }, + { + "epoch": 1.7921189947819829, + "grad_norm": 2.90625, + "learning_rate": 6.170018939413024e-06, + "loss": 0.8716, + "step": 3735 + }, + { + "epoch": 1.7945180831284113, + "grad_norm": 3.078125, + "learning_rate": 6.152561893298601e-06, + "loss": 1.0129, + "step": 3740 + }, + { + "epoch": 1.7969171714748395, + "grad_norm": 2.5625, + "learning_rate": 6.135089996416039e-06, + "loss": 0.8712, + "step": 3745 + }, + { + "epoch": 1.799316259821268, + "grad_norm": 2.9375, + "learning_rate": 6.1176034738908515e-06, + "loss": 0.9311, + "step": 3750 + }, + { + "epoch": 1.8017153481676962, + "grad_norm": 2.8125, + "learning_rate": 6.100102551037003e-06, + "loss": 0.948, + "step": 3755 + }, + { + "epoch": 1.8041144365141246, + "grad_norm": 2.984375, + "learning_rate": 6.082587453354012e-06, + "loss": 0.956, + "step": 3760 + }, + { + "epoch": 1.806513524860553, + "grad_norm": 3.34375, + "learning_rate": 6.065058406524033e-06, + "loss": 0.9149, + "step": 3765 + }, + { + "epoch": 1.8089126132069815, + "grad_norm": 2.75, + "learning_rate": 6.047515636408959e-06, + "loss": 0.8932, + "step": 3770 + }, + { + "epoch": 1.8113117015534097, + "grad_norm": 3.265625, + "learning_rate": 6.029959369047507e-06, + "loss": 0.7662, + "step": 3775 + }, + { + "epoch": 1.813710789899838, + "grad_norm": 3.625, + "learning_rate": 6.012389830652307e-06, + "loss": 0.9215, + "step": 3780 + }, + { + "epoch": 1.8161098782462664, + "grad_norm": 3.546875, + "learning_rate": 5.994807247606984e-06, + "loss": 0.8813, + "step": 3785 + }, + { + "epoch": 1.8185089665926948, + "grad_norm": 3.171875, + "learning_rate": 5.977211846463243e-06, + "loss": 0.8829, + "step": 3790 + }, + { + "epoch": 1.8209080549391232, + "grad_norm": 4.0, + "learning_rate": 5.959603853937958e-06, + "loss": 1.0723, + "step": 3795 + }, + { + "epoch": 1.8233071432855517, + "grad_norm": 3.03125, + "learning_rate": 5.941983496910232e-06, + "loss": 0.873, + "step": 3800 + }, + { + "epoch": 1.8233071432855517, + "eval_loss": 0.9455747008323669, + "eval_runtime": 175.6671, + "eval_samples_per_second": 42.182, + "eval_steps_per_second": 10.548, + "step": 3800 + }, + { + "epoch": 1.8257062316319799, + "grad_norm": 3.03125, + "learning_rate": 5.924351002418489e-06, + "loss": 0.9247, + "step": 3805 + }, + { + "epoch": 1.828105319978408, + "grad_norm": 3.265625, + "learning_rate": 5.90670659765755e-06, + "loss": 0.9389, + "step": 3810 + }, + { + "epoch": 1.8305044083248365, + "grad_norm": 2.984375, + "learning_rate": 5.889050509975692e-06, + "loss": 0.8573, + "step": 3815 + }, + { + "epoch": 1.832903496671265, + "grad_norm": 3.265625, + "learning_rate": 5.8713829668717295e-06, + "loss": 0.8662, + "step": 3820 + }, + { + "epoch": 1.8353025850176934, + "grad_norm": 4.3125, + "learning_rate": 5.853704195992082e-06, + "loss": 0.9179, + "step": 3825 + }, + { + "epoch": 1.8377016733641216, + "grad_norm": 2.5, + "learning_rate": 5.836014425127835e-06, + "loss": 0.7961, + "step": 3830 + }, + { + "epoch": 1.84010076171055, + "grad_norm": 3.59375, + "learning_rate": 5.8183138822118125e-06, + "loss": 0.8975, + "step": 3835 + }, + { + "epoch": 1.8424998500569783, + "grad_norm": 3.453125, + "learning_rate": 5.800602795315633e-06, + "loss": 0.8728, + "step": 3840 + }, + { + "epoch": 1.8448989384034067, + "grad_norm": 3.453125, + "learning_rate": 5.7828813926467795e-06, + "loss": 1.0257, + "step": 3845 + }, + { + "epoch": 1.8472980267498351, + "grad_norm": 3.15625, + "learning_rate": 5.765149902545649e-06, + "loss": 0.8953, + "step": 3850 + }, + { + "epoch": 1.8496971150962636, + "grad_norm": 3.859375, + "learning_rate": 5.747408553482616e-06, + "loss": 0.8241, + "step": 3855 + }, + { + "epoch": 1.8520962034426918, + "grad_norm": 3.0, + "learning_rate": 5.729657574055089e-06, + "loss": 0.9077, + "step": 3860 + }, + { + "epoch": 1.85449529178912, + "grad_norm": 3.875, + "learning_rate": 5.711897192984567e-06, + "loss": 0.9028, + "step": 3865 + }, + { + "epoch": 1.8568943801355484, + "grad_norm": 4.03125, + "learning_rate": 5.694127639113679e-06, + "loss": 0.8912, + "step": 3870 + }, + { + "epoch": 1.8592934684819769, + "grad_norm": 3.03125, + "learning_rate": 5.676349141403257e-06, + "loss": 0.8446, + "step": 3875 + }, + { + "epoch": 1.8616925568284053, + "grad_norm": 2.6875, + "learning_rate": 5.658561928929368e-06, + "loss": 0.7482, + "step": 3880 + }, + { + "epoch": 1.8640916451748337, + "grad_norm": 2.703125, + "learning_rate": 5.6407662308803704e-06, + "loss": 0.9638, + "step": 3885 + }, + { + "epoch": 1.866490733521262, + "grad_norm": 2.953125, + "learning_rate": 5.62296227655396e-06, + "loss": 0.8948, + "step": 3890 + }, + { + "epoch": 1.8688898218676901, + "grad_norm": 2.75, + "learning_rate": 5.605150295354214e-06, + "loss": 0.8241, + "step": 3895 + }, + { + "epoch": 1.8712889102141186, + "grad_norm": 2.90625, + "learning_rate": 5.5873305167886334e-06, + "loss": 0.9893, + "step": 3900 + }, + { + "epoch": 1.8712889102141186, + "eval_loss": 0.944765031337738, + "eval_runtime": 175.8075, + "eval_samples_per_second": 42.148, + "eval_steps_per_second": 10.54, + "step": 3900 + }, + { + "epoch": 1.873687998560547, + "grad_norm": 3.546875, + "learning_rate": 5.569503170465196e-06, + "loss": 0.9387, + "step": 3905 + }, + { + "epoch": 1.8760870869069755, + "grad_norm": 3.40625, + "learning_rate": 5.55166848608938e-06, + "loss": 0.8305, + "step": 3910 + }, + { + "epoch": 1.8784861752534037, + "grad_norm": 3.34375, + "learning_rate": 5.533826693461224e-06, + "loss": 0.8884, + "step": 3915 + }, + { + "epoch": 1.880885263599832, + "grad_norm": 3.359375, + "learning_rate": 5.515978022472349e-06, + "loss": 0.8486, + "step": 3920 + }, + { + "epoch": 1.8832843519462603, + "grad_norm": 3.65625, + "learning_rate": 5.498122703103009e-06, + "loss": 0.9519, + "step": 3925 + }, + { + "epoch": 1.8856834402926888, + "grad_norm": 2.9375, + "learning_rate": 5.48026096541912e-06, + "loss": 0.9153, + "step": 3930 + }, + { + "epoch": 1.8880825286391172, + "grad_norm": 3.296875, + "learning_rate": 5.462393039569296e-06, + "loss": 0.7888, + "step": 3935 + }, + { + "epoch": 1.8904816169855456, + "grad_norm": 2.6875, + "learning_rate": 5.44451915578189e-06, + "loss": 0.8447, + "step": 3940 + }, + { + "epoch": 1.8928807053319738, + "grad_norm": 2.6875, + "learning_rate": 5.42663954436202e-06, + "loss": 0.9121, + "step": 3945 + }, + { + "epoch": 1.895279793678402, + "grad_norm": 3.53125, + "learning_rate": 5.408754435688605e-06, + "loss": 0.9036, + "step": 3950 + }, + { + "epoch": 1.8976788820248305, + "grad_norm": 2.78125, + "learning_rate": 5.390864060211399e-06, + "loss": 0.8647, + "step": 3955 + }, + { + "epoch": 1.900077970371259, + "grad_norm": 3.046875, + "learning_rate": 5.372968648448015e-06, + "loss": 0.8347, + "step": 3960 + }, + { + "epoch": 1.9024770587176874, + "grad_norm": 3.9375, + "learning_rate": 5.35506843098096e-06, + "loss": 0.8615, + "step": 3965 + }, + { + "epoch": 1.9048761470641158, + "grad_norm": 3.34375, + "learning_rate": 5.337163638454661e-06, + "loss": 0.815, + "step": 3970 + }, + { + "epoch": 1.907275235410544, + "grad_norm": 3.328125, + "learning_rate": 5.3192545015724995e-06, + "loss": 0.9842, + "step": 3975 + }, + { + "epoch": 1.9096743237569722, + "grad_norm": 2.6875, + "learning_rate": 5.301341251093828e-06, + "loss": 1.0199, + "step": 3980 + }, + { + "epoch": 1.9120734121034006, + "grad_norm": 3.984375, + "learning_rate": 5.2834241178310065e-06, + "loss": 0.9157, + "step": 3985 + }, + { + "epoch": 1.914472500449829, + "grad_norm": 3.359375, + "learning_rate": 5.265503332646425e-06, + "loss": 0.9769, + "step": 3990 + }, + { + "epoch": 1.9168715887962575, + "grad_norm": 3.03125, + "learning_rate": 5.247579126449525e-06, + "loss": 0.8583, + "step": 3995 + }, + { + "epoch": 1.9192706771426857, + "grad_norm": 3.203125, + "learning_rate": 5.22965173019383e-06, + "loss": 0.8915, + "step": 4000 + }, + { + "epoch": 1.9192706771426857, + "eval_loss": 0.9442155361175537, + "eval_runtime": 177.0197, + "eval_samples_per_second": 41.86, + "eval_steps_per_second": 10.468, + "step": 4000 + }, + { + "epoch": 1.9216697654891142, + "grad_norm": 3.21875, + "learning_rate": 5.211721374873969e-06, + "loss": 0.8223, + "step": 4005 + }, + { + "epoch": 1.9240688538355424, + "grad_norm": 2.734375, + "learning_rate": 5.193788291522698e-06, + "loss": 0.9664, + "step": 4010 + }, + { + "epoch": 1.9264679421819708, + "grad_norm": 3.625, + "learning_rate": 5.1758527112079194e-06, + "loss": 0.958, + "step": 4015 + }, + { + "epoch": 1.9288670305283993, + "grad_norm": 2.484375, + "learning_rate": 5.157914865029715e-06, + "loss": 0.8964, + "step": 4020 + }, + { + "epoch": 1.9312661188748277, + "grad_norm": 3.46875, + "learning_rate": 5.13997498411736e-06, + "loss": 1.014, + "step": 4025 + }, + { + "epoch": 1.933665207221256, + "grad_norm": 2.796875, + "learning_rate": 5.122033299626344e-06, + "loss": 0.8947, + "step": 4030 + }, + { + "epoch": 1.9360642955676843, + "grad_norm": 3.609375, + "learning_rate": 5.104090042735399e-06, + "loss": 0.8083, + "step": 4035 + }, + { + "epoch": 1.9384633839141125, + "grad_norm": 3.21875, + "learning_rate": 5.08614544464352e-06, + "loss": 0.8151, + "step": 4040 + }, + { + "epoch": 1.940862472260541, + "grad_norm": 4.25, + "learning_rate": 5.068199736566976e-06, + "loss": 1.0128, + "step": 4045 + }, + { + "epoch": 1.9432615606069694, + "grad_norm": 4.25, + "learning_rate": 5.0502531497363435e-06, + "loss": 0.9116, + "step": 4050 + }, + { + "epoch": 1.9456606489533979, + "grad_norm": 4.46875, + "learning_rate": 5.0323059153935235e-06, + "loss": 0.9195, + "step": 4055 + }, + { + "epoch": 1.948059737299826, + "grad_norm": 3.09375, + "learning_rate": 5.014358264788755e-06, + "loss": 0.8837, + "step": 4060 + }, + { + "epoch": 1.9504588256462543, + "grad_norm": 3.15625, + "learning_rate": 4.996410429177645e-06, + "loss": 0.9059, + "step": 4065 + }, + { + "epoch": 1.9528579139926827, + "grad_norm": 3.234375, + "learning_rate": 4.9784626398181775e-06, + "loss": 0.9118, + "step": 4070 + }, + { + "epoch": 1.9552570023391111, + "grad_norm": 3.234375, + "learning_rate": 4.96051512796775e-06, + "loss": 0.8492, + "step": 4075 + }, + { + "epoch": 1.9576560906855396, + "grad_norm": 3.171875, + "learning_rate": 4.9425681248801756e-06, + "loss": 0.9743, + "step": 4080 + }, + { + "epoch": 1.9600551790319678, + "grad_norm": 3.15625, + "learning_rate": 4.924621861802721e-06, + "loss": 0.8697, + "step": 4085 + }, + { + "epoch": 1.9624542673783962, + "grad_norm": 2.78125, + "learning_rate": 4.906676569973107e-06, + "loss": 0.8178, + "step": 4090 + }, + { + "epoch": 1.9648533557248244, + "grad_norm": 3.203125, + "learning_rate": 4.88873248061655e-06, + "loss": 0.8874, + "step": 4095 + }, + { + "epoch": 1.9672524440712529, + "grad_norm": 3.59375, + "learning_rate": 4.870789824942766e-06, + "loss": 0.8854, + "step": 4100 + }, + { + "epoch": 1.9672524440712529, + "eval_loss": 0.9435391426086426, + "eval_runtime": 175.8544, + "eval_samples_per_second": 42.137, + "eval_steps_per_second": 10.537, + "step": 4100 + }, + { + "epoch": 1.9696515324176813, + "grad_norm": 3.296875, + "learning_rate": 4.852848834143002e-06, + "loss": 0.9725, + "step": 4105 + }, + { + "epoch": 1.9720506207641098, + "grad_norm": 2.828125, + "learning_rate": 4.834909739387048e-06, + "loss": 0.8971, + "step": 4110 + }, + { + "epoch": 1.974449709110538, + "grad_norm": 3.265625, + "learning_rate": 4.8169727718202695e-06, + "loss": 0.7883, + "step": 4115 + }, + { + "epoch": 1.9768487974569664, + "grad_norm": 2.78125, + "learning_rate": 4.799038162560619e-06, + "loss": 0.9476, + "step": 4120 + }, + { + "epoch": 1.9792478858033946, + "grad_norm": 3.484375, + "learning_rate": 4.781106142695664e-06, + "loss": 0.9393, + "step": 4125 + }, + { + "epoch": 1.981646974149823, + "grad_norm": 3.078125, + "learning_rate": 4.763176943279608e-06, + "loss": 0.8465, + "step": 4130 + }, + { + "epoch": 1.9840460624962515, + "grad_norm": 3.546875, + "learning_rate": 4.745250795330311e-06, + "loss": 0.8959, + "step": 4135 + }, + { + "epoch": 1.98644515084268, + "grad_norm": 3.25, + "learning_rate": 4.727327929826318e-06, + "loss": 0.8703, + "step": 4140 + }, + { + "epoch": 1.9888442391891081, + "grad_norm": 3.40625, + "learning_rate": 4.709408577703875e-06, + "loss": 0.9591, + "step": 4145 + }, + { + "epoch": 1.9912433275355363, + "grad_norm": 3.015625, + "learning_rate": 4.691492969853963e-06, + "loss": 0.8204, + "step": 4150 + }, + { + "epoch": 1.9936424158819648, + "grad_norm": 2.9375, + "learning_rate": 4.673581337119313e-06, + "loss": 0.9007, + "step": 4155 + }, + { + "epoch": 1.9960415042283932, + "grad_norm": 2.9375, + "learning_rate": 4.655673910291442e-06, + "loss": 0.8965, + "step": 4160 + }, + { + "epoch": 1.9984405925748217, + "grad_norm": 3.375, + "learning_rate": 4.637770920107669e-06, + "loss": 0.88, + "step": 4165 + }, + { + "epoch": 2.00083968092125, + "grad_norm": 3.390625, + "learning_rate": 4.619872597248153e-06, + "loss": 0.9135, + "step": 4170 + }, + { + "epoch": 2.003238769267678, + "grad_norm": 2.8125, + "learning_rate": 4.6019791723329055e-06, + "loss": 0.8235, + "step": 4175 + }, + { + "epoch": 2.0056378576141065, + "grad_norm": 2.96875, + "learning_rate": 4.584090875918837e-06, + "loss": 0.8495, + "step": 4180 + }, + { + "epoch": 2.008036945960535, + "grad_norm": 2.4375, + "learning_rate": 4.56620793849677e-06, + "loss": 0.8506, + "step": 4185 + }, + { + "epoch": 2.0104360343069634, + "grad_norm": 3.296875, + "learning_rate": 4.5483305904884826e-06, + "loss": 0.8355, + "step": 4190 + }, + { + "epoch": 2.012835122653392, + "grad_norm": 3.234375, + "learning_rate": 4.530459062243726e-06, + "loss": 0.9317, + "step": 4195 + }, + { + "epoch": 2.0152342109998203, + "grad_norm": 2.96875, + "learning_rate": 4.512593584037274e-06, + "loss": 0.7608, + "step": 4200 + }, + { + "epoch": 2.0152342109998203, + "eval_loss": 0.9447470307350159, + "eval_runtime": 175.7692, + "eval_samples_per_second": 42.158, + "eval_steps_per_second": 10.542, + "step": 4200 + }, + { + "epoch": 2.0176332993462482, + "grad_norm": 2.71875, + "learning_rate": 4.494734386065933e-06, + "loss": 0.8606, + "step": 4205 + }, + { + "epoch": 2.0200323876926767, + "grad_norm": 3.40625, + "learning_rate": 4.476881698445601e-06, + "loss": 0.9198, + "step": 4210 + }, + { + "epoch": 2.022431476039105, + "grad_norm": 2.59375, + "learning_rate": 4.45903575120828e-06, + "loss": 0.8639, + "step": 4215 + }, + { + "epoch": 2.0248305643855335, + "grad_norm": 2.96875, + "learning_rate": 4.441196774299129e-06, + "loss": 0.7841, + "step": 4220 + }, + { + "epoch": 2.027229652731962, + "grad_norm": 3.09375, + "learning_rate": 4.423364997573489e-06, + "loss": 0.8384, + "step": 4225 + }, + { + "epoch": 2.0296287410783904, + "grad_norm": 2.75, + "learning_rate": 4.405540650793931e-06, + "loss": 0.8485, + "step": 4230 + }, + { + "epoch": 2.0320278294248184, + "grad_norm": 2.703125, + "learning_rate": 4.387723963627288e-06, + "loss": 0.785, + "step": 4235 + }, + { + "epoch": 2.034426917771247, + "grad_norm": 3.234375, + "learning_rate": 4.369915165641701e-06, + "loss": 0.7982, + "step": 4240 + }, + { + "epoch": 2.0368260061176753, + "grad_norm": 2.828125, + "learning_rate": 4.352114486303657e-06, + "loss": 0.8574, + "step": 4245 + }, + { + "epoch": 2.0392250944641037, + "grad_norm": 2.640625, + "learning_rate": 4.334322154975037e-06, + "loss": 0.8414, + "step": 4250 + }, + { + "epoch": 2.041624182810532, + "grad_norm": 3.484375, + "learning_rate": 4.3165384009101535e-06, + "loss": 0.8034, + "step": 4255 + }, + { + "epoch": 2.04402327115696, + "grad_norm": 2.8125, + "learning_rate": 4.298763453252805e-06, + "loss": 0.8283, + "step": 4260 + }, + { + "epoch": 2.0464223595033886, + "grad_norm": 3.0, + "learning_rate": 4.280997541033315e-06, + "loss": 0.6966, + "step": 4265 + }, + { + "epoch": 2.048821447849817, + "grad_norm": 3.125, + "learning_rate": 4.263240893165592e-06, + "loss": 0.8893, + "step": 4270 + }, + { + "epoch": 2.0512205361962454, + "grad_norm": 3.21875, + "learning_rate": 4.2454937384441665e-06, + "loss": 0.8797, + "step": 4275 + }, + { + "epoch": 2.053619624542674, + "grad_norm": 3.890625, + "learning_rate": 4.227756305541253e-06, + "loss": 0.8327, + "step": 4280 + }, + { + "epoch": 2.0560187128891023, + "grad_norm": 2.75, + "learning_rate": 4.210028823003802e-06, + "loss": 0.8423, + "step": 4285 + }, + { + "epoch": 2.0584178012355303, + "grad_norm": 2.6875, + "learning_rate": 4.192311519250548e-06, + "loss": 0.8146, + "step": 4290 + }, + { + "epoch": 2.0608168895819587, + "grad_norm": 4.0625, + "learning_rate": 4.174604622569076e-06, + "loss": 0.8386, + "step": 4295 + }, + { + "epoch": 2.063215977928387, + "grad_norm": 2.625, + "learning_rate": 4.156908361112876e-06, + "loss": 0.796, + "step": 4300 + }, + { + "epoch": 2.063215977928387, + "eval_loss": 0.9463717937469482, + "eval_runtime": 175.7455, + "eval_samples_per_second": 42.163, + "eval_steps_per_second": 10.544, + "step": 4300 + }, + { + "epoch": 2.0656150662748156, + "grad_norm": 3.046875, + "learning_rate": 4.139222962898401e-06, + "loss": 0.7608, + "step": 4305 + }, + { + "epoch": 2.068014154621244, + "grad_norm": 2.421875, + "learning_rate": 4.121548655802132e-06, + "loss": 0.8912, + "step": 4310 + }, + { + "epoch": 2.0704132429676725, + "grad_norm": 3.015625, + "learning_rate": 4.103885667557642e-06, + "loss": 0.7804, + "step": 4315 + }, + { + "epoch": 2.0728123313141005, + "grad_norm": 2.796875, + "learning_rate": 4.086234225752657e-06, + "loss": 0.876, + "step": 4320 + }, + { + "epoch": 2.075211419660529, + "grad_norm": 2.890625, + "learning_rate": 4.068594557826132e-06, + "loss": 0.7589, + "step": 4325 + }, + { + "epoch": 2.0776105080069573, + "grad_norm": 3.46875, + "learning_rate": 4.0509668910653114e-06, + "loss": 0.8852, + "step": 4330 + }, + { + "epoch": 2.080009596353386, + "grad_norm": 2.984375, + "learning_rate": 4.033351452602807e-06, + "loss": 0.9057, + "step": 4335 + }, + { + "epoch": 2.082408684699814, + "grad_norm": 2.515625, + "learning_rate": 4.0157484694136645e-06, + "loss": 0.8005, + "step": 4340 + }, + { + "epoch": 2.0848077730462427, + "grad_norm": 2.9375, + "learning_rate": 3.998158168312453e-06, + "loss": 0.8047, + "step": 4345 + }, + { + "epoch": 2.0872068613926706, + "grad_norm": 3.09375, + "learning_rate": 3.98058077595032e-06, + "loss": 0.7325, + "step": 4350 + }, + { + "epoch": 2.089605949739099, + "grad_norm": 2.53125, + "learning_rate": 3.9630165188120945e-06, + "loss": 0.7172, + "step": 4355 + }, + { + "epoch": 2.0920050380855275, + "grad_norm": 2.78125, + "learning_rate": 3.945465623213352e-06, + "loss": 0.776, + "step": 4360 + }, + { + "epoch": 2.094404126431956, + "grad_norm": 3.28125, + "learning_rate": 3.927928315297508e-06, + "loss": 0.8868, + "step": 4365 + }, + { + "epoch": 2.0968032147783844, + "grad_norm": 2.8125, + "learning_rate": 3.9104048210328965e-06, + "loss": 0.9293, + "step": 4370 + }, + { + "epoch": 2.0992023031248124, + "grad_norm": 2.765625, + "learning_rate": 3.892895366209867e-06, + "loss": 0.7971, + "step": 4375 + }, + { + "epoch": 2.101601391471241, + "grad_norm": 2.734375, + "learning_rate": 3.875400176437867e-06, + "loss": 0.861, + "step": 4380 + }, + { + "epoch": 2.1040004798176692, + "grad_norm": 3.203125, + "learning_rate": 3.8579194771425414e-06, + "loss": 0.867, + "step": 4385 + }, + { + "epoch": 2.1063995681640977, + "grad_norm": 2.984375, + "learning_rate": 3.840453493562823e-06, + "loss": 0.8466, + "step": 4390 + }, + { + "epoch": 2.108798656510526, + "grad_norm": 2.859375, + "learning_rate": 3.8230024507480375e-06, + "loss": 0.7525, + "step": 4395 + }, + { + "epoch": 2.1111977448569546, + "grad_norm": 3.3125, + "learning_rate": 3.80556657355499e-06, + "loss": 0.9225, + "step": 4400 + }, + { + "epoch": 2.1111977448569546, + "eval_loss": 0.9466894865036011, + "eval_runtime": 175.7759, + "eval_samples_per_second": 42.156, + "eval_steps_per_second": 10.542, + "step": 4400 + }, + { + "epoch": 2.1135968332033825, + "grad_norm": 2.9375, + "learning_rate": 3.788146086645084e-06, + "loss": 0.7987, + "step": 4405 + }, + { + "epoch": 2.115995921549811, + "grad_norm": 3.15625, + "learning_rate": 3.7707412144814154e-06, + "loss": 0.7869, + "step": 4410 + }, + { + "epoch": 2.1183950098962394, + "grad_norm": 2.671875, + "learning_rate": 3.7533521813258845e-06, + "loss": 0.9135, + "step": 4415 + }, + { + "epoch": 2.120794098242668, + "grad_norm": 2.65625, + "learning_rate": 3.735979211236309e-06, + "loss": 0.8726, + "step": 4420 + }, + { + "epoch": 2.1231931865890963, + "grad_norm": 2.890625, + "learning_rate": 3.7186225280635286e-06, + "loss": 0.8516, + "step": 4425 + }, + { + "epoch": 2.1255922749355243, + "grad_norm": 2.625, + "learning_rate": 3.701282355448531e-06, + "loss": 0.7885, + "step": 4430 + }, + { + "epoch": 2.1279913632819527, + "grad_norm": 3.015625, + "learning_rate": 3.6839589168195605e-06, + "loss": 0.8337, + "step": 4435 + }, + { + "epoch": 2.130390451628381, + "grad_norm": 2.90625, + "learning_rate": 3.666652435389248e-06, + "loss": 0.9509, + "step": 4440 + }, + { + "epoch": 2.1327895399748096, + "grad_norm": 3.09375, + "learning_rate": 3.6493631341517274e-06, + "loss": 0.8803, + "step": 4445 + }, + { + "epoch": 2.135188628321238, + "grad_norm": 3.34375, + "learning_rate": 3.632091235879769e-06, + "loss": 0.8565, + "step": 4450 + }, + { + "epoch": 2.1375877166676664, + "grad_norm": 3.15625, + "learning_rate": 3.614836963121902e-06, + "loss": 0.865, + "step": 4455 + }, + { + "epoch": 2.1399868050140944, + "grad_norm": 2.515625, + "learning_rate": 3.5976005381995573e-06, + "loss": 0.8987, + "step": 4460 + }, + { + "epoch": 2.142385893360523, + "grad_norm": 2.828125, + "learning_rate": 3.5803821832041857e-06, + "loss": 0.7778, + "step": 4465 + }, + { + "epoch": 2.1447849817069513, + "grad_norm": 2.875, + "learning_rate": 3.563182119994417e-06, + "loss": 0.7757, + "step": 4470 + }, + { + "epoch": 2.1471840700533797, + "grad_norm": 3.0625, + "learning_rate": 3.5460005701931864e-06, + "loss": 0.7319, + "step": 4475 + }, + { + "epoch": 2.149583158399808, + "grad_norm": 3.125, + "learning_rate": 3.5288377551848855e-06, + "loss": 0.9632, + "step": 4480 + }, + { + "epoch": 2.1519822467462366, + "grad_norm": 3.5, + "learning_rate": 3.511693896112503e-06, + "loss": 0.8698, + "step": 4485 + }, + { + "epoch": 2.1543813350926646, + "grad_norm": 2.921875, + "learning_rate": 3.4945692138747898e-06, + "loss": 0.8772, + "step": 4490 + }, + { + "epoch": 2.156780423439093, + "grad_norm": 3.40625, + "learning_rate": 3.4774639291233937e-06, + "loss": 0.8039, + "step": 4495 + }, + { + "epoch": 2.1591795117855215, + "grad_norm": 3.109375, + "learning_rate": 3.4603782622600307e-06, + "loss": 0.9901, + "step": 4500 + }, + { + "epoch": 2.1591795117855215, + "eval_loss": 0.9466681480407715, + "eval_runtime": 177.7999, + "eval_samples_per_second": 41.676, + "eval_steps_per_second": 10.422, + "step": 4500 + }, + { + "epoch": 2.16157860013195, + "grad_norm": 3.390625, + "learning_rate": 3.4433124334336383e-06, + "loss": 0.8059, + "step": 4505 + }, + { + "epoch": 2.1639776884783783, + "grad_norm": 3.03125, + "learning_rate": 3.426266662537544e-06, + "loss": 0.9415, + "step": 4510 + }, + { + "epoch": 2.166376776824807, + "grad_norm": 2.78125, + "learning_rate": 3.409241169206623e-06, + "loss": 0.8322, + "step": 4515 + }, + { + "epoch": 2.1687758651712348, + "grad_norm": 2.8125, + "learning_rate": 3.3922361728144804e-06, + "loss": 0.7832, + "step": 4520 + }, + { + "epoch": 2.171174953517663, + "grad_norm": 2.78125, + "learning_rate": 3.375251892470611e-06, + "loss": 0.7673, + "step": 4525 + }, + { + "epoch": 2.1735740418640916, + "grad_norm": 3.390625, + "learning_rate": 3.358288547017591e-06, + "loss": 0.8652, + "step": 4530 + }, + { + "epoch": 2.17597313021052, + "grad_norm": 3.265625, + "learning_rate": 3.3413463550282437e-06, + "loss": 0.8218, + "step": 4535 + }, + { + "epoch": 2.1783722185569485, + "grad_norm": 2.96875, + "learning_rate": 3.324425534802835e-06, + "loss": 0.8349, + "step": 4540 + }, + { + "epoch": 2.1807713069033765, + "grad_norm": 2.640625, + "learning_rate": 3.307526304366251e-06, + "loss": 0.8862, + "step": 4545 + }, + { + "epoch": 2.183170395249805, + "grad_norm": 2.953125, + "learning_rate": 3.290648881465196e-06, + "loss": 0.7924, + "step": 4550 + }, + { + "epoch": 2.1855694835962334, + "grad_norm": 3.484375, + "learning_rate": 3.2737934835653827e-06, + "loss": 0.8539, + "step": 4555 + }, + { + "epoch": 2.187968571942662, + "grad_norm": 2.703125, + "learning_rate": 3.2569603278487335e-06, + "loss": 0.7829, + "step": 4560 + }, + { + "epoch": 2.1903676602890902, + "grad_norm": 2.578125, + "learning_rate": 3.2401496312105786e-06, + "loss": 0.8173, + "step": 4565 + }, + { + "epoch": 2.1927667486355187, + "grad_norm": 3.421875, + "learning_rate": 3.223361610256861e-06, + "loss": 0.9411, + "step": 4570 + }, + { + "epoch": 2.1951658369819467, + "grad_norm": 3.90625, + "learning_rate": 3.2065964813013533e-06, + "loss": 0.9485, + "step": 4575 + }, + { + "epoch": 2.197564925328375, + "grad_norm": 2.6875, + "learning_rate": 3.1898544603628563e-06, + "loss": 0.8482, + "step": 4580 + }, + { + "epoch": 2.1999640136748035, + "grad_norm": 2.609375, + "learning_rate": 3.1731357631624304e-06, + "loss": 0.7985, + "step": 4585 + }, + { + "epoch": 2.202363102021232, + "grad_norm": 3.4375, + "learning_rate": 3.1564406051206063e-06, + "loss": 0.8566, + "step": 4590 + }, + { + "epoch": 2.2047621903676604, + "grad_norm": 26.625, + "learning_rate": 3.1397692013546154e-06, + "loss": 0.8479, + "step": 4595 + }, + { + "epoch": 2.207161278714089, + "grad_norm": 2.65625, + "learning_rate": 3.1231217666756085e-06, + "loss": 0.9263, + "step": 4600 + }, + { + "epoch": 2.207161278714089, + "eval_loss": 0.9467650651931763, + "eval_runtime": 175.6879, + "eval_samples_per_second": 42.177, + "eval_steps_per_second": 10.547, + "step": 4600 + }, + { + "epoch": 2.209560367060517, + "grad_norm": 3.3125, + "learning_rate": 3.106498515585905e-06, + "loss": 0.8065, + "step": 4605 + }, + { + "epoch": 2.2119594554069453, + "grad_norm": 2.796875, + "learning_rate": 3.089899662276208e-06, + "loss": 0.7885, + "step": 4610 + }, + { + "epoch": 2.2143585437533737, + "grad_norm": 3.171875, + "learning_rate": 3.073325420622862e-06, + "loss": 0.9224, + "step": 4615 + }, + { + "epoch": 2.216757632099802, + "grad_norm": 3.578125, + "learning_rate": 3.056776004185086e-06, + "loss": 0.8299, + "step": 4620 + }, + { + "epoch": 2.2191567204462306, + "grad_norm": 2.875, + "learning_rate": 3.0402516262022312e-06, + "loss": 0.698, + "step": 4625 + }, + { + "epoch": 2.221555808792659, + "grad_norm": 3.171875, + "learning_rate": 3.0237524995910205e-06, + "loss": 0.8001, + "step": 4630 + }, + { + "epoch": 2.223954897139087, + "grad_norm": 3.5625, + "learning_rate": 3.0072788369428195e-06, + "loss": 0.7655, + "step": 4635 + }, + { + "epoch": 2.2263539854855154, + "grad_norm": 2.484375, + "learning_rate": 2.9908308505208864e-06, + "loss": 0.8143, + "step": 4640 + }, + { + "epoch": 2.228753073831944, + "grad_norm": 4.125, + "learning_rate": 2.9744087522576444e-06, + "loss": 0.8349, + "step": 4645 + }, + { + "epoch": 2.2311521621783723, + "grad_norm": 3.28125, + "learning_rate": 2.9580127537519432e-06, + "loss": 0.8516, + "step": 4650 + }, + { + "epoch": 2.2335512505248007, + "grad_norm": 3.78125, + "learning_rate": 2.9416430662663432e-06, + "loss": 0.8004, + "step": 4655 + }, + { + "epoch": 2.2359503388712287, + "grad_norm": 2.78125, + "learning_rate": 2.9252999007243786e-06, + "loss": 0.8981, + "step": 4660 + }, + { + "epoch": 2.238349427217657, + "grad_norm": 2.953125, + "learning_rate": 2.908983467707856e-06, + "loss": 0.7687, + "step": 4665 + }, + { + "epoch": 2.2407485155640856, + "grad_norm": 2.984375, + "learning_rate": 2.8926939774541273e-06, + "loss": 0.7922, + "step": 4670 + }, + { + "epoch": 2.243147603910514, + "grad_norm": 2.84375, + "learning_rate": 2.876431639853396e-06, + "loss": 0.7277, + "step": 4675 + }, + { + "epoch": 2.2455466922569425, + "grad_norm": 2.640625, + "learning_rate": 2.860196664445988e-06, + "loss": 0.7337, + "step": 4680 + }, + { + "epoch": 2.247945780603371, + "grad_norm": 3.265625, + "learning_rate": 2.8439892604196834e-06, + "loss": 0.9654, + "step": 4685 + }, + { + "epoch": 2.250344868949799, + "grad_norm": 2.890625, + "learning_rate": 2.8278096366069945e-06, + "loss": 0.875, + "step": 4690 + }, + { + "epoch": 2.2527439572962273, + "grad_norm": 3.375, + "learning_rate": 2.811658001482489e-06, + "loss": 0.8906, + "step": 4695 + }, + { + "epoch": 2.2551430456426558, + "grad_norm": 3.28125, + "learning_rate": 2.7955345631600993e-06, + "loss": 0.7735, + "step": 4700 + }, + { + "epoch": 2.2551430456426558, + "eval_loss": 0.9466607570648193, + "eval_runtime": 179.2943, + "eval_samples_per_second": 41.329, + "eval_steps_per_second": 10.335, + "step": 4700 + }, + { + "epoch": 2.257542133989084, + "grad_norm": 3.03125, + "learning_rate": 2.779439529390441e-06, + "loss": 0.8957, + "step": 4705 + }, + { + "epoch": 2.2599412223355126, + "grad_norm": 3.03125, + "learning_rate": 2.7633731075581406e-06, + "loss": 0.8791, + "step": 4710 + }, + { + "epoch": 2.2623403106819406, + "grad_norm": 3.125, + "learning_rate": 2.747335504679156e-06, + "loss": 0.8097, + "step": 4715 + }, + { + "epoch": 2.264739399028369, + "grad_norm": 2.59375, + "learning_rate": 2.7313269273981135e-06, + "loss": 0.8695, + "step": 4720 + }, + { + "epoch": 2.2671384873747975, + "grad_norm": 2.984375, + "learning_rate": 2.7153475819856425e-06, + "loss": 0.8596, + "step": 4725 + }, + { + "epoch": 2.269537575721226, + "grad_norm": 4.40625, + "learning_rate": 2.6993976743357264e-06, + "loss": 0.8044, + "step": 4730 + }, + { + "epoch": 2.2719366640676544, + "grad_norm": 3.1875, + "learning_rate": 2.6834774099630323e-06, + "loss": 0.8534, + "step": 4735 + }, + { + "epoch": 2.274335752414083, + "grad_norm": 3.328125, + "learning_rate": 2.667586994000283e-06, + "loss": 0.8286, + "step": 4740 + }, + { + "epoch": 2.2767348407605112, + "grad_norm": 3.40625, + "learning_rate": 2.651726631195599e-06, + "loss": 0.871, + "step": 4745 + }, + { + "epoch": 2.2791339291069392, + "grad_norm": 2.734375, + "learning_rate": 2.635896525909868e-06, + "loss": 0.8352, + "step": 4750 + }, + { + "epoch": 2.2815330174533677, + "grad_norm": 3.03125, + "learning_rate": 2.620096882114106e-06, + "loss": 0.7521, + "step": 4755 + }, + { + "epoch": 2.283932105799796, + "grad_norm": 3.1875, + "learning_rate": 2.6043279033868407e-06, + "loss": 0.9201, + "step": 4760 + }, + { + "epoch": 2.2863311941462245, + "grad_norm": 3.125, + "learning_rate": 2.5885897929114662e-06, + "loss": 0.8577, + "step": 4765 + }, + { + "epoch": 2.288730282492653, + "grad_norm": 2.953125, + "learning_rate": 2.572882753473654e-06, + "loss": 0.8168, + "step": 4770 + }, + { + "epoch": 2.291129370839081, + "grad_norm": 2.953125, + "learning_rate": 2.5572069874587157e-06, + "loss": 0.8826, + "step": 4775 + }, + { + "epoch": 2.2935284591855094, + "grad_norm": 3.21875, + "learning_rate": 2.5415626968490075e-06, + "loss": 0.8244, + "step": 4780 + }, + { + "epoch": 2.295927547531938, + "grad_norm": 3.046875, + "learning_rate": 2.5259500832213224e-06, + "loss": 0.7957, + "step": 4785 + }, + { + "epoch": 2.2983266358783663, + "grad_norm": 2.953125, + "learning_rate": 2.510369347744303e-06, + "loss": 0.8546, + "step": 4790 + }, + { + "epoch": 2.3007257242247947, + "grad_norm": 2.8125, + "learning_rate": 2.4948206911758284e-06, + "loss": 0.8521, + "step": 4795 + }, + { + "epoch": 2.303124812571223, + "grad_norm": 3.234375, + "learning_rate": 2.4793043138604546e-06, + "loss": 0.8454, + "step": 4800 + }, + { + "epoch": 2.303124812571223, + "eval_loss": 0.94642174243927, + "eval_runtime": 193.1371, + "eval_samples_per_second": 38.367, + "eval_steps_per_second": 9.594, + "step": 4800 + }, + { + "epoch": 2.305523900917651, + "grad_norm": 3.5625, + "learning_rate": 2.46382041572681e-06, + "loss": 0.7795, + "step": 4805 + }, + { + "epoch": 2.3079229892640796, + "grad_norm": 3.125, + "learning_rate": 2.4483691962850327e-06, + "loss": 0.9321, + "step": 4810 + }, + { + "epoch": 2.310322077610508, + "grad_norm": 3.421875, + "learning_rate": 2.432950854624193e-06, + "loss": 0.8356, + "step": 4815 + }, + { + "epoch": 2.3127211659569364, + "grad_norm": 3.34375, + "learning_rate": 2.4175655894097335e-06, + "loss": 0.7706, + "step": 4820 + }, + { + "epoch": 2.315120254303365, + "grad_norm": 2.734375, + "learning_rate": 2.4022135988809025e-06, + "loss": 0.791, + "step": 4825 + }, + { + "epoch": 2.317519342649793, + "grad_norm": 3.265625, + "learning_rate": 2.3868950808482107e-06, + "loss": 0.7859, + "step": 4830 + }, + { + "epoch": 2.3199184309962213, + "grad_norm": 2.953125, + "learning_rate": 2.371610232690869e-06, + "loss": 0.8915, + "step": 4835 + }, + { + "epoch": 2.3223175193426497, + "grad_norm": 3.0625, + "learning_rate": 2.3563592513542543e-06, + "loss": 0.9431, + "step": 4840 + }, + { + "epoch": 2.324716607689078, + "grad_norm": 2.9375, + "learning_rate": 2.3411423333473683e-06, + "loss": 0.8549, + "step": 4845 + }, + { + "epoch": 2.3271156960355066, + "grad_norm": 3.328125, + "learning_rate": 2.325959674740306e-06, + "loss": 0.8688, + "step": 4850 + }, + { + "epoch": 2.329514784381935, + "grad_norm": 3.703125, + "learning_rate": 2.3108114711617335e-06, + "loss": 0.9216, + "step": 4855 + }, + { + "epoch": 2.331913872728363, + "grad_norm": 2.71875, + "learning_rate": 2.29569791779636e-06, + "loss": 0.8822, + "step": 4860 + }, + { + "epoch": 2.3343129610747915, + "grad_norm": 3.0, + "learning_rate": 2.2806192093824277e-06, + "loss": 0.837, + "step": 4865 + }, + { + "epoch": 2.33671204942122, + "grad_norm": 3.109375, + "learning_rate": 2.265575540209198e-06, + "loss": 0.8704, + "step": 4870 + }, + { + "epoch": 2.3391111377676483, + "grad_norm": 2.75, + "learning_rate": 2.250567104114461e-06, + "loss": 0.7557, + "step": 4875 + }, + { + "epoch": 2.3415102261140768, + "grad_norm": 3.015625, + "learning_rate": 2.235594094482014e-06, + "loss": 0.756, + "step": 4880 + }, + { + "epoch": 2.3439093144605048, + "grad_norm": 3.03125, + "learning_rate": 2.220656704239198e-06, + "loss": 0.887, + "step": 4885 + }, + { + "epoch": 2.346308402806933, + "grad_norm": 3.390625, + "learning_rate": 2.2057551258543893e-06, + "loss": 0.799, + "step": 4890 + }, + { + "epoch": 2.3487074911533616, + "grad_norm": 3.921875, + "learning_rate": 2.1908895513345314e-06, + "loss": 0.9533, + "step": 4895 + }, + { + "epoch": 2.35110657949979, + "grad_norm": 3.328125, + "learning_rate": 2.176060172222654e-06, + "loss": 0.8562, + "step": 4900 + }, + { + "epoch": 2.35110657949979, + "eval_loss": 0.9465652108192444, + "eval_runtime": 187.1163, + "eval_samples_per_second": 39.601, + "eval_steps_per_second": 9.903, + "step": 4900 + }, + { + "epoch": 2.3535056678462185, + "grad_norm": 3.03125, + "learning_rate": 2.1612671795954193e-06, + "loss": 0.9049, + "step": 4905 + }, + { + "epoch": 2.355904756192647, + "grad_norm": 2.75, + "learning_rate": 2.146510764060633e-06, + "loss": 0.7734, + "step": 4910 + }, + { + "epoch": 2.3583038445390754, + "grad_norm": 2.953125, + "learning_rate": 2.1317911157548187e-06, + "loss": 0.8603, + "step": 4915 + }, + { + "epoch": 2.3607029328855034, + "grad_norm": 3.203125, + "learning_rate": 2.1171084243407487e-06, + "loss": 0.877, + "step": 4920 + }, + { + "epoch": 2.363102021231932, + "grad_norm": 2.875, + "learning_rate": 2.1024628790050038e-06, + "loss": 0.9646, + "step": 4925 + }, + { + "epoch": 2.3655011095783602, + "grad_norm": 2.546875, + "learning_rate": 2.0878546684555384e-06, + "loss": 0.9592, + "step": 4930 + }, + { + "epoch": 2.3679001979247887, + "grad_norm": 3.6875, + "learning_rate": 2.073283980919254e-06, + "loss": 0.7567, + "step": 4935 + }, + { + "epoch": 2.370299286271217, + "grad_norm": 2.65625, + "learning_rate": 2.0587510041395553e-06, + "loss": 0.7229, + "step": 4940 + }, + { + "epoch": 2.372698374617645, + "grad_norm": 3.234375, + "learning_rate": 2.044255925373956e-06, + "loss": 0.8459, + "step": 4945 + }, + { + "epoch": 2.3750974629640735, + "grad_norm": 3.296875, + "learning_rate": 2.029798931391646e-06, + "loss": 0.8717, + "step": 4950 + }, + { + "epoch": 2.377496551310502, + "grad_norm": 3.125, + "learning_rate": 2.015380208471096e-06, + "loss": 0.8185, + "step": 4955 + }, + { + "epoch": 2.3798956396569304, + "grad_norm": 3.09375, + "learning_rate": 2.0009999423976527e-06, + "loss": 0.8071, + "step": 4960 + }, + { + "epoch": 2.382294728003359, + "grad_norm": 2.734375, + "learning_rate": 1.9866583184611452e-06, + "loss": 0.8988, + "step": 4965 + }, + { + "epoch": 2.3846938163497873, + "grad_norm": 2.78125, + "learning_rate": 1.9723555214534983e-06, + "loss": 0.8504, + "step": 4970 + }, + { + "epoch": 2.3870929046962153, + "grad_norm": 3.078125, + "learning_rate": 1.958091735666356e-06, + "loss": 0.8153, + "step": 4975 + }, + { + "epoch": 2.3894919930426437, + "grad_norm": 2.453125, + "learning_rate": 1.9438671448886963e-06, + "loss": 0.8636, + "step": 4980 + }, + { + "epoch": 2.391891081389072, + "grad_norm": 3.125, + "learning_rate": 1.929681932404473e-06, + "loss": 0.7966, + "step": 4985 + }, + { + "epoch": 2.3942901697355006, + "grad_norm": 3.859375, + "learning_rate": 1.915536280990249e-06, + "loss": 0.7249, + "step": 4990 + }, + { + "epoch": 2.396689258081929, + "grad_norm": 3.546875, + "learning_rate": 1.9014303729128402e-06, + "loss": 0.8304, + "step": 4995 + }, + { + "epoch": 2.399088346428357, + "grad_norm": 3.015625, + "learning_rate": 1.8873643899269761e-06, + "loss": 0.8923, + "step": 5000 + }, + { + "epoch": 2.399088346428357, + "eval_loss": 0.946355402469635, + "eval_runtime": 175.772, + "eval_samples_per_second": 42.157, + "eval_steps_per_second": 10.542, + "step": 5000 + }, + { + "epoch": 2.4014874347747854, + "grad_norm": 3.203125, + "learning_rate": 1.8733385132729453e-06, + "loss": 0.9251, + "step": 5005 + }, + { + "epoch": 2.403886523121214, + "grad_norm": 4.28125, + "learning_rate": 1.859352923674267e-06, + "loss": 0.9606, + "step": 5010 + }, + { + "epoch": 2.4062856114676423, + "grad_norm": 2.8125, + "learning_rate": 1.8454078013353604e-06, + "loss": 0.9109, + "step": 5015 + }, + { + "epoch": 2.4086846998140707, + "grad_norm": 3.265625, + "learning_rate": 1.8315033259392313e-06, + "loss": 0.7988, + "step": 5020 + }, + { + "epoch": 2.411083788160499, + "grad_norm": 3.0625, + "learning_rate": 1.8176396766451353e-06, + "loss": 0.8556, + "step": 5025 + }, + { + "epoch": 2.4134828765069276, + "grad_norm": 3.4375, + "learning_rate": 1.803817032086298e-06, + "loss": 0.8578, + "step": 5030 + }, + { + "epoch": 2.4158819648533556, + "grad_norm": 3.9375, + "learning_rate": 1.7900355703675893e-06, + "loss": 0.8618, + "step": 5035 + }, + { + "epoch": 2.418281053199784, + "grad_norm": 3.34375, + "learning_rate": 1.7762954690632416e-06, + "loss": 0.8395, + "step": 5040 + }, + { + "epoch": 2.4206801415462125, + "grad_norm": 4.09375, + "learning_rate": 1.7625969052145557e-06, + "loss": 0.885, + "step": 5045 + }, + { + "epoch": 2.423079229892641, + "grad_norm": 3.296875, + "learning_rate": 1.7489400553276281e-06, + "loss": 0.7281, + "step": 5050 + }, + { + "epoch": 2.4254783182390693, + "grad_norm": 2.71875, + "learning_rate": 1.73532509537106e-06, + "loss": 0.8529, + "step": 5055 + }, + { + "epoch": 2.4278774065854973, + "grad_norm": 3.34375, + "learning_rate": 1.7217522007737108e-06, + "loss": 0.9102, + "step": 5060 + }, + { + "epoch": 2.4302764949319258, + "grad_norm": 2.921875, + "learning_rate": 1.7082215464224228e-06, + "loss": 0.8904, + "step": 5065 + }, + { + "epoch": 2.432675583278354, + "grad_norm": 3.0625, + "learning_rate": 1.6947333066597721e-06, + "loss": 0.883, + "step": 5070 + }, + { + "epoch": 2.4350746716247826, + "grad_norm": 3.359375, + "learning_rate": 1.6812876552818236e-06, + "loss": 0.9472, + "step": 5075 + }, + { + "epoch": 2.437473759971211, + "grad_norm": 3.09375, + "learning_rate": 1.6678847655358899e-06, + "loss": 0.9375, + "step": 5080 + }, + { + "epoch": 2.4398728483176395, + "grad_norm": 2.796875, + "learning_rate": 1.6545248101182992e-06, + "loss": 0.8775, + "step": 5085 + }, + { + "epoch": 2.4422719366640675, + "grad_norm": 4.96875, + "learning_rate": 1.641207961172175e-06, + "loss": 0.8681, + "step": 5090 + }, + { + "epoch": 2.444671025010496, + "grad_norm": 2.40625, + "learning_rate": 1.627934390285207e-06, + "loss": 0.82, + "step": 5095 + }, + { + "epoch": 2.4470701133569244, + "grad_norm": 3.5625, + "learning_rate": 1.614704268487451e-06, + "loss": 0.7529, + "step": 5100 + }, + { + "epoch": 2.4470701133569244, + "eval_loss": 0.9462727904319763, + "eval_runtime": 175.7988, + "eval_samples_per_second": 42.15, + "eval_steps_per_second": 10.54, + "step": 5100 + }, + { + "epoch": 2.449469201703353, + "grad_norm": 3.4375, + "learning_rate": 1.60151776624912e-06, + "loss": 0.7723, + "step": 5105 + }, + { + "epoch": 2.4518682900497812, + "grad_norm": 2.84375, + "learning_rate": 1.5883750534783876e-06, + "loss": 0.7418, + "step": 5110 + }, + { + "epoch": 2.4542673783962092, + "grad_norm": 2.71875, + "learning_rate": 1.5752762995192e-06, + "loss": 0.823, + "step": 5115 + }, + { + "epoch": 2.4566664667426377, + "grad_norm": 3.125, + "learning_rate": 1.5622216731490975e-06, + "loss": 0.889, + "step": 5120 + }, + { + "epoch": 2.459065555089066, + "grad_norm": 3.265625, + "learning_rate": 1.549211342577031e-06, + "loss": 0.7486, + "step": 5125 + }, + { + "epoch": 2.4614646434354945, + "grad_norm": 3.140625, + "learning_rate": 1.536245475441201e-06, + "loss": 0.8351, + "step": 5130 + }, + { + "epoch": 2.463863731781923, + "grad_norm": 2.875, + "learning_rate": 1.523324238806902e-06, + "loss": 0.7765, + "step": 5135 + }, + { + "epoch": 2.4662628201283514, + "grad_norm": 4.03125, + "learning_rate": 1.5104477991643517e-06, + "loss": 0.9074, + "step": 5140 + }, + { + "epoch": 2.4686619084747794, + "grad_norm": 2.921875, + "learning_rate": 1.4976163224265728e-06, + "loss": 0.8319, + "step": 5145 + }, + { + "epoch": 2.471060996821208, + "grad_norm": 3.484375, + "learning_rate": 1.4848299739272304e-06, + "loss": 0.7772, + "step": 5150 + }, + { + "epoch": 2.4734600851676363, + "grad_norm": 2.515625, + "learning_rate": 1.4720889184185155e-06, + "loss": 0.8531, + "step": 5155 + }, + { + "epoch": 2.4758591735140647, + "grad_norm": 3.796875, + "learning_rate": 1.459393320069018e-06, + "loss": 0.7806, + "step": 5160 + }, + { + "epoch": 2.478258261860493, + "grad_norm": 3.53125, + "learning_rate": 1.4467433424616155e-06, + "loss": 1.0109, + "step": 5165 + }, + { + "epoch": 2.480657350206921, + "grad_norm": 3.0, + "learning_rate": 1.4341391485913536e-06, + "loss": 0.8122, + "step": 5170 + }, + { + "epoch": 2.4830564385533496, + "grad_norm": 2.921875, + "learning_rate": 1.4215809008633636e-06, + "loss": 0.8517, + "step": 5175 + }, + { + "epoch": 2.485455526899778, + "grad_norm": 3.15625, + "learning_rate": 1.409068761090755e-06, + "loss": 0.7956, + "step": 5180 + }, + { + "epoch": 2.4878546152462064, + "grad_norm": 3.59375, + "learning_rate": 1.3966028904925372e-06, + "loss": 0.7672, + "step": 5185 + }, + { + "epoch": 2.490253703592635, + "grad_norm": 2.609375, + "learning_rate": 1.384183449691539e-06, + "loss": 0.7573, + "step": 5190 + }, + { + "epoch": 2.4926527919390633, + "grad_norm": 3.625, + "learning_rate": 1.3718105987123482e-06, + "loss": 0.8696, + "step": 5195 + }, + { + "epoch": 2.4950518802854917, + "grad_norm": 2.703125, + "learning_rate": 1.3594844969792304e-06, + "loss": 0.8421, + "step": 5200 + }, + { + "epoch": 2.4950518802854917, + "eval_loss": 0.9463452100753784, + "eval_runtime": 175.6343, + "eval_samples_per_second": 42.19, + "eval_steps_per_second": 10.55, + "step": 5200 + }, + { + "epoch": 2.4974509686319197, + "grad_norm": 3.546875, + "learning_rate": 1.347205303314098e-06, + "loss": 0.9172, + "step": 5205 + }, + { + "epoch": 2.499850056978348, + "grad_norm": 3.28125, + "learning_rate": 1.3349731759344469e-06, + "loss": 0.8079, + "step": 5210 + }, + { + "epoch": 2.5022491453247766, + "grad_norm": 3.03125, + "learning_rate": 1.3227882724513253e-06, + "loss": 0.752, + "step": 5215 + }, + { + "epoch": 2.504648233671205, + "grad_norm": 2.96875, + "learning_rate": 1.3106507498672999e-06, + "loss": 0.8044, + "step": 5220 + }, + { + "epoch": 2.507047322017633, + "grad_norm": 3.6875, + "learning_rate": 1.2985607645744352e-06, + "loss": 0.8345, + "step": 5225 + }, + { + "epoch": 2.5094464103640615, + "grad_norm": 2.984375, + "learning_rate": 1.286518472352276e-06, + "loss": 0.7665, + "step": 5230 + }, + { + "epoch": 2.51184549871049, + "grad_norm": 2.71875, + "learning_rate": 1.2745240283658456e-06, + "loss": 0.8592, + "step": 5235 + }, + { + "epoch": 2.5142445870569183, + "grad_norm": 2.734375, + "learning_rate": 1.2625775871636376e-06, + "loss": 0.7824, + "step": 5240 + }, + { + "epoch": 2.5166436754033468, + "grad_norm": 3.59375, + "learning_rate": 1.2506793026756314e-06, + "loss": 0.827, + "step": 5245 + }, + { + "epoch": 2.519042763749775, + "grad_norm": 3.296875, + "learning_rate": 1.2388293282113067e-06, + "loss": 0.8529, + "step": 5250 + }, + { + "epoch": 2.5214418520962036, + "grad_norm": 3.53125, + "learning_rate": 1.2270278164576688e-06, + "loss": 0.8162, + "step": 5255 + }, + { + "epoch": 2.523840940442632, + "grad_norm": 2.84375, + "learning_rate": 1.2152749194772783e-06, + "loss": 0.8861, + "step": 5260 + }, + { + "epoch": 2.52624002878906, + "grad_norm": 2.6875, + "learning_rate": 1.2035707887062981e-06, + "loss": 0.7473, + "step": 5265 + }, + { + "epoch": 2.5286391171354885, + "grad_norm": 2.78125, + "learning_rate": 1.1919155749525357e-06, + "loss": 0.7434, + "step": 5270 + }, + { + "epoch": 2.531038205481917, + "grad_norm": 2.890625, + "learning_rate": 1.1803094283935002e-06, + "loss": 0.835, + "step": 5275 + }, + { + "epoch": 2.5334372938283454, + "grad_norm": 3.125, + "learning_rate": 1.1687524985744764e-06, + "loss": 0.86, + "step": 5280 + }, + { + "epoch": 2.5358363821747734, + "grad_norm": 3.078125, + "learning_rate": 1.1572449344065816e-06, + "loss": 0.84, + "step": 5285 + }, + { + "epoch": 2.538235470521202, + "grad_norm": 3.6875, + "learning_rate": 1.1457868841648656e-06, + "loss": 0.8313, + "step": 5290 + }, + { + "epoch": 2.5406345588676302, + "grad_norm": 2.75, + "learning_rate": 1.1343784954863847e-06, + "loss": 0.794, + "step": 5295 + }, + { + "epoch": 2.5430336472140587, + "grad_norm": 2.8125, + "learning_rate": 1.123019915368308e-06, + "loss": 0.8578, + "step": 5300 + }, + { + "epoch": 2.5430336472140587, + "eval_loss": 0.9463096261024475, + "eval_runtime": 175.7279, + "eval_samples_per_second": 42.167, + "eval_steps_per_second": 10.545, + "step": 5300 + }, + { + "epoch": 2.545432735560487, + "grad_norm": 2.5625, + "learning_rate": 1.1117112901660193e-06, + "loss": 0.7754, + "step": 5305 + }, + { + "epoch": 2.5478318239069155, + "grad_norm": 3.96875, + "learning_rate": 1.1004527655912383e-06, + "loss": 0.9777, + "step": 5310 + }, + { + "epoch": 2.550230912253344, + "grad_norm": 2.953125, + "learning_rate": 1.0892444867101288e-06, + "loss": 0.8786, + "step": 5315 + }, + { + "epoch": 2.552630000599772, + "grad_norm": 3.1875, + "learning_rate": 1.0780865979414463e-06, + "loss": 0.9006, + "step": 5320 + }, + { + "epoch": 2.5550290889462004, + "grad_norm": 2.9375, + "learning_rate": 1.0669792430546655e-06, + "loss": 0.7912, + "step": 5325 + }, + { + "epoch": 2.557428177292629, + "grad_norm": 2.78125, + "learning_rate": 1.0559225651681332e-06, + "loss": 0.8563, + "step": 5330 + }, + { + "epoch": 2.5598272656390573, + "grad_norm": 2.765625, + "learning_rate": 1.0449167067472205e-06, + "loss": 0.8267, + "step": 5335 + }, + { + "epoch": 2.5622263539854853, + "grad_norm": 3.140625, + "learning_rate": 1.0339618096024946e-06, + "loss": 0.7603, + "step": 5340 + }, + { + "epoch": 2.5646254423319137, + "grad_norm": 3.171875, + "learning_rate": 1.0230580148878777e-06, + "loss": 0.9592, + "step": 5345 + }, + { + "epoch": 2.567024530678342, + "grad_norm": 2.5625, + "learning_rate": 1.0122054630988454e-06, + "loss": 0.8984, + "step": 5350 + }, + { + "epoch": 2.5694236190247706, + "grad_norm": 2.828125, + "learning_rate": 1.0014042940706031e-06, + "loss": 0.8046, + "step": 5355 + }, + { + "epoch": 2.571822707371199, + "grad_norm": 2.65625, + "learning_rate": 9.9065464697629e-07, + "loss": 0.727, + "step": 5360 + }, + { + "epoch": 2.5742217957176274, + "grad_norm": 2.90625, + "learning_rate": 9.799566603251847e-07, + "loss": 0.7937, + "step": 5365 + }, + { + "epoch": 2.576620884064056, + "grad_norm": 2.59375, + "learning_rate": 9.693104719609213e-07, + "loss": 0.7619, + "step": 5370 + }, + { + "epoch": 2.579019972410484, + "grad_norm": 2.9375, + "learning_rate": 9.587162190597104e-07, + "loss": 0.8086, + "step": 5375 + }, + { + "epoch": 2.5814190607569123, + "grad_norm": 3.03125, + "learning_rate": 9.481740381285782e-07, + "loss": 0.9133, + "step": 5380 + }, + { + "epoch": 2.5838181491033407, + "grad_norm": 2.84375, + "learning_rate": 9.376840650035995e-07, + "loss": 0.8183, + "step": 5385 + }, + { + "epoch": 2.586217237449769, + "grad_norm": 3.109375, + "learning_rate": 9.272464348481513e-07, + "loss": 0.7881, + "step": 5390 + }, + { + "epoch": 2.5886163257961976, + "grad_norm": 3.328125, + "learning_rate": 9.168612821511729e-07, + "loss": 0.7716, + "step": 5395 + }, + { + "epoch": 2.5910154141426256, + "grad_norm": 3.171875, + "learning_rate": 9.065287407254292e-07, + "loss": 0.8143, + "step": 5400 + }, + { + "epoch": 2.5910154141426256, + "eval_loss": 0.9463847279548645, + "eval_runtime": 175.7856, + "eval_samples_per_second": 42.154, + "eval_steps_per_second": 10.541, + "step": 5400 + }, + { + "epoch": 2.593414502489054, + "grad_norm": 3.0, + "learning_rate": 8.962489437057892e-07, + "loss": 0.8354, + "step": 5405 + }, + { + "epoch": 2.5958135908354825, + "grad_norm": 3.03125, + "learning_rate": 8.860220235475136e-07, + "loss": 0.757, + "step": 5410 + }, + { + "epoch": 2.598212679181911, + "grad_norm": 2.78125, + "learning_rate": 8.758481120245355e-07, + "loss": 0.8845, + "step": 5415 + }, + { + "epoch": 2.6006117675283393, + "grad_norm": 3.34375, + "learning_rate": 8.6572734022778e-07, + "loss": 0.8356, + "step": 5420 + }, + { + "epoch": 2.6030108558747678, + "grad_norm": 3.40625, + "learning_rate": 8.556598385634645e-07, + "loss": 0.8755, + "step": 5425 + }, + { + "epoch": 2.605409944221196, + "grad_norm": 3.546875, + "learning_rate": 8.456457367514154e-07, + "loss": 0.8882, + "step": 5430 + }, + { + "epoch": 2.607809032567624, + "grad_norm": 3.125, + "learning_rate": 8.356851638234087e-07, + "loss": 0.8723, + "step": 5435 + }, + { + "epoch": 2.6102081209140526, + "grad_norm": 2.921875, + "learning_rate": 8.257782481214954e-07, + "loss": 0.7859, + "step": 5440 + }, + { + "epoch": 2.612607209260481, + "grad_norm": 3.21875, + "learning_rate": 8.159251172963545e-07, + "loss": 0.8549, + "step": 5445 + }, + { + "epoch": 2.6150062976069095, + "grad_norm": 3.390625, + "learning_rate": 8.061258983056452e-07, + "loss": 0.9265, + "step": 5450 + }, + { + "epoch": 2.6174053859533375, + "grad_norm": 2.859375, + "learning_rate": 7.963807174123772e-07, + "loss": 0.6679, + "step": 5455 + }, + { + "epoch": 2.619804474299766, + "grad_norm": 2.84375, + "learning_rate": 7.866897001832696e-07, + "loss": 0.6948, + "step": 5460 + }, + { + "epoch": 2.6222035626461944, + "grad_norm": 2.828125, + "learning_rate": 7.770529714871527e-07, + "loss": 0.7636, + "step": 5465 + }, + { + "epoch": 2.624602650992623, + "grad_norm": 2.96875, + "learning_rate": 7.674706554933414e-07, + "loss": 0.9437, + "step": 5470 + }, + { + "epoch": 2.6270017393390512, + "grad_norm": 3.609375, + "learning_rate": 7.579428756700463e-07, + "loss": 0.8655, + "step": 5475 + }, + { + "epoch": 2.6294008276854797, + "grad_norm": 3.4375, + "learning_rate": 7.484697547827763e-07, + "loss": 0.9146, + "step": 5480 + }, + { + "epoch": 2.631799916031908, + "grad_norm": 2.90625, + "learning_rate": 7.390514148927619e-07, + "loss": 0.9243, + "step": 5485 + }, + { + "epoch": 2.634199004378336, + "grad_norm": 3.34375, + "learning_rate": 7.296879773553784e-07, + "loss": 0.812, + "step": 5490 + }, + { + "epoch": 2.6365980927247645, + "grad_norm": 3.484375, + "learning_rate": 7.203795628185856e-07, + "loss": 0.9371, + "step": 5495 + }, + { + "epoch": 2.638997181071193, + "grad_norm": 3.109375, + "learning_rate": 7.111262912213707e-07, + "loss": 0.8117, + "step": 5500 + }, + { + "epoch": 2.638997181071193, + "eval_loss": 0.9463163614273071, + "eval_runtime": 175.7487, + "eval_samples_per_second": 42.162, + "eval_steps_per_second": 10.543, + "step": 5500 + }, + { + "epoch": 2.6413962694176214, + "grad_norm": 2.984375, + "learning_rate": 7.019282817922029e-07, + "loss": 0.7939, + "step": 5505 + }, + { + "epoch": 2.6437953577640494, + "grad_norm": 3.40625, + "learning_rate": 6.927856530474985e-07, + "loss": 0.7907, + "step": 5510 + }, + { + "epoch": 2.646194446110478, + "grad_norm": 4.09375, + "learning_rate": 6.836985227900944e-07, + "loss": 0.9429, + "step": 5515 + }, + { + "epoch": 2.6485935344569063, + "grad_norm": 3.734375, + "learning_rate": 6.746670081077266e-07, + "loss": 0.8834, + "step": 5520 + }, + { + "epoch": 2.6509926228033347, + "grad_norm": 3.4375, + "learning_rate": 6.656912253715281e-07, + "loss": 0.8487, + "step": 5525 + }, + { + "epoch": 2.653391711149763, + "grad_norm": 3.5, + "learning_rate": 6.567712902345208e-07, + "loss": 0.9643, + "step": 5530 + }, + { + "epoch": 2.6557907994961916, + "grad_norm": 3.15625, + "learning_rate": 6.479073176301332e-07, + "loss": 0.8767, + "step": 5535 + }, + { + "epoch": 2.65818988784262, + "grad_norm": 3.140625, + "learning_rate": 6.390994217707142e-07, + "loss": 0.8471, + "step": 5540 + }, + { + "epoch": 2.6605889761890484, + "grad_norm": 2.8125, + "learning_rate": 6.303477161460647e-07, + "loss": 0.8318, + "step": 5545 + }, + { + "epoch": 2.6629880645354764, + "grad_norm": 2.453125, + "learning_rate": 6.216523135219715e-07, + "loss": 0.8511, + "step": 5550 + }, + { + "epoch": 2.665387152881905, + "grad_norm": 3.578125, + "learning_rate": 6.130133259387633e-07, + "loss": 0.8708, + "step": 5555 + }, + { + "epoch": 2.6677862412283333, + "grad_norm": 3.265625, + "learning_rate": 6.044308647098512e-07, + "loss": 0.7847, + "step": 5560 + }, + { + "epoch": 2.6701853295747617, + "grad_norm": 3.375, + "learning_rate": 5.959050404203109e-07, + "loss": 0.9311, + "step": 5565 + }, + { + "epoch": 2.6725844179211897, + "grad_norm": 2.859375, + "learning_rate": 5.874359629254511e-07, + "loss": 0.8072, + "step": 5570 + }, + { + "epoch": 2.674983506267618, + "grad_norm": 2.8125, + "learning_rate": 5.79023741349391e-07, + "loss": 0.8887, + "step": 5575 + }, + { + "epoch": 2.6773825946140466, + "grad_norm": 3.171875, + "learning_rate": 5.706684840836674e-07, + "loss": 0.8971, + "step": 5580 + }, + { + "epoch": 2.679781682960475, + "grad_norm": 2.640625, + "learning_rate": 5.623702987858293e-07, + "loss": 0.7582, + "step": 5585 + }, + { + "epoch": 2.6821807713069035, + "grad_norm": 2.90625, + "learning_rate": 5.541292923780516e-07, + "loss": 0.7481, + "step": 5590 + }, + { + "epoch": 2.684579859653332, + "grad_norm": 3.125, + "learning_rate": 5.459455710457601e-07, + "loss": 0.8242, + "step": 5595 + }, + { + "epoch": 2.6869789479997603, + "grad_norm": 2.796875, + "learning_rate": 5.378192402362653e-07, + "loss": 0.861, + "step": 5600 + }, + { + "epoch": 2.6869789479997603, + "eval_loss": 0.9463757276535034, + "eval_runtime": 175.8536, + "eval_samples_per_second": 42.137, + "eval_steps_per_second": 10.537, + "step": 5600 + }, + { + "epoch": 2.6893780363461883, + "grad_norm": 2.8125, + "learning_rate": 5.29750404657392e-07, + "loss": 0.6736, + "step": 5605 + }, + { + "epoch": 2.6917771246926168, + "grad_norm": 2.890625, + "learning_rate": 5.217391682761469e-07, + "loss": 0.8122, + "step": 5610 + }, + { + "epoch": 2.694176213039045, + "grad_norm": 2.9375, + "learning_rate": 5.137856343173675e-07, + "loss": 0.8597, + "step": 5615 + }, + { + "epoch": 2.6965753013854736, + "grad_norm": 2.515625, + "learning_rate": 5.058899052623933e-07, + "loss": 0.6924, + "step": 5620 + }, + { + "epoch": 2.6989743897319016, + "grad_norm": 3.296875, + "learning_rate": 4.980520828477509e-07, + "loss": 0.9464, + "step": 5625 + }, + { + "epoch": 2.70137347807833, + "grad_norm": 2.6875, + "learning_rate": 4.902722680638356e-07, + "loss": 0.8155, + "step": 5630 + }, + { + "epoch": 2.7037725664247585, + "grad_norm": 3.140625, + "learning_rate": 4.825505611536163e-07, + "loss": 0.8895, + "step": 5635 + }, + { + "epoch": 2.706171654771187, + "grad_norm": 3.25, + "learning_rate": 4.7488706161134266e-07, + "loss": 0.9353, + "step": 5640 + }, + { + "epoch": 2.7085707431176154, + "grad_norm": 3.125, + "learning_rate": 4.672818681812591e-07, + "loss": 0.8699, + "step": 5645 + }, + { + "epoch": 2.710969831464044, + "grad_norm": 3.125, + "learning_rate": 4.597350788563376e-07, + "loss": 0.9011, + "step": 5650 + }, + { + "epoch": 2.7133689198104722, + "grad_norm": 2.65625, + "learning_rate": 4.522467908770106e-07, + "loss": 0.8674, + "step": 5655 + }, + { + "epoch": 2.7157680081569002, + "grad_norm": 2.46875, + "learning_rate": 4.448171007299229e-07, + "loss": 0.7725, + "step": 5660 + }, + { + "epoch": 2.7181670965033287, + "grad_norm": 3.234375, + "learning_rate": 4.3744610414668265e-07, + "loss": 0.8388, + "step": 5665 + }, + { + "epoch": 2.720566184849757, + "grad_norm": 2.953125, + "learning_rate": 4.3013389610263636e-07, + "loss": 0.8038, + "step": 5670 + }, + { + "epoch": 2.7229652731961855, + "grad_norm": 3.0625, + "learning_rate": 4.2288057081563247e-07, + "loss": 0.8106, + "step": 5675 + }, + { + "epoch": 2.725364361542614, + "grad_norm": 3.015625, + "learning_rate": 4.156862217448215e-07, + "loss": 0.7503, + "step": 5680 + }, + { + "epoch": 2.727763449889042, + "grad_norm": 3.296875, + "learning_rate": 4.0855094158944066e-07, + "loss": 0.8085, + "step": 5685 + }, + { + "epoch": 2.7301625382354704, + "grad_norm": 3.0, + "learning_rate": 4.014748222876258e-07, + "loss": 0.781, + "step": 5690 + }, + { + "epoch": 2.732561626581899, + "grad_norm": 2.71875, + "learning_rate": 3.9445795501522276e-07, + "loss": 0.8548, + "step": 5695 + }, + { + "epoch": 2.7349607149283273, + "grad_norm": 3.421875, + "learning_rate": 3.875004301846186e-07, + "loss": 0.8415, + "step": 5700 + }, + { + "epoch": 2.7349607149283273, + "eval_loss": 0.946337103843689, + "eval_runtime": 175.7065, + "eval_samples_per_second": 42.173, + "eval_steps_per_second": 10.546, + "step": 5700 + }, + { + "epoch": 2.7373598032747557, + "grad_norm": 2.984375, + "learning_rate": 3.8060233744356634e-07, + "loss": 0.8101, + "step": 5705 + }, + { + "epoch": 2.739758891621184, + "grad_norm": 2.9375, + "learning_rate": 3.737637656740423e-07, + "loss": 0.8063, + "step": 5710 + }, + { + "epoch": 2.7421579799676126, + "grad_norm": 3.15625, + "learning_rate": 3.6698480299109273e-07, + "loss": 0.7506, + "step": 5715 + }, + { + "epoch": 2.7445570683140406, + "grad_norm": 2.84375, + "learning_rate": 3.602655367416968e-07, + "loss": 0.8546, + "step": 5720 + }, + { + "epoch": 2.746956156660469, + "grad_norm": 3.09375, + "learning_rate": 3.5360605350365006e-07, + "loss": 0.8406, + "step": 5725 + }, + { + "epoch": 2.7493552450068974, + "grad_norm": 3.15625, + "learning_rate": 3.470064390844402e-07, + "loss": 0.8724, + "step": 5730 + }, + { + "epoch": 2.751754333353326, + "grad_norm": 2.703125, + "learning_rate": 3.404667785201454e-07, + "loss": 0.7694, + "step": 5735 + }, + { + "epoch": 2.754153421699754, + "grad_norm": 2.625, + "learning_rate": 3.3398715607433794e-07, + "loss": 0.8349, + "step": 5740 + }, + { + "epoch": 2.7565525100461823, + "grad_norm": 3.171875, + "learning_rate": 3.2756765523700165e-07, + "loss": 0.8109, + "step": 5745 + }, + { + "epoch": 2.7589515983926107, + "grad_norm": 3.296875, + "learning_rate": 3.2120835872344547e-07, + "loss": 0.7918, + "step": 5750 + }, + { + "epoch": 2.761350686739039, + "grad_norm": 2.671875, + "learning_rate": 3.1490934847325406e-07, + "loss": 0.7042, + "step": 5755 + }, + { + "epoch": 2.7637497750854676, + "grad_norm": 3.921875, + "learning_rate": 3.0867070564921665e-07, + "loss": 0.8729, + "step": 5760 + }, + { + "epoch": 2.766148863431896, + "grad_norm": 2.765625, + "learning_rate": 3.0249251063629137e-07, + "loss": 0.8672, + "step": 5765 + }, + { + "epoch": 2.7685479517783245, + "grad_norm": 2.875, + "learning_rate": 2.9637484304056387e-07, + "loss": 0.8593, + "step": 5770 + }, + { + "epoch": 2.7709470401247525, + "grad_norm": 3.578125, + "learning_rate": 2.9031778168822466e-07, + "loss": 0.7651, + "step": 5775 + }, + { + "epoch": 2.773346128471181, + "grad_norm": 3.21875, + "learning_rate": 2.843214046245507e-07, + "loss": 0.7516, + "step": 5780 + }, + { + "epoch": 2.7757452168176093, + "grad_norm": 3.328125, + "learning_rate": 2.783857891129055e-07, + "loss": 0.834, + "step": 5785 + }, + { + "epoch": 2.7781443051640378, + "grad_norm": 3.828125, + "learning_rate": 2.725110116337354e-07, + "loss": 0.842, + "step": 5790 + }, + { + "epoch": 2.7805433935104658, + "grad_norm": 2.875, + "learning_rate": 2.6669714788358946e-07, + "loss": 0.8287, + "step": 5795 + }, + { + "epoch": 2.782942481856894, + "grad_norm": 3.203125, + "learning_rate": 2.60944272774144e-07, + "loss": 0.7846, + "step": 5800 + }, + { + "epoch": 2.782942481856894, + "eval_loss": 0.946326732635498, + "eval_runtime": 175.7382, + "eval_samples_per_second": 42.165, + "eval_steps_per_second": 10.544, + "step": 5800 + }, + { + "epoch": 2.7853415702033226, + "grad_norm": 2.578125, + "learning_rate": 2.552524604312351e-07, + "loss": 0.8889, + "step": 5805 + }, + { + "epoch": 2.787740658549751, + "grad_norm": 2.96875, + "learning_rate": 2.4962178419390357e-07, + "loss": 0.8157, + "step": 5810 + }, + { + "epoch": 2.7901397468961795, + "grad_norm": 2.78125, + "learning_rate": 2.440523166134562e-07, + "loss": 0.7996, + "step": 5815 + }, + { + "epoch": 2.792538835242608, + "grad_norm": 2.59375, + "learning_rate": 2.3854412945251757e-07, + "loss": 0.8163, + "step": 5820 + }, + { + "epoch": 2.7949379235890364, + "grad_norm": 2.671875, + "learning_rate": 2.3309729368412193e-07, + "loss": 0.7702, + "step": 5825 + }, + { + "epoch": 2.7973370119354644, + "grad_norm": 3.078125, + "learning_rate": 2.2771187949078455e-07, + "loss": 0.8732, + "step": 5830 + }, + { + "epoch": 2.799736100281893, + "grad_norm": 2.78125, + "learning_rate": 2.223879562636061e-07, + "loss": 0.7786, + "step": 5835 + }, + { + "epoch": 2.8021351886283212, + "grad_norm": 4.15625, + "learning_rate": 2.1712559260137434e-07, + "loss": 0.7568, + "step": 5840 + }, + { + "epoch": 2.8045342769747497, + "grad_norm": 2.921875, + "learning_rate": 2.1192485630968374e-07, + "loss": 1.008, + "step": 5845 + }, + { + "epoch": 2.806933365321178, + "grad_norm": 3.125, + "learning_rate": 2.0678581440005617e-07, + "loss": 0.8665, + "step": 5850 + }, + { + "epoch": 2.809332453667606, + "grad_norm": 3.515625, + "learning_rate": 2.0170853308908388e-07, + "loss": 0.8216, + "step": 5855 + }, + { + "epoch": 2.8117315420140345, + "grad_norm": 3.125, + "learning_rate": 1.966930777975734e-07, + "loss": 0.7792, + "step": 5860 + }, + { + "epoch": 2.814130630360463, + "grad_norm": 3.140625, + "learning_rate": 1.9173951314970018e-07, + "loss": 0.826, + "step": 5865 + }, + { + "epoch": 2.8165297187068914, + "grad_norm": 2.875, + "learning_rate": 1.8684790297218037e-07, + "loss": 0.8359, + "step": 5870 + }, + { + "epoch": 2.81892880705332, + "grad_norm": 3.515625, + "learning_rate": 1.8201831029344585e-07, + "loss": 0.8419, + "step": 5875 + }, + { + "epoch": 2.8213278953997483, + "grad_norm": 3.28125, + "learning_rate": 1.7725079734283223e-07, + "loss": 0.8791, + "step": 5880 + }, + { + "epoch": 2.8237269837461767, + "grad_norm": 2.984375, + "learning_rate": 1.7254542554977771e-07, + "loss": 0.8924, + "step": 5885 + }, + { + "epoch": 2.8261260720926047, + "grad_norm": 2.9375, + "learning_rate": 1.679022555430304e-07, + "loss": 0.9084, + "step": 5890 + }, + { + "epoch": 2.828525160439033, + "grad_norm": 3.65625, + "learning_rate": 1.6332134714986848e-07, + "loss": 0.8278, + "step": 5895 + }, + { + "epoch": 2.8309242487854616, + "grad_norm": 2.484375, + "learning_rate": 1.5880275939533063e-07, + "loss": 0.7605, + "step": 5900 + }, + { + "epoch": 2.8309242487854616, + "eval_loss": 0.9463862776756287, + "eval_runtime": 177.234, + "eval_samples_per_second": 41.809, + "eval_steps_per_second": 10.455, + "step": 5900 + }, + { + "epoch": 2.83332333713189, + "grad_norm": 2.625, + "learning_rate": 1.5434655050145077e-07, + "loss": 0.9009, + "step": 5905 + }, + { + "epoch": 2.835722425478318, + "grad_norm": 2.8125, + "learning_rate": 1.499527778865123e-07, + "loss": 0.8512, + "step": 5910 + }, + { + "epoch": 2.8381215138247464, + "grad_norm": 3.28125, + "learning_rate": 1.4562149816430616e-07, + "loss": 0.6809, + "step": 5915 + }, + { + "epoch": 2.840520602171175, + "grad_norm": 2.625, + "learning_rate": 1.413527671434023e-07, + "loss": 0.8097, + "step": 5920 + }, + { + "epoch": 2.8429196905176033, + "grad_norm": 2.703125, + "learning_rate": 1.3714663982642984e-07, + "loss": 0.8181, + "step": 5925 + }, + { + "epoch": 2.8453187788640317, + "grad_norm": 2.859375, + "learning_rate": 1.3300317040936927e-07, + "loss": 0.8673, + "step": 5930 + }, + { + "epoch": 2.84771786721046, + "grad_norm": 3.078125, + "learning_rate": 1.2892241228085355e-07, + "loss": 0.8847, + "step": 5935 + }, + { + "epoch": 2.8501169555568886, + "grad_norm": 2.8125, + "learning_rate": 1.2490441802148036e-07, + "loss": 0.8172, + "step": 5940 + }, + { + "epoch": 2.8525160439033166, + "grad_norm": 2.78125, + "learning_rate": 1.2094923940313308e-07, + "loss": 0.8514, + "step": 5945 + }, + { + "epoch": 2.854915132249745, + "grad_norm": 3.609375, + "learning_rate": 1.1705692738831654e-07, + "loss": 0.8143, + "step": 5950 + }, + { + "epoch": 2.8573142205961735, + "grad_norm": 3.203125, + "learning_rate": 1.1322753212949844e-07, + "loss": 0.8274, + "step": 5955 + }, + { + "epoch": 2.859713308942602, + "grad_norm": 3.75, + "learning_rate": 1.0946110296846447e-07, + "loss": 0.8353, + "step": 5960 + }, + { + "epoch": 2.8621123972890303, + "grad_norm": 3.15625, + "learning_rate": 1.057576884356798e-07, + "loss": 0.8685, + "step": 5965 + }, + { + "epoch": 2.8645114856354583, + "grad_norm": 4.0, + "learning_rate": 1.0211733624966802e-07, + "loss": 0.8467, + "step": 5970 + }, + { + "epoch": 2.8669105739818868, + "grad_norm": 3.453125, + "learning_rate": 9.854009331639214e-08, + "loss": 0.8935, + "step": 5975 + }, + { + "epoch": 2.869309662328315, + "grad_norm": 3.015625, + "learning_rate": 9.502600572865284e-08, + "loss": 0.8584, + "step": 5980 + }, + { + "epoch": 2.8717087506747436, + "grad_norm": 2.796875, + "learning_rate": 9.157511876549286e-08, + "loss": 0.7601, + "step": 5985 + }, + { + "epoch": 2.874107839021172, + "grad_norm": 3.359375, + "learning_rate": 8.818747689161688e-08, + "loss": 0.8792, + "step": 5990 + }, + { + "epoch": 2.8765069273676005, + "grad_norm": 3.078125, + "learning_rate": 8.486312375681205e-08, + "loss": 0.7975, + "step": 5995 + }, + { + "epoch": 2.878906015714029, + "grad_norm": 2.625, + "learning_rate": 8.160210219539333e-08, + "loss": 0.8721, + "step": 6000 + }, + { + "epoch": 2.878906015714029, + "eval_loss": 0.9464093446731567, + "eval_runtime": 183.9687, + "eval_samples_per_second": 40.279, + "eval_steps_per_second": 10.072, + "step": 6000 + }, + { + "epoch": 2.881305104060457, + "grad_norm": 3.578125, + "learning_rate": 7.840445422564735e-08, + "loss": 0.8902, + "step": 6005 + }, + { + "epoch": 2.8837041924068854, + "grad_norm": 2.84375, + "learning_rate": 7.527022104928893e-08, + "loss": 0.8215, + "step": 6010 + }, + { + "epoch": 2.886103280753314, + "grad_norm": 2.734375, + "learning_rate": 7.219944305093596e-08, + "loss": 0.8512, + "step": 6015 + }, + { + "epoch": 2.8885023690997422, + "grad_norm": 2.875, + "learning_rate": 6.919215979758476e-08, + "loss": 0.8271, + "step": 6020 + }, + { + "epoch": 2.89090145744617, + "grad_norm": 2.796875, + "learning_rate": 6.624841003810056e-08, + "loss": 0.7854, + "step": 6025 + }, + { + "epoch": 2.8933005457925987, + "grad_norm": 3.21875, + "learning_rate": 6.336823170272011e-08, + "loss": 0.8432, + "step": 6030 + }, + { + "epoch": 2.895699634139027, + "grad_norm": 3.140625, + "learning_rate": 6.055166190256145e-08, + "loss": 0.872, + "step": 6035 + }, + { + "epoch": 2.8980987224854555, + "grad_norm": 2.953125, + "learning_rate": 5.779873692914606e-08, + "loss": 0.8188, + "step": 6040 + }, + { + "epoch": 2.900497810831884, + "grad_norm": 3.578125, + "learning_rate": 5.5109492253933025e-08, + "loss": 0.7619, + "step": 6045 + }, + { + "epoch": 2.9028968991783124, + "grad_norm": 2.59375, + "learning_rate": 5.2483962527857813e-08, + "loss": 0.704, + "step": 6050 + }, + { + "epoch": 2.905295987524741, + "grad_norm": 3.296875, + "learning_rate": 4.992218158088979e-08, + "loss": 0.7787, + "step": 6055 + }, + { + "epoch": 2.907695075871169, + "grad_norm": 3.0625, + "learning_rate": 4.7424182421594854e-08, + "loss": 0.8102, + "step": 6060 + }, + { + "epoch": 2.9100941642175973, + "grad_norm": 3.390625, + "learning_rate": 4.498999723670905e-08, + "loss": 0.8945, + "step": 6065 + }, + { + "epoch": 2.9124932525640257, + "grad_norm": 2.609375, + "learning_rate": 4.2619657390726154e-08, + "loss": 0.8516, + "step": 6070 + }, + { + "epoch": 2.914892340910454, + "grad_norm": 3.171875, + "learning_rate": 4.0313193425492446e-08, + "loss": 0.7551, + "step": 6075 + }, + { + "epoch": 2.917291429256882, + "grad_norm": 3.03125, + "learning_rate": 3.8070635059811455e-08, + "loss": 0.8393, + "step": 6080 + }, + { + "epoch": 2.9196905176033106, + "grad_norm": 3.046875, + "learning_rate": 3.589201118906427e-08, + "loss": 0.8361, + "step": 6085 + }, + { + "epoch": 2.922089605949739, + "grad_norm": 3.078125, + "learning_rate": 3.3777349884834275e-08, + "loss": 0.8572, + "step": 6090 + }, + { + "epoch": 2.9244886942961674, + "grad_norm": 3.15625, + "learning_rate": 3.1726678394547464e-08, + "loss": 0.8325, + "step": 6095 + }, + { + "epoch": 2.926887782642596, + "grad_norm": 3.0, + "learning_rate": 2.9740023141120455e-08, + "loss": 0.8566, + "step": 6100 + }, + { + "epoch": 2.926887782642596, + "eval_loss": 0.9463550448417664, + "eval_runtime": 178.3552, + "eval_samples_per_second": 41.546, + "eval_steps_per_second": 10.389, + "step": 6100 + }, + { + "epoch": 2.9292868709890243, + "grad_norm": 3.0625, + "learning_rate": 2.7817409722621368e-08, + "loss": 0.8511, + "step": 6105 + }, + { + "epoch": 2.9316859593354527, + "grad_norm": 2.6875, + "learning_rate": 2.5958862911935613e-08, + "loss": 1.0181, + "step": 6110 + }, + { + "epoch": 2.9340850476818807, + "grad_norm": 2.65625, + "learning_rate": 2.4164406656453364e-08, + "loss": 0.7093, + "step": 6115 + }, + { + "epoch": 2.936484136028309, + "grad_norm": 2.96875, + "learning_rate": 2.2434064077755945e-08, + "loss": 0.7972, + "step": 6120 + }, + { + "epoch": 2.9388832243747376, + "grad_norm": 3.34375, + "learning_rate": 2.076785747131993e-08, + "loss": 0.9193, + "step": 6125 + }, + { + "epoch": 2.941282312721166, + "grad_norm": 3.25, + "learning_rate": 1.9165808306228496e-08, + "loss": 0.7502, + "step": 6130 + }, + { + "epoch": 2.9436814010675945, + "grad_norm": 3.0625, + "learning_rate": 1.7627937224897197e-08, + "loss": 0.8204, + "step": 6135 + }, + { + "epoch": 2.9460804894140225, + "grad_norm": 3.015625, + "learning_rate": 1.615426404280529e-08, + "loss": 0.8157, + "step": 6140 + }, + { + "epoch": 2.948479577760451, + "grad_norm": 3.078125, + "learning_rate": 1.474480774824205e-08, + "loss": 0.8643, + "step": 6145 + }, + { + "epoch": 2.9508786661068793, + "grad_norm": 3.109375, + "learning_rate": 1.3399586502062523e-08, + "loss": 0.8144, + "step": 6150 + }, + { + "epoch": 2.9532777544533078, + "grad_norm": 2.625, + "learning_rate": 1.2118617637451035e-08, + "loss": 0.8754, + "step": 6155 + }, + { + "epoch": 2.955676842799736, + "grad_norm": 4.0, + "learning_rate": 1.090191765970139e-08, + "loss": 0.8617, + "step": 6160 + }, + { + "epoch": 2.9580759311461646, + "grad_norm": 3.203125, + "learning_rate": 9.749502246000352e-09, + "loss": 0.778, + "step": 6165 + }, + { + "epoch": 2.960475019492593, + "grad_norm": 3.671875, + "learning_rate": 8.661386245229498e-09, + "loss": 0.8113, + "step": 6170 + }, + { + "epoch": 2.962874107839021, + "grad_norm": 3.015625, + "learning_rate": 7.637583677771453e-09, + "loss": 0.8138, + "step": 6175 + }, + { + "epoch": 2.9652731961854495, + "grad_norm": 2.34375, + "learning_rate": 6.678107735328398e-09, + "loss": 0.7571, + "step": 6180 + }, + { + "epoch": 2.967672284531878, + "grad_norm": 2.890625, + "learning_rate": 5.782970780755515e-09, + "loss": 0.7821, + "step": 6185 + }, + { + "epoch": 2.9700713728783064, + "grad_norm": 2.90625, + "learning_rate": 4.952184347898903e-09, + "loss": 0.8735, + "step": 6190 + }, + { + "epoch": 2.9724704612247344, + "grad_norm": 2.71875, + "learning_rate": 4.1857591414468106e-09, + "loss": 0.7953, + "step": 6195 + }, + { + "epoch": 2.974869549571163, + "grad_norm": 2.640625, + "learning_rate": 3.4837050367936275e-09, + "loss": 0.7978, + "step": 6200 + }, + { + "epoch": 2.974869549571163, + "eval_loss": 0.946397602558136, + "eval_runtime": 176.2072, + "eval_samples_per_second": 42.053, + "eval_steps_per_second": 10.516, + "step": 6200 + }, + { + "epoch": 2.977268637917591, + "grad_norm": 2.84375, + "learning_rate": 2.846031079912215e-09, + "loss": 0.8122, + "step": 6205 + }, + { + "epoch": 2.9796677262640197, + "grad_norm": 3.4375, + "learning_rate": 2.2727454872351062e-09, + "loss": 0.8337, + "step": 6210 + }, + { + "epoch": 2.982066814610448, + "grad_norm": 3.296875, + "learning_rate": 1.7638556455518152e-09, + "loss": 0.8998, + "step": 6215 + }, + { + "epoch": 2.9844659029568765, + "grad_norm": 3.125, + "learning_rate": 1.3193681119116897e-09, + "loss": 0.7873, + "step": 6220 + }, + { + "epoch": 2.986864991303305, + "grad_norm": 3.09375, + "learning_rate": 9.3928861353898e-10, + "loss": 0.7989, + "step": 6225 + }, + { + "epoch": 2.989264079649733, + "grad_norm": 3.140625, + "learning_rate": 6.236220477612298e-10, + "loss": 0.9234, + "step": 6230 + }, + { + "epoch": 2.9916631679961614, + "grad_norm": 2.75, + "learning_rate": 3.723724819443275e-10, + "loss": 0.7815, + "step": 6235 + }, + { + "epoch": 2.99406225634259, + "grad_norm": 2.5625, + "learning_rate": 1.8554315344088136e-10, + "loss": 0.6956, + "step": 6240 + }, + { + "epoch": 2.9964613446890183, + "grad_norm": 3.328125, + "learning_rate": 6.313646954747565e-11, + "loss": 0.794, + "step": 6245 + }, + { + "epoch": 2.9988604330354467, + "grad_norm": 2.953125, + "learning_rate": 5.154007475249856e-12, + "loss": 0.7592, + "step": 6250 + }, + { + "epoch": 2.9998200683740177, + "step": 6252, + "total_flos": 6.498465993073754e+18, + "train_loss": 0.937529916860168, + "train_runtime": 37609.4943, + "train_samples_per_second": 5.32, + "train_steps_per_second": 0.166 } ], "logging_steps": 5, - "max_steps": 1110, + "max_steps": 6252, "num_input_tokens_seen": 0, "num_train_epochs": 3, - "save_steps": 100, + "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { @@ -3397,7 +9281,7 @@ "attributes": {} } }, - "total_flos": 4.5615607240812134e+17, + "total_flos": 6.498465993073754e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null