|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.996, |
|
"eval_steps": 500, |
|
"global_step": 83, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.012, |
|
"grad_norm": 0.8640882968902588, |
|
"learning_rate": 5.555555555555555e-08, |
|
"logits/chosen": -1.495650291442871, |
|
"logits/rejected": -1.3535889387130737, |
|
"logps/chosen": -0.10173828899860382, |
|
"logps/rejected": -0.08792766183614731, |
|
"loss": 0.8717, |
|
"rewards/accuracies": 0.3333333730697632, |
|
"rewards/chosen": -0.20347657799720764, |
|
"rewards/margins": -0.027621246874332428, |
|
"rewards/rejected": -0.17585532367229462, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.024, |
|
"grad_norm": 0.979682981967926, |
|
"learning_rate": 1.111111111111111e-07, |
|
"logits/chosen": -1.443913221359253, |
|
"logits/rejected": -1.3641024827957153, |
|
"logps/chosen": -0.10752908140420914, |
|
"logps/rejected": -0.08240076899528503, |
|
"loss": 0.8846, |
|
"rewards/accuracies": 0.2187500149011612, |
|
"rewards/chosen": -0.21505816280841827, |
|
"rewards/margins": -0.0502566322684288, |
|
"rewards/rejected": -0.16480153799057007, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.036, |
|
"grad_norm": 0.9309831857681274, |
|
"learning_rate": 1.6666666666666665e-07, |
|
"logits/chosen": -1.4732969999313354, |
|
"logits/rejected": -1.376213550567627, |
|
"logps/chosen": -0.10481980443000793, |
|
"logps/rejected": -0.07933054119348526, |
|
"loss": 0.8848, |
|
"rewards/accuracies": 0.28125, |
|
"rewards/chosen": -0.20963960886001587, |
|
"rewards/margins": -0.050978537648916245, |
|
"rewards/rejected": -0.15866108238697052, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.048, |
|
"grad_norm": 0.9682196974754333, |
|
"learning_rate": 2.222222222222222e-07, |
|
"logits/chosen": -1.5285106897354126, |
|
"logits/rejected": -1.3950246572494507, |
|
"logps/chosen": -0.11225953698158264, |
|
"logps/rejected": -0.08694332838058472, |
|
"loss": 0.8849, |
|
"rewards/accuracies": 0.3229166865348816, |
|
"rewards/chosen": -0.22451907396316528, |
|
"rewards/margins": -0.05063238739967346, |
|
"rewards/rejected": -0.17388665676116943, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.0820338726043701, |
|
"learning_rate": 2.7777777777777776e-07, |
|
"logits/chosen": -1.494052767753601, |
|
"logits/rejected": -1.3356661796569824, |
|
"logps/chosen": -0.12428013980388641, |
|
"logps/rejected": -0.09185128659009933, |
|
"loss": 0.8968, |
|
"rewards/accuracies": 0.322916716337204, |
|
"rewards/chosen": -0.24856027960777283, |
|
"rewards/margins": -0.06485769897699356, |
|
"rewards/rejected": -0.18370257318019867, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.072, |
|
"grad_norm": 1.0573477745056152, |
|
"learning_rate": 3.333333333333333e-07, |
|
"logits/chosen": -1.4825295209884644, |
|
"logits/rejected": -1.3897716999053955, |
|
"logps/chosen": -0.11380095779895782, |
|
"logps/rejected": -0.08133920282125473, |
|
"loss": 0.8938, |
|
"rewards/accuracies": 0.3020833432674408, |
|
"rewards/chosen": -0.22760191559791565, |
|
"rewards/margins": -0.06492353230714798, |
|
"rewards/rejected": -0.16267840564250946, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.084, |
|
"grad_norm": 0.9294664263725281, |
|
"learning_rate": 3.888888888888889e-07, |
|
"logits/chosen": -1.5021216869354248, |
|
"logits/rejected": -1.3295238018035889, |
|
"logps/chosen": -0.099081851541996, |
|
"logps/rejected": -0.08119318634271622, |
|
"loss": 0.8761, |
|
"rewards/accuracies": 0.2916666567325592, |
|
"rewards/chosen": -0.198163703083992, |
|
"rewards/margins": -0.03577733412384987, |
|
"rewards/rejected": -0.16238637268543243, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.096, |
|
"grad_norm": 1.0329580307006836, |
|
"learning_rate": 4.444444444444444e-07, |
|
"logits/chosen": -1.5048794746398926, |
|
"logits/rejected": -1.377795934677124, |
|
"logps/chosen": -0.12124787271022797, |
|
"logps/rejected": -0.08464659005403519, |
|
"loss": 0.9015, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.24249574542045593, |
|
"rewards/margins": -0.07320256531238556, |
|
"rewards/rejected": -0.16929318010807037, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.108, |
|
"grad_norm": 0.9767515063285828, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -1.5186768770217896, |
|
"logits/rejected": -1.3877286911010742, |
|
"logps/chosen": -0.10737170279026031, |
|
"logps/rejected": -0.0823729932308197, |
|
"loss": 0.8847, |
|
"rewards/accuracies": 0.3020833432674408, |
|
"rewards/chosen": -0.21474340558052063, |
|
"rewards/margins": -0.04999742656946182, |
|
"rewards/rejected": -0.1647459864616394, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.9653743505477905, |
|
"learning_rate": 4.997747415511704e-07, |
|
"logits/chosen": -1.4915320873260498, |
|
"logits/rejected": -1.3450301885604858, |
|
"logps/chosen": -0.10813666880130768, |
|
"logps/rejected": -0.08507634699344635, |
|
"loss": 0.8822, |
|
"rewards/accuracies": 0.2291666716337204, |
|
"rewards/chosen": -0.21627333760261536, |
|
"rewards/margins": -0.04612065851688385, |
|
"rewards/rejected": -0.1701526939868927, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.132, |
|
"grad_norm": 0.9533908367156982, |
|
"learning_rate": 4.990993721356315e-07, |
|
"logits/chosen": -1.4848368167877197, |
|
"logits/rejected": -1.3539990186691284, |
|
"logps/chosen": -0.11860796809196472, |
|
"logps/rejected": -0.08763512969017029, |
|
"loss": 0.8931, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.23721593618392944, |
|
"rewards/margins": -0.061945684254169464, |
|
"rewards/rejected": -0.17527025938034058, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.144, |
|
"grad_norm": 0.9020726084709167, |
|
"learning_rate": 4.979751088147191e-07, |
|
"logits/chosen": -1.4897594451904297, |
|
"logits/rejected": -1.3646252155303955, |
|
"logps/chosen": -0.09957993775606155, |
|
"logps/rejected": -0.08218874782323837, |
|
"loss": 0.8759, |
|
"rewards/accuracies": 0.3958333432674408, |
|
"rewards/chosen": -0.1991598755121231, |
|
"rewards/margins": -0.034782394766807556, |
|
"rewards/rejected": -0.16437749564647675, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.156, |
|
"grad_norm": 1.0148029327392578, |
|
"learning_rate": 4.964039775869271e-07, |
|
"logits/chosen": -1.4809292554855347, |
|
"logits/rejected": -1.3655236959457397, |
|
"logps/chosen": -0.11366236209869385, |
|
"logps/rejected": -0.0875682383775711, |
|
"loss": 0.8868, |
|
"rewards/accuracies": 0.3645833432674408, |
|
"rewards/chosen": -0.2273247241973877, |
|
"rewards/margins": -0.05218825116753578, |
|
"rewards/rejected": -0.1751364767551422, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.168, |
|
"grad_norm": 1.334598422050476, |
|
"learning_rate": 4.943888097369216e-07, |
|
"logits/chosen": -1.4804601669311523, |
|
"logits/rejected": -1.3518431186676025, |
|
"logps/chosen": -0.13359910249710083, |
|
"logps/rejected": -0.08674132823944092, |
|
"loss": 0.9236, |
|
"rewards/accuracies": 0.2395833432674408, |
|
"rewards/chosen": -0.26719820499420166, |
|
"rewards/margins": -0.09371551126241684, |
|
"rewards/rejected": -0.17348265647888184, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.9834568500518799, |
|
"learning_rate": 4.919332367333748e-07, |
|
"logits/chosen": -1.4974645376205444, |
|
"logits/rejected": -1.3400018215179443, |
|
"logps/chosen": -0.10907550156116486, |
|
"logps/rejected": -0.08122590184211731, |
|
"loss": 0.8879, |
|
"rewards/accuracies": 0.21875, |
|
"rewards/chosen": -0.2181510031223297, |
|
"rewards/margins": -0.05569921433925629, |
|
"rewards/rejected": -0.16245180368423462, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.192, |
|
"grad_norm": 0.9361069798469543, |
|
"learning_rate": 4.890416836848127e-07, |
|
"logits/chosen": -1.4610369205474854, |
|
"logits/rejected": -1.369497299194336, |
|
"logps/chosen": -0.10335300862789154, |
|
"logps/rejected": -0.08525583893060684, |
|
"loss": 0.8765, |
|
"rewards/accuracies": 0.3541666865348816, |
|
"rewards/chosen": -0.20670601725578308, |
|
"rewards/margins": -0.03619435429573059, |
|
"rewards/rejected": -0.17051167786121368, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.204, |
|
"grad_norm": 0.910740077495575, |
|
"learning_rate": 4.85719361365271e-07, |
|
"logits/chosen": -1.5087649822235107, |
|
"logits/rejected": -1.3652905225753784, |
|
"logps/chosen": -0.09960392862558365, |
|
"logps/rejected": -0.08507402241230011, |
|
"loss": 0.8719, |
|
"rewards/accuracies": 0.3541666865348816, |
|
"rewards/chosen": -0.1992078572511673, |
|
"rewards/margins": -0.029059793800115585, |
|
"rewards/rejected": -0.17014804482460022, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.216, |
|
"grad_norm": 0.9233217239379883, |
|
"learning_rate": 4.819722568241273e-07, |
|
"logits/chosen": -1.4695565700531006, |
|
"logits/rejected": -1.336183786392212, |
|
"logps/chosen": -0.10343371331691742, |
|
"logps/rejected": -0.0857667475938797, |
|
"loss": 0.8769, |
|
"rewards/accuracies": 0.4166666865348816, |
|
"rewards/chosen": -0.20686742663383484, |
|
"rewards/margins": -0.03533393144607544, |
|
"rewards/rejected": -0.1715334951877594, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.228, |
|
"grad_norm": 0.8909419775009155, |
|
"learning_rate": 4.778071225970339e-07, |
|
"logits/chosen": -1.4759807586669922, |
|
"logits/rejected": -1.3495370149612427, |
|
"logps/chosen": -0.10235996544361115, |
|
"logps/rejected": -0.0851350948214531, |
|
"loss": 0.8757, |
|
"rewards/accuracies": 0.3229166567325592, |
|
"rewards/chosen": -0.2047199308872223, |
|
"rewards/margins": -0.03444972261786461, |
|
"rewards/rejected": -0.1702701896429062, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.9479613304138184, |
|
"learning_rate": 4.732314645373921e-07, |
|
"logits/chosen": -1.4673627614974976, |
|
"logits/rejected": -1.3651877641677856, |
|
"logps/chosen": -0.10552840679883957, |
|
"logps/rejected": -0.07911674678325653, |
|
"loss": 0.886, |
|
"rewards/accuracies": 0.28125, |
|
"rewards/chosen": -0.21105681359767914, |
|
"rewards/margins": -0.05282333493232727, |
|
"rewards/rejected": -0.15823349356651306, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.252, |
|
"grad_norm": 0.854266345500946, |
|
"learning_rate": 4.68253528290297e-07, |
|
"logits/chosen": -1.4877924919128418, |
|
"logits/rejected": -1.324515461921692, |
|
"logps/chosen": -0.1002732366323471, |
|
"logps/rejected": -0.09079625457525253, |
|
"loss": 0.8664, |
|
"rewards/accuracies": 0.3645833432674408, |
|
"rewards/chosen": -0.2005464732646942, |
|
"rewards/margins": -0.018953965976834297, |
|
"rewards/rejected": -0.18159250915050507, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.264, |
|
"grad_norm": 0.9627916812896729, |
|
"learning_rate": 4.6288228443332776e-07, |
|
"logits/chosen": -1.466447114944458, |
|
"logits/rejected": -1.3485172986984253, |
|
"logps/chosen": -0.10300128906965256, |
|
"logps/rejected": -0.08484764397144318, |
|
"loss": 0.8763, |
|
"rewards/accuracies": 0.3333333730697632, |
|
"rewards/chosen": -0.20600257813930511, |
|
"rewards/margins": -0.03630730137228966, |
|
"rewards/rejected": -0.16969528794288635, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.276, |
|
"grad_norm": 0.8689938187599182, |
|
"learning_rate": 4.571274123109605e-07, |
|
"logits/chosen": -1.4124252796173096, |
|
"logits/rejected": -1.3001394271850586, |
|
"logps/chosen": -0.10588695108890533, |
|
"logps/rejected": -0.08438257873058319, |
|
"loss": 0.8809, |
|
"rewards/accuracies": 0.2395833283662796, |
|
"rewards/chosen": -0.21177390217781067, |
|
"rewards/margins": -0.043008752167224884, |
|
"rewards/rejected": -0.16876515746116638, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.288, |
|
"grad_norm": 0.9801927208900452, |
|
"learning_rate": 4.5099928259173514e-07, |
|
"logits/chosen": -1.4522674083709717, |
|
"logits/rejected": -1.3461543321609497, |
|
"logps/chosen": -0.1134054884314537, |
|
"logps/rejected": -0.08870639652013779, |
|
"loss": 0.886, |
|
"rewards/accuracies": 0.2499999850988388, |
|
"rewards/chosen": -0.2268109768629074, |
|
"rewards/margins": -0.049398161470890045, |
|
"rewards/rejected": -0.17741279304027557, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.9185568690299988, |
|
"learning_rate": 4.4450893857960984e-07, |
|
"logits/chosen": -1.4601349830627441, |
|
"logits/rejected": -1.273384928703308, |
|
"logps/chosen": -0.10715562850236893, |
|
"logps/rejected": -0.09089934825897217, |
|
"loss": 0.8746, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.21431125700473785, |
|
"rewards/margins": -0.03251257538795471, |
|
"rewards/rejected": -0.18179869651794434, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.312, |
|
"grad_norm": 0.8864983320236206, |
|
"learning_rate": 4.3766807631318105e-07, |
|
"logits/chosen": -1.438947081565857, |
|
"logits/rejected": -1.3086212873458862, |
|
"logps/chosen": -0.10850708931684494, |
|
"logps/rejected": -0.0892128050327301, |
|
"loss": 0.8781, |
|
"rewards/accuracies": 0.3229166865348816, |
|
"rewards/chosen": -0.21701417863368988, |
|
"rewards/margins": -0.038588590919971466, |
|
"rewards/rejected": -0.1784256100654602, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.324, |
|
"grad_norm": 0.9518214464187622, |
|
"learning_rate": 4.3048902348863106e-07, |
|
"logits/chosen": -1.4363845586776733, |
|
"logits/rejected": -1.3129128217697144, |
|
"logps/chosen": -0.10809854418039322, |
|
"logps/rejected": -0.08249183744192123, |
|
"loss": 0.885, |
|
"rewards/accuracies": 0.2812500298023224, |
|
"rewards/chosen": -0.21619708836078644, |
|
"rewards/margins": -0.05121342092752457, |
|
"rewards/rejected": -0.16498367488384247, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.336, |
|
"grad_norm": 0.9291889667510986, |
|
"learning_rate": 4.2298471724438653e-07, |
|
"logits/chosen": -1.4329116344451904, |
|
"logits/rejected": -1.3157219886779785, |
|
"logps/chosen": -0.10641689598560333, |
|
"logps/rejected": -0.0836237370967865, |
|
"loss": 0.8825, |
|
"rewards/accuracies": 0.3541666567325592, |
|
"rewards/chosen": -0.21283379197120667, |
|
"rewards/margins": -0.04558631405234337, |
|
"rewards/rejected": -0.167247474193573, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.348, |
|
"grad_norm": 0.8883063197135925, |
|
"learning_rate": 4.151686808475203e-07, |
|
"logits/chosen": -1.4277637004852295, |
|
"logits/rejected": -1.2657394409179688, |
|
"logps/chosen": -0.1044892817735672, |
|
"logps/rejected": -0.08842873573303223, |
|
"loss": 0.8741, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -0.2089785635471344, |
|
"rewards/margins": -0.03212107717990875, |
|
"rewards/rejected": -0.17685747146606445, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.9316148161888123, |
|
"learning_rate": 4.070549993239106e-07, |
|
"logits/chosen": -1.3881316184997559, |
|
"logits/rejected": -1.2497737407684326, |
|
"logps/chosen": -0.11017285287380219, |
|
"logps/rejected": -0.0868031308054924, |
|
"loss": 0.8842, |
|
"rewards/accuracies": 0.3645833432674408, |
|
"rewards/chosen": -0.22034570574760437, |
|
"rewards/margins": -0.046739429235458374, |
|
"rewards/rejected": -0.1736062616109848, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.372, |
|
"grad_norm": 0.8768561482429504, |
|
"learning_rate": 3.9865829407607166e-07, |
|
"logits/chosen": -1.4147083759307861, |
|
"logits/rejected": -1.2981321811676025, |
|
"logps/chosen": -0.10104362666606903, |
|
"logps/rejected": -0.09066756814718246, |
|
"loss": 0.8673, |
|
"rewards/accuracies": 0.3437500298023224, |
|
"rewards/chosen": -0.20208725333213806, |
|
"rewards/margins": -0.020752109587192535, |
|
"rewards/rejected": -0.18133513629436493, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.384, |
|
"grad_norm": 1.0077239274978638, |
|
"learning_rate": 3.8999369653439883e-07, |
|
"logits/chosen": -1.4155137538909912, |
|
"logits/rejected": -1.2743993997573853, |
|
"logps/chosen": -0.10902610421180725, |
|
"logps/rejected": -0.0863211378455162, |
|
"loss": 0.8827, |
|
"rewards/accuracies": 0.3020833432674408, |
|
"rewards/chosen": -0.2180522084236145, |
|
"rewards/margins": -0.0454099103808403, |
|
"rewards/rejected": -0.1726422756910324, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.396, |
|
"grad_norm": 0.9207706451416016, |
|
"learning_rate": 3.810768208893079e-07, |
|
"logits/chosen": -1.3758422136306763, |
|
"logits/rejected": -1.2915358543395996, |
|
"logps/chosen": -0.1034766435623169, |
|
"logps/rejected": -0.07977995276451111, |
|
"loss": 0.883, |
|
"rewards/accuracies": 0.3020833432674408, |
|
"rewards/chosen": -0.2069532871246338, |
|
"rewards/margins": -0.047393374145030975, |
|
"rewards/rejected": -0.15955990552902222, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.408, |
|
"grad_norm": 0.9844294190406799, |
|
"learning_rate": 3.7192373595340865e-07, |
|
"logits/chosen": -1.442295789718628, |
|
"logits/rejected": -1.2863086462020874, |
|
"logps/chosen": -0.10710459202528, |
|
"logps/rejected": -0.08674141019582748, |
|
"loss": 0.8796, |
|
"rewards/accuracies": 0.3020833432674408, |
|
"rewards/chosen": -0.21420918405056, |
|
"rewards/margins": -0.04072638228535652, |
|
"rewards/rejected": -0.17348282039165497, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.9682800769805908, |
|
"learning_rate": 3.625509362044183e-07, |
|
"logits/chosen": -1.3701705932617188, |
|
"logits/rejected": -1.2656924724578857, |
|
"logps/chosen": -0.11000403016805649, |
|
"logps/rejected": -0.08980907499790192, |
|
"loss": 0.8803, |
|
"rewards/accuracies": 0.3854166567325592, |
|
"rewards/chosen": -0.22000806033611298, |
|
"rewards/margins": -0.04038992151618004, |
|
"rewards/rejected": -0.17961814999580383, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.432, |
|
"grad_norm": 0.9004039168357849, |
|
"learning_rate": 3.529753120609982e-07, |
|
"logits/chosen": -1.4099225997924805, |
|
"logits/rejected": -1.252682089805603, |
|
"logps/chosen": -0.09707480669021606, |
|
"logps/rejected": -0.08859608322381973, |
|
"loss": 0.8653, |
|
"rewards/accuracies": 0.4479166865348816, |
|
"rewards/chosen": -0.19414961338043213, |
|
"rewards/margins": -0.016957445070147514, |
|
"rewards/rejected": -0.17719216644763947, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.444, |
|
"grad_norm": 0.9568714499473572, |
|
"learning_rate": 3.4321411944507714e-07, |
|
"logits/chosen": -1.3612836599349976, |
|
"logits/rejected": -1.2257184982299805, |
|
"logps/chosen": -0.10623904317617416, |
|
"logps/rejected": -0.09100136905908585, |
|
"loss": 0.8734, |
|
"rewards/accuracies": 0.4166666567325592, |
|
"rewards/chosen": -0.21247808635234833, |
|
"rewards/margins": -0.030475351959466934, |
|
"rewards/rejected": -0.1820027381181717, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.456, |
|
"grad_norm": 0.8658465147018433, |
|
"learning_rate": 3.332849486855144e-07, |
|
"logits/chosen": -1.381141185760498, |
|
"logits/rejected": -1.2495661973953247, |
|
"logps/chosen": -0.09047922492027283, |
|
"logps/rejected": -0.07975338399410248, |
|
"loss": 0.8676, |
|
"rewards/accuracies": 0.3958333432674408, |
|
"rewards/chosen": -0.18095844984054565, |
|
"rewards/margins": -0.021451696753501892, |
|
"rewards/rejected": -0.15950676798820496, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.468, |
|
"grad_norm": 0.9716626405715942, |
|
"learning_rate": 3.2320569281913754e-07, |
|
"logits/chosen": -1.3790628910064697, |
|
"logits/rejected": -1.251636028289795, |
|
"logps/chosen": -0.10486049205064774, |
|
"logps/rejected": -0.08277000486850739, |
|
"loss": 0.883, |
|
"rewards/accuracies": 0.3437500298023224, |
|
"rewards/chosen": -0.20972098410129547, |
|
"rewards/margins": -0.0441809706389904, |
|
"rewards/rejected": -0.16554000973701477, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.948078989982605, |
|
"learning_rate": 3.129945153462813e-07, |
|
"logits/chosen": -1.3937729597091675, |
|
"logits/rejected": -1.2695637941360474, |
|
"logps/chosen": -0.09631801396608353, |
|
"logps/rejected": -0.08585619181394577, |
|
"loss": 0.8673, |
|
"rewards/accuracies": 0.3854166865348816, |
|
"rewards/chosen": -0.19263602793216705, |
|
"rewards/margins": -0.020923633128404617, |
|
"rewards/rejected": -0.17171238362789154, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.492, |
|
"grad_norm": 0.9445565342903137, |
|
"learning_rate": 3.0266981749893154e-07, |
|
"logits/chosen": -1.3988057374954224, |
|
"logits/rejected": -1.2703980207443237, |
|
"logps/chosen": -0.09746824949979782, |
|
"logps/rejected": -0.0910995602607727, |
|
"loss": 0.8624, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.19493649899959564, |
|
"rewards/margins": -0.012737366370856762, |
|
"rewards/rejected": -0.1821991205215454, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.504, |
|
"grad_norm": 1.080769419670105, |
|
"learning_rate": 2.922502050804623e-07, |
|
"logits/chosen": -1.4094312191009521, |
|
"logits/rejected": -1.247127890586853, |
|
"logps/chosen": -0.09884171932935715, |
|
"logps/rejected": -0.08831708878278732, |
|
"loss": 0.8677, |
|
"rewards/accuracies": 0.3958333730697632, |
|
"rewards/chosen": -0.1976834386587143, |
|
"rewards/margins": -0.02104926109313965, |
|
"rewards/rejected": -0.17663417756557465, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.516, |
|
"grad_norm": 0.9974801540374756, |
|
"learning_rate": 2.8175445493671966e-07, |
|
"logits/chosen": -1.3815157413482666, |
|
"logits/rejected": -1.2270005941390991, |
|
"logps/chosen": -0.10922063887119293, |
|
"logps/rejected": -0.0900418609380722, |
|
"loss": 0.8809, |
|
"rewards/accuracies": 0.4583333730697632, |
|
"rewards/chosen": -0.21844127774238586, |
|
"rewards/margins": -0.03835754841566086, |
|
"rewards/rejected": -0.1800837218761444, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.528, |
|
"grad_norm": 0.9864545464515686, |
|
"learning_rate": 2.712014811188773e-07, |
|
"logits/chosen": -1.3654570579528809, |
|
"logits/rejected": -1.2366658449172974, |
|
"logps/chosen": -0.1124953106045723, |
|
"logps/rejected": -0.0964164212346077, |
|
"loss": 0.8745, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.2249906212091446, |
|
"rewards/margins": -0.0321577824652195, |
|
"rewards/rejected": -0.1928328424692154, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.9367190599441528, |
|
"learning_rate": 2.606103007990371e-07, |
|
"logits/chosen": -1.3880029916763306, |
|
"logits/rejected": -1.2659260034561157, |
|
"logps/chosen": -0.09874889254570007, |
|
"logps/rejected": -0.09255427867174149, |
|
"loss": 0.8628, |
|
"rewards/accuracies": 0.4062500298023224, |
|
"rewards/chosen": -0.19749778509140015, |
|
"rewards/margins": -0.012389198876917362, |
|
"rewards/rejected": -0.18510855734348297, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.552, |
|
"grad_norm": 0.9902623891830444, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": -1.3836193084716797, |
|
"logits/rejected": -1.2308557033538818, |
|
"logps/chosen": -0.09668231755495071, |
|
"logps/rejected": -0.08627666532993317, |
|
"loss": 0.867, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.19336463510990143, |
|
"rewards/margins": -0.020811304450035095, |
|
"rewards/rejected": -0.17255333065986633, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.564, |
|
"grad_norm": 1.0337759256362915, |
|
"learning_rate": 2.3938969920096296e-07, |
|
"logits/chosen": -1.3623703718185425, |
|
"logits/rejected": -1.2246556282043457, |
|
"logps/chosen": -0.11106079071760178, |
|
"logps/rejected": -0.09072640538215637, |
|
"loss": 0.8814, |
|
"rewards/accuracies": 0.4479166865348816, |
|
"rewards/chosen": -0.22212158143520355, |
|
"rewards/margins": -0.040668785572052, |
|
"rewards/rejected": -0.18145281076431274, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.576, |
|
"grad_norm": 0.984899640083313, |
|
"learning_rate": 2.2879851888112278e-07, |
|
"logits/chosen": -1.3421802520751953, |
|
"logits/rejected": -1.198030710220337, |
|
"logps/chosen": -0.10126922279596329, |
|
"logps/rejected": -0.09068246185779572, |
|
"loss": 0.8687, |
|
"rewards/accuracies": 0.3958333730697632, |
|
"rewards/chosen": -0.20253844559192657, |
|
"rewards/margins": -0.021173518151044846, |
|
"rewards/rejected": -0.18136492371559143, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.588, |
|
"grad_norm": 1.0588122606277466, |
|
"learning_rate": 2.182455450632803e-07, |
|
"logits/chosen": -1.3651273250579834, |
|
"logits/rejected": -1.2209078073501587, |
|
"logps/chosen": -0.10214084386825562, |
|
"logps/rejected": -0.09154266119003296, |
|
"loss": 0.8675, |
|
"rewards/accuracies": 0.4166666865348816, |
|
"rewards/chosen": -0.20428168773651123, |
|
"rewards/margins": -0.02119637280702591, |
|
"rewards/rejected": -0.18308532238006592, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.0050568580627441, |
|
"learning_rate": 2.0774979491953776e-07, |
|
"logits/chosen": -1.3634512424468994, |
|
"logits/rejected": -1.1983171701431274, |
|
"logps/chosen": -0.09622293710708618, |
|
"logps/rejected": -0.09168636053800583, |
|
"loss": 0.8605, |
|
"rewards/accuracies": 0.4479166865348816, |
|
"rewards/chosen": -0.19244587421417236, |
|
"rewards/margins": -0.009073152206838131, |
|
"rewards/rejected": -0.18337272107601166, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.612, |
|
"grad_norm": 1.003779411315918, |
|
"learning_rate": 1.973301825010685e-07, |
|
"logits/chosen": -1.377637267112732, |
|
"logits/rejected": -1.2013237476348877, |
|
"logps/chosen": -0.09089501947164536, |
|
"logps/rejected": -0.09312086552381516, |
|
"loss": 0.8524, |
|
"rewards/accuracies": 0.5520833730697632, |
|
"rewards/chosen": -0.1817900389432907, |
|
"rewards/margins": 0.004451685585081577, |
|
"rewards/rejected": -0.1862417310476303, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.624, |
|
"grad_norm": 1.0647156238555908, |
|
"learning_rate": 1.8700548465371873e-07, |
|
"logits/chosen": -1.3391690254211426, |
|
"logits/rejected": -1.182291030883789, |
|
"logps/chosen": -0.10368049144744873, |
|
"logps/rejected": -0.09800291061401367, |
|
"loss": 0.8624, |
|
"rewards/accuracies": 0.4895833134651184, |
|
"rewards/chosen": -0.20736098289489746, |
|
"rewards/margins": -0.011355183087289333, |
|
"rewards/rejected": -0.19600582122802734, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.636, |
|
"grad_norm": 1.2042913436889648, |
|
"learning_rate": 1.767943071808624e-07, |
|
"logits/chosen": -1.3675341606140137, |
|
"logits/rejected": -1.2020319700241089, |
|
"logps/chosen": -0.10732070356607437, |
|
"logps/rejected": -0.09919053316116333, |
|
"loss": 0.871, |
|
"rewards/accuracies": 0.5416666865348816, |
|
"rewards/chosen": -0.21464140713214874, |
|
"rewards/margins": -0.01626037061214447, |
|
"rewards/rejected": -0.19838106632232666, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.648, |
|
"grad_norm": 0.9795694351196289, |
|
"learning_rate": 1.667150513144856e-07, |
|
"logits/chosen": -1.3203659057617188, |
|
"logits/rejected": -1.1565968990325928, |
|
"logps/chosen": -0.09574580192565918, |
|
"logps/rejected": -0.09623640775680542, |
|
"loss": 0.8547, |
|
"rewards/accuracies": 0.5520833730697632, |
|
"rewards/chosen": -0.19149160385131836, |
|
"rewards/margins": 0.000981215387582779, |
|
"rewards/rejected": -0.19247281551361084, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.0732730627059937, |
|
"learning_rate": 1.5678588055492286e-07, |
|
"logits/chosen": -1.2979214191436768, |
|
"logits/rejected": -1.150040626525879, |
|
"logps/chosen": -0.10067766904830933, |
|
"logps/rejected": -0.1018737256526947, |
|
"loss": 0.8545, |
|
"rewards/accuracies": 0.5104166865348816, |
|
"rewards/chosen": -0.20135533809661865, |
|
"rewards/margins": 0.0023921187967061996, |
|
"rewards/rejected": -0.2037474513053894, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.672, |
|
"grad_norm": 0.9587694406509399, |
|
"learning_rate": 1.4702468793900186e-07, |
|
"logits/chosen": -1.3606462478637695, |
|
"logits/rejected": -1.1573679447174072, |
|
"logps/chosen": -0.09973961114883423, |
|
"logps/rejected": -0.10131655633449554, |
|
"loss": 0.8535, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.19947922229766846, |
|
"rewards/margins": 0.003153874073177576, |
|
"rewards/rejected": -0.2026331126689911, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.684, |
|
"grad_norm": 1.0398699045181274, |
|
"learning_rate": 1.3744906379558164e-07, |
|
"logits/chosen": -1.3375906944274902, |
|
"logits/rejected": -1.1461068391799927, |
|
"logps/chosen": -0.10326816886663437, |
|
"logps/rejected": -0.10188017785549164, |
|
"loss": 0.857, |
|
"rewards/accuracies": 0.4687500298023224, |
|
"rewards/chosen": -0.20653633773326874, |
|
"rewards/margins": -0.002775975503027439, |
|
"rewards/rejected": -0.20376035571098328, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.696, |
|
"grad_norm": 1.0405802726745605, |
|
"learning_rate": 1.280762640465914e-07, |
|
"logits/chosen": -1.3368816375732422, |
|
"logits/rejected": -1.1643409729003906, |
|
"logps/chosen": -0.09207028895616531, |
|
"logps/rejected": -0.09347332268953323, |
|
"loss": 0.8534, |
|
"rewards/accuracies": 0.5208333134651184, |
|
"rewards/chosen": -0.18414057791233063, |
|
"rewards/margins": 0.002806063275784254, |
|
"rewards/rejected": -0.18694664537906647, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.708, |
|
"grad_norm": 1.0253372192382812, |
|
"learning_rate": 1.189231791106921e-07, |
|
"logits/chosen": -1.2978026866912842, |
|
"logits/rejected": -1.13652765750885, |
|
"logps/chosen": -0.10593652725219727, |
|
"logps/rejected": -0.10165742039680481, |
|
"loss": 0.8609, |
|
"rewards/accuracies": 0.479166716337204, |
|
"rewards/chosen": -0.21187305450439453, |
|
"rewards/margins": -0.008558189496397972, |
|
"rewards/rejected": -0.20331484079360962, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.2529534101486206, |
|
"learning_rate": 1.1000630346560116e-07, |
|
"logits/chosen": -1.3010480403900146, |
|
"logits/rejected": -1.133022665977478, |
|
"logps/chosen": -0.11126932501792908, |
|
"logps/rejected": -0.09786901623010635, |
|
"loss": 0.8738, |
|
"rewards/accuracies": 0.4895833432674408, |
|
"rewards/chosen": -0.22253865003585815, |
|
"rewards/margins": -0.026800617575645447, |
|
"rewards/rejected": -0.1957380324602127, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.732, |
|
"grad_norm": 0.9133132696151733, |
|
"learning_rate": 1.0134170592392836e-07, |
|
"logits/chosen": -1.3394014835357666, |
|
"logits/rejected": -1.1518943309783936, |
|
"logps/chosen": -0.09960196912288666, |
|
"logps/rejected": -0.10244160890579224, |
|
"loss": 0.8521, |
|
"rewards/accuracies": 0.5104166865348816, |
|
"rewards/chosen": -0.19920393824577332, |
|
"rewards/margins": 0.005679287016391754, |
|
"rewards/rejected": -0.20488321781158447, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.744, |
|
"grad_norm": 1.0934334993362427, |
|
"learning_rate": 9.29450006760894e-08, |
|
"logits/chosen": -1.3294323682785034, |
|
"logits/rejected": -1.1431366205215454, |
|
"logps/chosen": -0.10188900679349899, |
|
"logps/rejected": -0.09653942286968231, |
|
"loss": 0.8616, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.20377801358699799, |
|
"rewards/margins": -0.010699168778955936, |
|
"rewards/rejected": -0.19307884573936462, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.756, |
|
"grad_norm": 0.9888376593589783, |
|
"learning_rate": 8.483131915247967e-08, |
|
"logits/chosen": -1.3347070217132568, |
|
"logits/rejected": -1.167306661605835, |
|
"logps/chosen": -0.10506478697061539, |
|
"logps/rejected": -0.1007775291800499, |
|
"loss": 0.8603, |
|
"rewards/accuracies": 0.5000000596046448, |
|
"rewards/chosen": -0.21012957394123077, |
|
"rewards/margins": -0.008574524894356728, |
|
"rewards/rejected": -0.2015550583600998, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.768, |
|
"grad_norm": 0.9486240744590759, |
|
"learning_rate": 7.701528275561347e-08, |
|
"logits/chosen": -1.3588067293167114, |
|
"logits/rejected": -1.1604478359222412, |
|
"logps/chosen": -0.0977708101272583, |
|
"logps/rejected": -0.09753303229808807, |
|
"loss": 0.8555, |
|
"rewards/accuracies": 0.5312500596046448, |
|
"rewards/chosen": -0.1955416202545166, |
|
"rewards/margins": -0.0004755451809614897, |
|
"rewards/rejected": -0.19506606459617615, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.033793568611145, |
|
"learning_rate": 6.951097651136889e-08, |
|
"logits/chosen": -1.3951979875564575, |
|
"logits/rejected": -1.125361680984497, |
|
"logps/chosen": -0.10307514667510986, |
|
"logps/rejected": -0.1052648052573204, |
|
"loss": 0.8525, |
|
"rewards/accuracies": 0.4895833432674408, |
|
"rewards/chosen": -0.20615029335021973, |
|
"rewards/margins": 0.0043793064542114735, |
|
"rewards/rejected": -0.2105296105146408, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.792, |
|
"grad_norm": 1.0308159589767456, |
|
"learning_rate": 6.233192368681889e-08, |
|
"logits/chosen": -1.3253390789031982, |
|
"logits/rejected": -1.1783477067947388, |
|
"logps/chosen": -0.09217476844787598, |
|
"logps/rejected": -0.09416632354259491, |
|
"loss": 0.8527, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.18434953689575195, |
|
"rewards/margins": 0.003983109723776579, |
|
"rewards/rejected": -0.18833264708518982, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.804, |
|
"grad_norm": 1.1081663370132446, |
|
"learning_rate": 5.5491061420390174e-08, |
|
"logits/chosen": -1.2740880250930786, |
|
"logits/rejected": -1.152024269104004, |
|
"logps/chosen": -0.10923092067241669, |
|
"logps/rejected": -0.10176312923431396, |
|
"loss": 0.8642, |
|
"rewards/accuracies": 0.5104166865348816, |
|
"rewards/chosen": -0.21846184134483337, |
|
"rewards/margins": -0.01493558008223772, |
|
"rewards/rejected": -0.20352625846862793, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.816, |
|
"grad_norm": 1.0150699615478516, |
|
"learning_rate": 4.900071740826489e-08, |
|
"logits/chosen": -1.348282814025879, |
|
"logits/rejected": -1.1383142471313477, |
|
"logps/chosen": -0.10064545273780823, |
|
"logps/rejected": -0.10860613733530045, |
|
"loss": 0.8459, |
|
"rewards/accuracies": 0.6041667461395264, |
|
"rewards/chosen": -0.20129090547561646, |
|
"rewards/margins": 0.01592138595879078, |
|
"rewards/rejected": -0.2172122746706009, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.828, |
|
"grad_norm": 1.123404860496521, |
|
"learning_rate": 4.287258768903948e-08, |
|
"logits/chosen": -1.3442084789276123, |
|
"logits/rejected": -1.146917462348938, |
|
"logps/chosen": -0.10861781984567642, |
|
"logps/rejected": -0.10071661323308945, |
|
"loss": 0.8688, |
|
"rewards/accuracies": 0.5833333134651184, |
|
"rewards/chosen": -0.21723563969135284, |
|
"rewards/margins": -0.015802428126335144, |
|
"rewards/rejected": -0.2014332264661789, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.9981204867362976, |
|
"learning_rate": 3.7117715566672176e-08, |
|
"logits/chosen": -1.3524047136306763, |
|
"logits/rejected": -1.1452308893203735, |
|
"logps/chosen": -0.10508691519498825, |
|
"logps/rejected": -0.10403452813625336, |
|
"loss": 0.8568, |
|
"rewards/accuracies": 0.5729166865348816, |
|
"rewards/chosen": -0.2101738303899765, |
|
"rewards/margins": -0.0021047808695584536, |
|
"rewards/rejected": -0.2080690562725067, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.852, |
|
"grad_norm": 1.0365066528320312, |
|
"learning_rate": 3.174647170970296e-08, |
|
"logits/chosen": -1.3592528104782104, |
|
"logits/rejected": -1.1138660907745361, |
|
"logps/chosen": -0.10330555588006973, |
|
"logps/rejected": -0.1064281240105629, |
|
"loss": 0.8524, |
|
"rewards/accuracies": 0.5729166865348816, |
|
"rewards/chosen": -0.20661111176013947, |
|
"rewards/margins": 0.006245152093470097, |
|
"rewards/rejected": -0.2128562480211258, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.864, |
|
"grad_norm": 1.076464295387268, |
|
"learning_rate": 2.6768535462607905e-08, |
|
"logits/chosen": -1.3058511018753052, |
|
"logits/rejected": -1.126082420349121, |
|
"logps/chosen": -0.10569944232702255, |
|
"logps/rejected": -0.11077728122472763, |
|
"loss": 0.8494, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.2113988846540451, |
|
"rewards/margins": 0.01015565823763609, |
|
"rewards/rejected": -0.22155456244945526, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.876, |
|
"grad_norm": 0.9264240264892578, |
|
"learning_rate": 2.2192877402966048e-08, |
|
"logits/chosen": -1.3490333557128906, |
|
"logits/rejected": -1.1134393215179443, |
|
"logps/chosen": -0.10448520630598068, |
|
"logps/rejected": -0.11283887922763824, |
|
"loss": 0.8457, |
|
"rewards/accuracies": 0.6145833730697632, |
|
"rewards/chosen": -0.20897041261196136, |
|
"rewards/margins": 0.016707373782992363, |
|
"rewards/rejected": -0.2256777584552765, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.888, |
|
"grad_norm": 1.067218542098999, |
|
"learning_rate": 1.8027743175872662e-08, |
|
"logits/chosen": -1.345249891281128, |
|
"logits/rejected": -1.1063634157180786, |
|
"logps/chosen": -0.11241614818572998, |
|
"logps/rejected": -0.11045221984386444, |
|
"loss": 0.8579, |
|
"rewards/accuracies": 0.5104166865348816, |
|
"rewards/chosen": -0.22483229637145996, |
|
"rewards/margins": -0.003927857149392366, |
|
"rewards/rejected": -0.22090443968772888, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.0710501670837402, |
|
"learning_rate": 1.4280638634728948e-08, |
|
"logits/chosen": -1.3244132995605469, |
|
"logits/rejected": -1.1485953330993652, |
|
"logps/chosen": -0.11838357150554657, |
|
"logps/rejected": -0.10926854610443115, |
|
"loss": 0.8683, |
|
"rewards/accuracies": 0.47916674613952637, |
|
"rewards/chosen": -0.23676714301109314, |
|
"rewards/margins": -0.018230034038424492, |
|
"rewards/rejected": -0.2185370922088623, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.912, |
|
"grad_norm": 1.1065670251846313, |
|
"learning_rate": 1.0958316315187289e-08, |
|
"logits/chosen": -1.317086100578308, |
|
"logits/rejected": -1.1413421630859375, |
|
"logps/chosen": -0.10257872194051743, |
|
"logps/rejected": -0.10372138023376465, |
|
"loss": 0.8542, |
|
"rewards/accuracies": 0.5416666269302368, |
|
"rewards/chosen": -0.20515744388103485, |
|
"rewards/margins": 0.0022853193804621696, |
|
"rewards/rejected": -0.2074427604675293, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.924, |
|
"grad_norm": 1.0825769901275635, |
|
"learning_rate": 8.066763266625282e-09, |
|
"logits/chosen": -1.3573386669158936, |
|
"logits/rejected": -1.109717607498169, |
|
"logps/chosen": -0.10758916288614273, |
|
"logps/rejected": -0.10599493980407715, |
|
"loss": 0.8597, |
|
"rewards/accuracies": 0.6250000596046448, |
|
"rewards/chosen": -0.21517832577228546, |
|
"rewards/margins": -0.0031884238123893738, |
|
"rewards/rejected": -0.2119898796081543, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.936, |
|
"grad_norm": 0.9156083464622498, |
|
"learning_rate": 5.611190263078463e-09, |
|
"logits/chosen": -1.309991717338562, |
|
"logits/rejected": -1.1210401058197021, |
|
"logps/chosen": -0.0973024070262909, |
|
"logps/rejected": -0.09729278087615967, |
|
"loss": 0.8551, |
|
"rewards/accuracies": 0.4583333432674408, |
|
"rewards/chosen": -0.1946048140525818, |
|
"rewards/margins": -1.9263941794633865e-05, |
|
"rewards/rejected": -0.19458556175231934, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.948, |
|
"grad_norm": 0.9994163513183594, |
|
"learning_rate": 3.5960224130728858e-09, |
|
"logits/chosen": -1.3023028373718262, |
|
"logits/rejected": -1.1124149560928345, |
|
"logps/chosen": -0.09809858351945877, |
|
"logps/rejected": -0.1080770194530487, |
|
"loss": 0.8436, |
|
"rewards/accuracies": 0.6354166865348816, |
|
"rewards/chosen": -0.19619716703891754, |
|
"rewards/margins": 0.01995689421892166, |
|
"rewards/rejected": -0.2161540389060974, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.9938739538192749, |
|
"learning_rate": 2.0248911852807917e-09, |
|
"logits/chosen": -1.3484057188034058, |
|
"logits/rejected": -1.0871906280517578, |
|
"logps/chosen": -0.11684219539165497, |
|
"logps/rejected": -0.11286689341068268, |
|
"loss": 0.8601, |
|
"rewards/accuracies": 0.5104166865348816, |
|
"rewards/chosen": -0.23368439078330994, |
|
"rewards/margins": -0.007950600236654282, |
|
"rewards/rejected": -0.22573378682136536, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.972, |
|
"grad_norm": 0.9481876492500305, |
|
"learning_rate": 9.006278643683696e-10, |
|
"logits/chosen": -1.3169337511062622, |
|
"logits/rejected": -1.1376291513442993, |
|
"logps/chosen": -0.09936561435461044, |
|
"logps/rejected": -0.10312428325414658, |
|
"loss": 0.8508, |
|
"rewards/accuracies": 0.5833333730697632, |
|
"rewards/chosen": -0.1987312287092209, |
|
"rewards/margins": 0.007517362013459206, |
|
"rewards/rejected": -0.20624856650829315, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.984, |
|
"grad_norm": 1.0620635747909546, |
|
"learning_rate": 2.2525844882964606e-10, |
|
"logits/chosen": -1.3467659950256348, |
|
"logits/rejected": -1.1462361812591553, |
|
"logps/chosen": -0.10522940754890442, |
|
"logps/rejected": -0.10153805464506149, |
|
"loss": 0.8594, |
|
"rewards/accuracies": 0.4687500298023224, |
|
"rewards/chosen": -0.21045881509780884, |
|
"rewards/margins": -0.007382689975202084, |
|
"rewards/rejected": -0.20307610929012299, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.996, |
|
"grad_norm": 1.0221400260925293, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -1.313194751739502, |
|
"logits/rejected": -1.164141297340393, |
|
"logps/chosen": -0.10121379047632217, |
|
"logps/rejected": -0.1018705815076828, |
|
"loss": 0.8549, |
|
"rewards/accuracies": 0.5416666865348816, |
|
"rewards/chosen": -0.20242758095264435, |
|
"rewards/margins": 0.0013135506305843592, |
|
"rewards/rejected": -0.2037411630153656, |
|
"step": 83 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 83, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 305651334512640.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|