|
{ |
|
"best_metric": 21.601177215576172, |
|
"best_model_checkpoint": "./output/checkpoints/2024-05-27_09-02-19/checkpoint-600", |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 1271, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.003933910306845004, |
|
"grad_norm": 26.446468353271484, |
|
"learning_rate": 9.375000000000001e-07, |
|
"logits/chosen": -0.2329835593700409, |
|
"logits/rejected": -0.7131723165512085, |
|
"logps/chosen": -1.0090148448944092, |
|
"logps/rejected": -1.6766555309295654, |
|
"loss": 25.0031, |
|
"rewards/accuracies": 0.1875, |
|
"rewards/chosen": 8.527375939593185e-06, |
|
"rewards/margins": -3.058705624425784e-05, |
|
"rewards/rejected": 3.911443127435632e-05, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.007867820613690008, |
|
"grad_norm": 11.936881065368652, |
|
"learning_rate": 2.5e-06, |
|
"logits/chosen": -0.396948903799057, |
|
"logits/rejected": -0.7360211610794067, |
|
"logps/chosen": -0.8984262347221375, |
|
"logps/rejected": -1.1693015098571777, |
|
"loss": 24.9925, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -4.258692206349224e-05, |
|
"rewards/margins": 7.496408943552524e-05, |
|
"rewards/rejected": -0.00011755101149901748, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.011801730920535013, |
|
"grad_norm": 13.576423645019531, |
|
"learning_rate": 4.0625000000000005e-06, |
|
"logits/chosen": -0.3573324680328369, |
|
"logits/rejected": -0.6578253507614136, |
|
"logps/chosen": -0.8142125010490417, |
|
"logps/rejected": -1.0063048601150513, |
|
"loss": 24.98, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.00027080357540398836, |
|
"rewards/margins": 0.00020028329163324088, |
|
"rewards/rejected": -0.00047108688158914447, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.015735641227380016, |
|
"grad_norm": 34.40192794799805, |
|
"learning_rate": 5.3125e-06, |
|
"logits/chosen": -0.3880882263183594, |
|
"logits/rejected": -0.7228592038154602, |
|
"logps/chosen": -1.1428436040878296, |
|
"logps/rejected": -1.567692756652832, |
|
"loss": 24.8648, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.001833672053180635, |
|
"rewards/margins": 0.0014055297942832112, |
|
"rewards/rejected": -0.003239201847463846, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01966955153422502, |
|
"grad_norm": 16.62054443359375, |
|
"learning_rate": 6.875e-06, |
|
"logits/chosen": -0.25890520215034485, |
|
"logits/rejected": -0.7020931839942932, |
|
"logps/chosen": -1.212777853012085, |
|
"logps/rejected": -1.3589212894439697, |
|
"loss": 24.939, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.0046936506405472755, |
|
"rewards/margins": 0.0006289022276178002, |
|
"rewards/rejected": -0.005322552751749754, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.023603461841070025, |
|
"grad_norm": 22.588180541992188, |
|
"learning_rate": 8.4375e-06, |
|
"logits/chosen": -0.3284154236316681, |
|
"logits/rejected": -0.6061900854110718, |
|
"logps/chosen": -0.9161252975463867, |
|
"logps/rejected": -1.1756784915924072, |
|
"loss": 24.7008, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.004226677119731903, |
|
"rewards/margins": 0.003145116614177823, |
|
"rewards/rejected": -0.0073717935010790825, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02753737214791503, |
|
"grad_norm": 35.44740295410156, |
|
"learning_rate": 1e-05, |
|
"logits/chosen": -0.4888971447944641, |
|
"logits/rejected": -0.7553955912590027, |
|
"logps/chosen": -1.252327561378479, |
|
"logps/rejected": -1.473224401473999, |
|
"loss": 24.5665, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.013578332960605621, |
|
"rewards/margins": 0.004552872385829687, |
|
"rewards/rejected": -0.018131205812096596, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.03147128245476003, |
|
"grad_norm": 32.20027160644531, |
|
"learning_rate": 1.1562500000000002e-05, |
|
"logits/chosen": -0.4067641794681549, |
|
"logits/rejected": -0.7352877855300903, |
|
"logps/chosen": -1.055959939956665, |
|
"logps/rejected": -1.4485868215560913, |
|
"loss": 24.0967, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.016065727919340134, |
|
"rewards/margins": 0.011377329006791115, |
|
"rewards/rejected": -0.02744305692613125, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03540519276160504, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.2812500000000001e-05, |
|
"logits/chosen": -0.7447024583816528, |
|
"logits/rejected": -1.0448763370513916, |
|
"logps/chosen": -1.723064661026001, |
|
"logps/rejected": -2.249486207962036, |
|
"loss": 24.0293, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.041548244655132294, |
|
"rewards/margins": 0.013970533385872841, |
|
"rewards/rejected": -0.05551878362894058, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.03933910306845004, |
|
"grad_norm": 26.65926742553711, |
|
"learning_rate": 1.4375e-05, |
|
"logits/chosen": -0.37696924805641174, |
|
"logits/rejected": -0.46783286333084106, |
|
"logps/chosen": -1.015779733657837, |
|
"logps/rejected": -1.599536418914795, |
|
"loss": 24.0592, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.020270783454179764, |
|
"rewards/margins": 0.019904401153326035, |
|
"rewards/rejected": -0.0401751883327961, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.043273013375295044, |
|
"grad_norm": 30.033103942871094, |
|
"learning_rate": 1.59375e-05, |
|
"logits/chosen": -0.7907823324203491, |
|
"logits/rejected": -0.990174412727356, |
|
"logps/chosen": -1.7764304876327515, |
|
"logps/rejected": -1.9972736835479736, |
|
"loss": 24.6893, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.05663704872131348, |
|
"rewards/margins": 0.012498116120696068, |
|
"rewards/rejected": -0.0691351667046547, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.04720692368214005, |
|
"grad_norm": 155.12327575683594, |
|
"learning_rate": 1.7500000000000002e-05, |
|
"logits/chosen": -0.5883419513702393, |
|
"logits/rejected": -0.9729728698730469, |
|
"logps/chosen": -1.749121904373169, |
|
"logps/rejected": -2.5301637649536133, |
|
"loss": 23.3501, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.06644631177186966, |
|
"rewards/margins": 0.04214775934815407, |
|
"rewards/rejected": -0.10859407484531403, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.05114083398898505, |
|
"grad_norm": 106.24934387207031, |
|
"learning_rate": 1.8750000000000002e-05, |
|
"logits/chosen": -0.8320428133010864, |
|
"logits/rejected": -1.0106093883514404, |
|
"logps/chosen": -1.1940712928771973, |
|
"logps/rejected": -2.7599964141845703, |
|
"loss": 23.1753, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.03951374441385269, |
|
"rewards/margins": 0.08484560251235962, |
|
"rewards/rejected": -0.12435934692621231, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.05507474429583006, |
|
"grad_norm": 82.20561981201172, |
|
"learning_rate": 2.0312500000000002e-05, |
|
"logits/chosen": -0.9974054098129272, |
|
"logits/rejected": -1.2483726739883423, |
|
"logps/chosen": -1.6322723627090454, |
|
"logps/rejected": -2.4456088542938232, |
|
"loss": 21.933, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.07070576399564743, |
|
"rewards/margins": 0.055024802684783936, |
|
"rewards/rejected": -0.12573055922985077, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.059008654602675056, |
|
"grad_norm": 1317.8922119140625, |
|
"learning_rate": 2.1562500000000002e-05, |
|
"logits/chosen": -1.0032284259796143, |
|
"logits/rejected": -1.2543575763702393, |
|
"logps/chosen": -1.648602843284607, |
|
"logps/rejected": -3.2052788734436035, |
|
"loss": 30.9638, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.08028480410575867, |
|
"rewards/margins": 0.09190882742404938, |
|
"rewards/rejected": -0.17219363152980804, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.06294256490952006, |
|
"grad_norm": 135.98031616210938, |
|
"learning_rate": 2.3125000000000003e-05, |
|
"logits/chosen": -1.1862694025039673, |
|
"logits/rejected": -1.268090844154358, |
|
"logps/chosen": -1.7252533435821533, |
|
"logps/rejected": -2.730776786804199, |
|
"loss": 23.1291, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.08682449907064438, |
|
"rewards/margins": 0.07040676474571228, |
|
"rewards/rejected": -0.15723127126693726, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06687647521636507, |
|
"grad_norm": 83.77543640136719, |
|
"learning_rate": 2.46875e-05, |
|
"logits/chosen": -1.4288493394851685, |
|
"logits/rejected": -1.6324199438095093, |
|
"logps/chosen": -1.858473539352417, |
|
"logps/rejected": -2.4925625324249268, |
|
"loss": 22.47, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.10322336852550507, |
|
"rewards/margins": 0.040188394486904144, |
|
"rewards/rejected": -0.143411785364151, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.07081038552321008, |
|
"grad_norm": 112.26095581054688, |
|
"learning_rate": 2.625e-05, |
|
"logits/chosen": -1.5094571113586426, |
|
"logits/rejected": -1.6503517627716064, |
|
"logps/chosen": -2.1784815788269043, |
|
"logps/rejected": -2.8247978687286377, |
|
"loss": 26.8652, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.12195611000061035, |
|
"rewards/margins": 0.04399397224187851, |
|
"rewards/rejected": -0.16595008969306946, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.07474429583005507, |
|
"grad_norm": 116.14981079101562, |
|
"learning_rate": 2.7812500000000002e-05, |
|
"logits/chosen": -1.705255150794983, |
|
"logits/rejected": -1.8774993419647217, |
|
"logps/chosen": -2.0536391735076904, |
|
"logps/rejected": -2.913367748260498, |
|
"loss": 23.1756, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.11939144134521484, |
|
"rewards/margins": 0.03494938462972641, |
|
"rewards/rejected": -0.15434083342552185, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.07867820613690008, |
|
"grad_norm": 171.99806213378906, |
|
"learning_rate": 2.9375000000000003e-05, |
|
"logits/chosen": -1.7922956943511963, |
|
"logits/rejected": -1.805193305015564, |
|
"logps/chosen": -2.2087619304656982, |
|
"logps/rejected": -2.953051805496216, |
|
"loss": 23.3214, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.1307235062122345, |
|
"rewards/margins": 0.03435593843460083, |
|
"rewards/rejected": -0.16507944464683533, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07867820613690008, |
|
"eval_logits/chosen": -1.966138243675232, |
|
"eval_logits/rejected": -2.09938645362854, |
|
"eval_logps/chosen": -2.3467371463775635, |
|
"eval_logps/rejected": -2.9913861751556396, |
|
"eval_loss": 22.537601470947266, |
|
"eval_rewards/accuracies": 0.643750011920929, |
|
"eval_rewards/chosen": -0.13215361535549164, |
|
"eval_rewards/margins": 0.04243787005543709, |
|
"eval_rewards/rejected": -0.17459148168563843, |
|
"eval_runtime": 254.2532, |
|
"eval_samples_per_second": 2.517, |
|
"eval_steps_per_second": 0.157, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08261211644374508, |
|
"grad_norm": 90.37792205810547, |
|
"learning_rate": 3.09375e-05, |
|
"logits/chosen": -1.7015612125396729, |
|
"logits/rejected": -1.8363323211669922, |
|
"logps/chosen": -2.096281051635742, |
|
"logps/rejected": -3.1252198219299316, |
|
"loss": 27.6536, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.11915542930364609, |
|
"rewards/margins": 0.06005290150642395, |
|
"rewards/rejected": -0.17920835316181183, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.08654602675059009, |
|
"grad_norm": 88.4887466430664, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"logits/chosen": -1.657274842262268, |
|
"logits/rejected": -1.8521808385849, |
|
"logps/chosen": -1.8666527271270752, |
|
"logps/rejected": -2.9845376014709473, |
|
"loss": 21.5034, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.10541417449712753, |
|
"rewards/margins": 0.05897489935159683, |
|
"rewards/rejected": -0.16438907384872437, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.0904799370574351, |
|
"grad_norm": 134.99591064453125, |
|
"learning_rate": 3.40625e-05, |
|
"logits/chosen": -1.6925245523452759, |
|
"logits/rejected": -1.7081615924835205, |
|
"logps/chosen": -2.6438632011413574, |
|
"logps/rejected": -3.7139625549316406, |
|
"loss": 22.8601, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.14907808601856232, |
|
"rewards/margins": 0.05926816537976265, |
|
"rewards/rejected": -0.20834624767303467, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.0944138473642801, |
|
"grad_norm": 105.24943542480469, |
|
"learning_rate": 3.5625000000000005e-05, |
|
"logits/chosen": -1.591507911682129, |
|
"logits/rejected": -1.58656907081604, |
|
"logps/chosen": -1.9171836376190186, |
|
"logps/rejected": -2.5378520488739014, |
|
"loss": 23.0976, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.10242807865142822, |
|
"rewards/margins": 0.03649063780903816, |
|
"rewards/rejected": -0.1389187127351761, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0983477576711251, |
|
"grad_norm": 87.82530212402344, |
|
"learning_rate": 3.71875e-05, |
|
"logits/chosen": -1.3108810186386108, |
|
"logits/rejected": -1.4434765577316284, |
|
"logps/chosen": -2.124854564666748, |
|
"logps/rejected": -3.0788867473602295, |
|
"loss": 24.5017, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.1214764267206192, |
|
"rewards/margins": 0.04934501647949219, |
|
"rewards/rejected": -0.1708214432001114, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.1022816679779701, |
|
"grad_norm": 69.61011505126953, |
|
"learning_rate": 3.875e-05, |
|
"logits/chosen": -1.286228895187378, |
|
"logits/rejected": -1.5050832033157349, |
|
"logps/chosen": -2.5508532524108887, |
|
"logps/rejected": -3.24528169631958, |
|
"loss": 21.4116, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.15334758162498474, |
|
"rewards/margins": 0.04556337743997574, |
|
"rewards/rejected": -0.19891095161437988, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.10621557828481511, |
|
"grad_norm": 101.8541259765625, |
|
"learning_rate": 3.999992445477636e-05, |
|
"logits/chosen": -1.3636066913604736, |
|
"logits/rejected": -1.5931237936019897, |
|
"logps/chosen": -3.0847220420837402, |
|
"logps/rejected": -3.839167356491089, |
|
"loss": 21.3367, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.2012835294008255, |
|
"rewards/margins": 0.05702406167984009, |
|
"rewards/rejected": -0.2583075761795044, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.11014948859166011, |
|
"grad_norm": 701.3626098632812, |
|
"learning_rate": 3.999728043187288e-05, |
|
"logits/chosen": -1.4217129945755005, |
|
"logits/rejected": -1.4933011531829834, |
|
"logps/chosen": -3.9832420349121094, |
|
"logps/rejected": -5.435095310211182, |
|
"loss": 23.8821, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.2787173390388489, |
|
"rewards/margins": 0.09217057377099991, |
|
"rewards/rejected": -0.3708879351615906, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.11408339889850512, |
|
"grad_norm": 163.25831604003906, |
|
"learning_rate": 3.9990859718476166e-05, |
|
"logits/chosen": -1.4570497274398804, |
|
"logits/rejected": -1.473787784576416, |
|
"logps/chosen": -3.3077120780944824, |
|
"logps/rejected": -4.8310956954956055, |
|
"loss": 20.1222, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.2514913082122803, |
|
"rewards/margins": 0.11996223777532578, |
|
"rewards/rejected": -0.37145355343818665, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.11801730920535011, |
|
"grad_norm": 274.31146240234375, |
|
"learning_rate": 3.998066352720348e-05, |
|
"logits/chosen": -1.4901472330093384, |
|
"logits/rejected": -1.5641014575958252, |
|
"logps/chosen": -4.366189956665039, |
|
"logps/rejected": -5.792475700378418, |
|
"loss": 24.8947, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.3093239367008209, |
|
"rewards/margins": 0.1069142073392868, |
|
"rewards/rejected": -0.4162382185459137, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.12195121951219512, |
|
"grad_norm": 204.0838623046875, |
|
"learning_rate": 3.9966693783709596e-05, |
|
"logits/chosen": -1.775489091873169, |
|
"logits/rejected": -1.6681087017059326, |
|
"logps/chosen": -3.3862712383270264, |
|
"logps/rejected": -3.850006580352783, |
|
"loss": 25.0897, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.22455720603466034, |
|
"rewards/margins": 0.03894208371639252, |
|
"rewards/rejected": -0.26349928975105286, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.12588512981904013, |
|
"grad_norm": 117.58898162841797, |
|
"learning_rate": 3.9948953126323144e-05, |
|
"logits/chosen": -1.7140939235687256, |
|
"logits/rejected": -1.8748031854629517, |
|
"logps/chosen": -2.808436632156372, |
|
"logps/rejected": -3.4621639251708984, |
|
"loss": 22.909, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.17321309447288513, |
|
"rewards/margins": 0.03799115866422653, |
|
"rewards/rejected": -0.21120426058769226, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.12981904012588513, |
|
"grad_norm": 75.52522277832031, |
|
"learning_rate": 3.992744490554832e-05, |
|
"logits/chosen": -1.5584402084350586, |
|
"logits/rejected": -1.6980777978897095, |
|
"logps/chosen": -2.5916876792907715, |
|
"logps/rejected": -3.250844955444336, |
|
"loss": 22.8805, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.16778233647346497, |
|
"rewards/margins": 0.046813301742076874, |
|
"rewards/rejected": -0.21459563076496124, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.13375295043273014, |
|
"grad_norm": 210.5413818359375, |
|
"learning_rate": 3.990217318343214e-05, |
|
"logits/chosen": -1.6046726703643799, |
|
"logits/rejected": -1.785196304321289, |
|
"logps/chosen": -3.144779920578003, |
|
"logps/rejected": -4.314496040344238, |
|
"loss": 21.1638, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.2154751569032669, |
|
"rewards/margins": 0.07778888940811157, |
|
"rewards/rejected": -0.2932640314102173, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.13768686073957515, |
|
"grad_norm": 137.43014526367188, |
|
"learning_rate": 3.987314273279721e-05, |
|
"logits/chosen": -1.538189172744751, |
|
"logits/rejected": -1.7611982822418213, |
|
"logps/chosen": -3.314256191253662, |
|
"logps/rejected": -4.361076354980469, |
|
"loss": 22.1568, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.24516558647155762, |
|
"rewards/margins": 0.08248710632324219, |
|
"rewards/rejected": -0.3276526927947998, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.14162077104642015, |
|
"grad_norm": 162.08778381347656, |
|
"learning_rate": 3.9840359036340424e-05, |
|
"logits/chosen": -1.5785366296768188, |
|
"logits/rejected": -1.6759214401245117, |
|
"logps/chosen": -3.9628891944885254, |
|
"logps/rejected": -4.663954257965088, |
|
"loss": 23.3996, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.2786514163017273, |
|
"rewards/margins": 0.06107773259282112, |
|
"rewards/rejected": -0.3397291302680969, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.14555468135326516, |
|
"grad_norm": 183.8037567138672, |
|
"learning_rate": 3.980382828559743e-05, |
|
"logits/chosen": -1.8036388158798218, |
|
"logits/rejected": -1.9109561443328857, |
|
"logps/chosen": -4.924368381500244, |
|
"logps/rejected": -5.794642448425293, |
|
"loss": 22.9192, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.357871949672699, |
|
"rewards/margins": 0.06735062599182129, |
|
"rewards/rejected": -0.42522257566452026, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.14948859166011014, |
|
"grad_norm": 137.1122283935547, |
|
"learning_rate": 3.9763557379773316e-05, |
|
"logits/chosen": -1.7101930379867554, |
|
"logits/rejected": -1.8198864459991455, |
|
"logps/chosen": -3.584138870239258, |
|
"logps/rejected": -4.54425048828125, |
|
"loss": 20.9665, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.27856582403182983, |
|
"rewards/margins": 0.0747852548956871, |
|
"rewards/rejected": -0.3533511161804199, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.15342250196695514, |
|
"grad_norm": 164.39016723632812, |
|
"learning_rate": 3.971955392443965e-05, |
|
"logits/chosen": -1.697361707687378, |
|
"logits/rejected": -1.7193193435668945, |
|
"logps/chosen": -3.8646721839904785, |
|
"logps/rejected": -5.100863456726074, |
|
"loss": 21.2867, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.29381299018859863, |
|
"rewards/margins": 0.07520242035388947, |
|
"rewards/rejected": -0.3690153658390045, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.15735641227380015, |
|
"grad_norm": 125.37354278564453, |
|
"learning_rate": 3.9671826230098045e-05, |
|
"logits/chosen": -1.6001428365707397, |
|
"logits/rejected": -1.736572504043579, |
|
"logps/chosen": -3.7506911754608154, |
|
"logps/rejected": -4.7256364822387695, |
|
"loss": 21.3918, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.2859867215156555, |
|
"rewards/margins": 0.07923749834299088, |
|
"rewards/rejected": -0.3652242124080658, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15735641227380015, |
|
"eval_logits/chosen": -1.5953483581542969, |
|
"eval_logits/rejected": -1.725934624671936, |
|
"eval_logps/chosen": -4.086066246032715, |
|
"eval_logps/rejected": -5.129216194152832, |
|
"eval_loss": 23.703336715698242, |
|
"eval_rewards/accuracies": 0.6546875238418579, |
|
"eval_rewards/chosen": -0.3060864806175232, |
|
"eval_rewards/margins": 0.0822879821062088, |
|
"eval_rewards/rejected": -0.3883745074272156, |
|
"eval_runtime": 254.3055, |
|
"eval_samples_per_second": 2.517, |
|
"eval_steps_per_second": 0.157, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.16129032258064516, |
|
"grad_norm": 167.259033203125, |
|
"learning_rate": 3.962038331061065e-05, |
|
"logits/chosen": -1.4170001745224, |
|
"logits/rejected": -1.6189939975738525, |
|
"logps/chosen": -3.634209156036377, |
|
"logps/rejected": -5.206550121307373, |
|
"loss": 26.5886, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.2781711220741272, |
|
"rewards/margins": 0.10549378395080566, |
|
"rewards/rejected": -0.38366490602493286, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.16522423288749016, |
|
"grad_norm": 109.77017211914062, |
|
"learning_rate": 3.9565234881497835e-05, |
|
"logits/chosen": -1.5879325866699219, |
|
"logits/rejected": -1.6509323120117188, |
|
"logps/chosen": -2.8328652381896973, |
|
"logps/rejected": -3.345362901687622, |
|
"loss": 22.6272, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.20151302218437195, |
|
"rewards/margins": 0.04314180836081505, |
|
"rewards/rejected": -0.2446548491716385, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.16915814319433517, |
|
"grad_norm": 162.56524658203125, |
|
"learning_rate": 3.950639135810326e-05, |
|
"logits/chosen": -1.6067664623260498, |
|
"logits/rejected": -1.7900478839874268, |
|
"logps/chosen": -3.400160312652588, |
|
"logps/rejected": -4.594171047210693, |
|
"loss": 20.8583, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2197611778974533, |
|
"rewards/margins": 0.0824299305677414, |
|
"rewards/rejected": -0.3021911084651947, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.17309205350118018, |
|
"grad_norm": 134.93789672851562, |
|
"learning_rate": 3.944386385362683e-05, |
|
"logits/chosen": -1.7304567098617554, |
|
"logits/rejected": -1.7651231288909912, |
|
"logps/chosen": -4.1914567947387695, |
|
"logps/rejected": -5.027632713317871, |
|
"loss": 21.3514, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2970493733882904, |
|
"rewards/margins": 0.06264514476060867, |
|
"rewards/rejected": -0.3596945106983185, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.17702596380802518, |
|
"grad_norm": 106.4392318725586, |
|
"learning_rate": 3.937766417702591e-05, |
|
"logits/chosen": -1.645422339439392, |
|
"logits/rejected": -1.7480659484863281, |
|
"logps/chosen": -4.8556694984436035, |
|
"logps/rejected": -5.551773548126221, |
|
"loss": 25.2991, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.39037808775901794, |
|
"rewards/margins": 0.040518540889024734, |
|
"rewards/rejected": -0.4308966100215912, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.1809598741148702, |
|
"grad_norm": 75.40190124511719, |
|
"learning_rate": 3.9307804830785033e-05, |
|
"logits/chosen": -1.710780382156372, |
|
"logits/rejected": -1.759790062904358, |
|
"logps/chosen": -4.1053643226623535, |
|
"logps/rejected": -5.580018520355225, |
|
"loss": 20.5995, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.32871341705322266, |
|
"rewards/margins": 0.09709561616182327, |
|
"rewards/rejected": -0.42580899596214294, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.1848937844217152, |
|
"grad_norm": 129.5159912109375, |
|
"learning_rate": 3.923429900855468e-05, |
|
"logits/chosen": -1.5250613689422607, |
|
"logits/rejected": -1.7375835180282593, |
|
"logps/chosen": -3.8208870887756348, |
|
"logps/rejected": -5.471742630004883, |
|
"loss": 19.4905, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.2826174199581146, |
|
"rewards/margins": 0.10505588352680206, |
|
"rewards/rejected": -0.3876733183860779, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.1888276947285602, |
|
"grad_norm": 358.99334716796875, |
|
"learning_rate": 3.915716059265956e-05, |
|
"logits/chosen": -1.2919423580169678, |
|
"logits/rejected": -1.5058711767196655, |
|
"logps/chosen": -4.358604907989502, |
|
"logps/rejected": -5.214530944824219, |
|
"loss": 21.5458, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.30291062593460083, |
|
"rewards/margins": 0.06571656465530396, |
|
"rewards/rejected": -0.3686271905899048, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.19276160503540518, |
|
"grad_norm": 83.99627685546875, |
|
"learning_rate": 3.907640415147675e-05, |
|
"logits/chosen": -1.1905521154403687, |
|
"logits/rejected": -1.4401142597198486, |
|
"logps/chosen": -3.447228193283081, |
|
"logps/rejected": -4.264595985412598, |
|
"loss": 21.469, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.2503766417503357, |
|
"rewards/margins": 0.06672003120183945, |
|
"rewards/rejected": -0.31709665060043335, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.1966955153422502, |
|
"grad_norm": 97.8551025390625, |
|
"learning_rate": 3.8992044936684326e-05, |
|
"logits/chosen": -1.167415976524353, |
|
"logits/rejected": -1.3312307596206665, |
|
"logps/chosen": -3.2072510719299316, |
|
"logps/rejected": -3.7459397315979004, |
|
"loss": 24.394, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.23177361488342285, |
|
"rewards/margins": 0.04146546125411987, |
|
"rewards/rejected": -0.2732390761375427, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2006294256490952, |
|
"grad_norm": 81.79573822021484, |
|
"learning_rate": 3.8904098880380946e-05, |
|
"logits/chosen": -1.0507287979125977, |
|
"logits/rejected": -1.1515988111495972, |
|
"logps/chosen": -2.6618685722351074, |
|
"logps/rejected": -3.6504600048065186, |
|
"loss": 21.5489, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.17464396357536316, |
|
"rewards/margins": 0.08120186626911163, |
|
"rewards/rejected": -0.2558458149433136, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.2045633359559402, |
|
"grad_norm": 74.57398986816406, |
|
"learning_rate": 3.881258259207688e-05, |
|
"logits/chosen": -1.026132583618164, |
|
"logits/rejected": -1.1835418939590454, |
|
"logps/chosen": -3.0442395210266113, |
|
"logps/rejected": -3.4265129566192627, |
|
"loss": 24.2589, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.18103419244289398, |
|
"rewards/margins": 0.02801087498664856, |
|
"rewards/rejected": -0.20904505252838135, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.2084972462627852, |
|
"grad_norm": 136.9661102294922, |
|
"learning_rate": 3.8717513355557156e-05, |
|
"logits/chosen": -1.0285115242004395, |
|
"logits/rejected": -1.2167742252349854, |
|
"logps/chosen": -2.512547016143799, |
|
"logps/rejected": -3.510410785675049, |
|
"loss": 22.8105, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.1588296890258789, |
|
"rewards/margins": 0.07337381690740585, |
|
"rewards/rejected": -0.23220351338386536, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.21243115656963021, |
|
"grad_norm": 69.99120330810547, |
|
"learning_rate": 3.861890912561731e-05, |
|
"logits/chosen": -0.8553465604782104, |
|
"logits/rejected": -1.1523014307022095, |
|
"logps/chosen": -2.420487880706787, |
|
"logps/rejected": -3.4689393043518066, |
|
"loss": 20.5014, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.15432177484035492, |
|
"rewards/margins": 0.08016739785671234, |
|
"rewards/rejected": -0.23448920249938965, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.21636506687647522, |
|
"grad_norm": 75.06553649902344, |
|
"learning_rate": 3.85167885246725e-05, |
|
"logits/chosen": -1.0096137523651123, |
|
"logits/rejected": -0.9508267641067505, |
|
"logps/chosen": -3.4687724113464355, |
|
"logps/rejected": -4.444920539855957, |
|
"loss": 22.2787, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.2557123303413391, |
|
"rewards/margins": 0.07582716643810272, |
|
"rewards/rejected": -0.3315395414829254, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.22029897718332023, |
|
"grad_norm": 80.17613983154297, |
|
"learning_rate": 3.8411170839240394e-05, |
|
"logits/chosen": -0.9037753939628601, |
|
"logits/rejected": -0.9584333300590515, |
|
"logps/chosen": -3.32795786857605, |
|
"logps/rejected": -4.637690544128418, |
|
"loss": 23.2314, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.2455664873123169, |
|
"rewards/margins": 0.09604751318693161, |
|
"rewards/rejected": -0.3416139781475067, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.22423288749016523, |
|
"grad_norm": 51.01013946533203, |
|
"learning_rate": 3.8302076016298786e-05, |
|
"logits/chosen": -0.7821402549743652, |
|
"logits/rejected": -0.8784409761428833, |
|
"logps/chosen": -3.464825391769409, |
|
"logps/rejected": -4.3120927810668945, |
|
"loss": 26.3572, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.22129957377910614, |
|
"rewards/margins": 0.05804131552577019, |
|
"rewards/rejected": -0.27934086322784424, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.22816679779701024, |
|
"grad_norm": 50.36003494262695, |
|
"learning_rate": 3.818952465951836e-05, |
|
"logits/chosen": -0.8527859449386597, |
|
"logits/rejected": -1.0032708644866943, |
|
"logps/chosen": -2.805267810821533, |
|
"logps/rejected": -3.6299731731414795, |
|
"loss": 22.6246, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.19116182625293732, |
|
"rewards/margins": 0.04059046879410744, |
|
"rewards/rejected": -0.23175227642059326, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.23210070810385522, |
|
"grad_norm": 73.20655059814453, |
|
"learning_rate": 3.80735380253715e-05, |
|
"logits/chosen": -1.153649926185608, |
|
"logits/rejected": -1.3198211193084717, |
|
"logps/chosen": -3.344242811203003, |
|
"logps/rejected": -3.948678970336914, |
|
"loss": 23.0667, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.24411065876483917, |
|
"rewards/margins": 0.03483257442712784, |
|
"rewards/rejected": -0.2789432406425476, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.23603461841070023, |
|
"grad_norm": 74.69874572753906, |
|
"learning_rate": 3.7954138019117764e-05, |
|
"logits/chosen": -1.2777029275894165, |
|
"logits/rejected": -1.4231250286102295, |
|
"logps/chosen": -3.8555595874786377, |
|
"logps/rejected": -4.460744380950928, |
|
"loss": 23.7264, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.2938821017742157, |
|
"rewards/margins": 0.04352904483675957, |
|
"rewards/rejected": -0.33741116523742676, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.23603461841070023, |
|
"eval_logits/chosen": -1.299351453781128, |
|
"eval_logits/rejected": -1.5808088779449463, |
|
"eval_logps/chosen": -3.9815101623535156, |
|
"eval_logps/rejected": -4.813979148864746, |
|
"eval_loss": 22.46109962463379, |
|
"eval_rewards/accuracies": 0.6578124761581421, |
|
"eval_rewards/chosen": -0.29563087224960327, |
|
"eval_rewards/margins": 0.06121987849473953, |
|
"eval_rewards/rejected": -0.3568507432937622, |
|
"eval_runtime": 256.5735, |
|
"eval_samples_per_second": 2.494, |
|
"eval_steps_per_second": 0.156, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.23996852871754523, |
|
"grad_norm": 100.70768737792969, |
|
"learning_rate": 3.7831347190666886e-05, |
|
"logits/chosen": -1.4278929233551025, |
|
"logits/rejected": -1.5912885665893555, |
|
"logps/chosen": -4.3169355392456055, |
|
"logps/rejected": -5.223280906677246, |
|
"loss": 21.9094, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.32976609468460083, |
|
"rewards/margins": 0.05834154412150383, |
|
"rewards/rejected": -0.38810762763023376, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.24390243902439024, |
|
"grad_norm": 220.49977111816406, |
|
"learning_rate": 3.770518873031997e-05, |
|
"logits/chosen": -1.3963868618011475, |
|
"logits/rejected": -1.5353944301605225, |
|
"logps/chosen": -4.6622209548950195, |
|
"logps/rejected": -5.298556327819824, |
|
"loss": 25.9132, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.34076324105262756, |
|
"rewards/margins": 0.03211987391114235, |
|
"rewards/rejected": -0.3728831112384796, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.24783634933123525, |
|
"grad_norm": 45.653079986572266, |
|
"learning_rate": 3.757568646438977e-05, |
|
"logits/chosen": -1.3604671955108643, |
|
"logits/rejected": -1.4712865352630615, |
|
"logps/chosen": -4.468562126159668, |
|
"logps/rejected": -5.09032678604126, |
|
"loss": 23.4367, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.32228899002075195, |
|
"rewards/margins": 0.037079013884067535, |
|
"rewards/rejected": -0.3593679964542389, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.25177025963808025, |
|
"grad_norm": 66.92400360107422, |
|
"learning_rate": 3.744286485070085e-05, |
|
"logits/chosen": -1.082240343093872, |
|
"logits/rejected": -1.4719394445419312, |
|
"logps/chosen": -4.237619876861572, |
|
"logps/rejected": -5.15458345413208, |
|
"loss": 23.5263, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.33307066559791565, |
|
"rewards/margins": 0.0633757933974266, |
|
"rewards/rejected": -0.39644646644592285, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.25570416994492523, |
|
"grad_norm": 177.88168334960938, |
|
"learning_rate": 3.730674897397048e-05, |
|
"logits/chosen": -1.114916443824768, |
|
"logits/rejected": -1.6648147106170654, |
|
"logps/chosen": -4.053045749664307, |
|
"logps/rejected": -5.189083099365234, |
|
"loss": 22.1226, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.3164909780025482, |
|
"rewards/margins": 0.07537268847227097, |
|
"rewards/rejected": -0.3918636739253998, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.25963808025177026, |
|
"grad_norm": 100.30528259277344, |
|
"learning_rate": 3.7167364541071115e-05, |
|
"logits/chosen": -0.988497257232666, |
|
"logits/rejected": -1.1937472820281982, |
|
"logps/chosen": -4.265296459197998, |
|
"logps/rejected": -4.853640556335449, |
|
"loss": 20.9283, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.3129270374774933, |
|
"rewards/margins": 0.0673552006483078, |
|
"rewards/rejected": -0.3802822232246399, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.26357199055861524, |
|
"grad_norm": 83.86753845214844, |
|
"learning_rate": 3.7024737876175406e-05, |
|
"logits/chosen": -0.8350197076797485, |
|
"logits/rejected": -1.1670969724655151, |
|
"logps/chosen": -5.4864630699157715, |
|
"logps/rejected": -6.544106960296631, |
|
"loss": 20.1224, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.4627310335636139, |
|
"rewards/margins": 0.0860290378332138, |
|
"rewards/rejected": -0.5487600564956665, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.2675059008654603, |
|
"grad_norm": 73.52009582519531, |
|
"learning_rate": 3.6878895915784616e-05, |
|
"logits/chosen": -0.6294984221458435, |
|
"logits/rejected": -0.6951633095741272, |
|
"logps/chosen": -6.682524681091309, |
|
"logps/rejected": -7.499720573425293, |
|
"loss": 23.3106, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.576111912727356, |
|
"rewards/margins": 0.06401752680540085, |
|
"rewards/rejected": -0.6401294469833374, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.27143981117230526, |
|
"grad_norm": 47.29294204711914, |
|
"learning_rate": 3.6729866203641346e-05, |
|
"logits/chosen": -0.30728015303611755, |
|
"logits/rejected": -0.7238900065422058, |
|
"logps/chosen": -5.2912421226501465, |
|
"logps/rejected": -6.754895210266113, |
|
"loss": 20.956, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.44258102774620056, |
|
"rewards/margins": 0.09092569351196289, |
|
"rewards/rejected": -0.5335067510604858, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.2753737214791503, |
|
"grad_norm": 92.66437530517578, |
|
"learning_rate": 3.6577676885527676e-05, |
|
"logits/chosen": -0.4043883681297302, |
|
"logits/rejected": -0.6512165069580078, |
|
"logps/chosen": -4.932716369628906, |
|
"logps/rejected": -6.014449596405029, |
|
"loss": 19.8567, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.39097142219543457, |
|
"rewards/margins": 0.09061526507139206, |
|
"rewards/rejected": -0.48158663511276245, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.27930763178599527, |
|
"grad_norm": 147.38983154296875, |
|
"learning_rate": 3.6422356703949525e-05, |
|
"logits/chosen": -0.1327817142009735, |
|
"logits/rejected": -0.62315833568573, |
|
"logps/chosen": -5.30325984954834, |
|
"logps/rejected": -6.832304954528809, |
|
"loss": 21.7519, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.4314250946044922, |
|
"rewards/margins": 0.1263391077518463, |
|
"rewards/rejected": -0.5577641725540161, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.2832415420928403, |
|
"grad_norm": 31.5225772857666, |
|
"learning_rate": 3.62639349927083e-05, |
|
"logits/chosen": -0.2402796745300293, |
|
"logits/rejected": -0.8563323020935059, |
|
"logps/chosen": -4.622067928314209, |
|
"logps/rejected": -6.146195411682129, |
|
"loss": 17.5716, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.37094250321388245, |
|
"rewards/margins": 0.14315392076969147, |
|
"rewards/rejected": -0.5140964388847351, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.2871754523996853, |
|
"grad_norm": 62.2461051940918, |
|
"learning_rate": 3.610244167136095e-05, |
|
"logits/chosen": -0.09466689825057983, |
|
"logits/rejected": -0.4544064402580261, |
|
"logps/chosen": -5.546882629394531, |
|
"logps/rejected": -6.525860786437988, |
|
"loss": 24.1383, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.47001561522483826, |
|
"rewards/margins": 0.06077291816473007, |
|
"rewards/rejected": -0.5307885408401489, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.2911093627065303, |
|
"grad_norm": 48.28466796875, |
|
"learning_rate": 3.593790723956935e-05, |
|
"logits/chosen": -0.2374683916568756, |
|
"logits/rejected": -0.373137503862381, |
|
"logps/chosen": -8.164457321166992, |
|
"logps/rejected": -8.156596183776855, |
|
"loss": 27.2267, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7038506269454956, |
|
"rewards/margins": 0.0024669456761330366, |
|
"rewards/rejected": -0.7063175439834595, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2950432730133753, |
|
"grad_norm": 51.48979568481445, |
|
"learning_rate": 3.577036277134012e-05, |
|
"logits/chosen": 0.5883103609085083, |
|
"logits/rejected": 0.345896452665329, |
|
"logps/chosen": -7.786595344543457, |
|
"logps/rejected": -8.453828811645508, |
|
"loss": 23.7109, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.6850046515464783, |
|
"rewards/margins": 0.03222974017262459, |
|
"rewards/rejected": -0.7172344326972961, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.2989771833202203, |
|
"grad_norm": 47.203521728515625, |
|
"learning_rate": 3.5599839909155954e-05, |
|
"logits/chosen": 0.8187308311462402, |
|
"logits/rejected": 0.5813020467758179, |
|
"logps/chosen": -7.474339962005615, |
|
"logps/rejected": -8.487818717956543, |
|
"loss": 21.9183, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.649897038936615, |
|
"rewards/margins": 0.06481163203716278, |
|
"rewards/rejected": -0.714708685874939, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.3029110936270653, |
|
"grad_norm": 66.43785095214844, |
|
"learning_rate": 3.542637085799967e-05, |
|
"logits/chosen": 0.7243896722793579, |
|
"logits/rejected": 0.633999228477478, |
|
"logps/chosen": -6.237640380859375, |
|
"logps/rejected": -7.760465145111084, |
|
"loss": 21.8449, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5326268672943115, |
|
"rewards/margins": 0.1084168553352356, |
|
"rewards/rejected": -0.6410436630249023, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.3068450039339103, |
|
"grad_norm": 37.43156814575195, |
|
"learning_rate": 3.524998837927192e-05, |
|
"logits/chosen": 0.06674204766750336, |
|
"logits/rejected": -0.2028542459011078, |
|
"logps/chosen": -3.726958751678467, |
|
"logps/rejected": -4.262044906616211, |
|
"loss": 24.4969, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.28034740686416626, |
|
"rewards/margins": 0.04259229078888893, |
|
"rewards/rejected": -0.3229396939277649, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.3107789142407553, |
|
"grad_norm": 43.61391067504883, |
|
"learning_rate": 3.5070725784603906e-05, |
|
"logits/chosen": -0.40263357758522034, |
|
"logits/rejected": -0.7386992573738098, |
|
"logps/chosen": -2.9942004680633545, |
|
"logps/rejected": -3.7471251487731934, |
|
"loss": 22.408, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.19999414682388306, |
|
"rewards/margins": 0.05711622163653374, |
|
"rewards/rejected": -0.2571103572845459, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.3147128245476003, |
|
"grad_norm": 39.55924606323242, |
|
"learning_rate": 3.488861692956612e-05, |
|
"logits/chosen": -0.3814232647418976, |
|
"logits/rejected": -0.7326269149780273, |
|
"logps/chosen": -3.743687391281128, |
|
"logps/rejected": -4.247666835784912, |
|
"loss": 22.5808, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.2401810586452484, |
|
"rewards/margins": 0.03669751435518265, |
|
"rewards/rejected": -0.27687856554985046, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3147128245476003, |
|
"eval_logits/chosen": 1.05367910861969, |
|
"eval_logits/rejected": 0.8413508534431458, |
|
"eval_logps/chosen": -3.476516008377075, |
|
"eval_logps/rejected": -4.1527628898620605, |
|
"eval_loss": 22.322988510131836, |
|
"eval_rewards/accuracies": 0.643750011920929, |
|
"eval_rewards/chosen": -0.2451314926147461, |
|
"eval_rewards/margins": 0.045597635209560394, |
|
"eval_rewards/rejected": -0.2907291054725647, |
|
"eval_runtime": 262.6324, |
|
"eval_samples_per_second": 2.437, |
|
"eval_steps_per_second": 0.152, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.31864673485444533, |
|
"grad_norm": 106.93277740478516, |
|
"learning_rate": 3.470369620727433e-05, |
|
"logits/chosen": -0.2646043300628662, |
|
"logits/rejected": -0.5908231735229492, |
|
"logps/chosen": -4.6682844161987305, |
|
"logps/rejected": -4.9077653884887695, |
|
"loss": 24.9016, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.31124863028526306, |
|
"rewards/margins": 0.024502381682395935, |
|
"rewards/rejected": -0.3357509970664978, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.3225806451612903, |
|
"grad_norm": 74.05709838867188, |
|
"learning_rate": 3.451599854189419e-05, |
|
"logits/chosen": 0.022719597443938255, |
|
"logits/rejected": -0.2066432684659958, |
|
"logps/chosen": -4.9405951499938965, |
|
"logps/rejected": -5.324969291687012, |
|
"loss": 24.7214, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.37897494435310364, |
|
"rewards/margins": 0.023352503776550293, |
|
"rewards/rejected": -0.40232744812965393, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.32651455546813535, |
|
"grad_norm": 39.29796600341797, |
|
"learning_rate": 3.4325559382045344e-05, |
|
"logits/chosen": 0.5940214395523071, |
|
"logits/rejected": 0.3333088755607605, |
|
"logps/chosen": -4.753328323364258, |
|
"logps/rejected": -5.081197738647461, |
|
"loss": 24.0772, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.36598071455955505, |
|
"rewards/margins": 0.0194702185690403, |
|
"rewards/rejected": -0.38545092940330505, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.3304484657749803, |
|
"grad_norm": 77.69596862792969, |
|
"learning_rate": 3.413241469410669e-05, |
|
"logits/chosen": 0.5746585726737976, |
|
"logits/rejected": 0.37664318084716797, |
|
"logps/chosen": -4.366871356964111, |
|
"logps/rejected": -4.923527717590332, |
|
"loss": 23.106, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.33017784357070923, |
|
"rewards/margins": 0.03850778192281723, |
|
"rewards/rejected": -0.36868563294410706, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.33438237608182536, |
|
"grad_norm": 37.37318801879883, |
|
"learning_rate": 3.3936600955423684e-05, |
|
"logits/chosen": 0.5197581052780151, |
|
"logits/rejected": 0.22815366089344025, |
|
"logps/chosen": -4.27942419052124, |
|
"logps/rejected": -5.002130031585693, |
|
"loss": 21.5593, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.3183726668357849, |
|
"rewards/margins": 0.0549759566783905, |
|
"rewards/rejected": -0.3733486235141754, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.33831628638867034, |
|
"grad_norm": 51.467933654785156, |
|
"learning_rate": 3.373815514741928e-05, |
|
"logits/chosen": 0.5920094847679138, |
|
"logits/rejected": 0.27607864141464233, |
|
"logps/chosen": -4.812392234802246, |
|
"logps/rejected": -6.322897434234619, |
|
"loss": 20.3197, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.3940238356590271, |
|
"rewards/margins": 0.08363697677850723, |
|
"rewards/rejected": -0.47766080498695374, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.3422501966955153, |
|
"grad_norm": 92.83573150634766, |
|
"learning_rate": 3.353711474860957e-05, |
|
"logits/chosen": 0.2608449459075928, |
|
"logits/rejected": 0.022218376398086548, |
|
"logps/chosen": -6.094309329986572, |
|
"logps/rejected": -6.749911308288574, |
|
"loss": 22.7358, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.48861637711524963, |
|
"rewards/margins": 0.06393507868051529, |
|
"rewards/rejected": -0.5525515079498291, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.34618410700236035, |
|
"grad_norm": 117.08031463623047, |
|
"learning_rate": 3.333351772752559e-05, |
|
"logits/chosen": 0.36764952540397644, |
|
"logits/rejected": 0.11572384834289551, |
|
"logps/chosen": -5.9948601722717285, |
|
"logps/rejected": -7.810961723327637, |
|
"loss": 22.5633, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.4657825827598572, |
|
"rewards/margins": 0.0877792239189148, |
|
"rewards/rejected": -0.553561806678772, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.35011801730920533, |
|
"grad_norm": 42.3343620300293, |
|
"learning_rate": 3.31274025355426e-05, |
|
"logits/chosen": -0.1417434960603714, |
|
"logits/rejected": -0.3869401216506958, |
|
"logps/chosen": -4.742400169372559, |
|
"logps/rejected": -5.590722560882568, |
|
"loss": 23.695, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.37819725275039673, |
|
"rewards/margins": 0.05572297424077988, |
|
"rewards/rejected": -0.4339202046394348, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.35405192761605037, |
|
"grad_norm": 48.5923957824707, |
|
"learning_rate": 3.2918808099618145e-05, |
|
"logits/chosen": -0.13320264220237732, |
|
"logits/rejected": -0.4407239854335785, |
|
"logps/chosen": -4.052382946014404, |
|
"logps/rejected": -5.202944755554199, |
|
"loss": 23.9333, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.2898481488227844, |
|
"rewards/margins": 0.06827215105295181, |
|
"rewards/rejected": -0.35812026262283325, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.35798583792289534, |
|
"grad_norm": 40.526222229003906, |
|
"learning_rate": 3.270777381494025e-05, |
|
"logits/chosen": 0.15568742156028748, |
|
"logits/rejected": -0.1973900943994522, |
|
"logps/chosen": -3.3848178386688232, |
|
"logps/rejected": -4.244564056396484, |
|
"loss": 22.4256, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.24747803807258606, |
|
"rewards/margins": 0.0624859556555748, |
|
"rewards/rejected": -0.30996400117874146, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.3619197482297404, |
|
"grad_norm": 39.41511917114258, |
|
"learning_rate": 3.2494339537487316e-05, |
|
"logits/chosen": 0.2709997296333313, |
|
"logits/rejected": -0.02324852906167507, |
|
"logps/chosen": -4.2176713943481445, |
|
"logps/rejected": -4.712052345275879, |
|
"loss": 22.1339, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.30599862337112427, |
|
"rewards/margins": 0.05780113860964775, |
|
"rewards/rejected": -0.3637998104095459, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.36585365853658536, |
|
"grad_norm": 64.34850311279297, |
|
"learning_rate": 3.227854557650086e-05, |
|
"logits/chosen": 0.43827542662620544, |
|
"logits/rejected": 0.3022671937942505, |
|
"logps/chosen": -4.146628379821777, |
|
"logps/rejected": -4.721283912658691, |
|
"loss": 24.9713, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.32393088936805725, |
|
"rewards/margins": 0.03894919902086258, |
|
"rewards/rejected": -0.36288008093833923, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.3697875688434304, |
|
"grad_norm": 48.07391357421875, |
|
"learning_rate": 3.206043268687271e-05, |
|
"logits/chosen": 0.8742543458938599, |
|
"logits/rejected": 0.6306554079055786, |
|
"logps/chosen": -4.235721111297607, |
|
"logps/rejected": -4.655104160308838, |
|
"loss": 24.504, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.3277904689311981, |
|
"rewards/margins": 0.03070756234228611, |
|
"rewards/rejected": -0.3584980368614197, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.37372147915027537, |
|
"grad_norm": 48.7944450378418, |
|
"learning_rate": 3.1840042061448034e-05, |
|
"logits/chosen": 0.8953019380569458, |
|
"logits/rejected": 0.6509414315223694, |
|
"logps/chosen": -3.809953212738037, |
|
"logps/rejected": -4.727410793304443, |
|
"loss": 21.5063, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.2697634696960449, |
|
"rewards/margins": 0.04955270141363144, |
|
"rewards/rejected": -0.31931617856025696, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.3776553894571204, |
|
"grad_norm": 50.050758361816406, |
|
"learning_rate": 3.161741532324567e-05, |
|
"logits/chosen": 0.6901669502258301, |
|
"logits/rejected": 0.4217056632041931, |
|
"logps/chosen": -3.9408392906188965, |
|
"logps/rejected": -4.815566062927246, |
|
"loss": 20.9824, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3004041314125061, |
|
"rewards/margins": 0.06339363753795624, |
|
"rewards/rejected": -0.36379775404930115, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.3815892997639654, |
|
"grad_norm": 31.20033836364746, |
|
"learning_rate": 3.139259451759715e-05, |
|
"logits/chosen": 0.13000288605690002, |
|
"logits/rejected": -0.044496648013591766, |
|
"logps/chosen": -3.5587058067321777, |
|
"logps/rejected": -4.0875444412231445, |
|
"loss": 23.3392, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.25211071968078613, |
|
"rewards/margins": 0.0408257320523262, |
|
"rewards/rejected": -0.2929364740848541, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.38552321007081036, |
|
"grad_norm": 42.778175354003906, |
|
"learning_rate": 3.116562210420604e-05, |
|
"logits/chosen": 0.001088732504285872, |
|
"logits/rejected": -0.32067522406578064, |
|
"logps/chosen": -3.792126178741455, |
|
"logps/rejected": -5.263632297515869, |
|
"loss": 19.5225, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.2830085754394531, |
|
"rewards/margins": 0.11108819395303726, |
|
"rewards/rejected": -0.3940967619419098, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.3894571203776554, |
|
"grad_norm": 51.158023834228516, |
|
"learning_rate": 3.093654094912901e-05, |
|
"logits/chosen": 0.14770345389842987, |
|
"logits/rejected": -0.27998632192611694, |
|
"logps/chosen": -3.2016499042510986, |
|
"logps/rejected": -4.281766414642334, |
|
"loss": 20.9501, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.23279622197151184, |
|
"rewards/margins": 0.07809507101774216, |
|
"rewards/rejected": -0.3108913004398346, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.3933910306845004, |
|
"grad_norm": 56.20425033569336, |
|
"learning_rate": 3.070539431668008e-05, |
|
"logits/chosen": 0.262935608625412, |
|
"logits/rejected": 0.04751387611031532, |
|
"logps/chosen": -3.5432257652282715, |
|
"logps/rejected": -4.892638683319092, |
|
"loss": 19.8621, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.2724161148071289, |
|
"rewards/margins": 0.10621275752782822, |
|
"rewards/rejected": -0.3786288797855377, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3933910306845004, |
|
"eval_logits/chosen": 0.6740007400512695, |
|
"eval_logits/rejected": 0.4591088891029358, |
|
"eval_logps/chosen": -4.5571136474609375, |
|
"eval_logps/rejected": -5.613867282867432, |
|
"eval_loss": 21.918312072753906, |
|
"eval_rewards/accuracies": 0.659375011920929, |
|
"eval_rewards/chosen": -0.35319122672080994, |
|
"eval_rewards/margins": 0.08364833891391754, |
|
"eval_rewards/rejected": -0.43683958053588867, |
|
"eval_runtime": 263.6435, |
|
"eval_samples_per_second": 2.428, |
|
"eval_steps_per_second": 0.152, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3973249409913454, |
|
"grad_norm": 58.8707275390625, |
|
"learning_rate": 3.0472225861259792e-05, |
|
"logits/chosen": 0.5369864702224731, |
|
"logits/rejected": 0.22990770637989044, |
|
"logps/chosen": -4.736105442047119, |
|
"logps/rejected": -6.215359687805176, |
|
"loss": 19.8037, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.3894490599632263, |
|
"rewards/margins": 0.10317282378673553, |
|
"rewards/rejected": -0.49262189865112305, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.4012588512981904, |
|
"grad_norm": 70.14684295654297, |
|
"learning_rate": 3.023707961911056e-05, |
|
"logits/chosen": 0.8286817669868469, |
|
"logits/rejected": 0.553676187992096, |
|
"logps/chosen": -5.788529872894287, |
|
"logps/rejected": -7.536166191101074, |
|
"loss": 17.8624, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.49081355333328247, |
|
"rewards/margins": 0.14760908484458923, |
|
"rewards/rejected": -0.6384226083755493, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.4051927616050354, |
|
"grad_norm": 64.01305389404297, |
|
"learning_rate": 3.0000000000000004e-05, |
|
"logits/chosen": 0.9336326718330383, |
|
"logits/rejected": 0.7778112292289734, |
|
"logps/chosen": -6.726840972900391, |
|
"logps/rejected": -7.550817966461182, |
|
"loss": 22.4924, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.57944256067276, |
|
"rewards/margins": 0.06806603819131851, |
|
"rewards/rejected": -0.6475085616111755, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.4091266719118804, |
|
"grad_norm": 78.60610961914062, |
|
"learning_rate": 2.976103177883374e-05, |
|
"logits/chosen": 1.424285650253296, |
|
"logits/rejected": 1.2528765201568604, |
|
"logps/chosen": -6.3146467208862305, |
|
"logps/rejected": -7.42047643661499, |
|
"loss": 22.4632, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.5491231083869934, |
|
"rewards/margins": 0.08809302002191544, |
|
"rewards/rejected": -0.6372160911560059, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.41306058221872544, |
|
"grad_norm": 53.94759750366211, |
|
"learning_rate": 2.9520220087199142e-05, |
|
"logits/chosen": 2.070854663848877, |
|
"logits/rejected": 1.9312273263931274, |
|
"logps/chosen": -7.275876522064209, |
|
"logps/rejected": -7.790124416351318, |
|
"loss": 22.9161, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6180551052093506, |
|
"rewards/margins": 0.04511018842458725, |
|
"rewards/rejected": -0.663165271282196, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.4169944925255704, |
|
"grad_norm": 56.0859489440918, |
|
"learning_rate": 2.9277610404841792e-05, |
|
"logits/chosen": 2.1373679637908936, |
|
"logits/rejected": 1.8990551233291626, |
|
"logps/chosen": -6.323026180267334, |
|
"logps/rejected": -7.068973541259766, |
|
"loss": 21.5577, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.5458236932754517, |
|
"rewards/margins": 0.058930903673172, |
|
"rewards/rejected": -0.6047546863555908, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.4209284028324154, |
|
"grad_norm": 38.24552536010742, |
|
"learning_rate": 2.903324855107617e-05, |
|
"logits/chosen": 1.698553442955017, |
|
"logits/rejected": 1.4752168655395508, |
|
"logps/chosen": -6.036180019378662, |
|
"logps/rejected": -7.159039497375488, |
|
"loss": 20.6918, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.4921696186065674, |
|
"rewards/margins": 0.09151138365268707, |
|
"rewards/rejected": -0.5836809873580933, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.42486231313926043, |
|
"grad_norm": 78.7889633178711, |
|
"learning_rate": 2.8787180676132222e-05, |
|
"logits/chosen": 1.3787410259246826, |
|
"logits/rejected": 1.1702592372894287, |
|
"logps/chosen": -5.483891010284424, |
|
"logps/rejected": -6.863039493560791, |
|
"loss": 21.8569, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.4555833339691162, |
|
"rewards/margins": 0.10881421715021133, |
|
"rewards/rejected": -0.5643975138664246, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.4287962234461054, |
|
"grad_norm": 58.13717269897461, |
|
"learning_rate": 2.8539453252439388e-05, |
|
"logits/chosen": 1.238527536392212, |
|
"logits/rejected": 1.0659363269805908, |
|
"logps/chosen": -4.524688243865967, |
|
"logps/rejected": -5.795032978057861, |
|
"loss": 19.8882, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.365116685628891, |
|
"rewards/margins": 0.0977514460682869, |
|
"rewards/rejected": -0.4628681540489197, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.43273013375295044, |
|
"grad_norm": 57.31916809082031, |
|
"learning_rate": 2.829011306584983e-05, |
|
"logits/chosen": 1.0495655536651611, |
|
"logits/rejected": 0.9189395904541016, |
|
"logps/chosen": -4.698520183563232, |
|
"logps/rejected": -5.4618024826049805, |
|
"loss": 22.771, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.3808877170085907, |
|
"rewards/margins": 0.05942262336611748, |
|
"rewards/rejected": -0.4403103291988373, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.4366640440597954, |
|
"grad_norm": 57.70218276977539, |
|
"learning_rate": 2.8039207206802444e-05, |
|
"logits/chosen": 1.0372337102890015, |
|
"logits/rejected": 0.7637672424316406, |
|
"logps/chosen": -5.192538261413574, |
|
"logps/rejected": -6.2920708656311035, |
|
"loss": 21.0183, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.4280625283718109, |
|
"rewards/margins": 0.07608579099178314, |
|
"rewards/rejected": -0.5041483640670776, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.44059795436664045, |
|
"grad_norm": 38.57474899291992, |
|
"learning_rate": 2.778678306142936e-05, |
|
"logits/chosen": 1.3346863985061646, |
|
"logits/rejected": 1.2482701539993286, |
|
"logps/chosen": -4.537243843078613, |
|
"logps/rejected": -5.363685607910156, |
|
"loss": 21.725, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.367723673582077, |
|
"rewards/margins": 0.07422361522912979, |
|
"rewards/rejected": -0.4419472813606262, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.44453186467348543, |
|
"grad_norm": 47.30175018310547, |
|
"learning_rate": 2.753288830260655e-05, |
|
"logits/chosen": 1.081312894821167, |
|
"logits/rejected": 1.0209182500839233, |
|
"logps/chosen": -4.504608154296875, |
|
"logps/rejected": -5.094980716705322, |
|
"loss": 23.979, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.34262794256210327, |
|
"rewards/margins": 0.04645577073097229, |
|
"rewards/rejected": -0.38908374309539795, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.44846577498033047, |
|
"grad_norm": 41.14936065673828, |
|
"learning_rate": 2.727757088095037e-05, |
|
"logits/chosen": 1.2744576930999756, |
|
"logits/rejected": 0.9736446142196655, |
|
"logps/chosen": -4.100034236907959, |
|
"logps/rejected": -5.128809452056885, |
|
"loss": 20.4892, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.3064883053302765, |
|
"rewards/margins": 0.08413257449865341, |
|
"rewards/rejected": -0.3906208872795105, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.45239968528717545, |
|
"grad_norm": 29.25827407836914, |
|
"learning_rate": 2.7020879015761555e-05, |
|
"logits/chosen": 1.315836787223816, |
|
"logits/rejected": 1.091429352760315, |
|
"logps/chosen": -4.02718448638916, |
|
"logps/rejected": -4.86886739730835, |
|
"loss": 21.384, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.2975132465362549, |
|
"rewards/margins": 0.07079926878213882, |
|
"rewards/rejected": -0.3683125078678131, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.4563335955940205, |
|
"grad_norm": 156.1727752685547, |
|
"learning_rate": 2.6762861185918532e-05, |
|
"logits/chosen": 1.271761178970337, |
|
"logits/rejected": 1.0751326084136963, |
|
"logps/chosen": -4.122300624847412, |
|
"logps/rejected": -5.07947301864624, |
|
"loss": 20.2656, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.3042159378528595, |
|
"rewards/margins": 0.0826338604092598, |
|
"rewards/rejected": -0.3868497610092163, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.46026750590086546, |
|
"grad_norm": 33.745567321777344, |
|
"learning_rate": 2.6503566120721685e-05, |
|
"logits/chosen": 1.1284042596817017, |
|
"logits/rejected": 0.8111549615859985, |
|
"logps/chosen": -4.277104377746582, |
|
"logps/rejected": -5.083390235900879, |
|
"loss": 20.6651, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.3438721299171448, |
|
"rewards/margins": 0.06679800897836685, |
|
"rewards/rejected": -0.41067013144493103, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.46420141620771044, |
|
"grad_norm": 51.66657638549805, |
|
"learning_rate": 2.6243042790690332e-05, |
|
"logits/chosen": 0.8191879987716675, |
|
"logits/rejected": 0.7653782367706299, |
|
"logps/chosen": -5.171725273132324, |
|
"logps/rejected": -5.932669162750244, |
|
"loss": 22.7086, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.4288380742073059, |
|
"rewards/margins": 0.0465051606297493, |
|
"rewards/rejected": -0.4753432869911194, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.46813532651455547, |
|
"grad_norm": 44.73387908935547, |
|
"learning_rate": 2.5981340398314148e-05, |
|
"logits/chosen": 0.5237664580345154, |
|
"logits/rejected": 0.2646290957927704, |
|
"logps/chosen": -4.674435615539551, |
|
"logps/rejected": -6.298100471496582, |
|
"loss": 19.2422, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.3852913975715637, |
|
"rewards/margins": 0.1304633468389511, |
|
"rewards/rejected": -0.5157546997070312, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.47206923682140045, |
|
"grad_norm": 45.94012451171875, |
|
"learning_rate": 2.571850836876074e-05, |
|
"logits/chosen": 0.5761805176734924, |
|
"logits/rejected": 0.38747546076774597, |
|
"logps/chosen": -5.026305198669434, |
|
"logps/rejected": -7.055234432220459, |
|
"loss": 19.6134, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.4065484404563904, |
|
"rewards/margins": 0.10743912309408188, |
|
"rewards/rejected": -0.5139876008033752, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.47206923682140045, |
|
"eval_logits/chosen": 0.718625545501709, |
|
"eval_logits/rejected": 0.5276994705200195, |
|
"eval_logps/chosen": -5.122862815856934, |
|
"eval_logps/rejected": -6.157461643218994, |
|
"eval_loss": 21.601177215576172, |
|
"eval_rewards/accuracies": 0.6812499761581421, |
|
"eval_rewards/chosen": -0.40976619720458984, |
|
"eval_rewards/margins": 0.08143284171819687, |
|
"eval_rewards/rejected": -0.4911990761756897, |
|
"eval_runtime": 265.5044, |
|
"eval_samples_per_second": 2.411, |
|
"eval_steps_per_second": 0.151, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.4760031471282455, |
|
"grad_norm": 186.21047973632812, |
|
"learning_rate": 2.5454596340541246e-05, |
|
"logits/chosen": 0.4267461895942688, |
|
"logits/rejected": 0.17012283205986023, |
|
"logps/chosen": -5.3673906326293945, |
|
"logps/rejected": -6.8712663650512695, |
|
"loss": 20.017, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.4380454421043396, |
|
"rewards/margins": 0.10050982236862183, |
|
"rewards/rejected": -0.5385553240776062, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.47993705743509046, |
|
"grad_norm": 137.08309936523438, |
|
"learning_rate": 2.5189654156135577e-05, |
|
"logits/chosen": 0.4136212468147278, |
|
"logits/rejected": 0.08081427961587906, |
|
"logps/chosen": -4.9327521324157715, |
|
"logps/rejected": -6.341540813446045, |
|
"loss": 21.9344, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3804043233394623, |
|
"rewards/margins": 0.10515154898166656, |
|
"rewards/rejected": -0.48555582761764526, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.4838709677419355, |
|
"grad_norm": 45.95155334472656, |
|
"learning_rate": 2.492373185257913e-05, |
|
"logits/chosen": 0.255919486284256, |
|
"logits/rejected": 0.06857960671186447, |
|
"logps/chosen": -4.774691104888916, |
|
"logps/rejected": -5.879635334014893, |
|
"loss": 21.4035, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.38787540793418884, |
|
"rewards/margins": 0.08788047730922699, |
|
"rewards/rejected": -0.47575584053993225, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.4878048780487805, |
|
"grad_norm": 93.56800079345703, |
|
"learning_rate": 2.465687965201283e-05, |
|
"logits/chosen": -0.002777445362880826, |
|
"logits/rejected": -0.19223739206790924, |
|
"logps/chosen": -4.975631237030029, |
|
"logps/rejected": -6.164121150970459, |
|
"loss": 21.7253, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.40307608246803284, |
|
"rewards/margins": 0.09124691784381866, |
|
"rewards/rejected": -0.4943229556083679, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.4917387883556255, |
|
"grad_norm": 48.37771987915039, |
|
"learning_rate": 2.438914795219813e-05, |
|
"logits/chosen": 0.19498832523822784, |
|
"logits/rejected": 0.00378171494230628, |
|
"logps/chosen": -4.89281702041626, |
|
"logps/rejected": -6.380954742431641, |
|
"loss": 18.3391, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.4015694558620453, |
|
"rewards/margins": 0.10364029556512833, |
|
"rewards/rejected": -0.505209743976593, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.4956726986624705, |
|
"grad_norm": 92.10182189941406, |
|
"learning_rate": 2.41205873169989e-05, |
|
"logits/chosen": 0.3381834626197815, |
|
"logits/rejected": 0.16358311474323273, |
|
"logps/chosen": -5.862008094787598, |
|
"logps/rejected": -7.257102966308594, |
|
"loss": 21.0954, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.49373936653137207, |
|
"rewards/margins": 0.10812152922153473, |
|
"rewards/rejected": -0.6018608808517456, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.4996066089693155, |
|
"grad_norm": 71.71492767333984, |
|
"learning_rate": 2.3851248466831906e-05, |
|
"logits/chosen": 0.3445281982421875, |
|
"logits/rejected": 0.19256843626499176, |
|
"logps/chosen": -7.764035224914551, |
|
"logps/rejected": -8.462240219116211, |
|
"loss": 24.1744, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.6419528126716614, |
|
"rewards/margins": 0.05703103542327881, |
|
"rewards/rejected": -0.6989837884902954, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.5035405192761605, |
|
"grad_norm": 66.27214813232422, |
|
"learning_rate": 2.3581182269087756e-05, |
|
"logits/chosen": 0.6563648581504822, |
|
"logits/rejected": 0.4416646957397461, |
|
"logps/chosen": -7.931412696838379, |
|
"logps/rejected": -9.330202102661133, |
|
"loss": 20.7496, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6889309883117676, |
|
"rewards/margins": 0.10431470721960068, |
|
"rewards/rejected": -0.7932456731796265, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.5074744295830055, |
|
"grad_norm": 58.666744232177734, |
|
"learning_rate": 2.331043972852408e-05, |
|
"logits/chosen": 0.7555745840072632, |
|
"logits/rejected": 0.6508604884147644, |
|
"logps/chosen": -8.447460174560547, |
|
"logps/rejected": -9.591973304748535, |
|
"loss": 19.7029, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.7366517782211304, |
|
"rewards/margins": 0.09356808662414551, |
|
"rewards/rejected": -0.8302198648452759, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.5114083398898505, |
|
"grad_norm": 72.17485046386719, |
|
"learning_rate": 2.303907197763275e-05, |
|
"logits/chosen": 1.018164038658142, |
|
"logits/rejected": 0.8766286969184875, |
|
"logps/chosen": -8.899210929870605, |
|
"logps/rejected": -10.095988273620605, |
|
"loss": 20.9781, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7549716830253601, |
|
"rewards/margins": 0.07867839932441711, |
|
"rewards/rejected": -0.8336501121520996, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.5153422501966956, |
|
"grad_norm": 67.8235855102539, |
|
"learning_rate": 2.2767130266982972e-05, |
|
"logits/chosen": 1.225295066833496, |
|
"logits/rejected": 1.1447144746780396, |
|
"logps/chosen": -9.052233695983887, |
|
"logps/rejected": -11.032278060913086, |
|
"loss": 21.8437, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.8185558319091797, |
|
"rewards/margins": 0.0942615494132042, |
|
"rewards/rejected": -0.9128173589706421, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.5192761605035405, |
|
"grad_norm": 68.50457000732422, |
|
"learning_rate": 2.2494665955542128e-05, |
|
"logits/chosen": 1.5384838581085205, |
|
"logits/rejected": 1.455594778060913, |
|
"logps/chosen": -8.344881057739258, |
|
"logps/rejected": -9.565633773803711, |
|
"loss": 20.1429, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.7527450323104858, |
|
"rewards/margins": 0.09417356550693512, |
|
"rewards/rejected": -0.8469184637069702, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.5232100708103855, |
|
"grad_norm": 82.57818603515625, |
|
"learning_rate": 2.2221730500976095e-05, |
|
"logits/chosen": 1.622971534729004, |
|
"logits/rejected": 1.5073456764221191, |
|
"logps/chosen": -8.968372344970703, |
|
"logps/rejected": -10.068008422851562, |
|
"loss": 23.63, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.8010263442993164, |
|
"rewards/margins": 0.061060793697834015, |
|
"rewards/rejected": -0.862087070941925, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.5271439811172305, |
|
"grad_norm": 68.11051177978516, |
|
"learning_rate": 2.1948375449930918e-05, |
|
"logits/chosen": 1.7366511821746826, |
|
"logits/rejected": 1.4829221963882446, |
|
"logps/chosen": -8.090542793273926, |
|
"logps/rejected": -9.549524307250977, |
|
"loss": 19.8611, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.7041796445846558, |
|
"rewards/margins": 0.09810546785593033, |
|
"rewards/rejected": -0.8022850751876831, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.5310778914240756, |
|
"grad_norm": 65.59056854248047, |
|
"learning_rate": 2.167465242829774e-05, |
|
"logits/chosen": 1.7103216648101807, |
|
"logits/rejected": 1.6612507104873657, |
|
"logps/chosen": -7.633252143859863, |
|
"logps/rejected": -9.149633407592773, |
|
"loss": 19.6635, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.678862988948822, |
|
"rewards/margins": 0.08954181522130966, |
|
"rewards/rejected": -0.7684048414230347, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.5350118017309206, |
|
"grad_norm": 82.65924835205078, |
|
"learning_rate": 2.1400613131462697e-05, |
|
"logits/chosen": 1.8782835006713867, |
|
"logits/rejected": 1.8185780048370361, |
|
"logps/chosen": -8.152751922607422, |
|
"logps/rejected": -9.006729125976562, |
|
"loss": 23.3166, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.6963292360305786, |
|
"rewards/margins": 0.05258508399128914, |
|
"rewards/rejected": -0.7489142417907715, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.5389457120377655, |
|
"grad_norm": 52.20330810546875, |
|
"learning_rate": 2.1126309314543712e-05, |
|
"logits/chosen": 2.1228325366973877, |
|
"logits/rejected": 1.9640865325927734, |
|
"logps/chosen": -7.649266242980957, |
|
"logps/rejected": -9.048944473266602, |
|
"loss": 18.8418, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.6748228669166565, |
|
"rewards/margins": 0.11289025843143463, |
|
"rewards/rejected": -0.7877130508422852, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.5428796223446105, |
|
"grad_norm": 71.97002410888672, |
|
"learning_rate": 2.0851792782616055e-05, |
|
"logits/chosen": 2.075559377670288, |
|
"logits/rejected": 1.9426681995391846, |
|
"logps/chosen": -7.587338924407959, |
|
"logps/rejected": -8.79751968383789, |
|
"loss": 22.4863, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.6690691113471985, |
|
"rewards/margins": 0.0750223845243454, |
|
"rewards/rejected": -0.7440915107727051, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.5468135326514555, |
|
"grad_norm": 72.18091583251953, |
|
"learning_rate": 2.0577115380928366e-05, |
|
"logits/chosen": 2.057607889175415, |
|
"logits/rejected": 1.905311942100525, |
|
"logps/chosen": -7.9985480308532715, |
|
"logps/rejected": -9.41818618774414, |
|
"loss": 19.677, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6911519169807434, |
|
"rewards/margins": 0.11198244243860245, |
|
"rewards/rejected": -0.8031343221664429, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.5507474429583006, |
|
"grad_norm": 58.584835052490234, |
|
"learning_rate": 2.0302328985111197e-05, |
|
"logits/chosen": 1.9190715551376343, |
|
"logits/rejected": 1.7894951105117798, |
|
"logps/chosen": -7.271109580993652, |
|
"logps/rejected": -8.35887336730957, |
|
"loss": 20.514, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.6410033702850342, |
|
"rewards/margins": 0.08995092660188675, |
|
"rewards/rejected": -0.7309542894363403, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5507474429583006, |
|
"eval_logits/chosen": 1.4903769493103027, |
|
"eval_logits/rejected": 1.3117529153823853, |
|
"eval_logps/chosen": -7.017483711242676, |
|
"eval_logps/rejected": -8.199769973754883, |
|
"eval_loss": 22.447879791259766, |
|
"eval_rewards/accuracies": 0.667187511920929, |
|
"eval_rewards/chosen": -0.5992282629013062, |
|
"eval_rewards/margins": 0.09620151668787003, |
|
"eval_rewards/rejected": -0.6954299211502075, |
|
"eval_runtime": 271.3259, |
|
"eval_samples_per_second": 2.359, |
|
"eval_steps_per_second": 0.147, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5546813532651456, |
|
"grad_norm": 53.3320426940918, |
|
"learning_rate": 2.0027485491379747e-05, |
|
"logits/chosen": 1.7803634405136108, |
|
"logits/rejected": 1.7076250314712524, |
|
"logps/chosen": -7.167619228363037, |
|
"logps/rejected": -8.508960723876953, |
|
"loss": 23.2853, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.6200074553489685, |
|
"rewards/margins": 0.06713174283504486, |
|
"rewards/rejected": -0.6871392130851746, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.5586152635719905, |
|
"grad_norm": 50.50627136230469, |
|
"learning_rate": 1.9752636806732742e-05, |
|
"logits/chosen": 1.3914200067520142, |
|
"logits/rejected": 1.207233190536499, |
|
"logps/chosen": -5.589415073394775, |
|
"logps/rejected": -7.128883361816406, |
|
"loss": 18.8527, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.4748566746711731, |
|
"rewards/margins": 0.10387661308050156, |
|
"rewards/rejected": -0.5787333250045776, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.5625491738788355, |
|
"grad_norm": 48.835845947265625, |
|
"learning_rate": 1.9477834839149278e-05, |
|
"logits/chosen": 1.0721666812896729, |
|
"logits/rejected": 0.9233131408691406, |
|
"logps/chosen": -5.5658769607543945, |
|
"logps/rejected": -7.002392768859863, |
|
"loss": 18.0462, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.46734148263931274, |
|
"rewards/margins": 0.12140637636184692, |
|
"rewards/rejected": -0.5887478590011597, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.5664830841856806, |
|
"grad_norm": 90.04922485351562, |
|
"learning_rate": 1.9203131487785428e-05, |
|
"logits/chosen": 0.7360326051712036, |
|
"logits/rejected": 0.5881434679031372, |
|
"logps/chosen": -5.03780460357666, |
|
"logps/rejected": -5.854475498199463, |
|
"loss": 22.7303, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.4176352024078369, |
|
"rewards/margins": 0.06857079267501831, |
|
"rewards/rejected": -0.4862059950828552, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.5704169944925256, |
|
"grad_norm": 38.25209045410156, |
|
"learning_rate": 1.8928578633172605e-05, |
|
"logits/chosen": 0.27817726135253906, |
|
"logits/rejected": 0.11374132335186005, |
|
"logps/chosen": -4.7726898193359375, |
|
"logps/rejected": -6.507106781005859, |
|
"loss": 18.5401, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.3321690857410431, |
|
"rewards/margins": 0.12912718951702118, |
|
"rewards/rejected": -0.4612962603569031, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.5743509047993706, |
|
"grad_norm": 63.3168830871582, |
|
"learning_rate": 1.8654228127419375e-05, |
|
"logits/chosen": 0.14177891612052917, |
|
"logits/rejected": 0.005235266871750355, |
|
"logps/chosen": -3.9255287647247314, |
|
"logps/rejected": -4.774594783782959, |
|
"loss": 22.0643, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.30579084157943726, |
|
"rewards/margins": 0.0727209746837616, |
|
"rewards/rejected": -0.37851184606552124, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.5782848151062155, |
|
"grad_norm": 85.18032836914062, |
|
"learning_rate": 1.838013178441866e-05, |
|
"logits/chosen": 0.020315665751695633, |
|
"logits/rejected": -0.2219502180814743, |
|
"logps/chosen": -3.9957587718963623, |
|
"logps/rejected": -5.301746368408203, |
|
"loss": 19.3506, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.3082605302333832, |
|
"rewards/margins": 0.10884448140859604, |
|
"rewards/rejected": -0.4171050190925598, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.5822187254130606, |
|
"grad_norm": 51.162899017333984, |
|
"learning_rate": 1.810634137006213e-05, |
|
"logits/chosen": 0.07239419966936111, |
|
"logits/rejected": -0.12188796699047089, |
|
"logps/chosen": -3.5929577350616455, |
|
"logps/rejected": -4.78910493850708, |
|
"loss": 20.6575, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.2725752294063568, |
|
"rewards/margins": 0.10044431686401367, |
|
"rewards/rejected": -0.3730195164680481, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.5861526357199056, |
|
"grad_norm": 92.04205322265625, |
|
"learning_rate": 1.7832908592463733e-05, |
|
"logits/chosen": 0.04120447859168053, |
|
"logits/rejected": -0.11522980034351349, |
|
"logps/chosen": -4.338095188140869, |
|
"logps/rejected": -5.353451251983643, |
|
"loss": 22.649, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.34081414341926575, |
|
"rewards/margins": 0.08662021160125732, |
|
"rewards/rejected": -0.42743435502052307, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.5900865460267506, |
|
"grad_norm": 116.44744873046875, |
|
"learning_rate": 1.755988509219406e-05, |
|
"logits/chosen": 0.23367616534233093, |
|
"logits/rejected": -0.08756458014249802, |
|
"logps/chosen": -4.6500091552734375, |
|
"logps/rejected": -5.806417465209961, |
|
"loss": 21.2043, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.3509804606437683, |
|
"rewards/margins": 0.10566103458404541, |
|
"rewards/rejected": -0.4566414952278137, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.5940204563335956, |
|
"grad_norm": 80.40803527832031, |
|
"learning_rate": 1.7287322432527488e-05, |
|
"logits/chosen": 0.2616182565689087, |
|
"logits/rejected": 0.0007495712488889694, |
|
"logps/chosen": -4.33780574798584, |
|
"logps/rejected": -5.812748432159424, |
|
"loss": 22.7481, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.346657931804657, |
|
"rewards/margins": 0.09951233118772507, |
|
"rewards/rejected": -0.44617027044296265, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.5979543666404405, |
|
"grad_norm": 55.723270416259766, |
|
"learning_rate": 1.7015272089703957e-05, |
|
"logits/chosen": 0.28436246514320374, |
|
"logits/rejected": 0.11714988946914673, |
|
"logps/chosen": -5.426861763000488, |
|
"logps/rejected": -6.6721086502075195, |
|
"loss": 19.9111, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.43509548902511597, |
|
"rewards/margins": 0.09244825690984726, |
|
"rewards/rejected": -0.527543842792511, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.6018882769472856, |
|
"grad_norm": 57.092342376708984, |
|
"learning_rate": 1.6743785443207143e-05, |
|
"logits/chosen": 0.41758331656455994, |
|
"logits/rejected": 0.11633528769016266, |
|
"logps/chosen": -4.913158416748047, |
|
"logps/rejected": -6.685948848724365, |
|
"loss": 17.5309, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.37690046429634094, |
|
"rewards/margins": 0.1555865854024887, |
|
"rewards/rejected": -0.5324870347976685, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.6058221872541306, |
|
"grad_norm": 52.56325149536133, |
|
"learning_rate": 1.6472913766060902e-05, |
|
"logits/chosen": 0.4455360770225525, |
|
"logits/rejected": 0.2686857581138611, |
|
"logps/chosen": -5.640677452087402, |
|
"logps/rejected": -6.480513572692871, |
|
"loss": 22.17, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.4680722653865814, |
|
"rewards/margins": 0.06571700423955917, |
|
"rewards/rejected": -0.5337892770767212, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.6097560975609756, |
|
"grad_norm": 97.6286392211914, |
|
"learning_rate": 1.6202708215145872e-05, |
|
"logits/chosen": 0.5606172680854797, |
|
"logits/rejected": 0.4327201247215271, |
|
"logps/chosen": -6.182036876678467, |
|
"logps/rejected": -7.41018533706665, |
|
"loss": 20.9249, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5261003375053406, |
|
"rewards/margins": 0.10036258399486542, |
|
"rewards/rejected": -0.6264629364013672, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.6136900078678206, |
|
"grad_norm": 52.04305648803711, |
|
"learning_rate": 1.5933219821537954e-05, |
|
"logits/chosen": 0.5304365754127502, |
|
"logits/rejected": 0.35703176259994507, |
|
"logps/chosen": -6.3372802734375, |
|
"logps/rejected": -7.8030829429626465, |
|
"loss": 21.2755, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.5311385989189148, |
|
"rewards/margins": 0.08466647565364838, |
|
"rewards/rejected": -0.6158050298690796, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.6176239181746657, |
|
"grad_norm": 102.9618148803711, |
|
"learning_rate": 1.566449948087054e-05, |
|
"logits/chosen": 0.5141326785087585, |
|
"logits/rejected": 0.36102861166000366, |
|
"logps/chosen": -6.295104026794434, |
|
"logps/rejected": -8.318052291870117, |
|
"loss": 20.8088, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5228041410446167, |
|
"rewards/margins": 0.10621719062328339, |
|
"rewards/rejected": -0.6290213465690613, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.6215578284815106, |
|
"grad_norm": 68.3493423461914, |
|
"learning_rate": 1.5396597943722432e-05, |
|
"logits/chosen": 0.6660643815994263, |
|
"logits/rejected": 0.5413907766342163, |
|
"logps/chosen": -6.168806552886963, |
|
"logps/rejected": -7.278306484222412, |
|
"loss": 22.7281, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.49865293502807617, |
|
"rewards/margins": 0.07277169823646545, |
|
"rewards/rejected": -0.571424663066864, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.6254917387883556, |
|
"grad_norm": 82.17771911621094, |
|
"learning_rate": 1.512956580603299e-05, |
|
"logits/chosen": 0.7329293489456177, |
|
"logits/rejected": 0.5695565938949585, |
|
"logps/chosen": -6.647820949554443, |
|
"logps/rejected": -7.977785587310791, |
|
"loss": 22.417, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5436100959777832, |
|
"rewards/margins": 0.09925737231969833, |
|
"rewards/rejected": -0.6428674459457397, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.6294256490952006, |
|
"grad_norm": 46.18910598754883, |
|
"learning_rate": 1.4863453499546645e-05, |
|
"logits/chosen": 0.8801663517951965, |
|
"logits/rejected": 0.7344497442245483, |
|
"logps/chosen": -5.812638282775879, |
|
"logps/rejected": -7.028628349304199, |
|
"loss": 19.7149, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.4914124608039856, |
|
"rewards/margins": 0.10450112819671631, |
|
"rewards/rejected": -0.5959135890007019, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6294256490952006, |
|
"eval_logits/chosen": 0.3036060333251953, |
|
"eval_logits/rejected": 0.1599578559398651, |
|
"eval_logps/chosen": -6.097789287567139, |
|
"eval_logps/rejected": -7.161267280578613, |
|
"eval_loss": 21.823678970336914, |
|
"eval_rewards/accuracies": 0.6781250238418579, |
|
"eval_rewards/chosen": -0.5072587728500366, |
|
"eval_rewards/margins": 0.08432072401046753, |
|
"eval_rewards/rejected": -0.5915795564651489, |
|
"eval_runtime": 269.2822, |
|
"eval_samples_per_second": 2.377, |
|
"eval_steps_per_second": 0.149, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6333595594020456, |
|
"grad_norm": 59.289424896240234, |
|
"learning_rate": 1.4598311282288303e-05, |
|
"logits/chosen": 0.9243080019950867, |
|
"logits/rejected": 0.7870502471923828, |
|
"logps/chosen": -6.291727542877197, |
|
"logps/rejected": -6.99709939956665, |
|
"loss": 23.6839, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.5228357315063477, |
|
"rewards/margins": 0.0496952123939991, |
|
"rewards/rejected": -0.5725310444831848, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.6372934697088907, |
|
"grad_norm": 77.22916412353516, |
|
"learning_rate": 1.4334189229071616e-05, |
|
"logits/chosen": 0.9643747210502625, |
|
"logits/rejected": 0.8659998774528503, |
|
"logps/chosen": -6.07599401473999, |
|
"logps/rejected": -6.853678226470947, |
|
"loss": 22.2309, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5087305307388306, |
|
"rewards/margins": 0.06423305720090866, |
|
"rewards/rejected": -0.572963535785675, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.6412273800157356, |
|
"grad_norm": 59.69563674926758, |
|
"learning_rate": 1.4071137222041853e-05, |
|
"logits/chosen": 1.0996719598770142, |
|
"logits/rejected": 1.134555459022522, |
|
"logps/chosen": -6.2107062339782715, |
|
"logps/rejected": -6.798120021820068, |
|
"loss": 24.029, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5292443037033081, |
|
"rewards/margins": 0.04952489584684372, |
|
"rewards/rejected": -0.578769326210022, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.6451612903225806, |
|
"grad_norm": 60.09773254394531, |
|
"learning_rate": 1.3809204941255145e-05, |
|
"logits/chosen": 1.1656619310379028, |
|
"logits/rejected": 1.024043083190918, |
|
"logps/chosen": -6.1883721351623535, |
|
"logps/rejected": -7.462551116943359, |
|
"loss": 19.7327, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5342612862586975, |
|
"rewards/margins": 0.09449473023414612, |
|
"rewards/rejected": -0.6287559270858765, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.6490952006294256, |
|
"grad_norm": 41.9356689453125, |
|
"learning_rate": 1.3548441855295875e-05, |
|
"logits/chosen": 1.4191844463348389, |
|
"logits/rejected": 1.1811176538467407, |
|
"logps/chosen": -6.1646833419799805, |
|
"logps/rejected": -7.954948425292969, |
|
"loss": 19.8497, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.5303637385368347, |
|
"rewards/margins": 0.10388074070215225, |
|
"rewards/rejected": -0.634244441986084, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.6530291109362707, |
|
"grad_norm": 48.11675262451172, |
|
"learning_rate": 1.3288897211934068e-05, |
|
"logits/chosen": 1.2117464542388916, |
|
"logits/rejected": 1.0153982639312744, |
|
"logps/chosen": -6.489108085632324, |
|
"logps/rejected": -7.619096279144287, |
|
"loss": 21.6481, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5611577033996582, |
|
"rewards/margins": 0.08501636981964111, |
|
"rewards/rejected": -0.6461740732192993, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.6569630212431157, |
|
"grad_norm": 49.94951248168945, |
|
"learning_rate": 1.3030620028824426e-05, |
|
"logits/chosen": 1.3170408010482788, |
|
"logits/rejected": 1.1979198455810547, |
|
"logps/chosen": -6.271700382232666, |
|
"logps/rejected": -7.738451957702637, |
|
"loss": 21.4144, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.5391649603843689, |
|
"rewards/margins": 0.10710246860980988, |
|
"rewards/rejected": -0.6462674736976624, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.6608969315499607, |
|
"grad_norm": 71.95092010498047, |
|
"learning_rate": 1.2773659084248847e-05, |
|
"logits/chosen": 1.2662818431854248, |
|
"logits/rejected": 1.0763803720474243, |
|
"logps/chosen": -6.253358364105225, |
|
"logps/rejected": -7.545995235443115, |
|
"loss": 20.9379, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5418421030044556, |
|
"rewards/margins": 0.0893731564283371, |
|
"rewards/rejected": -0.6312152147293091, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.6648308418568056, |
|
"grad_norm": 68.2335205078125, |
|
"learning_rate": 1.2518062907904139e-05, |
|
"logits/chosen": 1.0089311599731445, |
|
"logits/rejected": 0.9394065141677856, |
|
"logps/chosen": -6.143620014190674, |
|
"logps/rejected": -6.944084167480469, |
|
"loss": 22.3766, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.5299480557441711, |
|
"rewards/margins": 0.06761987507343292, |
|
"rewards/rejected": -0.5975678563117981, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.6687647521636507, |
|
"grad_norm": 57.40378189086914, |
|
"learning_rate": 1.2263879771736715e-05, |
|
"logits/chosen": 1.062596321105957, |
|
"logits/rejected": 0.9756869077682495, |
|
"logps/chosen": -6.35455322265625, |
|
"logps/rejected": -7.452897071838379, |
|
"loss": 21.0019, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5414422750473022, |
|
"rewards/margins": 0.0917770117521286, |
|
"rewards/rejected": -0.6332192420959473, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.6726986624704957, |
|
"grad_norm": 126.5318374633789, |
|
"learning_rate": 1.2011157680825928e-05, |
|
"logits/chosen": 1.1028703451156616, |
|
"logits/rejected": 0.9885491132736206, |
|
"logps/chosen": -6.448549747467041, |
|
"logps/rejected": -7.387596130371094, |
|
"loss": 23.4204, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.552506148815155, |
|
"rewards/margins": 0.06099040061235428, |
|
"rewards/rejected": -0.6134966015815735, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.6766325727773407, |
|
"grad_norm": 47.99146270751953, |
|
"learning_rate": 1.1759944364317813e-05, |
|
"logits/chosen": 1.2233279943466187, |
|
"logits/rejected": 1.0970909595489502, |
|
"logps/chosen": -6.800168037414551, |
|
"logps/rejected": -7.861006259918213, |
|
"loss": 20.2209, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.5629354119300842, |
|
"rewards/margins": 0.08497332781553268, |
|
"rewards/rejected": -0.6479086875915527, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.6805664830841857, |
|
"grad_norm": 45.71125793457031, |
|
"learning_rate": 1.151028726641097e-05, |
|
"logits/chosen": 1.211646318435669, |
|
"logits/rejected": 1.0901798009872437, |
|
"logps/chosen": -6.566044807434082, |
|
"logps/rejected": -7.516194820404053, |
|
"loss": 23.8428, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.5404679775238037, |
|
"rewards/margins": 0.05825378745794296, |
|
"rewards/rejected": -0.5987217426300049, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.6845003933910306, |
|
"grad_norm": 197.55665588378906, |
|
"learning_rate": 1.126223353739623e-05, |
|
"logits/chosen": 1.3044805526733398, |
|
"logits/rejected": 1.134115219116211, |
|
"logps/chosen": -5.714540958404541, |
|
"logps/rejected": -6.580451965332031, |
|
"loss": 20.3569, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.4907086491584778, |
|
"rewards/margins": 0.07508612424135208, |
|
"rewards/rejected": -0.5657947659492493, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.6884343036978757, |
|
"grad_norm": 57.10478210449219, |
|
"learning_rate": 1.1015830024751855e-05, |
|
"logits/chosen": 1.2777565717697144, |
|
"logits/rejected": 1.1199305057525635, |
|
"logps/chosen": -5.988950252532959, |
|
"logps/rejected": -7.0798020362854, |
|
"loss": 21.1749, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.5039869546890259, |
|
"rewards/margins": 0.08334285765886307, |
|
"rewards/rejected": -0.5873297452926636, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.6923682140047207, |
|
"grad_norm": 81.65518188476562, |
|
"learning_rate": 1.0771123264295898e-05, |
|
"logits/chosen": 1.3681554794311523, |
|
"logits/rejected": 1.1427236795425415, |
|
"logps/chosen": -5.568511486053467, |
|
"logps/rejected": -6.888806343078613, |
|
"loss": 21.5826, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.4716271460056305, |
|
"rewards/margins": 0.09705589711666107, |
|
"rewards/rejected": -0.5686829686164856, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.6963021243115657, |
|
"grad_norm": 51.81953811645508, |
|
"learning_rate": 1.0528159471397425e-05, |
|
"logits/chosen": 1.4481117725372314, |
|
"logits/rejected": 1.2932265996932983, |
|
"logps/chosen": -5.606583595275879, |
|
"logps/rejected": -6.986734867095947, |
|
"loss": 20.0762, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.46911725401878357, |
|
"rewards/margins": 0.10834591090679169, |
|
"rewards/rejected": -0.5774631500244141, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.7002360346184107, |
|
"grad_norm": 63.05445098876953, |
|
"learning_rate": 1.0286984532248327e-05, |
|
"logits/chosen": 1.2815773487091064, |
|
"logits/rejected": 1.1918110847473145, |
|
"logps/chosen": -6.13564920425415, |
|
"logps/rejected": -7.042695045471191, |
|
"loss": 23.5487, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5119949579238892, |
|
"rewards/margins": 0.050577979534864426, |
|
"rewards/rejected": -0.5625730156898499, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.7041699449252558, |
|
"grad_norm": 95.1924057006836, |
|
"learning_rate": 1.004764399519718e-05, |
|
"logits/chosen": 1.4892219305038452, |
|
"logits/rejected": 1.1956894397735596, |
|
"logps/chosen": -5.164662837982178, |
|
"logps/rejected": -6.872800350189209, |
|
"loss": 18.8206, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.4294440746307373, |
|
"rewards/margins": 0.1322946548461914, |
|
"rewards/rejected": -0.5617387294769287, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.7081038552321007, |
|
"grad_norm": 71.83849334716797, |
|
"learning_rate": 9.81018306214702e-06, |
|
"logits/chosen": 1.2133238315582275, |
|
"logits/rejected": 1.130614995956421, |
|
"logps/chosen": -5.295372009277344, |
|
"logps/rejected": -6.396082878112793, |
|
"loss": 21.4856, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.4392309784889221, |
|
"rewards/margins": 0.06895321607589722, |
|
"rewards/rejected": -0.5081842541694641, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.7081038552321007, |
|
"eval_logits/chosen": 0.8757205009460449, |
|
"eval_logits/rejected": 0.7193918824195862, |
|
"eval_logps/chosen": -5.373657703399658, |
|
"eval_logps/rejected": -6.414725303649902, |
|
"eval_loss": 21.844945907592773, |
|
"eval_rewards/accuracies": 0.682812511920929, |
|
"eval_rewards/chosen": -0.43484562635421753, |
|
"eval_rewards/margins": 0.08207974582910538, |
|
"eval_rewards/rejected": -0.5169254541397095, |
|
"eval_runtime": 270.5677, |
|
"eval_samples_per_second": 2.365, |
|
"eval_steps_per_second": 0.148, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.7120377655389457, |
|
"grad_norm": 56.13790512084961, |
|
"learning_rate": 9.574646580018483e-06, |
|
"logits/chosen": 1.3937456607818604, |
|
"logits/rejected": 1.1991198062896729, |
|
"logps/chosen": -5.508763313293457, |
|
"logps/rejected": -7.044470310211182, |
|
"loss": 20.5192, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.439894437789917, |
|
"rewards/margins": 0.10662362724542618, |
|
"rewards/rejected": -0.5465181469917297, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.7159716758457907, |
|
"grad_norm": 58.68037796020508, |
|
"learning_rate": 9.341079032279987e-06, |
|
"logits/chosen": 1.3311818838119507, |
|
"logits/rejected": 1.1792255640029907, |
|
"logps/chosen": -4.972262382507324, |
|
"logps/rejected": -6.138205528259277, |
|
"loss": 20.7723, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.3926599621772766, |
|
"rewards/margins": 0.0938471108675003, |
|
"rewards/rejected": -0.4865070879459381, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.7199055861526357, |
|
"grad_norm": 60.50275802612305, |
|
"learning_rate": 9.109524530546622e-06, |
|
"logits/chosen": 1.4016880989074707, |
|
"logits/rejected": 1.1274712085723877, |
|
"logps/chosen": -5.319563865661621, |
|
"logps/rejected": -6.878640174865723, |
|
"loss": 18.247, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.442908376455307, |
|
"rewards/margins": 0.12117187678813934, |
|
"rewards/rejected": -0.5640802979469299, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.7238394964594808, |
|
"grad_norm": 51.57741165161133, |
|
"learning_rate": 8.880026806249194e-06, |
|
"logits/chosen": 1.2216346263885498, |
|
"logits/rejected": 1.1520761251449585, |
|
"logps/chosen": -5.210920333862305, |
|
"logps/rejected": -6.013674736022949, |
|
"loss": 22.637, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.4350380003452301, |
|
"rewards/margins": 0.06822977215051651, |
|
"rewards/rejected": -0.503267765045166, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.7277734067663257, |
|
"grad_norm": 113.00042724609375, |
|
"learning_rate": 8.652629202375075e-06, |
|
"logits/chosen": 1.1651921272277832, |
|
"logits/rejected": 1.0577062368392944, |
|
"logps/chosen": -5.723333835601807, |
|
"logps/rejected": -7.094163417816162, |
|
"loss": 21.495, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.4707266390323639, |
|
"rewards/margins": 0.0914454534649849, |
|
"rewards/rejected": -0.5621720552444458, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.7317073170731707, |
|
"grad_norm": 48.148902893066406, |
|
"learning_rate": 8.427374665282488e-06, |
|
"logits/chosen": 1.2118332386016846, |
|
"logits/rejected": 1.0361649990081787, |
|
"logps/chosen": -5.214700222015381, |
|
"logps/rejected": -6.316381931304932, |
|
"loss": 21.8246, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.4387596547603607, |
|
"rewards/margins": 0.07641210407018661, |
|
"rewards/rejected": -0.5151717066764832, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.7356412273800157, |
|
"grad_norm": 55.0938835144043, |
|
"learning_rate": 8.204305736589613e-06, |
|
"logits/chosen": 1.1594150066375732, |
|
"logits/rejected": 0.9741013646125793, |
|
"logps/chosen": -6.014735221862793, |
|
"logps/rejected": -7.042943000793457, |
|
"loss": 24.2135, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.5000636577606201, |
|
"rewards/margins": 0.07266019284725189, |
|
"rewards/rejected": -0.5727238655090332, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.7395751376868608, |
|
"grad_norm": 54.356346130371094, |
|
"learning_rate": 7.98346454514018e-06, |
|
"logits/chosen": 1.2832156419754028, |
|
"logits/rejected": 1.0834671258926392, |
|
"logps/chosen": -5.47364616394043, |
|
"logps/rejected": -6.59670352935791, |
|
"loss": 20.1251, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.4491247236728668, |
|
"rewards/margins": 0.0896775871515274, |
|
"rewards/rejected": -0.5388022661209106, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.7435090479937058, |
|
"grad_norm": 45.92910385131836, |
|
"learning_rate": 7.764892799047005e-06, |
|
"logits/chosen": 1.2785322666168213, |
|
"logits/rejected": 1.157806158065796, |
|
"logps/chosen": -5.310911178588867, |
|
"logps/rejected": -6.4970197677612305, |
|
"loss": 22.7858, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.44885730743408203, |
|
"rewards/margins": 0.09017422050237656, |
|
"rewards/rejected": -0.5390315055847168, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.7474429583005507, |
|
"grad_norm": 59.7811393737793, |
|
"learning_rate": 7.548631777814996e-06, |
|
"logits/chosen": 1.3047298192977905, |
|
"logits/rejected": 1.089996099472046, |
|
"logps/chosen": -5.754390239715576, |
|
"logps/rejected": -6.797402858734131, |
|
"loss": 20.7907, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.4849552512168884, |
|
"rewards/margins": 0.07941243052482605, |
|
"rewards/rejected": -0.5643676519393921, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.7513768686073957, |
|
"grad_norm": 40.247764587402344, |
|
"learning_rate": 7.334722324545065e-06, |
|
"logits/chosen": 1.2972372770309448, |
|
"logits/rejected": 1.0613635778427124, |
|
"logps/chosen": -5.219810485839844, |
|
"logps/rejected": -6.806240081787109, |
|
"loss": 22.302, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.4275331497192383, |
|
"rewards/margins": 0.08785694092512131, |
|
"rewards/rejected": -0.5153900980949402, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.7553107789142408, |
|
"grad_norm": 46.78326416015625, |
|
"learning_rate": 7.123204838220534e-06, |
|
"logits/chosen": 1.3515903949737549, |
|
"logits/rejected": 1.223388910293579, |
|
"logps/chosen": -5.440323829650879, |
|
"logps/rejected": -6.528807163238525, |
|
"loss": 20.2834, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.45285138487815857, |
|
"rewards/margins": 0.08332471549510956, |
|
"rewards/rejected": -0.5361760854721069, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.7592446892210858, |
|
"grad_norm": 40.53936767578125, |
|
"learning_rate": 6.914119266077355e-06, |
|
"logits/chosen": 1.3715136051177979, |
|
"logits/rejected": 1.172968864440918, |
|
"logps/chosen": -5.467780590057373, |
|
"logps/rejected": -6.763506889343262, |
|
"loss": 20.8753, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.4584290385246277, |
|
"rewards/margins": 0.08680377900600433, |
|
"rewards/rejected": -0.5452328324317932, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.7631785995279308, |
|
"grad_norm": 49.12062454223633, |
|
"learning_rate": 6.707505096059663e-06, |
|
"logits/chosen": 1.3483985662460327, |
|
"logits/rejected": 1.2560744285583496, |
|
"logps/chosen": -5.542896747589111, |
|
"logps/rejected": -6.603106498718262, |
|
"loss": 21.623, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.43749743700027466, |
|
"rewards/margins": 0.08478393405675888, |
|
"rewards/rejected": -0.5222813487052917, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.7671125098347757, |
|
"grad_norm": 88.77938079833984, |
|
"learning_rate": 6.503401349362084e-06, |
|
"logits/chosen": 1.2470347881317139, |
|
"logits/rejected": 1.128948450088501, |
|
"logps/chosen": -5.131396293640137, |
|
"logps/rejected": -6.444796085357666, |
|
"loss": 22.2456, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.4255082607269287, |
|
"rewards/margins": 0.08464544266462326, |
|
"rewards/rejected": -0.5101537704467773, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.7710464201416207, |
|
"grad_norm": 43.77740478515625, |
|
"learning_rate": 6.301846573060177e-06, |
|
"logits/chosen": 1.3094112873077393, |
|
"logits/rejected": 1.1933720111846924, |
|
"logps/chosen": -4.36563777923584, |
|
"logps/rejected": -5.267557621002197, |
|
"loss": 20.8653, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.35090750455856323, |
|
"rewards/margins": 0.0695296972990036, |
|
"rewards/rejected": -0.4204372465610504, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.7749803304484658, |
|
"grad_norm": 51.79399871826172, |
|
"learning_rate": 6.102878832830432e-06, |
|
"logits/chosen": 1.196919560432434, |
|
"logits/rejected": 1.0146095752716064, |
|
"logps/chosen": -5.188233852386475, |
|
"logps/rejected": -6.542181491851807, |
|
"loss": 19.3124, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.4062345027923584, |
|
"rewards/margins": 0.10461841523647308, |
|
"rewards/rejected": -0.5108529925346375, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.7789142407553108, |
|
"grad_norm": 55.651023864746094, |
|
"learning_rate": 5.90653570576116e-06, |
|
"logits/chosen": 1.1126521825790405, |
|
"logits/rejected": 0.9780498743057251, |
|
"logps/chosen": -5.8701982498168945, |
|
"logps/rejected": -6.6015424728393555, |
|
"loss": 21.2432, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.4567411541938782, |
|
"rewards/margins": 0.07785584777593613, |
|
"rewards/rejected": -0.5345970392227173, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.7828481510621558, |
|
"grad_norm": 41.18940353393555, |
|
"learning_rate": 5.712854273255708e-06, |
|
"logits/chosen": 1.3726425170898438, |
|
"logits/rejected": 1.0831149816513062, |
|
"logps/chosen": -4.694300651550293, |
|
"logps/rejected": -6.523576259613037, |
|
"loss": 18.0565, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.38610920310020447, |
|
"rewards/margins": 0.1167839914560318, |
|
"rewards/rejected": -0.5028932094573975, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.7867820613690008, |
|
"grad_norm": 55.95176696777344, |
|
"learning_rate": 5.521871114029233e-06, |
|
"logits/chosen": 1.1702280044555664, |
|
"logits/rejected": 0.8772756457328796, |
|
"logps/chosen": -4.847783088684082, |
|
"logps/rejected": -6.067271709442139, |
|
"loss": 20.8844, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.3914787769317627, |
|
"rewards/margins": 0.09186731278896332, |
|
"rewards/rejected": -0.4833460748195648, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7867820613690008, |
|
"eval_logits/chosen": 0.7199884653091431, |
|
"eval_logits/rejected": 0.5619722604751587, |
|
"eval_logps/chosen": -5.075138092041016, |
|
"eval_logps/rejected": -6.114271640777588, |
|
"eval_loss": 21.61347198486328, |
|
"eval_rewards/accuracies": 0.676562488079071, |
|
"eval_rewards/chosen": -0.4049936830997467, |
|
"eval_rewards/margins": 0.08188632875680923, |
|
"eval_rewards/rejected": -0.4868800640106201, |
|
"eval_runtime": 272.6784, |
|
"eval_samples_per_second": 2.347, |
|
"eval_steps_per_second": 0.147, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7907159716758458, |
|
"grad_norm": 50.42679214477539, |
|
"learning_rate": 5.3336222972004494e-06, |
|
"logits/chosen": 1.0932950973510742, |
|
"logits/rejected": 1.0363296270370483, |
|
"logps/chosen": -5.089923858642578, |
|
"logps/rejected": -5.958308219909668, |
|
"loss": 22.5933, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.4146384596824646, |
|
"rewards/margins": 0.0728868916630745, |
|
"rewards/rejected": -0.4875253140926361, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.7946498819826908, |
|
"grad_norm": 50.056846618652344, |
|
"learning_rate": 5.148143375479602e-06, |
|
"logits/chosen": 1.0465943813323975, |
|
"logits/rejected": 0.7968215346336365, |
|
"logps/chosen": -5.255601406097412, |
|
"logps/rejected": -6.39734411239624, |
|
"loss": 19.5322, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.43599051237106323, |
|
"rewards/margins": 0.08602426201105118, |
|
"rewards/rejected": -0.5220147967338562, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.7985837922895358, |
|
"grad_norm": 63.85953140258789, |
|
"learning_rate": 4.96546937845398e-06, |
|
"logits/chosen": 0.9397533535957336, |
|
"logits/rejected": 0.6560163497924805, |
|
"logps/chosen": -5.794540882110596, |
|
"logps/rejected": -7.656335353851318, |
|
"loss": 18.5173, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.4594026207923889, |
|
"rewards/margins": 0.11265122890472412, |
|
"rewards/rejected": -0.5720537900924683, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.8025177025963808, |
|
"grad_norm": 42.84744644165039, |
|
"learning_rate": 4.785634805972201e-06, |
|
"logits/chosen": 0.8836447596549988, |
|
"logits/rejected": 0.6324716806411743, |
|
"logps/chosen": -5.117056369781494, |
|
"logps/rejected": -6.833028316497803, |
|
"loss": 17.5394, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.4234439730644226, |
|
"rewards/margins": 0.13116177916526794, |
|
"rewards/rejected": -0.5546057224273682, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.8064516129032258, |
|
"grad_norm": 47.17292404174805, |
|
"learning_rate": 4.60867362162861e-06, |
|
"logits/chosen": 0.8913745880126953, |
|
"logits/rejected": 0.7286295294761658, |
|
"logps/chosen": -5.609414577484131, |
|
"logps/rejected": -6.968409061431885, |
|
"loss": 18.6049, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.46564167737960815, |
|
"rewards/margins": 0.107306107878685, |
|
"rewards/rejected": -0.5729478001594543, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.8103855232100708, |
|
"grad_norm": 72.6835708618164, |
|
"learning_rate": 4.434619246348843e-06, |
|
"logits/chosen": 1.0678660869598389, |
|
"logits/rejected": 0.8571092486381531, |
|
"logps/chosen": -5.482309818267822, |
|
"logps/rejected": -6.8230743408203125, |
|
"loss": 21.0093, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4621756076812744, |
|
"rewards/margins": 0.09944047778844833, |
|
"rewards/rejected": -0.561616063117981, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.8143194335169158, |
|
"grad_norm": 63.443233489990234, |
|
"learning_rate": 4.263504552078004e-06, |
|
"logits/chosen": 0.8602091073989868, |
|
"logits/rejected": 0.7144955396652222, |
|
"logps/chosen": -5.040487289428711, |
|
"logps/rejected": -6.352028846740723, |
|
"loss": 21.8699, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.42594534158706665, |
|
"rewards/margins": 0.08123664557933807, |
|
"rewards/rejected": -0.5071820020675659, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.8182533438237608, |
|
"grad_norm": 100.11986541748047, |
|
"learning_rate": 4.095361855572431e-06, |
|
"logits/chosen": 0.6691738367080688, |
|
"logits/rejected": 0.5654880404472351, |
|
"logps/chosen": -5.479603290557861, |
|
"logps/rejected": -6.441510200500488, |
|
"loss": 23.1216, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.45797911286354065, |
|
"rewards/margins": 0.0781431570649147, |
|
"rewards/rejected": -0.53612220287323, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.8221872541306058, |
|
"grad_norm": 60.643890380859375, |
|
"learning_rate": 3.9302229122963465e-06, |
|
"logits/chosen": 0.8846467733383179, |
|
"logits/rejected": 0.6783546209335327, |
|
"logps/chosen": -6.16463565826416, |
|
"logps/rejected": -6.873899936676025, |
|
"loss": 25.3631, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.4791037440299988, |
|
"rewards/margins": 0.06751880794763565, |
|
"rewards/rejected": -0.5466224551200867, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.8261211644374509, |
|
"grad_norm": 43.71245574951172, |
|
"learning_rate": 3.768118910424532e-06, |
|
"logits/chosen": 0.9008040428161621, |
|
"logits/rejected": 0.6831108331680298, |
|
"logps/chosen": -5.510120868682861, |
|
"logps/rejected": -6.556514739990234, |
|
"loss": 22.7596, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4446142315864563, |
|
"rewards/margins": 0.0855989158153534, |
|
"rewards/rejected": -0.5302131175994873, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.8300550747442959, |
|
"grad_norm": 62.271480560302734, |
|
"learning_rate": 3.6090804649521037e-06, |
|
"logits/chosen": 0.7602671384811401, |
|
"logits/rejected": 0.7593709826469421, |
|
"logps/chosen": -5.140334606170654, |
|
"logps/rejected": -5.595780372619629, |
|
"loss": 26.1903, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.42375484108924866, |
|
"rewards/margins": 0.03734329715371132, |
|
"rewards/rejected": -0.4610981345176697, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.8339889850511408, |
|
"grad_norm": 55.21813201904297, |
|
"learning_rate": 3.4531376119125605e-06, |
|
"logits/chosen": 0.9315360188484192, |
|
"logits/rejected": 0.9035407900810242, |
|
"logps/chosen": -4.700660705566406, |
|
"logps/rejected": -5.618612766265869, |
|
"loss": 22.3302, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.3840516209602356, |
|
"rewards/margins": 0.06768553704023361, |
|
"rewards/rejected": -0.45173710584640503, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.8379228953579858, |
|
"grad_norm": 82.28722381591797, |
|
"learning_rate": 3.3003198027051897e-06, |
|
"logits/chosen": 0.8791500329971313, |
|
"logits/rejected": 0.7902938723564148, |
|
"logps/chosen": -5.500949382781982, |
|
"logps/rejected": -6.393127918243408, |
|
"loss": 22.1454, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.424252986907959, |
|
"rewards/margins": 0.07516753673553467, |
|
"rewards/rejected": -0.49942055344581604, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.8418568056648308, |
|
"grad_norm": 58.98760223388672, |
|
"learning_rate": 3.150655898532853e-06, |
|
"logits/chosen": 0.9076460003852844, |
|
"logits/rejected": 0.7676557302474976, |
|
"logps/chosen": -5.2781171798706055, |
|
"logps/rejected": -6.154453754425049, |
|
"loss": 21.2363, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4085540771484375, |
|
"rewards/margins": 0.07775183767080307, |
|
"rewards/rejected": -0.4863058924674988, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.8457907159716759, |
|
"grad_norm": 50.36076354980469, |
|
"learning_rate": 3.004174164951259e-06, |
|
"logits/chosen": 0.8667048215866089, |
|
"logits/rejected": 0.7412594556808472, |
|
"logps/chosen": -5.391347408294678, |
|
"logps/rejected": -6.3621954917907715, |
|
"loss": 23.4492, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4194518029689789, |
|
"rewards/margins": 0.05420858785510063, |
|
"rewards/rejected": -0.4736603796482086, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.8497246262785209, |
|
"grad_norm": 44.32093048095703, |
|
"learning_rate": 2.860902266530723e-06, |
|
"logits/chosen": 0.846762478351593, |
|
"logits/rejected": 0.6929168701171875, |
|
"logps/chosen": -5.092324256896973, |
|
"logps/rejected": -5.855961799621582, |
|
"loss": 23.0306, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.41406020522117615, |
|
"rewards/margins": 0.05717035382986069, |
|
"rewards/rejected": -0.47123056650161743, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.8536585365853658, |
|
"grad_norm": 57.81728744506836, |
|
"learning_rate": 2.7208672616314345e-06, |
|
"logits/chosen": 0.8338634371757507, |
|
"logits/rejected": 0.731313169002533, |
|
"logps/chosen": -5.652237415313721, |
|
"logps/rejected": -6.4818902015686035, |
|
"loss": 23.1995, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4492163062095642, |
|
"rewards/margins": 0.0547863133251667, |
|
"rewards/rejected": -0.5040026903152466, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.8575924468922108, |
|
"grad_norm": 60.14872360229492, |
|
"learning_rate": 2.58409559729321e-06, |
|
"logits/chosen": 0.8530842065811157, |
|
"logits/rejected": 0.546228289604187, |
|
"logps/chosen": -5.260161399841309, |
|
"logps/rejected": -6.7824811935424805, |
|
"loss": 18.7626, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.4305374026298523, |
|
"rewards/margins": 0.10589683055877686, |
|
"rewards/rejected": -0.5364342331886292, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.8615263571990559, |
|
"grad_norm": 63.61619186401367, |
|
"learning_rate": 2.4506131042406844e-06, |
|
"logits/chosen": 0.8177973628044128, |
|
"logits/rejected": 0.547761082649231, |
|
"logps/chosen": -5.860104560852051, |
|
"logps/rejected": -6.826254367828369, |
|
"loss": 21.7568, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.44510722160339355, |
|
"rewards/margins": 0.07002703845500946, |
|
"rewards/rejected": -0.5151342153549194, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.8654602675059009, |
|
"grad_norm": 63.3447151184082, |
|
"learning_rate": 2.3204449920049378e-06, |
|
"logits/chosen": 0.9175206422805786, |
|
"logits/rejected": 0.7014715671539307, |
|
"logps/chosen": -4.718405723571777, |
|
"logps/rejected": -6.031472682952881, |
|
"loss": 21.5778, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.3723874092102051, |
|
"rewards/margins": 0.08356385678052902, |
|
"rewards/rejected": -0.4559513032436371, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8654602675059009, |
|
"eval_logits/chosen": 0.5780611038208008, |
|
"eval_logits/rejected": 0.40807875990867615, |
|
"eval_logps/chosen": -4.8285369873046875, |
|
"eval_logps/rejected": -5.842423439025879, |
|
"eval_loss": 21.6467342376709, |
|
"eval_rewards/accuracies": 0.668749988079071, |
|
"eval_rewards/chosen": -0.3803336024284363, |
|
"eval_rewards/margins": 0.07936159521341324, |
|
"eval_rewards/rejected": -0.4596951901912689, |
|
"eval_runtime": 276.4407, |
|
"eval_samples_per_second": 2.315, |
|
"eval_steps_per_second": 0.145, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8693941778127459, |
|
"grad_norm": 54.00641632080078, |
|
"learning_rate": 2.1936158441624113e-06, |
|
"logits/chosen": 0.7385736107826233, |
|
"logits/rejected": 0.6566623449325562, |
|
"logps/chosen": -5.136672019958496, |
|
"logps/rejected": -5.858384609222412, |
|
"loss": 23.5285, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.4080820679664612, |
|
"rewards/margins": 0.05175456404685974, |
|
"rewards/rejected": -0.4598366320133209, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.8733280881195908, |
|
"grad_norm": 63.94253921508789, |
|
"learning_rate": 2.070149613692032e-06, |
|
"logits/chosen": 0.8581029772758484, |
|
"logits/rejected": 0.6364805102348328, |
|
"logps/chosen": -5.164943695068359, |
|
"logps/rejected": -5.946603298187256, |
|
"loss": 23.1104, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.4210405945777893, |
|
"rewards/margins": 0.06227627396583557, |
|
"rewards/rejected": -0.4833168387413025, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.8772619984264359, |
|
"grad_norm": 44.03200149536133, |
|
"learning_rate": 1.9500696184514735e-06, |
|
"logits/chosen": 0.813498318195343, |
|
"logits/rejected": 0.7083032131195068, |
|
"logps/chosen": -4.999638557434082, |
|
"logps/rejected": -5.514034271240234, |
|
"loss": 23.8753, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3863885700702667, |
|
"rewards/margins": 0.060605116188526154, |
|
"rewards/rejected": -0.4469936788082123, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.8811959087332809, |
|
"grad_norm": 91.94346618652344, |
|
"learning_rate": 1.8333985367733208e-06, |
|
"logits/chosen": 0.6693506240844727, |
|
"logits/rejected": 0.6530565023422241, |
|
"logps/chosen": -4.876978874206543, |
|
"logps/rejected": -5.703049659729004, |
|
"loss": 25.4636, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.3906908929347992, |
|
"rewards/margins": 0.038804419338703156, |
|
"rewards/rejected": -0.42949533462524414, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.8851298190401259, |
|
"grad_norm": 43.88142395019531, |
|
"learning_rate": 1.7201584031820418e-06, |
|
"logits/chosen": 0.7986466884613037, |
|
"logits/rejected": 0.637027382850647, |
|
"logps/chosen": -4.8898234367370605, |
|
"logps/rejected": -6.180974960327148, |
|
"loss": 20.0693, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3792203366756439, |
|
"rewards/margins": 0.09974167495965958, |
|
"rewards/rejected": -0.4789620339870453, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.8890637293469709, |
|
"grad_norm": 137.21253967285156, |
|
"learning_rate": 1.610370604232543e-06, |
|
"logits/chosen": 0.9146040678024292, |
|
"logits/rejected": 0.7076241970062256, |
|
"logps/chosen": -4.883869647979736, |
|
"logps/rejected": -5.884066104888916, |
|
"loss": 21.7767, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3782455325126648, |
|
"rewards/margins": 0.05688385292887688, |
|
"rewards/rejected": -0.4351293444633484, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.8929976396538158, |
|
"grad_norm": 64.58161163330078, |
|
"learning_rate": 1.5040558744711087e-06, |
|
"logits/chosen": 0.7862271070480347, |
|
"logits/rejected": 0.6925013065338135, |
|
"logps/chosen": -4.76452112197876, |
|
"logps/rejected": -5.642160892486572, |
|
"loss": 22.3309, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.3690032362937927, |
|
"rewards/margins": 0.06053303927183151, |
|
"rewards/rejected": -0.42953628301620483, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.8969315499606609, |
|
"grad_norm": 61.42625045776367, |
|
"learning_rate": 1.4012342925194532e-06, |
|
"logits/chosen": 0.9055964350700378, |
|
"logits/rejected": 0.7526835799217224, |
|
"logps/chosen": -4.5060014724731445, |
|
"logps/rejected": -5.722702980041504, |
|
"loss": 20.1537, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.3619564175605774, |
|
"rewards/margins": 0.09387658536434174, |
|
"rewards/rejected": -0.4558330178260803, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.9008654602675059, |
|
"grad_norm": 55.103858947753906, |
|
"learning_rate": 1.3019252772826874e-06, |
|
"logits/chosen": 0.8664646148681641, |
|
"logits/rejected": 0.6308975219726562, |
|
"logps/chosen": -4.155823230743408, |
|
"logps/rejected": -5.087547302246094, |
|
"loss": 21.2004, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3356073200702667, |
|
"rewards/margins": 0.06966021656990051, |
|
"rewards/rejected": -0.40526753664016724, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.9047993705743509, |
|
"grad_norm": 36.31183624267578, |
|
"learning_rate": 1.2061475842818337e-06, |
|
"logits/chosen": 0.8728748559951782, |
|
"logits/rejected": 0.8325087428092957, |
|
"logps/chosen": -4.736401557922363, |
|
"logps/rejected": -5.618078708648682, |
|
"loss": 22.6537, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.37614908814430237, |
|
"rewards/margins": 0.07038898766040802, |
|
"rewards/rejected": -0.4465380609035492, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.9087332808811959, |
|
"grad_norm": 66.35440826416016, |
|
"learning_rate": 1.1139193021116878e-06, |
|
"logits/chosen": 0.9341157674789429, |
|
"logits/rejected": 0.9482291340827942, |
|
"logps/chosen": -4.348454475402832, |
|
"logps/rejected": -4.784420967102051, |
|
"loss": 25.3501, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.34729093313217163, |
|
"rewards/margins": 0.020721841603517532, |
|
"rewards/rejected": -0.36801275610923767, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.912667191188041, |
|
"grad_norm": 89.98934173583984, |
|
"learning_rate": 1.0252578490245812e-06, |
|
"logits/chosen": 0.9573014974594116, |
|
"logits/rejected": 0.7914093136787415, |
|
"logps/chosen": -4.558495044708252, |
|
"logps/rejected": -5.923836708068848, |
|
"loss": 21.659, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3659791052341461, |
|
"rewards/margins": 0.07632460445165634, |
|
"rewards/rejected": -0.44230371713638306, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.9166011014948859, |
|
"grad_norm": 65.70526885986328, |
|
"learning_rate": 9.401799696407643e-07, |
|
"logits/chosen": 0.8820127248764038, |
|
"logits/rejected": 0.5988653898239136, |
|
"logps/chosen": -4.5979132652282715, |
|
"logps/rejected": -5.598001003265381, |
|
"loss": 20.0052, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3443650007247925, |
|
"rewards/margins": 0.09784060716629028, |
|
"rewards/rejected": -0.4422055780887604, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.9205350118017309, |
|
"grad_norm": 47.08201599121094, |
|
"learning_rate": 8.587017317860291e-07, |
|
"logits/chosen": 0.9893172979354858, |
|
"logits/rejected": 0.7521982192993164, |
|
"logps/chosen": -3.797804355621338, |
|
"logps/rejected": -5.014548301696777, |
|
"loss": 19.899, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.30206161737442017, |
|
"rewards/margins": 0.09998033195734024, |
|
"rewards/rejected": -0.4020419120788574, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.9244689221085759, |
|
"grad_norm": 44.627437591552734, |
|
"learning_rate": 7.808385234571303e-07, |
|
"logits/chosen": 0.9069494009017944, |
|
"logits/rejected": 0.6808138489723206, |
|
"logps/chosen": -4.21389627456665, |
|
"logps/rejected": -5.213305473327637, |
|
"loss": 20.0447, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.33665183186531067, |
|
"rewards/margins": 0.0863773375749588, |
|
"rewards/rejected": -0.4230291247367859, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.9284028324154209, |
|
"grad_norm": 223.38873291015625, |
|
"learning_rate": 7.066050499155941e-07, |
|
"logits/chosen": 0.866726279258728, |
|
"logits/rejected": 0.7506811022758484, |
|
"logps/chosen": -4.992801189422607, |
|
"logps/rejected": -5.7411932945251465, |
|
"loss": 21.8626, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.38969671726226807, |
|
"rewards/margins": 0.07754883170127869, |
|
"rewards/rejected": -0.46724551916122437, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.932336742722266, |
|
"grad_norm": 39.60309600830078, |
|
"learning_rate": 6.360153309104999e-07, |
|
"logits/chosen": 0.8703472018241882, |
|
"logits/rejected": 0.6217884421348572, |
|
"logps/chosen": -4.574381351470947, |
|
"logps/rejected": -6.075875759124756, |
|
"loss": 17.0961, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.36664003133773804, |
|
"rewards/margins": 0.116389200091362, |
|
"rewards/rejected": -0.48302921652793884, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.9362706530291109, |
|
"grad_norm": 44.12582015991211, |
|
"learning_rate": 5.690826980306851e-07, |
|
"logits/chosen": 0.8729127645492554, |
|
"logits/rejected": 0.6510919332504272, |
|
"logps/chosen": -4.34609842300415, |
|
"logps/rejected": -5.6441850662231445, |
|
"loss": 21.2825, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.3452923893928528, |
|
"rewards/margins": 0.08857759088277817, |
|
"rewards/rejected": -0.43387001752853394, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.9402045633359559, |
|
"grad_norm": 53.13776779174805, |
|
"learning_rate": 5.058197921869568e-07, |
|
"logits/chosen": 0.9216381311416626, |
|
"logits/rejected": 0.6996389031410217, |
|
"logps/chosen": -5.07051420211792, |
|
"logps/rejected": -6.018683433532715, |
|
"loss": 21.8654, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.38140612840652466, |
|
"rewards/margins": 0.07850027084350586, |
|
"rewards/rejected": -0.4599063992500305, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.9441384736428009, |
|
"grad_norm": 41.1729736328125, |
|
"learning_rate": 4.4623856122471665e-07, |
|
"logits/chosen": 0.7972432971000671, |
|
"logits/rejected": 0.6558005213737488, |
|
"logps/chosen": -4.346493721008301, |
|
"logps/rejected": -5.412367343902588, |
|
"loss": 20.3207, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.3416510224342346, |
|
"rewards/margins": 0.08416789770126343, |
|
"rewards/rejected": -0.42581886053085327, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.9441384736428009, |
|
"eval_logits/chosen": 0.6238431334495544, |
|
"eval_logits/rejected": 0.4534227252006531, |
|
"eval_logps/chosen": -4.653068542480469, |
|
"eval_logps/rejected": -5.671896934509277, |
|
"eval_loss": 21.683839797973633, |
|
"eval_rewards/accuracies": 0.671875, |
|
"eval_rewards/chosen": -0.3627867102622986, |
|
"eval_rewards/margins": 0.07985583692789078, |
|
"eval_rewards/rejected": -0.44264253973960876, |
|
"eval_runtime": 271.153, |
|
"eval_samples_per_second": 2.36, |
|
"eval_steps_per_second": 0.148, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.948072383949646, |
|
"grad_norm": 89.22261810302734, |
|
"learning_rate": 3.9035025766749333e-07, |
|
"logits/chosen": 1.065003752708435, |
|
"logits/rejected": 0.8652560114860535, |
|
"logps/chosen": -4.475849151611328, |
|
"logps/rejected": -5.141944885253906, |
|
"loss": 23.3289, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.355093777179718, |
|
"rewards/margins": 0.06265915930271149, |
|
"rewards/rejected": -0.4177529215812683, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.952006294256491, |
|
"grad_norm": 54.871742248535156, |
|
"learning_rate": 3.381654365917864e-07, |
|
"logits/chosen": 0.9329290390014648, |
|
"logits/rejected": 0.725328803062439, |
|
"logps/chosen": -4.456474304199219, |
|
"logps/rejected": -5.94174337387085, |
|
"loss": 21.236, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.35311684012413025, |
|
"rewards/margins": 0.08815713971853256, |
|
"rewards/rejected": -0.4412739872932434, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.955940204563336, |
|
"grad_norm": 50.6146354675293, |
|
"learning_rate": 2.896939536336296e-07, |
|
"logits/chosen": 0.9757564663887024, |
|
"logits/rejected": 0.7466751933097839, |
|
"logps/chosen": -3.781454563140869, |
|
"logps/rejected": -4.780596733093262, |
|
"loss": 20.805, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.30329400300979614, |
|
"rewards/margins": 0.0837683156132698, |
|
"rewards/rejected": -0.38706234097480774, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.9598741148701809, |
|
"grad_norm": 60.844093322753906, |
|
"learning_rate": 2.449449631272605e-07, |
|
"logits/chosen": 0.8028494715690613, |
|
"logits/rejected": 0.6643998622894287, |
|
"logps/chosen": -5.040999412536621, |
|
"logps/rejected": -6.319550037384033, |
|
"loss": 20.5501, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.38902026414871216, |
|
"rewards/margins": 0.09444370120763779, |
|
"rewards/rejected": -0.48346394300460815, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.963808025177026, |
|
"grad_norm": 48.02137756347656, |
|
"learning_rate": 2.0392691637622698e-07, |
|
"logits/chosen": 0.9626764059066772, |
|
"logits/rejected": 0.7285498380661011, |
|
"logps/chosen": -4.898202896118164, |
|
"logps/rejected": -5.569176197052002, |
|
"loss": 23.1671, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.37996894121170044, |
|
"rewards/margins": 0.05454573780298233, |
|
"rewards/rejected": -0.43451470136642456, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.967741935483871, |
|
"grad_norm": 78.87560272216797, |
|
"learning_rate": 1.666475600572648e-07, |
|
"logits/chosen": 0.9195537567138672, |
|
"logits/rejected": 0.7668181657791138, |
|
"logps/chosen": -4.843678951263428, |
|
"logps/rejected": -6.038217067718506, |
|
"loss": 21.6436, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.3704824447631836, |
|
"rewards/margins": 0.08607770502567291, |
|
"rewards/rejected": -0.4565601348876953, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.971675845790716, |
|
"grad_norm": 42.323211669921875, |
|
"learning_rate": 1.331139347572763e-07, |
|
"logits/chosen": 0.8599262237548828, |
|
"logits/rejected": 0.6428951025009155, |
|
"logps/chosen": -5.1760969161987305, |
|
"logps/rejected": -6.792412757873535, |
|
"loss": 19.5088, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.3994407057762146, |
|
"rewards/margins": 0.11559662967920303, |
|
"rewards/rejected": -0.5150374174118042, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.975609756097561, |
|
"grad_norm": 45.8914909362793, |
|
"learning_rate": 1.033323736436298e-07, |
|
"logits/chosen": 0.9099301099777222, |
|
"logits/rejected": 0.8088103532791138, |
|
"logps/chosen": -4.615214824676514, |
|
"logps/rejected": -5.559386253356934, |
|
"loss": 21.5362, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.37754908204078674, |
|
"rewards/margins": 0.07683036476373672, |
|
"rewards/rejected": -0.4543794095516205, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.9795436664044059, |
|
"grad_norm": 46.62201690673828, |
|
"learning_rate": 7.730850126807854e-08, |
|
"logits/chosen": 0.9434062838554382, |
|
"logits/rejected": 0.7752779126167297, |
|
"logps/chosen": -4.551201820373535, |
|
"logps/rejected": -5.316429615020752, |
|
"loss": 22.0776, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3628130853176117, |
|
"rewards/margins": 0.0656280368566513, |
|
"rewards/rejected": -0.4284411370754242, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.983477576711251, |
|
"grad_norm": 48.4689826965332, |
|
"learning_rate": 5.5047232504505943e-08, |
|
"logits/chosen": 0.8963427543640137, |
|
"logits/rejected": 0.684687077999115, |
|
"logps/chosen": -4.659503936767578, |
|
"logps/rejected": -6.174535751342773, |
|
"loss": 19.0895, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.37634724378585815, |
|
"rewards/margins": 0.10511218011379242, |
|
"rewards/rejected": -0.48145943880081177, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.987411487018096, |
|
"grad_norm": 55.87615203857422, |
|
"learning_rate": 3.655277162071258e-08, |
|
"logits/chosen": 0.7668878436088562, |
|
"logits/rejected": 0.6688386797904968, |
|
"logps/chosen": -5.46780252456665, |
|
"logps/rejected": -6.352745056152344, |
|
"loss": 24.3925, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.40356582403182983, |
|
"rewards/margins": 0.07050136476755142, |
|
"rewards/rejected": -0.47406721115112305, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.991345397324941, |
|
"grad_norm": 76.68724822998047, |
|
"learning_rate": 2.1828611484377983e-08, |
|
"logits/chosen": 0.9311445355415344, |
|
"logits/rejected": 0.7878313064575195, |
|
"logps/chosen": -4.927813529968262, |
|
"logps/rejected": -6.390625476837158, |
|
"loss": 21.7847, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.40384235978126526, |
|
"rewards/margins": 0.09595279395580292, |
|
"rewards/rejected": -0.499795138835907, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.995279307631786, |
|
"grad_norm": 47.24720764160156, |
|
"learning_rate": 1.0877532903414979e-08, |
|
"logits/chosen": 0.7036594152450562, |
|
"logits/rejected": 0.573765754699707, |
|
"logps/chosen": -5.013037204742432, |
|
"logps/rejected": -6.5157151222229, |
|
"loss": 21.7697, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.4053846001625061, |
|
"rewards/margins": 0.09475921839475632, |
|
"rewards/rejected": -0.500143826007843, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.999213217938631, |
|
"grad_norm": 195.9378662109375, |
|
"learning_rate": 3.7016041007742474e-09, |
|
"logits/chosen": 0.9408377408981323, |
|
"logits/rejected": 0.719150185585022, |
|
"logps/chosen": -4.878444194793701, |
|
"logps/rejected": -6.0785698890686035, |
|
"loss": 20.698, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.3925797641277313, |
|
"rewards/margins": 0.07607986032962799, |
|
"rewards/rejected": -0.4686596393585205, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1271, |
|
"total_flos": 0.0, |
|
"train_loss": 21.99049959550403, |
|
"train_runtime": 22427.3619, |
|
"train_samples_per_second": 0.906, |
|
"train_steps_per_second": 0.057 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 1271, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|