|
{ |
|
"best_metric": 0.6840614080429077, |
|
"best_model_checkpoint": "./output/checkpoints/2024-05-27_09-01-43/checkpoint-100", |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 1271, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.003933910306845004, |
|
"grad_norm": 33.997314453125, |
|
"learning_rate": 1.25e-06, |
|
"logits/chosen": -0.2329253852367401, |
|
"logits/rejected": -0.7133080363273621, |
|
"logps/chosen": -206.9918670654297, |
|
"logps/rejected": -177.71676635742188, |
|
"loss": 0.6965, |
|
"rewards/accuracies": 0.16249999403953552, |
|
"rewards/chosen": -0.0024444584269076586, |
|
"rewards/margins": -0.0065179443918168545, |
|
"rewards/rejected": 0.004073486663401127, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.007867820613690008, |
|
"grad_norm": 33.46182632446289, |
|
"learning_rate": 2.8125e-06, |
|
"logits/chosen": -0.39770540595054626, |
|
"logits/rejected": -0.7366135120391846, |
|
"logps/chosen": -200.96145629882812, |
|
"logps/rejected": -177.04241943359375, |
|
"loss": 0.6857, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.028690528124570847, |
|
"rewards/margins": 0.01567094773054123, |
|
"rewards/rejected": 0.013019581325352192, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.011801730920535013, |
|
"grad_norm": 36.08251953125, |
|
"learning_rate": 4.3750000000000005e-06, |
|
"logits/chosen": -0.35487601161003113, |
|
"logits/rejected": -0.6588561534881592, |
|
"logps/chosen": -217.158203125, |
|
"logps/rejected": -193.87928771972656, |
|
"loss": 0.6847, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.1420166790485382, |
|
"rewards/margins": 0.021750029176473618, |
|
"rewards/rejected": 0.12026665359735489, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.015735641227380016, |
|
"grad_norm": 31.359071731567383, |
|
"learning_rate": 5.9375e-06, |
|
"logits/chosen": -0.38902169466018677, |
|
"logits/rejected": -0.7328646779060364, |
|
"logps/chosen": -208.9059600830078, |
|
"logps/rejected": -179.52752685546875, |
|
"loss": 0.6636, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.23876996338367462, |
|
"rewards/margins": 0.07814005762338638, |
|
"rewards/rejected": 0.16062989830970764, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01966955153422502, |
|
"grad_norm": 37.41947555541992, |
|
"learning_rate": 7.500000000000001e-06, |
|
"logits/chosen": -0.24571442604064941, |
|
"logits/rejected": -0.7017894983291626, |
|
"logps/chosen": -195.68539428710938, |
|
"logps/rejected": -178.2770233154297, |
|
"loss": 0.6703, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.38804444670677185, |
|
"rewards/margins": 0.09177973121404648, |
|
"rewards/rejected": 0.2962647080421448, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.023603461841070025, |
|
"grad_norm": 36.168155670166016, |
|
"learning_rate": 9.0625e-06, |
|
"logits/chosen": -0.3279009461402893, |
|
"logits/rejected": -0.62000572681427, |
|
"logps/chosen": -207.6711883544922, |
|
"logps/rejected": -183.57412719726562, |
|
"loss": 0.6945, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.5336718559265137, |
|
"rewards/margins": 0.08061937242746353, |
|
"rewards/rejected": 0.45305246114730835, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02753737214791503, |
|
"grad_norm": 33.33737564086914, |
|
"learning_rate": 1.0625e-05, |
|
"logits/chosen": -0.4809085428714752, |
|
"logits/rejected": -0.758043646812439, |
|
"logps/chosen": -220.7525634765625, |
|
"logps/rejected": -198.58531188964844, |
|
"loss": 0.6564, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.8875558972358704, |
|
"rewards/margins": 0.20188398659229279, |
|
"rewards/rejected": 0.6856719255447388, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.03147128245476003, |
|
"grad_norm": 32.628173828125, |
|
"learning_rate": 1.2187500000000001e-05, |
|
"logits/chosen": -0.34948527812957764, |
|
"logits/rejected": -0.7154799699783325, |
|
"logps/chosen": -195.3294677734375, |
|
"logps/rejected": -178.76821899414062, |
|
"loss": 0.6995, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.8487609624862671, |
|
"rewards/margins": 0.11841963231563568, |
|
"rewards/rejected": 0.7303413152694702, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03540519276160504, |
|
"grad_norm": 34.517208099365234, |
|
"learning_rate": 1.3125e-05, |
|
"logits/chosen": -0.6799963712692261, |
|
"logits/rejected": -1.0354492664337158, |
|
"logps/chosen": -196.0514373779297, |
|
"logps/rejected": -165.4368438720703, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.6915052533149719, |
|
"rewards/margins": 0.18386030197143555, |
|
"rewards/rejected": 0.5076450109481812, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.03933910306845004, |
|
"grad_norm": 39.48639678955078, |
|
"learning_rate": 1.4687500000000001e-05, |
|
"logits/chosen": -0.22616450488567352, |
|
"logits/rejected": -0.3378121256828308, |
|
"logps/chosen": -210.38534545898438, |
|
"logps/rejected": -205.64596557617188, |
|
"loss": 0.6612, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.6935254335403442, |
|
"rewards/margins": 0.17534935474395752, |
|
"rewards/rejected": 0.5181760191917419, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.043273013375295044, |
|
"grad_norm": 54.90278244018555, |
|
"learning_rate": 1.6250000000000002e-05, |
|
"logits/chosen": -0.5758259892463684, |
|
"logits/rejected": -0.7866605520248413, |
|
"logps/chosen": -194.25332641601562, |
|
"logps/rejected": -179.38743591308594, |
|
"loss": 0.7132, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.7601491212844849, |
|
"rewards/margins": 0.13087329268455505, |
|
"rewards/rejected": 0.629275918006897, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.04720692368214005, |
|
"grad_norm": 31.500804901123047, |
|
"learning_rate": 1.7500000000000002e-05, |
|
"logits/chosen": -0.3127598762512207, |
|
"logits/rejected": -0.7857316136360168, |
|
"logps/chosen": -214.54507446289062, |
|
"logps/rejected": -167.3846435546875, |
|
"loss": 0.659, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.8511220216751099, |
|
"rewards/margins": 0.3019554316997528, |
|
"rewards/rejected": 0.5491665601730347, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.05114083398898505, |
|
"grad_norm": 39.18369674682617, |
|
"learning_rate": 1.9062500000000003e-05, |
|
"logits/chosen": -0.4257170557975769, |
|
"logits/rejected": -0.7206593751907349, |
|
"logps/chosen": -212.1534881591797, |
|
"logps/rejected": -198.5198516845703, |
|
"loss": 0.6603, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 1.0609362125396729, |
|
"rewards/margins": 0.4154728055000305, |
|
"rewards/rejected": 0.6454635858535767, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.05507474429583006, |
|
"grad_norm": 23.921350479125977, |
|
"learning_rate": 2.0625000000000003e-05, |
|
"logits/chosen": -0.4161883294582367, |
|
"logits/rejected": -0.7778038382530212, |
|
"logps/chosen": -211.8742218017578, |
|
"logps/rejected": -173.7216033935547, |
|
"loss": 0.6306, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 1.1546392440795898, |
|
"rewards/margins": 0.4130212366580963, |
|
"rewards/rejected": 0.7416179180145264, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.059008654602675056, |
|
"grad_norm": 33.804962158203125, |
|
"learning_rate": 2.21875e-05, |
|
"logits/chosen": -0.11207835376262665, |
|
"logits/rejected": -0.5466287136077881, |
|
"logps/chosen": -206.02218627929688, |
|
"logps/rejected": -185.80941772460938, |
|
"loss": 0.719, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 1.1464534997940063, |
|
"rewards/margins": 0.2297494113445282, |
|
"rewards/rejected": 0.9167040586471558, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.06294256490952006, |
|
"grad_norm": 42.39493942260742, |
|
"learning_rate": 2.375e-05, |
|
"logits/chosen": -0.2344416081905365, |
|
"logits/rejected": -0.49077630043029785, |
|
"logps/chosen": -209.4600067138672, |
|
"logps/rejected": -198.47911071777344, |
|
"loss": 0.663, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.7387064099311829, |
|
"rewards/margins": 0.3134633004665375, |
|
"rewards/rejected": 0.4252430498600006, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06687647521636507, |
|
"grad_norm": 35.38875961303711, |
|
"learning_rate": 2.5312500000000002e-05, |
|
"logits/chosen": -0.26731210947036743, |
|
"logits/rejected": -0.7659153342247009, |
|
"logps/chosen": -206.7689666748047, |
|
"logps/rejected": -178.71986389160156, |
|
"loss": 0.7082, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.22556963562965393, |
|
"rewards/margins": 0.251675546169281, |
|
"rewards/rejected": -0.026105916127562523, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.07081038552321008, |
|
"grad_norm": 30.606552124023438, |
|
"learning_rate": 2.6875000000000003e-05, |
|
"logits/chosen": -0.34513598680496216, |
|
"logits/rejected": -0.5674473643302917, |
|
"logps/chosen": -208.49002075195312, |
|
"logps/rejected": -184.6549072265625, |
|
"loss": 0.7702, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.2557719945907593, |
|
"rewards/margins": 0.22060665488243103, |
|
"rewards/rejected": 0.03516533225774765, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.07474429583005507, |
|
"grad_norm": 25.326486587524414, |
|
"learning_rate": 2.8437500000000003e-05, |
|
"logits/chosen": -0.21977896988391876, |
|
"logits/rejected": -0.6439090371131897, |
|
"logps/chosen": -201.21401977539062, |
|
"logps/rejected": -183.35049438476562, |
|
"loss": 0.713, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.6074048280715942, |
|
"rewards/margins": 0.23941688239574432, |
|
"rewards/rejected": 0.3679879307746887, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.07867820613690008, |
|
"grad_norm": 34.922325134277344, |
|
"learning_rate": 3.0000000000000004e-05, |
|
"logits/chosen": -0.4304371774196625, |
|
"logits/rejected": -0.7793359756469727, |
|
"logps/chosen": -207.411376953125, |
|
"logps/rejected": -178.08729553222656, |
|
"loss": 0.7514, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.6943355798721313, |
|
"rewards/margins": 0.1873859167098999, |
|
"rewards/rejected": 0.5069497227668762, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07867820613690008, |
|
"eval_logits/chosen": 1.2959624528884888, |
|
"eval_logits/rejected": 1.0611025094985962, |
|
"eval_logps/chosen": -205.19613647460938, |
|
"eval_logps/rejected": -178.5214385986328, |
|
"eval_loss": 0.6840614080429077, |
|
"eval_rewards/accuracies": 0.628125011920929, |
|
"eval_rewards/chosen": 1.1096659898757935, |
|
"eval_rewards/margins": 0.36510738730430603, |
|
"eval_rewards/rejected": 0.7445584535598755, |
|
"eval_runtime": 248.7873, |
|
"eval_samples_per_second": 2.572, |
|
"eval_steps_per_second": 0.161, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08261211644374508, |
|
"grad_norm": 27.505342483520508, |
|
"learning_rate": 3.15625e-05, |
|
"logits/chosen": -0.19241487979888916, |
|
"logits/rejected": -0.49540799856185913, |
|
"logps/chosen": -200.0576629638672, |
|
"logps/rejected": -175.6849365234375, |
|
"loss": 0.6671, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 1.2072901725769043, |
|
"rewards/margins": 0.31199443340301514, |
|
"rewards/rejected": 0.8952957391738892, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.08654602675059009, |
|
"grad_norm": 34.53056335449219, |
|
"learning_rate": 3.3125000000000006e-05, |
|
"logits/chosen": -0.4692454934120178, |
|
"logits/rejected": -0.9546957015991211, |
|
"logps/chosen": -194.42092895507812, |
|
"logps/rejected": -168.50645446777344, |
|
"loss": 0.6457, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 1.2674047946929932, |
|
"rewards/margins": 0.4998513162136078, |
|
"rewards/rejected": 0.767553448677063, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.0904799370574351, |
|
"grad_norm": 35.058509826660156, |
|
"learning_rate": 3.46875e-05, |
|
"logits/chosen": -0.6155041456222534, |
|
"logits/rejected": -0.9042506217956543, |
|
"logps/chosen": -198.44775390625, |
|
"logps/rejected": -174.51780700683594, |
|
"loss": 0.8259, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.7558386921882629, |
|
"rewards/margins": 0.17122629284858704, |
|
"rewards/rejected": 0.5846124291419983, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.0944138473642801, |
|
"grad_norm": 25.631980895996094, |
|
"learning_rate": 3.625e-05, |
|
"logits/chosen": -0.546207070350647, |
|
"logits/rejected": -0.7782138586044312, |
|
"logps/chosen": -193.2004852294922, |
|
"logps/rejected": -179.29165649414062, |
|
"loss": 0.7377, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.7277377247810364, |
|
"rewards/margins": 0.21684861183166504, |
|
"rewards/rejected": 0.5108891725540161, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0983477576711251, |
|
"grad_norm": 33.057899475097656, |
|
"learning_rate": 3.7812500000000004e-05, |
|
"logits/chosen": -0.4421041011810303, |
|
"logits/rejected": -0.6752287745475769, |
|
"logps/chosen": -186.85108947753906, |
|
"logps/rejected": -176.91525268554688, |
|
"loss": 0.588, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.4784621298313141, |
|
"rewards/margins": 0.6664348840713501, |
|
"rewards/rejected": -0.18797270953655243, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.1022816679779701, |
|
"grad_norm": 42.00132751464844, |
|
"learning_rate": 3.9375e-05, |
|
"logits/chosen": -0.3549075722694397, |
|
"logits/rejected": -0.7709188461303711, |
|
"logps/chosen": -202.1655731201172, |
|
"logps/rejected": -174.2749786376953, |
|
"loss": 0.8219, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.28374719619750977, |
|
"rewards/margins": 0.11102640628814697, |
|
"rewards/rejected": -0.3947736322879791, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.10621557828481511, |
|
"grad_norm": 27.35382843017578, |
|
"learning_rate": 3.9999320096411495e-05, |
|
"logits/chosen": -0.427701473236084, |
|
"logits/rejected": -0.8705617189407349, |
|
"logps/chosen": -200.99496459960938, |
|
"logps/rejected": -165.5473175048828, |
|
"loss": 0.6218, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.44559532403945923, |
|
"rewards/margins": 0.4697811007499695, |
|
"rewards/rejected": -0.024185750633478165, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.11014948859166011, |
|
"grad_norm": 35.097049713134766, |
|
"learning_rate": 3.999516529744215e-05, |
|
"logits/chosen": -0.27036961913108826, |
|
"logits/rejected": -0.6928902864456177, |
|
"logps/chosen": -196.0111083984375, |
|
"logps/rejected": -173.6074981689453, |
|
"loss": 0.6374, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 1.8343067169189453, |
|
"rewards/margins": 0.6672177314758301, |
|
"rewards/rejected": 1.1670891046524048, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.11408339889850512, |
|
"grad_norm": 25.420122146606445, |
|
"learning_rate": 3.99872342074443e-05, |
|
"logits/chosen": -0.21814267337322235, |
|
"logits/rejected": -0.6547086834907532, |
|
"logps/chosen": -200.3138427734375, |
|
"logps/rejected": -175.4734344482422, |
|
"loss": 0.6219, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 2.1070187091827393, |
|
"rewards/margins": 0.7536414861679077, |
|
"rewards/rejected": 1.353377103805542, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.11801730920535011, |
|
"grad_norm": 28.114505767822266, |
|
"learning_rate": 3.997552832428523e-05, |
|
"logits/chosen": -0.36429575085639954, |
|
"logits/rejected": -0.547380805015564, |
|
"logps/chosen": -190.30709838867188, |
|
"logps/rejected": -177.975830078125, |
|
"loss": 0.8049, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 1.508959174156189, |
|
"rewards/margins": 0.5520061254501343, |
|
"rewards/rejected": 0.9569530487060547, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.12195121951219512, |
|
"grad_norm": 28.442045211791992, |
|
"learning_rate": 3.9960049858740445e-05, |
|
"logits/chosen": -0.5695027709007263, |
|
"logits/rejected": -0.7303559184074402, |
|
"logps/chosen": -196.6102294921875, |
|
"logps/rejected": -179.8834991455078, |
|
"loss": 0.8199, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.8943918943405151, |
|
"rewards/margins": 0.4739474356174469, |
|
"rewards/rejected": 0.4204444885253906, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.12588512981904013, |
|
"grad_norm": 32.67585754394531, |
|
"learning_rate": 3.994080173407612e-05, |
|
"logits/chosen": -0.3450685143470764, |
|
"logits/rejected": -0.8491863012313843, |
|
"logps/chosen": -191.87008666992188, |
|
"logps/rejected": -165.56228637695312, |
|
"loss": 0.7242, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.7810487747192383, |
|
"rewards/margins": 0.4842549264431, |
|
"rewards/rejected": 0.2967938780784607, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.12981904012588513, |
|
"grad_norm": 23.827922821044922, |
|
"learning_rate": 3.9917787585497054e-05, |
|
"logits/chosen": -0.1322830617427826, |
|
"logits/rejected": -0.6103258728981018, |
|
"logps/chosen": -212.47152709960938, |
|
"logps/rejected": -184.1310272216797, |
|
"loss": 0.6944, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 1.2753359079360962, |
|
"rewards/margins": 0.585142970085144, |
|
"rewards/rejected": 0.6901928186416626, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.13375295043273014, |
|
"grad_norm": 26.69400405883789, |
|
"learning_rate": 3.9891011759460066e-05, |
|
"logits/chosen": -0.2469785213470459, |
|
"logits/rejected": -0.737481415271759, |
|
"logps/chosen": -202.321044921875, |
|
"logps/rejected": -170.80929565429688, |
|
"loss": 0.7278, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 2.2151436805725098, |
|
"rewards/margins": 0.6948539018630981, |
|
"rewards/rejected": 1.5202900171279907, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.13768686073957515, |
|
"grad_norm": 37.72239303588867, |
|
"learning_rate": 3.986047931285316e-05, |
|
"logits/chosen": -0.25732505321502686, |
|
"logits/rejected": -0.6134179830551147, |
|
"logps/chosen": -215.3037109375, |
|
"logps/rejected": -183.12831115722656, |
|
"loss": 1.0019, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 3.008638620376587, |
|
"rewards/margins": 0.3702520728111267, |
|
"rewards/rejected": 2.6383864879608154, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.14162077104642015, |
|
"grad_norm": 19.040857315063477, |
|
"learning_rate": 3.982619601204049e-05, |
|
"logits/chosen": -0.0029738754965364933, |
|
"logits/rejected": -0.3499515652656555, |
|
"logps/chosen": -224.81015014648438, |
|
"logps/rejected": -198.2837371826172, |
|
"loss": 0.9635, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 2.651772975921631, |
|
"rewards/margins": 0.27305322885513306, |
|
"rewards/rejected": 2.3787198066711426, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.14555468135326516, |
|
"grad_norm": 27.078737258911133, |
|
"learning_rate": 3.97881683317733e-05, |
|
"logits/chosen": -0.062339358031749725, |
|
"logits/rejected": -0.5432006120681763, |
|
"logps/chosen": -198.5349578857422, |
|
"logps/rejected": -165.4753875732422, |
|
"loss": 0.7487, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 2.0952491760253906, |
|
"rewards/margins": 0.6280597448348999, |
|
"rewards/rejected": 1.4671893119812012, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.14948859166011014, |
|
"grad_norm": 21.261680603027344, |
|
"learning_rate": 3.974640345396709e-05, |
|
"logits/chosen": -0.04105158522725105, |
|
"logits/rejected": -0.4839680790901184, |
|
"logps/chosen": -202.52001953125, |
|
"logps/rejected": -180.2872314453125, |
|
"loss": 0.7641, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 1.8239288330078125, |
|
"rewards/margins": 0.5607953667640686, |
|
"rewards/rejected": 1.2631335258483887, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.15342250196695514, |
|
"grad_norm": 24.164844512939453, |
|
"learning_rate": 3.9700909266345264e-05, |
|
"logits/chosen": -0.09652426838874817, |
|
"logits/rejected": -0.45777568221092224, |
|
"logps/chosen": -194.02333068847656, |
|
"logps/rejected": -163.8848876953125, |
|
"loss": 0.7631, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.7619401216506958, |
|
"rewards/margins": 0.377643883228302, |
|
"rewards/rejected": 0.3842962384223938, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.15735641227380015, |
|
"grad_norm": 23.005035400390625, |
|
"learning_rate": 3.965169436094947e-05, |
|
"logits/chosen": -0.07853743433952332, |
|
"logits/rejected": -0.3859809637069702, |
|
"logps/chosen": -202.27133178710938, |
|
"logps/rejected": -177.63619995117188, |
|
"loss": 0.699, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.7786092162132263, |
|
"rewards/margins": 0.5468564629554749, |
|
"rewards/rejected": 0.23175275325775146, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15735641227380015, |
|
"eval_logits/chosen": 1.3732103109359741, |
|
"eval_logits/rejected": 1.1526342630386353, |
|
"eval_logps/chosen": -205.0836944580078, |
|
"eval_logps/rejected": -178.6134033203125, |
|
"eval_loss": 0.7704851627349854, |
|
"eval_rewards/accuracies": 0.609375, |
|
"eval_rewards/chosen": 1.1546350717544556, |
|
"eval_rewards/margins": 0.44686493277549744, |
|
"eval_rewards/rejected": 0.7077701687812805, |
|
"eval_runtime": 249.1371, |
|
"eval_samples_per_second": 2.569, |
|
"eval_steps_per_second": 0.161, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.16129032258064516, |
|
"grad_norm": 17.80652618408203, |
|
"learning_rate": 3.959876803251684e-05, |
|
"logits/chosen": -0.01523427665233612, |
|
"logits/rejected": -0.35804808139801025, |
|
"logps/chosen": -195.39199829101562, |
|
"logps/rejected": -177.60208129882812, |
|
"loss": 0.7971, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 1.1401153802871704, |
|
"rewards/margins": 0.43174582719802856, |
|
"rewards/rejected": 0.7083694338798523, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.16522423288749016, |
|
"grad_norm": 33.28690719604492, |
|
"learning_rate": 3.954214027672465e-05, |
|
"logits/chosen": -0.0755038633942604, |
|
"logits/rejected": -0.3410184979438782, |
|
"logps/chosen": -213.752685546875, |
|
"logps/rejected": -188.33914184570312, |
|
"loss": 0.9044, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 1.4741712808609009, |
|
"rewards/margins": 0.14965666830539703, |
|
"rewards/rejected": 1.3245145082473755, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.16915814319433517, |
|
"grad_norm": 26.59926986694336, |
|
"learning_rate": 3.948182178830249e-05, |
|
"logits/chosen": 0.1018071174621582, |
|
"logits/rejected": -0.31446847319602966, |
|
"logps/chosen": -207.9669647216797, |
|
"logps/rejected": -177.61013793945312, |
|
"loss": 0.7243, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.8158692121505737, |
|
"rewards/margins": 0.5528920292854309, |
|
"rewards/rejected": 0.2629771828651428, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.17309205350118018, |
|
"grad_norm": 38.70561981201172, |
|
"learning_rate": 3.9417823959012495e-05, |
|
"logits/chosen": 0.043111931532621384, |
|
"logits/rejected": -0.27513837814331055, |
|
"logps/chosen": -207.97213745117188, |
|
"logps/rejected": -176.2123565673828, |
|
"loss": 0.8073, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 1.1955599784851074, |
|
"rewards/margins": 0.5336702466011047, |
|
"rewards/rejected": 0.6618898510932922, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.17702596380802518, |
|
"grad_norm": 24.805295944213867, |
|
"learning_rate": 3.935015887549786e-05, |
|
"logits/chosen": -0.08715621381998062, |
|
"logits/rejected": -0.451333612203598, |
|
"logps/chosen": -216.1869659423828, |
|
"logps/rejected": -184.5681915283203, |
|
"loss": 0.7354, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 1.5138676166534424, |
|
"rewards/margins": 0.6545284986495972, |
|
"rewards/rejected": 0.8593391180038452, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.1809598741148702, |
|
"grad_norm": 32.434017181396484, |
|
"learning_rate": 3.9278839317000164e-05, |
|
"logits/chosen": -0.22302675247192383, |
|
"logits/rejected": -0.6187790632247925, |
|
"logps/chosen": -205.05984497070312, |
|
"logps/rejected": -178.08538818359375, |
|
"loss": 0.7056, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 1.2798973321914673, |
|
"rewards/margins": 0.8485145568847656, |
|
"rewards/rejected": 0.43138280510902405, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.1848937844217152, |
|
"grad_norm": 25.297792434692383, |
|
"learning_rate": 3.920387875294588e-05, |
|
"logits/chosen": -0.14879488945007324, |
|
"logits/rejected": -0.7074145078659058, |
|
"logps/chosen": -198.54901123046875, |
|
"logps/rejected": -169.9258575439453, |
|
"loss": 0.6727, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.7288762331008911, |
|
"rewards/margins": 0.656203031539917, |
|
"rewards/rejected": 0.07267318665981293, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.1888276947285602, |
|
"grad_norm": 24.259140014648438, |
|
"learning_rate": 3.9125291340402557e-05, |
|
"logits/chosen": 0.05777154117822647, |
|
"logits/rejected": -0.37030404806137085, |
|
"logps/chosen": -195.25747680664062, |
|
"logps/rejected": -167.06149291992188, |
|
"loss": 0.6126, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 1.6011714935302734, |
|
"rewards/margins": 0.7886489629745483, |
|
"rewards/rejected": 0.8125225305557251, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.19276160503540518, |
|
"grad_norm": 26.536211013793945, |
|
"learning_rate": 3.904309192140507e-05, |
|
"logits/chosen": -0.27970609068870544, |
|
"logits/rejected": -0.5693933367729187, |
|
"logps/chosen": -204.64598083496094, |
|
"logps/rejected": -196.45191955566406, |
|
"loss": 0.6255, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 1.9763765335083008, |
|
"rewards/margins": 0.9035207033157349, |
|
"rewards/rejected": 1.072855830192566, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.1966955153422502, |
|
"grad_norm": 23.359954833984375, |
|
"learning_rate": 3.89572960201526e-05, |
|
"logits/chosen": -0.007777350954711437, |
|
"logits/rejected": -0.2527710497379303, |
|
"logps/chosen": -201.5716094970703, |
|
"logps/rejected": -187.55841064453125, |
|
"loss": 0.7931, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 1.4763085842132568, |
|
"rewards/margins": 0.5630350708961487, |
|
"rewards/rejected": 0.9132736325263977, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2006294256490952, |
|
"grad_norm": 20.35169792175293, |
|
"learning_rate": 3.886791984007669e-05, |
|
"logits/chosen": -0.13616272807121277, |
|
"logits/rejected": -0.46024090051651, |
|
"logps/chosen": -218.16427612304688, |
|
"logps/rejected": -192.9780731201172, |
|
"loss": 0.5993, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 1.0198808908462524, |
|
"rewards/margins": 0.9105831384658813, |
|
"rewards/rejected": 0.1092977523803711, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.2045633359559402, |
|
"grad_norm": 24.485294342041016, |
|
"learning_rate": 3.877498026078108e-05, |
|
"logits/chosen": -0.41903620958328247, |
|
"logits/rejected": -0.6284810304641724, |
|
"logps/chosen": -197.4494171142578, |
|
"logps/rejected": -182.7037811279297, |
|
"loss": 0.7597, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.005517137236893177, |
|
"rewards/margins": 0.6833702325820923, |
|
"rewards/rejected": -0.6778531074523926, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.2084972462627852, |
|
"grad_norm": 19.309831619262695, |
|
"learning_rate": 3.867849483485383e-05, |
|
"logits/chosen": -0.4443703591823578, |
|
"logits/rejected": -0.8326670527458191, |
|
"logps/chosen": -199.72280883789062, |
|
"logps/rejected": -168.3777313232422, |
|
"loss": 0.6804, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.8990485072135925, |
|
"rewards/margins": 0.7167578935623169, |
|
"rewards/rejected": 0.18229058384895325, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.21243115656963021, |
|
"grad_norm": 20.631038665771484, |
|
"learning_rate": 3.857848178455231e-05, |
|
"logits/chosen": -0.16152219474315643, |
|
"logits/rejected": -0.6074092984199524, |
|
"logps/chosen": -212.79232788085938, |
|
"logps/rejected": -169.90603637695312, |
|
"loss": 0.6141, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 2.5287070274353027, |
|
"rewards/margins": 1.119511365890503, |
|
"rewards/rejected": 1.4091956615447998, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.21636506687647522, |
|
"grad_norm": 23.65202522277832, |
|
"learning_rate": 3.8474959998361754e-05, |
|
"logits/chosen": -0.3141329884529114, |
|
"logits/rejected": -0.6160004734992981, |
|
"logps/chosen": -187.09658813476562, |
|
"logps/rejected": -175.7742919921875, |
|
"loss": 0.9405, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 2.6424171924591064, |
|
"rewards/margins": 0.35450419783592224, |
|
"rewards/rejected": 2.2879130840301514, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.22029897718332023, |
|
"grad_norm": 22.0035343170166, |
|
"learning_rate": 3.8367949027427994e-05, |
|
"logits/chosen": -0.03003566339612007, |
|
"logits/rejected": -0.3478749394416809, |
|
"logps/chosen": -195.31460571289062, |
|
"logps/rejected": -176.53146362304688, |
|
"loss": 0.7537, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 3.3984057903289795, |
|
"rewards/margins": 0.9712641835212708, |
|
"rewards/rejected": 2.4271416664123535, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.22423288749016523, |
|
"grad_norm": 27.25768280029297, |
|
"learning_rate": 3.825746908186499e-05, |
|
"logits/chosen": -0.33755970001220703, |
|
"logits/rejected": -0.5320338010787964, |
|
"logps/chosen": -194.16282653808594, |
|
"logps/rejected": -177.7003631591797, |
|
"loss": 0.9776, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 2.651137351989746, |
|
"rewards/margins": 0.3439365029335022, |
|
"rewards/rejected": 2.3072009086608887, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.22816679779701024, |
|
"grad_norm": 28.454999923706055, |
|
"learning_rate": 3.8143541026937976e-05, |
|
"logits/chosen": -0.22913965582847595, |
|
"logits/rejected": -0.4301827549934387, |
|
"logps/chosen": -191.88259887695312, |
|
"logps/rejected": -186.07785034179688, |
|
"loss": 0.7314, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 1.7435264587402344, |
|
"rewards/margins": 0.5647959113121033, |
|
"rewards/rejected": 1.1787304878234863, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.23210070810385522, |
|
"grad_norm": 17.058061599731445, |
|
"learning_rate": 3.802618637912282e-05, |
|
"logits/chosen": -0.4652739465236664, |
|
"logits/rejected": -0.7998835444450378, |
|
"logps/chosen": -193.7137908935547, |
|
"logps/rejected": -167.9613800048828, |
|
"loss": 0.8402, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 1.0257607698440552, |
|
"rewards/margins": 0.507733166217804, |
|
"rewards/rejected": 0.5180276036262512, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.23603461841070023, |
|
"grad_norm": 19.008708953857422, |
|
"learning_rate": 3.7905427302042454e-05, |
|
"logits/chosen": -0.18881992995738983, |
|
"logits/rejected": -0.5596441030502319, |
|
"logps/chosen": -198.87579345703125, |
|
"logps/rejected": -176.9473876953125, |
|
"loss": 0.7099, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 1.2355034351348877, |
|
"rewards/margins": 0.7009516358375549, |
|
"rewards/rejected": 0.5345517992973328, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.23603461841070023, |
|
"eval_logits/chosen": 1.17328941822052, |
|
"eval_logits/rejected": 0.9396275281906128, |
|
"eval_logps/chosen": -205.6018524169922, |
|
"eval_logps/rejected": -179.09677124023438, |
|
"eval_loss": 0.8744672536849976, |
|
"eval_rewards/accuracies": 0.5703125, |
|
"eval_rewards/chosen": 0.9473776817321777, |
|
"eval_rewards/margins": 0.43294864892959595, |
|
"eval_rewards/rejected": 0.5144290924072266, |
|
"eval_runtime": 248.0407, |
|
"eval_samples_per_second": 2.58, |
|
"eval_steps_per_second": 0.161, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.23996852871754523, |
|
"grad_norm": 32.76321029663086, |
|
"learning_rate": 3.778128660228097e-05, |
|
"logits/chosen": -0.5831426382064819, |
|
"logits/rejected": -0.8806711435317993, |
|
"logps/chosen": -196.5940704345703, |
|
"logps/rejected": -165.324462890625, |
|
"loss": 0.7365, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.9338918924331665, |
|
"rewards/margins": 0.5459949374198914, |
|
"rewards/rejected": 0.3878970444202423, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.24390243902439024, |
|
"grad_norm": 25.2067813873291, |
|
"learning_rate": 3.7653787725076466e-05, |
|
"logits/chosen": -0.31566348671913147, |
|
"logits/rejected": -0.571528434753418, |
|
"logps/chosen": -206.60708618164062, |
|
"logps/rejected": -191.65855407714844, |
|
"loss": 1.0555, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 1.3235902786254883, |
|
"rewards/margins": 0.047803860157728195, |
|
"rewards/rejected": 1.2757863998413086, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.24783634933123525, |
|
"grad_norm": 21.43438148498535, |
|
"learning_rate": 3.7522954749893095e-05, |
|
"logits/chosen": -0.18173213303089142, |
|
"logits/rejected": -0.5252254605293274, |
|
"logps/chosen": -209.19760131835938, |
|
"logps/rejected": -184.69784545898438, |
|
"loss": 0.9501, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 1.7247976064682007, |
|
"rewards/margins": 0.3406931459903717, |
|
"rewards/rejected": 1.3841044902801514, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.25177025963808025, |
|
"grad_norm": 24.763315200805664, |
|
"learning_rate": 3.738881238587344e-05, |
|
"logits/chosen": -0.06764630228281021, |
|
"logits/rejected": -0.5149588584899902, |
|
"logps/chosen": -213.1123504638672, |
|
"logps/rejected": -178.50633239746094, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 1.758510947227478, |
|
"rewards/margins": 0.698074460029602, |
|
"rewards/rejected": 1.060436487197876, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.25570416994492523, |
|
"grad_norm": 29.972768783569336, |
|
"learning_rate": 3.725138596717195e-05, |
|
"logits/chosen": -0.37508150935173035, |
|
"logits/rejected": -0.9795374870300293, |
|
"logps/chosen": -210.3339385986328, |
|
"logps/rejected": -166.22647094726562, |
|
"loss": 0.8229, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 1.5866848230361938, |
|
"rewards/margins": 0.4931170344352722, |
|
"rewards/rejected": 1.0935678482055664, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.25963808025177026, |
|
"grad_norm": 30.266515731811523, |
|
"learning_rate": 3.711070144817032e-05, |
|
"logits/chosen": -0.5352300405502319, |
|
"logits/rejected": -0.7394737005233765, |
|
"logps/chosen": -208.7587890625, |
|
"logps/rejected": -191.28744506835938, |
|
"loss": 0.8837, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 2.188896656036377, |
|
"rewards/margins": 0.5729233026504517, |
|
"rewards/rejected": 1.6159734725952148, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.26357199055861524, |
|
"grad_norm": 20.564146041870117, |
|
"learning_rate": 3.6966785398575716e-05, |
|
"logits/chosen": -0.11968457698822021, |
|
"logits/rejected": -0.622562050819397, |
|
"logps/chosen": -217.9984130859375, |
|
"logps/rejected": -174.47152709960938, |
|
"loss": 0.6454, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 2.778174877166748, |
|
"rewards/margins": 1.0597846508026123, |
|
"rewards/rejected": 1.7183904647827148, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.2675059008654603, |
|
"grad_norm": 28.211387634277344, |
|
"learning_rate": 3.681966499840286e-05, |
|
"logits/chosen": -0.3878430426120758, |
|
"logits/rejected": -0.6015113592147827, |
|
"logps/chosen": -192.3523406982422, |
|
"logps/rejected": -186.26373291015625, |
|
"loss": 0.7849, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 2.3973546028137207, |
|
"rewards/margins": 0.8487040400505066, |
|
"rewards/rejected": 1.5486505031585693, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.27143981117230526, |
|
"grad_norm": 25.53026580810547, |
|
"learning_rate": 3.6669368032840766e-05, |
|
"logits/chosen": -0.14762678742408752, |
|
"logits/rejected": -0.5588310956954956, |
|
"logps/chosen": -228.94393920898438, |
|
"logps/rejected": -188.67474365234375, |
|
"loss": 0.7247, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 2.564577579498291, |
|
"rewards/margins": 0.7130967974662781, |
|
"rewards/rejected": 1.8514807224273682, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.2753737214791503, |
|
"grad_norm": 18.148210525512695, |
|
"learning_rate": 3.651592288700525e-05, |
|
"logits/chosen": -0.3907206058502197, |
|
"logits/rejected": -0.7235937714576721, |
|
"logps/chosen": -188.78823852539062, |
|
"logps/rejected": -171.70339965820312, |
|
"loss": 0.8293, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 1.7909164428710938, |
|
"rewards/margins": 0.5921815037727356, |
|
"rewards/rejected": 1.198734998703003, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.27930763178599527, |
|
"grad_norm": 24.096284866333008, |
|
"learning_rate": 3.6359358540578095e-05, |
|
"logits/chosen": -0.30726513266563416, |
|
"logits/rejected": -0.8273059725761414, |
|
"logps/chosen": -199.73843383789062, |
|
"logps/rejected": -171.33981323242188, |
|
"loss": 0.7125, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 1.799203634262085, |
|
"rewards/margins": 0.8719380497932434, |
|
"rewards/rejected": 0.9272655248641968, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.2832415420928403, |
|
"grad_norm": 24.70514678955078, |
|
"learning_rate": 3.619970456233395e-05, |
|
"logits/chosen": -0.2640475332736969, |
|
"logits/rejected": -0.8327733278274536, |
|
"logps/chosen": -206.46035766601562, |
|
"logps/rejected": -165.4632110595703, |
|
"loss": 0.6377, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 2.429255723953247, |
|
"rewards/margins": 1.0934853553771973, |
|
"rewards/rejected": 1.3357700109481812, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.2871754523996853, |
|
"grad_norm": 23.147350311279297, |
|
"learning_rate": 3.6036991104555974e-05, |
|
"logits/chosen": -0.008176600560545921, |
|
"logits/rejected": -0.34810671210289, |
|
"logps/chosen": -206.794921875, |
|
"logps/rejected": -186.4044189453125, |
|
"loss": 0.7023, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 2.2398250102996826, |
|
"rewards/margins": 0.7909523248672485, |
|
"rewards/rejected": 1.4488725662231445, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.2911093627065303, |
|
"grad_norm": 27.7244815826416, |
|
"learning_rate": 3.587124889734125e-05, |
|
"logits/chosen": -0.48228806257247925, |
|
"logits/rejected": -0.49171242117881775, |
|
"logps/chosen": -178.5115509033203, |
|
"logps/rejected": -172.22119140625, |
|
"loss": 0.9182, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 1.4874086380004883, |
|
"rewards/margins": 0.3070749342441559, |
|
"rewards/rejected": 1.1803337335586548, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2950432730133753, |
|
"grad_norm": 26.767961502075195, |
|
"learning_rate": 3.57025092427971e-05, |
|
"logits/chosen": -0.20643901824951172, |
|
"logits/rejected": -0.5273423790931702, |
|
"logps/chosen": -195.8766632080078, |
|
"logps/rejected": -173.40370178222656, |
|
"loss": 0.9323, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 2.199556827545166, |
|
"rewards/margins": 0.6209264993667603, |
|
"rewards/rejected": 1.5786300897598267, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.2989771833202203, |
|
"grad_norm": 18.721200942993164, |
|
"learning_rate": 3.5530804009129375e-05, |
|
"logits/chosen": -0.25294172763824463, |
|
"logits/rejected": -0.60113525390625, |
|
"logps/chosen": -201.85067749023438, |
|
"logps/rejected": -173.7031707763672, |
|
"loss": 0.7695, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 2.1115119457244873, |
|
"rewards/margins": 0.6209571361541748, |
|
"rewards/rejected": 1.4905551671981812, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.3029110936270653, |
|
"grad_norm": 23.3524227142334, |
|
"learning_rate": 3.53561656246238e-05, |
|
"logits/chosen": -0.282802015542984, |
|
"logits/rejected": -0.5362482666969299, |
|
"logps/chosen": -202.23257446289062, |
|
"logps/rejected": -186.3799591064453, |
|
"loss": 0.681, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 2.020700454711914, |
|
"rewards/margins": 0.8641992807388306, |
|
"rewards/rejected": 1.156501293182373, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.3068450039339103, |
|
"grad_norm": 19.006500244140625, |
|
"learning_rate": 3.5178627071521574e-05, |
|
"logits/chosen": -0.40107831358909607, |
|
"logits/rejected": -0.5502349138259888, |
|
"logps/chosen": -202.1154022216797, |
|
"logps/rejected": -185.59539794921875, |
|
"loss": 0.8749, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 1.338216781616211, |
|
"rewards/margins": 0.3761267364025116, |
|
"rewards/rejected": 0.9620901346206665, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.3107789142407553, |
|
"grad_norm": 16.3823299407959, |
|
"learning_rate": 3.4998221879790324e-05, |
|
"logits/chosen": -0.31552472710609436, |
|
"logits/rejected": -0.7142918705940247, |
|
"logps/chosen": -197.4582977294922, |
|
"logps/rejected": -184.94174194335938, |
|
"loss": 0.901, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.363880455493927, |
|
"rewards/margins": 0.18122999370098114, |
|
"rewards/rejected": 0.18265047669410706, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.3147128245476003, |
|
"grad_norm": 20.992412567138672, |
|
"learning_rate": 3.481498412079167e-05, |
|
"logits/chosen": -0.3988286852836609, |
|
"logits/rejected": -0.7025038003921509, |
|
"logps/chosen": -196.63803100585938, |
|
"logps/rejected": -178.83221435546875, |
|
"loss": 0.7963, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.26729291677474976, |
|
"rewards/margins": 0.3433241546154022, |
|
"rewards/rejected": -0.07603128254413605, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3147128245476003, |
|
"eval_logits/chosen": 1.2825068235397339, |
|
"eval_logits/rejected": 1.0665369033813477, |
|
"eval_logps/chosen": -207.7469940185547, |
|
"eval_logps/rejected": -180.9813232421875, |
|
"eval_loss": 0.8834741711616516, |
|
"eval_rewards/accuracies": 0.589062511920929, |
|
"eval_rewards/chosen": 0.08931777626276016, |
|
"eval_rewards/margins": 0.3287123441696167, |
|
"eval_rewards/rejected": -0.23939454555511475, |
|
"eval_runtime": 251.0719, |
|
"eval_samples_per_second": 2.549, |
|
"eval_steps_per_second": 0.159, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.31864673485444533, |
|
"grad_norm": 26.44162368774414, |
|
"learning_rate": 3.462894840084642e-05, |
|
"logits/chosen": -0.38823994994163513, |
|
"logits/rejected": -0.6459166407585144, |
|
"logps/chosen": -190.995361328125, |
|
"logps/rejected": -173.5061492919922, |
|
"loss": 1.0993, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.19754914939403534, |
|
"rewards/margins": -0.06637996435165405, |
|
"rewards/rejected": -0.1311691701412201, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.3225806451612903, |
|
"grad_norm": 23.88953971862793, |
|
"learning_rate": 3.4440149854698904e-05, |
|
"logits/chosen": -0.38585788011550903, |
|
"logits/rejected": -0.7214224338531494, |
|
"logps/chosen": -205.794189453125, |
|
"logps/rejected": -175.1046600341797, |
|
"loss": 0.7178, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.010714983567595482, |
|
"rewards/margins": 0.5465508699417114, |
|
"rewards/rejected": -0.535835862159729, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.32651455546813535, |
|
"grad_norm": 22.045515060424805, |
|
"learning_rate": 3.424862413888134e-05, |
|
"logits/chosen": -0.30153313279151917, |
|
"logits/rejected": -0.49861517548561096, |
|
"logps/chosen": -193.66433715820312, |
|
"logps/rejected": -185.2410430908203, |
|
"loss": 0.7397, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.7127935290336609, |
|
"rewards/margins": 0.6260470151901245, |
|
"rewards/rejected": 0.08674647659063339, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.3304484657749803, |
|
"grad_norm": 27.552871704101562, |
|
"learning_rate": 3.4054407424979804e-05, |
|
"logits/chosen": -0.24517476558685303, |
|
"logits/rejected": -0.6450390815734863, |
|
"logps/chosen": -207.15200805664062, |
|
"logps/rejected": -176.4978485107422, |
|
"loss": 0.8156, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 1.253557562828064, |
|
"rewards/margins": 0.6033787131309509, |
|
"rewards/rejected": 0.6501787900924683, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.33438237608182536, |
|
"grad_norm": 17.781063079833984, |
|
"learning_rate": 3.3857536392802806e-05, |
|
"logits/chosen": -0.20200546085834503, |
|
"logits/rejected": -0.5061112642288208, |
|
"logps/chosen": -211.17922973632812, |
|
"logps/rejected": -192.77493286132812, |
|
"loss": 0.7883, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 1.2221243381500244, |
|
"rewards/margins": 0.416652113199234, |
|
"rewards/rejected": 0.805472195148468, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.33831628638867034, |
|
"grad_norm": 31.43796730041504, |
|
"learning_rate": 3.365804822345396e-05, |
|
"logits/chosen": -0.30611589550971985, |
|
"logits/rejected": -0.7340467572212219, |
|
"logps/chosen": -188.98428344726562, |
|
"logps/rejected": -158.50927734375, |
|
"loss": 0.7486, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 1.536189079284668, |
|
"rewards/margins": 0.589336633682251, |
|
"rewards/rejected": 0.9468523859977722, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.3422501966955153, |
|
"grad_norm": 21.21323585510254, |
|
"learning_rate": 3.3455980592309925e-05, |
|
"logits/chosen": -0.445300817489624, |
|
"logits/rejected": -0.6968305110931396, |
|
"logps/chosen": -191.3743896484375, |
|
"logps/rejected": -173.6061248779297, |
|
"loss": 0.8092, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 2.1057379245758057, |
|
"rewards/margins": 0.6257677674293518, |
|
"rewards/rejected": 1.4799703359603882, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.34618410700236035, |
|
"grad_norm": 18.975866317749023, |
|
"learning_rate": 3.3251371661905066e-05, |
|
"logits/chosen": -0.05589728429913521, |
|
"logits/rejected": -0.5498273968696594, |
|
"logps/chosen": -201.5423583984375, |
|
"logps/rejected": -165.8180694580078, |
|
"loss": 0.6459, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 1.7923845052719116, |
|
"rewards/margins": 0.8942405581474304, |
|
"rewards/rejected": 0.8981439471244812, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.35011801730920533, |
|
"grad_norm": 25.26014518737793, |
|
"learning_rate": 3.3044260074724036e-05, |
|
"logits/chosen": -0.47154492139816284, |
|
"logits/rejected": -0.7864343523979187, |
|
"logps/chosen": -182.36541748046875, |
|
"logps/rejected": -167.81796264648438, |
|
"loss": 0.791, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 1.5660803318023682, |
|
"rewards/margins": 0.4582035541534424, |
|
"rewards/rejected": 1.1078766584396362, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.35405192761605037, |
|
"grad_norm": 20.178096771240234, |
|
"learning_rate": 3.283468494590378e-05, |
|
"logits/chosen": -0.5151780247688293, |
|
"logits/rejected": -0.8873647451400757, |
|
"logps/chosen": -186.1438751220703, |
|
"logps/rejected": -172.07485961914062, |
|
"loss": 0.7255, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 1.5227115154266357, |
|
"rewards/margins": 0.7071123123168945, |
|
"rewards/rejected": 0.815599262714386, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.35798583792289534, |
|
"grad_norm": 25.483036041259766, |
|
"learning_rate": 3.26226858558462e-05, |
|
"logits/chosen": -0.4173513948917389, |
|
"logits/rejected": -0.8169075846672058, |
|
"logps/chosen": -220.79647827148438, |
|
"logps/rejected": -183.74241638183594, |
|
"loss": 0.9569, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 1.048793911933899, |
|
"rewards/margins": 0.1925489604473114, |
|
"rewards/rejected": 0.8562449216842651, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.3619197482297404, |
|
"grad_norm": 24.004615783691406, |
|
"learning_rate": 3.240830284274301e-05, |
|
"logits/chosen": -0.5872361063957214, |
|
"logits/rejected": -0.9254258275032043, |
|
"logps/chosen": -199.5272674560547, |
|
"logps/rejected": -171.4918212890625, |
|
"loss": 0.8168, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.2763752043247223, |
|
"rewards/margins": 0.33582815527915955, |
|
"rewards/rejected": -0.05945297330617905, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.36585365853658536, |
|
"grad_norm": 23.19999885559082, |
|
"learning_rate": 3.219157639501416e-05, |
|
"logits/chosen": -0.2660069167613983, |
|
"logits/rejected": -0.4420255720615387, |
|
"logps/chosen": -187.30184936523438, |
|
"logps/rejected": -184.6049041748047, |
|
"loss": 0.871, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.41878828406333923, |
|
"rewards/margins": 0.461493581533432, |
|
"rewards/rejected": -0.04270533099770546, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.3697875688434304, |
|
"grad_norm": 26.874906539916992, |
|
"learning_rate": 3.1972547443661114e-05, |
|
"logits/chosen": -0.19552254676818848, |
|
"logits/rejected": -0.5767315030097961, |
|
"logps/chosen": -205.4563446044922, |
|
"logps/rejected": -186.74722290039062, |
|
"loss": 0.8292, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.7297551035881042, |
|
"rewards/margins": 0.3652102053165436, |
|
"rewards/rejected": 0.3645448684692383, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.37372147915027537, |
|
"grad_norm": 25.91361427307129, |
|
"learning_rate": 3.175125735453664e-05, |
|
"logits/chosen": -0.20148754119873047, |
|
"logits/rejected": -0.5481426119804382, |
|
"logps/chosen": -221.0868682861328, |
|
"logps/rejected": -197.3042449951172, |
|
"loss": 0.776, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 1.2072087526321411, |
|
"rewards/margins": 0.3937892019748688, |
|
"rewards/rejected": 0.8134196400642395, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.3776553894571204, |
|
"grad_norm": 20.111371994018555, |
|
"learning_rate": 3.152774792053247e-05, |
|
"logits/chosen": -0.3326271176338196, |
|
"logits/rejected": -0.5712831616401672, |
|
"logps/chosen": -203.29025268554688, |
|
"logps/rejected": -187.23904418945312, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 1.6326103210449219, |
|
"rewards/margins": 0.8585765957832336, |
|
"rewards/rejected": 0.7740335464477539, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.3815892997639654, |
|
"grad_norm": 24.087158203125, |
|
"learning_rate": 3.1302061353686264e-05, |
|
"logits/chosen": -0.21493315696716309, |
|
"logits/rejected": -0.42921167612075806, |
|
"logps/chosen": -213.1800079345703, |
|
"logps/rejected": -195.15737915039062, |
|
"loss": 1.0061, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 2.751986026763916, |
|
"rewards/margins": 0.45154237747192383, |
|
"rewards/rejected": 2.3004441261291504, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.38552321007081036, |
|
"grad_norm": 37.55143356323242, |
|
"learning_rate": 3.107424027720941e-05, |
|
"logits/chosen": -0.5423256158828735, |
|
"logits/rejected": -1.0729395151138306, |
|
"logps/chosen": -193.28945922851562, |
|
"logps/rejected": -157.5052947998047, |
|
"loss": 0.7334, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 2.7436954975128174, |
|
"rewards/margins": 0.9733211398124695, |
|
"rewards/rejected": 1.7703745365142822, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.3894571203776554, |
|
"grad_norm": 24.617494583129883, |
|
"learning_rate": 3.0844327717437265e-05, |
|
"logits/chosen": -0.006182658486068249, |
|
"logits/rejected": -0.4748276174068451, |
|
"logps/chosen": -206.80899047851562, |
|
"logps/rejected": -172.13172912597656, |
|
"loss": 0.7678, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 2.979702949523926, |
|
"rewards/margins": 0.7632544636726379, |
|
"rewards/rejected": 2.2164485454559326, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.3933910306845004, |
|
"grad_norm": 17.355024337768555, |
|
"learning_rate": 3.0612367095703124e-05, |
|
"logits/chosen": -0.045905523002147675, |
|
"logits/rejected": -0.5579321384429932, |
|
"logps/chosen": -204.173583984375, |
|
"logps/rejected": -175.21615600585938, |
|
"loss": 0.6961, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 2.6483314037323, |
|
"rewards/margins": 0.7989780306816101, |
|
"rewards/rejected": 1.8493534326553345, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3933910306845004, |
|
"eval_logits/chosen": 1.3562368154525757, |
|
"eval_logits/rejected": 1.1292431354522705, |
|
"eval_logps/chosen": -202.31715393066406, |
|
"eval_logps/rejected": -176.03109741210938, |
|
"eval_loss": 0.8593710660934448, |
|
"eval_rewards/accuracies": 0.614062488079071, |
|
"eval_rewards/chosen": 2.261263370513916, |
|
"eval_rewards/margins": 0.520569920539856, |
|
"eval_rewards/rejected": 1.7406933307647705, |
|
"eval_runtime": 250.7548, |
|
"eval_samples_per_second": 2.552, |
|
"eval_steps_per_second": 0.16, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3973249409913454, |
|
"grad_norm": 21.848352432250977, |
|
"learning_rate": 3.0378402220137696e-05, |
|
"logits/chosen": 0.116512730717659, |
|
"logits/rejected": -0.3560892641544342, |
|
"logps/chosen": -202.67498779296875, |
|
"logps/rejected": -178.8372802734375, |
|
"loss": 0.7315, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 2.147618532180786, |
|
"rewards/margins": 0.7310143709182739, |
|
"rewards/rejected": 1.416603922843933, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.4012588512981904, |
|
"grad_norm": 27.533050537109375, |
|
"learning_rate": 3.0142477277395462e-05, |
|
"logits/chosen": 0.0075813233852386475, |
|
"logits/rejected": -0.6258127689361572, |
|
"logps/chosen": -209.9758758544922, |
|
"logps/rejected": -166.99766540527344, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 1.3229306936264038, |
|
"rewards/margins": 0.7913904190063477, |
|
"rewards/rejected": 0.5315402746200562, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.4051927616050354, |
|
"grad_norm": 16.1999454498291, |
|
"learning_rate": 2.990463682430963e-05, |
|
"logits/chosen": -0.35700559616088867, |
|
"logits/rejected": -0.5182300209999084, |
|
"logps/chosen": -200.20985412597656, |
|
"logps/rejected": -179.12539672851562, |
|
"loss": 0.7671, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.9399553537368774, |
|
"rewards/margins": 0.48222360014915466, |
|
"rewards/rejected": 0.4577317237854004, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.4091266719118804, |
|
"grad_norm": 32.54240798950195, |
|
"learning_rate": 2.966492577947704e-05, |
|
"logits/chosen": -0.12386194616556168, |
|
"logits/rejected": -0.4458809494972229, |
|
"logps/chosen": -189.1624755859375, |
|
"logps/rejected": -168.4569091796875, |
|
"loss": 0.9158, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 1.4050761461257935, |
|
"rewards/margins": 0.6978380084037781, |
|
"rewards/rejected": 0.7072380781173706, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.41306058221872544, |
|
"grad_norm": 21.36524200439453, |
|
"learning_rate": 2.9423389414774918e-05, |
|
"logits/chosen": -0.18413856625556946, |
|
"logits/rejected": -0.5436551570892334, |
|
"logps/chosen": -206.7151336669922, |
|
"logps/rejected": -181.02978515625, |
|
"loss": 0.8962, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 1.010108232498169, |
|
"rewards/margins": 0.30978208780288696, |
|
"rewards/rejected": 0.700326144695282, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.4169944925255704, |
|
"grad_norm": 57.26128005981445, |
|
"learning_rate": 2.918007334681074e-05, |
|
"logits/chosen": -0.3969922661781311, |
|
"logits/rejected": -0.7256218194961548, |
|
"logps/chosen": -199.84579467773438, |
|
"logps/rejected": -165.75674438476562, |
|
"loss": 0.6673, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 2.075115442276001, |
|
"rewards/margins": 0.9931079745292664, |
|
"rewards/rejected": 1.0820075273513794, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.4209284028324154, |
|
"grad_norm": 19.898866653442383, |
|
"learning_rate": 2.8935023528307124e-05, |
|
"logits/chosen": -0.1375611126422882, |
|
"logits/rejected": -0.4858548641204834, |
|
"logps/chosen": -201.7903289794922, |
|
"logps/rejected": -179.10194396972656, |
|
"loss": 0.7802, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 2.139580488204956, |
|
"rewards/margins": 0.8191660046577454, |
|
"rewards/rejected": 1.3204143047332764, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.42486231313926043, |
|
"grad_norm": 21.983154296875, |
|
"learning_rate": 2.868828623942317e-05, |
|
"logits/chosen": -0.16572633385658264, |
|
"logits/rejected": -0.5750501155853271, |
|
"logps/chosen": -197.57725524902344, |
|
"logps/rejected": -168.39517211914062, |
|
"loss": 0.5993, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 2.660771608352661, |
|
"rewards/margins": 1.5578687191009521, |
|
"rewards/rejected": 1.1029030084609985, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.4287962234461054, |
|
"grad_norm": 25.60312843322754, |
|
"learning_rate": 2.8439908079013975e-05, |
|
"logits/chosen": -0.039677783846855164, |
|
"logits/rejected": -0.45520296692848206, |
|
"logps/chosen": -201.81983947753906, |
|
"logps/rejected": -175.03994750976562, |
|
"loss": 0.6675, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 2.3861243724823, |
|
"rewards/margins": 0.9951289892196655, |
|
"rewards/rejected": 1.3909955024719238, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.43273013375295044, |
|
"grad_norm": 18.380178451538086, |
|
"learning_rate": 2.8189935955829977e-05, |
|
"logits/chosen": -0.1264878809452057, |
|
"logits/rejected": -0.4807268977165222, |
|
"logps/chosen": -212.05575561523438, |
|
"logps/rejected": -190.66098022460938, |
|
"loss": 1.1466, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 1.8014596700668335, |
|
"rewards/margins": 0.04773964732885361, |
|
"rewards/rejected": 1.7537200450897217, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.4366640440597954, |
|
"grad_norm": 23.785415649414062, |
|
"learning_rate": 2.7938417079657746e-05, |
|
"logits/chosen": -0.5862978100776672, |
|
"logits/rejected": -0.9251095056533813, |
|
"logps/chosen": -208.53695678710938, |
|
"logps/rejected": -180.97097778320312, |
|
"loss": 0.7145, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 2.073394775390625, |
|
"rewards/margins": 0.8073825836181641, |
|
"rewards/rejected": 1.2660123109817505, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.44059795436664045, |
|
"grad_norm": 24.911800384521484, |
|
"learning_rate": 2.7685398952403946e-05, |
|
"logits/chosen": -0.22417886555194855, |
|
"logits/rejected": -0.524285078048706, |
|
"logps/chosen": -207.4349365234375, |
|
"logps/rejected": -192.63758850097656, |
|
"loss": 0.9911, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 2.434218645095825, |
|
"rewards/margins": 0.4671500623226166, |
|
"rewards/rejected": 1.9670684337615967, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.44453186467348543, |
|
"grad_norm": 21.26664161682129, |
|
"learning_rate": 2.743092935912409e-05, |
|
"logits/chosen": -0.168512225151062, |
|
"logits/rejected": -0.4610822796821594, |
|
"logps/chosen": -207.57046508789062, |
|
"logps/rejected": -194.7887420654297, |
|
"loss": 0.9337, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 2.3753578662872314, |
|
"rewards/margins": 0.32185056805610657, |
|
"rewards/rejected": 2.0535073280334473, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.44846577498033047, |
|
"grad_norm": 23.40607452392578, |
|
"learning_rate": 2.7175056358997892e-05, |
|
"logits/chosen": -0.2319369614124298, |
|
"logits/rejected": -0.6572431325912476, |
|
"logps/chosen": -204.3990936279297, |
|
"logps/rejected": -180.0780487060547, |
|
"loss": 0.655, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 2.375079393386841, |
|
"rewards/margins": 0.9229291081428528, |
|
"rewards/rejected": 1.4521501064300537, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.45239968528717545, |
|
"grad_norm": 27.157663345336914, |
|
"learning_rate": 2.6917828276252747e-05, |
|
"logits/chosen": -0.1399884819984436, |
|
"logits/rejected": -0.46016645431518555, |
|
"logps/chosen": -218.91256713867188, |
|
"logps/rejected": -200.45068359375, |
|
"loss": 0.8026, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 1.7319839000701904, |
|
"rewards/margins": 0.786080539226532, |
|
"rewards/rejected": 0.9459033012390137, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.4563335955940205, |
|
"grad_norm": 20.441112518310547, |
|
"learning_rate": 2.6659293691037246e-05, |
|
"logits/chosen": -0.29733794927597046, |
|
"logits/rejected": -0.6675035953521729, |
|
"logps/chosen": -202.4237518310547, |
|
"logps/rejected": -176.8135223388672, |
|
"loss": 0.7622, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 1.0021812915802002, |
|
"rewards/margins": 0.6122254133224487, |
|
"rewards/rejected": 0.3899558186531067, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.46026750590086546, |
|
"grad_norm": 22.287860870361328, |
|
"learning_rate": 2.639950143024629e-05, |
|
"logits/chosen": -0.3219448924064636, |
|
"logits/rejected": -0.6755141019821167, |
|
"logps/chosen": -213.97811889648438, |
|
"logps/rejected": -184.30332946777344, |
|
"loss": 0.7878, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.9022668600082397, |
|
"rewards/margins": 0.5562304258346558, |
|
"rewards/rejected": 0.34603649377822876, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.46420141620771044, |
|
"grad_norm": 26.395917892456055, |
|
"learning_rate": 2.613850055829967e-05, |
|
"logits/chosen": -0.2554970383644104, |
|
"logits/rejected": -0.41837620735168457, |
|
"logps/chosen": -191.0037078857422, |
|
"logps/rejected": -185.1075897216797, |
|
"loss": 0.726, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.7684756517410278, |
|
"rewards/margins": 0.5839057564735413, |
|
"rewards/rejected": 0.18456986546516418, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.46813532651455547, |
|
"grad_norm": 25.363393783569336, |
|
"learning_rate": 2.587634036787571e-05, |
|
"logits/chosen": -0.37291693687438965, |
|
"logits/rejected": -0.9038352966308594, |
|
"logps/chosen": -202.77792358398438, |
|
"logps/rejected": -163.73086547851562, |
|
"loss": 0.7478, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 1.1803675889968872, |
|
"rewards/margins": 0.8836774826049805, |
|
"rewards/rejected": 0.2966901659965515, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.47206923682140045, |
|
"grad_norm": 27.41828727722168, |
|
"learning_rate": 2.5613070370601863e-05, |
|
"logits/chosen": -0.34387531876564026, |
|
"logits/rejected": -0.6873073577880859, |
|
"logps/chosen": -197.41146850585938, |
|
"logps/rejected": -171.6976318359375, |
|
"loss": 0.7599, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 1.3946095705032349, |
|
"rewards/margins": 0.7079658508300781, |
|
"rewards/rejected": 0.686643660068512, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.47206923682140045, |
|
"eval_logits/chosen": 1.3973848819732666, |
|
"eval_logits/rejected": 1.1919221878051758, |
|
"eval_logps/chosen": -203.7362518310547, |
|
"eval_logps/rejected": -177.5819854736328, |
|
"eval_loss": 0.8243551254272461, |
|
"eval_rewards/accuracies": 0.612500011920929, |
|
"eval_rewards/chosen": 1.693615198135376, |
|
"eval_rewards/margins": 0.5732673406600952, |
|
"eval_rewards/rejected": 1.1203477382659912, |
|
"eval_runtime": 251.0741, |
|
"eval_samples_per_second": 2.549, |
|
"eval_steps_per_second": 0.159, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.4760031471282455, |
|
"grad_norm": 14.156159400939941, |
|
"learning_rate": 2.5348740287703942e-05, |
|
"logits/chosen": -0.28574198484420776, |
|
"logits/rejected": -0.7568629384040833, |
|
"logps/chosen": -192.17074584960938, |
|
"logps/rejected": -158.06005859375, |
|
"loss": 0.8055, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 1.7736876010894775, |
|
"rewards/margins": 0.6764134764671326, |
|
"rewards/rejected": 1.0972741842269897, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.47993705743509046, |
|
"grad_norm": 25.657800674438477, |
|
"learning_rate": 2.508340004061574e-05, |
|
"logits/chosen": -0.24258217215538025, |
|
"logits/rejected": -0.6088284254074097, |
|
"logps/chosen": -222.4447784423828, |
|
"logps/rejected": -190.37503051757812, |
|
"loss": 0.8069, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 2.226174831390381, |
|
"rewards/margins": 0.7204557657241821, |
|
"rewards/rejected": 1.5057189464569092, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.4838709677419355, |
|
"grad_norm": 28.052139282226562, |
|
"learning_rate": 2.4817099741550864e-05, |
|
"logits/chosen": -0.3004125952720642, |
|
"logits/rejected": -0.5441415309906006, |
|
"logps/chosen": -196.12718200683594, |
|
"logps/rejected": -192.7050018310547, |
|
"loss": 0.9248, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 2.162635326385498, |
|
"rewards/margins": 0.5665928721427917, |
|
"rewards/rejected": 1.5960423946380615, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.4878048780487805, |
|
"grad_norm": 23.494508743286133, |
|
"learning_rate": 2.4549889684038543e-05, |
|
"logits/chosen": -0.611892819404602, |
|
"logits/rejected": -0.8981421589851379, |
|
"logps/chosen": -190.71786499023438, |
|
"logps/rejected": -167.4793701171875, |
|
"loss": 0.7981, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 2.283334732055664, |
|
"rewards/margins": 0.6954591870307922, |
|
"rewards/rejected": 1.5878756046295166, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.4917387883556255, |
|
"grad_norm": 15.813286781311035, |
|
"learning_rate": 2.4281820333425172e-05, |
|
"logits/chosen": -0.19501128792762756, |
|
"logits/rejected": -0.6402938365936279, |
|
"logps/chosen": -194.93563842773438, |
|
"logps/rejected": -193.161376953125, |
|
"loss": 0.7841, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 1.6924636363983154, |
|
"rewards/margins": 0.6635173559188843, |
|
"rewards/rejected": 1.0289465188980103, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.4956726986624705, |
|
"grad_norm": 34.470821380615234, |
|
"learning_rate": 2.4012942317343404e-05, |
|
"logits/chosen": -0.10980845987796783, |
|
"logits/rejected": -0.5895272493362427, |
|
"logps/chosen": -207.9347381591797, |
|
"logps/rejected": -179.27322387695312, |
|
"loss": 0.8625, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 1.9872920513153076, |
|
"rewards/margins": 0.6902352571487427, |
|
"rewards/rejected": 1.2970569133758545, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.4996066089693155, |
|
"grad_norm": 21.860822677612305, |
|
"learning_rate": 2.374330641615064e-05, |
|
"logits/chosen": -0.47541099786758423, |
|
"logits/rejected": -0.6988755464553833, |
|
"logps/chosen": -181.64480590820312, |
|
"logps/rejected": -169.11415100097656, |
|
"loss": 1.0446, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.7799679636955261, |
|
"rewards/margins": -0.016168225556612015, |
|
"rewards/rejected": 0.796136200428009, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.5035405192761605, |
|
"grad_norm": 24.410993576049805, |
|
"learning_rate": 2.3472963553338614e-05, |
|
"logits/chosen": -0.1209309846162796, |
|
"logits/rejected": -0.6129121780395508, |
|
"logps/chosen": -208.46646118164062, |
|
"logps/rejected": -173.28662109375, |
|
"loss": 0.7813, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 1.01343834400177, |
|
"rewards/margins": 0.7845452427864075, |
|
"rewards/rejected": 0.22889307141304016, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.5074744295830055, |
|
"grad_norm": 16.415790557861328, |
|
"learning_rate": 2.3201964785915956e-05, |
|
"logits/chosen": -0.06124670431017876, |
|
"logits/rejected": -0.34292641282081604, |
|
"logps/chosen": -202.11863708496094, |
|
"logps/rejected": -185.01980590820312, |
|
"loss": 0.6251, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.7241964936256409, |
|
"rewards/margins": 0.7067764401435852, |
|
"rewards/rejected": 0.017420072108507156, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.5114083398898505, |
|
"grad_norm": 21.06545066833496, |
|
"learning_rate": 2.29303612947656e-05, |
|
"logits/chosen": -0.18204979598522186, |
|
"logits/rejected": -0.668160080909729, |
|
"logps/chosen": -217.71627807617188, |
|
"logps/rejected": -200.24777221679688, |
|
"loss": 0.6768, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.7872123718261719, |
|
"rewards/margins": 0.820330023765564, |
|
"rewards/rejected": -0.0331176295876503, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.5153422501966956, |
|
"grad_norm": 20.972436904907227, |
|
"learning_rate": 2.265820437497871e-05, |
|
"logits/chosen": -0.4221726059913635, |
|
"logits/rejected": -0.7673285603523254, |
|
"logps/chosen": -193.30931091308594, |
|
"logps/rejected": -182.8597412109375, |
|
"loss": 0.5963, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.8721264600753784, |
|
"rewards/margins": 1.0373555421829224, |
|
"rewards/rejected": -0.1652289777994156, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.5192761605035405, |
|
"grad_norm": 25.041427612304688, |
|
"learning_rate": 2.2385545426167115e-05, |
|
"logits/chosen": -0.24139158427715302, |
|
"logits/rejected": -0.6950744390487671, |
|
"logps/chosen": -218.1499481201172, |
|
"logps/rejected": -176.1727752685547, |
|
"loss": 0.742, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 1.286029577255249, |
|
"rewards/margins": 0.7787684202194214, |
|
"rewards/rejected": 0.5072611570358276, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.5232100708103855, |
|
"grad_norm": 24.626811981201172, |
|
"learning_rate": 2.2112435942755942e-05, |
|
"logits/chosen": -0.3278786540031433, |
|
"logits/rejected": -0.5997655987739563, |
|
"logps/chosen": -197.0006561279297, |
|
"logps/rejected": -180.835205078125, |
|
"loss": 0.9312, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 1.228043794631958, |
|
"rewards/margins": 0.33655455708503723, |
|
"rewards/rejected": 0.8914892077445984, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.5271439811172305, |
|
"grad_norm": 22.894763946533203, |
|
"learning_rate": 2.1838927504258357e-05, |
|
"logits/chosen": -0.251998633146286, |
|
"logits/rejected": -0.6962267160415649, |
|
"logps/chosen": -204.58847045898438, |
|
"logps/rejected": -177.03872680664062, |
|
"loss": 0.6696, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 1.4768571853637695, |
|
"rewards/margins": 0.7038243412971497, |
|
"rewards/rejected": 0.7730330228805542, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.5310778914240756, |
|
"grad_norm": 18.778362274169922, |
|
"learning_rate": 2.156507176553429e-05, |
|
"logits/chosen": -0.1405915766954422, |
|
"logits/rejected": -0.6027745008468628, |
|
"logps/chosen": -204.18991088867188, |
|
"logps/rejected": -187.57095336914062, |
|
"loss": 0.5288, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 2.3066718578338623, |
|
"rewards/margins": 1.1950688362121582, |
|
"rewards/rejected": 1.111603021621704, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.5350118017309206, |
|
"grad_norm": 17.80322265625, |
|
"learning_rate": 2.129092044703485e-05, |
|
"logits/chosen": -0.15540140867233276, |
|
"logits/rejected": -0.4367486834526062, |
|
"logps/chosen": -197.77114868164062, |
|
"logps/rejected": -183.8221893310547, |
|
"loss": 0.8221, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 2.124110460281372, |
|
"rewards/margins": 0.5898113250732422, |
|
"rewards/rejected": 1.5342991352081299, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.5389457120377655, |
|
"grad_norm": 20.969318389892578, |
|
"learning_rate": 2.1016525325034405e-05, |
|
"logits/chosen": -0.06777362525463104, |
|
"logits/rejected": -0.6237557530403137, |
|
"logps/chosen": -211.85391235351562, |
|
"logps/rejected": -179.53981018066406, |
|
"loss": 0.7552, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 2.0291833877563477, |
|
"rewards/margins": 0.694739818572998, |
|
"rewards/rejected": 1.33444344997406, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.5428796223446105, |
|
"grad_norm": 16.9830265045166, |
|
"learning_rate": 2.0741938221852104e-05, |
|
"logits/chosen": -0.3013477623462677, |
|
"logits/rejected": -0.6262307167053223, |
|
"logps/chosen": -188.5670166015625, |
|
"logps/rejected": -171.5750274658203, |
|
"loss": 0.7812, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 1.7201658487319946, |
|
"rewards/margins": 0.6786609888076782, |
|
"rewards/rejected": 1.0415048599243164, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.5468135326514555, |
|
"grad_norm": 20.50164222717285, |
|
"learning_rate": 2.046721099606471e-05, |
|
"logits/chosen": -0.2895171046257019, |
|
"logits/rejected": -0.7959840893745422, |
|
"logps/chosen": -202.86526489257812, |
|
"logps/rejected": -168.116943359375, |
|
"loss": 0.6659, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 1.7198221683502197, |
|
"rewards/margins": 0.9006645083427429, |
|
"rewards/rejected": 0.8191574215888977, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.5507474429583006, |
|
"grad_norm": 24.72359848022461, |
|
"learning_rate": 2.0192395532712556e-05, |
|
"logits/chosen": -0.43050312995910645, |
|
"logits/rejected": -0.6164566278457642, |
|
"logps/chosen": -197.2266082763672, |
|
"logps/rejected": -185.25006103515625, |
|
"loss": 0.5887, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 1.4764689207077026, |
|
"rewards/margins": 0.8574800491333008, |
|
"rewards/rejected": 0.6189889311790466, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5507474429583006, |
|
"eval_logits/chosen": 1.277055025100708, |
|
"eval_logits/rejected": 1.059131145477295, |
|
"eval_logps/chosen": -205.26318359375, |
|
"eval_logps/rejected": -179.12974548339844, |
|
"eval_loss": 0.7933992743492126, |
|
"eval_rewards/accuracies": 0.6015625, |
|
"eval_rewards/chosen": 1.0828492641448975, |
|
"eval_rewards/margins": 0.5816105008125305, |
|
"eval_rewards/rejected": 0.5012389421463013, |
|
"eval_runtime": 250.5558, |
|
"eval_samples_per_second": 2.554, |
|
"eval_steps_per_second": 0.16, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5546813532651456, |
|
"grad_norm": 22.896121978759766, |
|
"learning_rate": 1.9917543733500532e-05, |
|
"logits/chosen": -0.005652183201164007, |
|
"logits/rejected": -0.21010038256645203, |
|
"logps/chosen": -193.43350219726562, |
|
"logps/rejected": -171.82823181152344, |
|
"loss": 0.8332, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.7293393015861511, |
|
"rewards/margins": 0.2607612609863281, |
|
"rewards/rejected": 0.4685780107975006, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.5586152635719905, |
|
"grad_norm": 19.870878219604492, |
|
"learning_rate": 1.9642707506995958e-05, |
|
"logits/chosen": -0.15198977291584015, |
|
"logits/rejected": -0.5298476815223694, |
|
"logps/chosen": -211.7928466796875, |
|
"logps/rejected": -179.28244018554688, |
|
"loss": 0.5933, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 1.4447071552276611, |
|
"rewards/margins": 1.0455639362335205, |
|
"rewards/rejected": 0.39914312958717346, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.5625491738788355, |
|
"grad_norm": 23.537195205688477, |
|
"learning_rate": 1.9367938758825056e-05, |
|
"logits/chosen": -0.1395382583141327, |
|
"logits/rejected": -0.6614929437637329, |
|
"logps/chosen": -209.0814208984375, |
|
"logps/rejected": -173.17478942871094, |
|
"loss": 0.6686, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 1.0658318996429443, |
|
"rewards/margins": 0.7761461734771729, |
|
"rewards/rejected": 0.2896856367588043, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.5664830841856806, |
|
"grad_norm": 22.774639129638672, |
|
"learning_rate": 1.90932893818701e-05, |
|
"logits/chosen": -0.09201023727655411, |
|
"logits/rejected": -0.3696451187133789, |
|
"logps/chosen": -216.9640655517578, |
|
"logps/rejected": -189.9328155517578, |
|
"loss": 0.9453, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 1.0307306051254272, |
|
"rewards/margins": 0.025239372625947, |
|
"rewards/rejected": 1.0054912567138672, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.5704169944925256, |
|
"grad_norm": 20.04373550415039, |
|
"learning_rate": 1.8818811246468875e-05, |
|
"logits/chosen": -0.21691718697547913, |
|
"logits/rejected": -0.6853747367858887, |
|
"logps/chosen": -197.96017456054688, |
|
"logps/rejected": -165.71035766601562, |
|
"loss": 0.7018, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 1.1925420761108398, |
|
"rewards/margins": 0.6861337423324585, |
|
"rewards/rejected": 0.5064083337783813, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.5743509047993706, |
|
"grad_norm": 21.558950424194336, |
|
"learning_rate": 1.8544556190618464e-05, |
|
"logits/chosen": 0.01636449061334133, |
|
"logits/rejected": -0.25090569257736206, |
|
"logps/chosen": -206.47286987304688, |
|
"logps/rejected": -188.0323486328125, |
|
"loss": 0.7587, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 1.4961506128311157, |
|
"rewards/margins": 0.7738775014877319, |
|
"rewards/rejected": 0.722273051738739, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.5782848151062155, |
|
"grad_norm": 25.04125213623047, |
|
"learning_rate": 1.8270576010185092e-05, |
|
"logits/chosen": -0.3876519501209259, |
|
"logits/rejected": -0.7680070996284485, |
|
"logps/chosen": -203.02491760253906, |
|
"logps/rejected": -169.53672790527344, |
|
"loss": 0.7619, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 1.2711286544799805, |
|
"rewards/margins": 0.6064616441726685, |
|
"rewards/rejected": 0.6646669507026672, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.5822187254130606, |
|
"grad_norm": 19.16695213317871, |
|
"learning_rate": 1.799692244912195e-05, |
|
"logits/chosen": -0.1374569684267044, |
|
"logits/rejected": -0.48932093381881714, |
|
"logps/chosen": -223.61813354492188, |
|
"logps/rejected": -199.4268341064453, |
|
"loss": 0.6542, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 1.8322197198867798, |
|
"rewards/margins": 1.0043301582336426, |
|
"rewards/rejected": 0.8278897404670715, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.5861526357199056, |
|
"grad_norm": 22.487895965576172, |
|
"learning_rate": 1.7723647189696843e-05, |
|
"logits/chosen": -0.11542626470327377, |
|
"logits/rejected": -0.3874351382255554, |
|
"logps/chosen": -205.8312225341797, |
|
"logps/rejected": -180.7718505859375, |
|
"loss": 0.8241, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 1.5367833375930786, |
|
"rewards/margins": 0.5126625895500183, |
|
"rewards/rejected": 1.0241206884384155, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.5900865460267506, |
|
"grad_norm": 16.18448257446289, |
|
"learning_rate": 1.7450801842731445e-05, |
|
"logits/chosen": -0.023925015702843666, |
|
"logits/rejected": -0.4763374328613281, |
|
"logps/chosen": -214.9409942626953, |
|
"logps/rejected": -185.83316040039062, |
|
"loss": 0.623, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 1.6252682209014893, |
|
"rewards/margins": 1.056118369102478, |
|
"rewards/rejected": 0.5691498517990112, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.5940204563335956, |
|
"grad_norm": 28.91587257385254, |
|
"learning_rate": 1.7178437937854065e-05, |
|
"logits/chosen": -0.1288028210401535, |
|
"logits/rejected": -0.6050774455070496, |
|
"logps/chosen": -207.6809539794922, |
|
"logps/rejected": -183.9476776123047, |
|
"loss": 0.7653, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 1.3927785158157349, |
|
"rewards/margins": 0.8300971984863281, |
|
"rewards/rejected": 0.5626811981201172, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.5979543666404405, |
|
"grad_norm": 18.849876403808594, |
|
"learning_rate": 1.6906606913767778e-05, |
|
"logits/chosen": -0.2586264908313751, |
|
"logits/rejected": -0.6829530596733093, |
|
"logps/chosen": -200.44479370117188, |
|
"logps/rejected": -167.65695190429688, |
|
"loss": 0.7795, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 1.2213537693023682, |
|
"rewards/margins": 0.4475148618221283, |
|
"rewards/rejected": 0.7738388180732727, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.6018882769472856, |
|
"grad_norm": 25.64194679260254, |
|
"learning_rate": 1.663536010853567e-05, |
|
"logits/chosen": -0.28778618574142456, |
|
"logits/rejected": -0.8489567637443542, |
|
"logps/chosen": -210.17996215820312, |
|
"logps/rejected": -162.97006225585938, |
|
"loss": 0.7059, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 1.7119789123535156, |
|
"rewards/margins": 1.09046471118927, |
|
"rewards/rejected": 0.6215142011642456, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.6058221872541306, |
|
"grad_norm": 14.11388874053955, |
|
"learning_rate": 1.6364748749885137e-05, |
|
"logits/chosen": -0.12569937109947205, |
|
"logits/rejected": -0.47604307532310486, |
|
"logps/chosen": -211.08639526367188, |
|
"logps/rejected": -186.719970703125, |
|
"loss": 0.7375, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 1.7021243572235107, |
|
"rewards/margins": 0.5951262712478638, |
|
"rewards/rejected": 1.106998085975647, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.6097560975609756, |
|
"grad_norm": 20.6273193359375, |
|
"learning_rate": 1.6094823945533e-05, |
|
"logits/chosen": 0.008131015114486217, |
|
"logits/rejected": -0.5134211778640747, |
|
"logps/chosen": -211.6152801513672, |
|
"logps/rejected": -173.49942016601562, |
|
"loss": 0.8714, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 1.374456763267517, |
|
"rewards/margins": 0.7731674909591675, |
|
"rewards/rejected": 0.6012891530990601, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.6136900078678206, |
|
"grad_norm": 17.514083862304688, |
|
"learning_rate": 1.58256366735333e-05, |
|
"logits/chosen": -0.0192732997238636, |
|
"logits/rejected": -0.37163615226745605, |
|
"logps/chosen": -192.7857208251953, |
|
"logps/rejected": -184.69699096679688, |
|
"loss": 0.5552, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 1.496361255645752, |
|
"rewards/margins": 1.1403007507324219, |
|
"rewards/rejected": 0.35606056451797485, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.6176239181746657, |
|
"grad_norm": 12.284652709960938, |
|
"learning_rate": 1.555723777264957e-05, |
|
"logits/chosen": -0.4136200547218323, |
|
"logits/rejected": -0.7878348231315613, |
|
"logps/chosen": -188.60914611816406, |
|
"logps/rejected": -162.43040466308594, |
|
"loss": 0.7347, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 1.2863472700119019, |
|
"rewards/margins": 0.7062928676605225, |
|
"rewards/rejected": 0.580054521560669, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.6215578284815106, |
|
"grad_norm": 26.990276336669922, |
|
"learning_rate": 1.52896779327534e-05, |
|
"logits/chosen": -0.062058307230472565, |
|
"logits/rejected": -0.4678223133087158, |
|
"logps/chosen": -197.87442016601562, |
|
"logps/rejected": -181.07705688476562, |
|
"loss": 0.7406, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 1.5467268228530884, |
|
"rewards/margins": 0.6265745162963867, |
|
"rewards/rejected": 0.9201523661613464, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.6254917387883556, |
|
"grad_norm": 26.78017807006836, |
|
"learning_rate": 1.5023007685251153e-05, |
|
"logits/chosen": -0.37522611021995544, |
|
"logits/rejected": -0.6946735382080078, |
|
"logps/chosen": -197.1485137939453, |
|
"logps/rejected": -177.7647705078125, |
|
"loss": 0.8528, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 1.4908908605575562, |
|
"rewards/margins": 0.8086551427841187, |
|
"rewards/rejected": 0.682235598564148, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.6294256490952006, |
|
"grad_norm": 22.932809829711914, |
|
"learning_rate": 1.4757277393540602e-05, |
|
"logits/chosen": -0.07537481188774109, |
|
"logits/rejected": -0.4230824112892151, |
|
"logps/chosen": -195.38076782226562, |
|
"logps/rejected": -173.30007934570312, |
|
"loss": 0.6356, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 1.180471420288086, |
|
"rewards/margins": 0.8355466723442078, |
|
"rewards/rejected": 0.3449247181415558, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6294256490952006, |
|
"eval_logits/chosen": 1.2686213254928589, |
|
"eval_logits/rejected": 1.0565236806869507, |
|
"eval_logps/chosen": -205.04721069335938, |
|
"eval_logps/rejected": -179.17825317382812, |
|
"eval_loss": 0.8022397756576538, |
|
"eval_rewards/accuracies": 0.628125011920929, |
|
"eval_rewards/chosen": 1.1692273616790771, |
|
"eval_rewards/margins": 0.6873818039894104, |
|
"eval_rewards/rejected": 0.4818454682826996, |
|
"eval_runtime": 251.1114, |
|
"eval_samples_per_second": 2.549, |
|
"eval_steps_per_second": 0.159, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6333595594020456, |
|
"grad_norm": 27.06431007385254, |
|
"learning_rate": 1.4492537243499253e-05, |
|
"logits/chosen": -0.08504633605480194, |
|
"logits/rejected": -0.37000229954719543, |
|
"logps/chosen": -221.24990844726562, |
|
"logps/rejected": -189.64706420898438, |
|
"loss": 0.8898, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 1.172984004020691, |
|
"rewards/margins": 0.574467658996582, |
|
"rewards/rejected": 0.5985165238380432, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.6372934697088907, |
|
"grad_norm": 20.225252151489258, |
|
"learning_rate": 1.4228837234006273e-05, |
|
"logits/chosen": -0.09346888214349747, |
|
"logits/rejected": -0.43980512022972107, |
|
"logps/chosen": -197.91098022460938, |
|
"logps/rejected": -192.83395385742188, |
|
"loss": 0.6881, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.971549391746521, |
|
"rewards/margins": 0.6740128397941589, |
|
"rewards/rejected": 0.29753655195236206, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.6412273800157356, |
|
"grad_norm": 28.533573150634766, |
|
"learning_rate": 1.3966227167499668e-05, |
|
"logits/chosen": 0.0283651240170002, |
|
"logits/rejected": -0.32705655694007874, |
|
"logps/chosen": -229.0713348388672, |
|
"logps/rejected": -199.23040771484375, |
|
"loss": 0.8796, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.845023512840271, |
|
"rewards/margins": 0.3233472406864166, |
|
"rewards/rejected": 0.5216764211654663, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.6451612903225806, |
|
"grad_norm": 18.14284896850586, |
|
"learning_rate": 1.3704756640570575e-05, |
|
"logits/chosen": -0.26658809185028076, |
|
"logits/rejected": -0.6564346551895142, |
|
"logps/chosen": -194.92213439941406, |
|
"logps/rejected": -176.8538818359375, |
|
"loss": 0.5813, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 1.0112560987472534, |
|
"rewards/margins": 0.9789012670516968, |
|
"rewards/rejected": 0.03235483542084694, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.6490952006294256, |
|
"grad_norm": 25.14547348022461, |
|
"learning_rate": 1.3444475034596464e-05, |
|
"logits/chosen": 0.08274314552545547, |
|
"logits/rejected": -0.47508057951927185, |
|
"logps/chosen": -189.21493530273438, |
|
"logps/rejected": -163.44387817382812, |
|
"loss": 0.7053, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 1.0577847957611084, |
|
"rewards/margins": 0.9744871258735657, |
|
"rewards/rejected": 0.08329786360263824, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.6530291109362707, |
|
"grad_norm": 23.569334030151367, |
|
"learning_rate": 1.3185431506414945e-05, |
|
"logits/chosen": -0.2618711590766907, |
|
"logits/rejected": -0.6126792430877686, |
|
"logps/chosen": -197.77426147460938, |
|
"logps/rejected": -172.6019744873047, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 1.0107938051223755, |
|
"rewards/margins": 0.7313990592956543, |
|
"rewards/rejected": 0.2793947160243988, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.6569630212431157, |
|
"grad_norm": 14.946830749511719, |
|
"learning_rate": 1.292767497904001e-05, |
|
"logits/chosen": 0.09540750831365585, |
|
"logits/rejected": -0.42838025093078613, |
|
"logps/chosen": -214.9687957763672, |
|
"logps/rejected": -183.12184143066406, |
|
"loss": 0.6963, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.7165493965148926, |
|
"rewards/margins": 0.6403694152832031, |
|
"rewards/rejected": 0.07618004828691483, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.6608969315499607, |
|
"grad_norm": 17.816455841064453, |
|
"learning_rate": 1.2671254132422393e-05, |
|
"logits/chosen": -0.34346064925193787, |
|
"logits/rejected": -0.628952145576477, |
|
"logps/chosen": -192.6859588623047, |
|
"logps/rejected": -179.92324829101562, |
|
"loss": 0.5884, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 1.1312153339385986, |
|
"rewards/margins": 0.8917140960693359, |
|
"rewards/rejected": 0.2395012080669403, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.6648308418568056, |
|
"grad_norm": 25.672109603881836, |
|
"learning_rate": 1.2416217394255906e-05, |
|
"logits/chosen": -0.2732202410697937, |
|
"logits/rejected": -0.5638844966888428, |
|
"logps/chosen": -207.419921875, |
|
"logps/rejected": -190.7101287841797, |
|
"loss": 0.8709, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.9203730821609497, |
|
"rewards/margins": 0.5037940740585327, |
|
"rewards/rejected": 0.4165789484977722, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.6687647521636507, |
|
"grad_norm": 24.7791805267334, |
|
"learning_rate": 1.2162612930831357e-05, |
|
"logits/chosen": -0.021216195076704025, |
|
"logits/rejected": -0.4218730032444, |
|
"logps/chosen": -211.8462677001953, |
|
"logps/rejected": -183.61422729492188, |
|
"loss": 0.6667, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.6997087001800537, |
|
"rewards/margins": 0.6386712193489075, |
|
"rewards/rejected": 0.06103747338056564, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.6726986624704957, |
|
"grad_norm": 17.186410903930664, |
|
"learning_rate": 1.1910488637939826e-05, |
|
"logits/chosen": -0.19025997817516327, |
|
"logits/rejected": -0.36145836114883423, |
|
"logps/chosen": -191.56838989257812, |
|
"logps/rejected": -188.1061553955078, |
|
"loss": 0.7408, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.7108181715011597, |
|
"rewards/margins": 0.6494898200035095, |
|
"rewards/rejected": 0.061328280717134476, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.6766325727773407, |
|
"grad_norm": 18.120513916015625, |
|
"learning_rate": 1.16598921318271e-05, |
|
"logits/chosen": -0.21306462585926056, |
|
"logits/rejected": -0.5272704362869263, |
|
"logps/chosen": -198.18896484375, |
|
"logps/rejected": -180.7800750732422, |
|
"loss": 0.6794, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.7278915643692017, |
|
"rewards/margins": 0.8657029271125793, |
|
"rewards/rejected": -0.13781137764453888, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.6805664830841857, |
|
"grad_norm": 20.887371063232422, |
|
"learning_rate": 1.1410870740200841e-05, |
|
"logits/chosen": -0.3551502823829651, |
|
"logits/rejected": -0.7362738847732544, |
|
"logps/chosen": -193.05130004882812, |
|
"logps/rejected": -174.00880432128906, |
|
"loss": 0.7762, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.8120868802070618, |
|
"rewards/margins": 0.8929181098937988, |
|
"rewards/rejected": -0.08083119988441467, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.6845003933910306, |
|
"grad_norm": 24.89902687072754, |
|
"learning_rate": 1.1163471493292268e-05, |
|
"logits/chosen": -0.009045323356986046, |
|
"logits/rejected": -0.34948888421058655, |
|
"logps/chosen": -220.1766815185547, |
|
"logps/rejected": -188.1958770751953, |
|
"loss": 0.9159, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 1.148590326309204, |
|
"rewards/margins": 0.5903881192207336, |
|
"rewards/rejected": 0.5582022666931152, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.6884343036978757, |
|
"grad_norm": 20.535686492919922, |
|
"learning_rate": 1.0917741114974007e-05, |
|
"logits/chosen": -0.3664267361164093, |
|
"logits/rejected": -0.8371219635009766, |
|
"logps/chosen": -200.46461486816406, |
|
"logps/rejected": -163.69137573242188, |
|
"loss": 0.7615, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.84300297498703, |
|
"rewards/margins": 0.8857911825180054, |
|
"rewards/rejected": -0.04278818517923355, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.6923682140047207, |
|
"grad_norm": 27.465072631835938, |
|
"learning_rate": 1.0673726013935828e-05, |
|
"logits/chosen": -0.25900501012802124, |
|
"logits/rejected": -0.7321861982345581, |
|
"logps/chosen": -211.7742919921875, |
|
"logps/rejected": -171.26019287109375, |
|
"loss": 0.8606, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 1.3226665258407593, |
|
"rewards/margins": 0.6031711101531982, |
|
"rewards/rejected": 0.7194954752922058, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.6963021243115657, |
|
"grad_norm": 24.614782333374023, |
|
"learning_rate": 1.0431472274919864e-05, |
|
"logits/chosen": 0.13692393898963928, |
|
"logits/rejected": -0.47194236516952515, |
|
"logps/chosen": -215.8333282470703, |
|
"logps/rejected": -175.57333374023438, |
|
"loss": 0.7391, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 1.1747673749923706, |
|
"rewards/margins": 0.6339668035507202, |
|
"rewards/rejected": 0.5408006906509399, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.7002360346184107, |
|
"grad_norm": 24.77642250061035, |
|
"learning_rate": 1.019102565001707e-05, |
|
"logits/chosen": -0.4733211100101471, |
|
"logits/rejected": -0.7766138315200806, |
|
"logps/chosen": -199.7375946044922, |
|
"logps/rejected": -169.17922973632812, |
|
"loss": 0.7491, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.6666286587715149, |
|
"rewards/margins": 0.6510328054428101, |
|
"rewards/rejected": 0.015595817938446999, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.7041699449252558, |
|
"grad_norm": 14.021129608154297, |
|
"learning_rate": 9.95243155002646e-06, |
|
"logits/chosen": -0.18163976073265076, |
|
"logits/rejected": -0.785339891910553, |
|
"logps/chosen": -198.67337036132812, |
|
"logps/rejected": -167.85330200195312, |
|
"loss": 0.5539, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 1.2840198278427124, |
|
"rewards/margins": 1.1790852546691895, |
|
"rewards/rejected": 0.10493452847003937, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.7081038552321007, |
|
"grad_norm": 27.48203468322754, |
|
"learning_rate": 9.7157350358788e-06, |
|
"logits/chosen": -0.5614138245582581, |
|
"logits/rejected": -0.7469819188117981, |
|
"logps/chosen": -204.990478515625, |
|
"logps/rejected": -186.52420043945312, |
|
"loss": 0.8492, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.8738373517990112, |
|
"rewards/margins": 0.5437658429145813, |
|
"rewards/rejected": 0.3300715386867523, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.7081038552321007, |
|
"eval_logits/chosen": 1.3100758790969849, |
|
"eval_logits/rejected": 1.0955623388290405, |
|
"eval_logps/chosen": -205.67874145507812, |
|
"eval_logps/rejected": -179.78384399414062, |
|
"eval_loss": 0.75962895154953, |
|
"eval_rewards/accuracies": 0.628125011920929, |
|
"eval_rewards/chosen": 0.9166278839111328, |
|
"eval_rewards/margins": 0.6770327091217041, |
|
"eval_rewards/rejected": 0.23959510028362274, |
|
"eval_runtime": 250.4549, |
|
"eval_samples_per_second": 2.555, |
|
"eval_steps_per_second": 0.16, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.7120377655389457, |
|
"grad_norm": 16.553695678710938, |
|
"learning_rate": 9.480980810126412e-06, |
|
"logits/chosen": -0.16247500479221344, |
|
"logits/rejected": -0.5344902276992798, |
|
"logps/chosen": -183.8999786376953, |
|
"logps/rejected": -169.76052856445312, |
|
"loss": 0.7105, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.8913670778274536, |
|
"rewards/margins": 0.8846918344497681, |
|
"rewards/rejected": 0.006675288081169128, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.7159716758457907, |
|
"grad_norm": 28.125730514526367, |
|
"learning_rate": 9.248213208500629e-06, |
|
"logits/chosen": -0.03472292423248291, |
|
"logits/rejected": -0.5100794434547424, |
|
"logps/chosen": -223.66665649414062, |
|
"logps/rejected": -193.7150421142578, |
|
"loss": 0.6846, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 1.2616242170333862, |
|
"rewards/margins": 0.8945581316947937, |
|
"rewards/rejected": 0.36706608533859253, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.7199055861526357, |
|
"grad_norm": 20.356327056884766, |
|
"learning_rate": 9.017476191538556e-06, |
|
"logits/chosen": 0.017731910571455956, |
|
"logits/rejected": -0.5705962181091309, |
|
"logps/chosen": -204.4373779296875, |
|
"logps/rejected": -166.6776885986328, |
|
"loss": 0.6665, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.9296242594718933, |
|
"rewards/margins": 0.7391864061355591, |
|
"rewards/rejected": 0.19043782353401184, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.7238394964594808, |
|
"grad_norm": 13.494754791259766, |
|
"learning_rate": 8.78881333628063e-06, |
|
"logits/chosen": -0.2843412160873413, |
|
"logits/rejected": -0.5937708616256714, |
|
"logps/chosen": -184.79656982421875, |
|
"logps/rejected": -171.83572387695312, |
|
"loss": 0.6447, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.7906737327575684, |
|
"rewards/margins": 0.704569935798645, |
|
"rewards/rejected": 0.08610378205776215, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.7277734067663257, |
|
"grad_norm": 17.869462966918945, |
|
"learning_rate": 8.562267828040714e-06, |
|
"logits/chosen": -0.5717681050300598, |
|
"logits/rejected": -0.875778079032898, |
|
"logps/chosen": -192.8079071044922, |
|
"logps/rejected": -175.69139099121094, |
|
"loss": 0.7083, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.3932053744792938, |
|
"rewards/margins": 0.5822644233703613, |
|
"rewards/rejected": -0.1890590488910675, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.7317073170731707, |
|
"grad_norm": 18.533781051635742, |
|
"learning_rate": 8.337882452250058e-06, |
|
"logits/chosen": -0.34088677167892456, |
|
"logits/rejected": -0.6980454325675964, |
|
"logps/chosen": -210.4141082763672, |
|
"logps/rejected": -196.22714233398438, |
|
"loss": 0.8137, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.802699863910675, |
|
"rewards/margins": 0.6218786835670471, |
|
"rewards/rejected": 0.1808212548494339, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.7356412273800157, |
|
"grad_norm": 20.780742645263672, |
|
"learning_rate": 8.115699586376865e-06, |
|
"logits/chosen": -0.5672314167022705, |
|
"logits/rejected": -0.855423629283905, |
|
"logps/chosen": -175.87796020507812, |
|
"logps/rejected": -159.25332641601562, |
|
"loss": 0.8287, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.42161092162132263, |
|
"rewards/margins": 0.5529305934906006, |
|
"rewards/rejected": -0.13131961226463318, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.7395751376868608, |
|
"grad_norm": 17.732147216796875, |
|
"learning_rate": 7.895761191922861e-06, |
|
"logits/chosen": -0.09196851402521133, |
|
"logits/rejected": -0.5184351801872253, |
|
"logps/chosen": -218.05563354492188, |
|
"logps/rejected": -186.40463256835938, |
|
"loss": 0.5937, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.9370241165161133, |
|
"rewards/margins": 0.9796267747879028, |
|
"rewards/rejected": -0.04260266199707985, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.7435090479937058, |
|
"grad_norm": 22.87380599975586, |
|
"learning_rate": 7.678108806498442e-06, |
|
"logits/chosen": -0.350538432598114, |
|
"logits/rejected": -0.6340444684028625, |
|
"logps/chosen": -202.5361785888672, |
|
"logps/rejected": -178.80300903320312, |
|
"loss": 0.7974, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 1.3468557596206665, |
|
"rewards/margins": 0.8867467641830444, |
|
"rewards/rejected": 0.46010905504226685, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.7474429583005507, |
|
"grad_norm": 23.245569229125977, |
|
"learning_rate": 7.462783535977842e-06, |
|
"logits/chosen": -0.19628608226776123, |
|
"logits/rejected": -0.6630419492721558, |
|
"logps/chosen": -206.7510223388672, |
|
"logps/rejected": -168.88804626464844, |
|
"loss": 0.8134, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.6633275747299194, |
|
"rewards/margins": 0.356858491897583, |
|
"rewards/rejected": 0.3064691126346588, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.7513768686073957, |
|
"grad_norm": 25.233848571777344, |
|
"learning_rate": 7.249826046735928e-06, |
|
"logits/chosen": -0.2122970074415207, |
|
"logits/rejected": -0.6993425488471985, |
|
"logps/chosen": -222.96298217773438, |
|
"logps/rejected": -183.36917114257812, |
|
"loss": 0.8162, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 1.5078728199005127, |
|
"rewards/margins": 0.9431095123291016, |
|
"rewards/rejected": 0.5647634267807007, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.7553107789142408, |
|
"grad_norm": 23.390539169311523, |
|
"learning_rate": 7.039276557967895e-06, |
|
"logits/chosen": -0.17560990154743195, |
|
"logits/rejected": -0.5167850852012634, |
|
"logps/chosen": -210.7447967529297, |
|
"logps/rejected": -185.9241180419922, |
|
"loss": 0.7632, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.9249579310417175, |
|
"rewards/margins": 0.6043773889541626, |
|
"rewards/rejected": 0.32058054208755493, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.7592446892210858, |
|
"grad_norm": 21.24551773071289, |
|
"learning_rate": 6.831174834093477e-06, |
|
"logits/chosen": -0.20156054198741913, |
|
"logits/rejected": -0.6375221610069275, |
|
"logps/chosen": -203.7855987548828, |
|
"logps/rejected": -169.75369262695312, |
|
"loss": 0.6731, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.9981558918952942, |
|
"rewards/margins": 0.8284910321235657, |
|
"rewards/rejected": 0.16966481506824493, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.7631785995279308, |
|
"grad_norm": 21.75351333618164, |
|
"learning_rate": 6.625560177247023e-06, |
|
"logits/chosen": -0.15350133180618286, |
|
"logits/rejected": -0.4431697726249695, |
|
"logps/chosen": -185.0416717529297, |
|
"logps/rejected": -163.0596160888672, |
|
"loss": 0.8273, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 1.2868894338607788, |
|
"rewards/margins": 0.6607948541641235, |
|
"rewards/rejected": 0.6260946989059448, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.7671125098347757, |
|
"grad_norm": 21.936416625976562, |
|
"learning_rate": 6.422471419854899e-06, |
|
"logits/chosen": -0.4399512708187103, |
|
"logits/rejected": -0.6968399882316589, |
|
"logps/chosen": -190.70333862304688, |
|
"logps/rejected": -173.41281127929688, |
|
"loss": 0.6566, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 1.1954008340835571, |
|
"rewards/margins": 0.9580972790718079, |
|
"rewards/rejected": 0.23730352520942688, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.7710464201416207, |
|
"grad_norm": 21.626394271850586, |
|
"learning_rate": 6.221946917301563e-06, |
|
"logits/chosen": -0.012787247076630592, |
|
"logits/rejected": -0.2723299562931061, |
|
"logps/chosen": -212.55581665039062, |
|
"logps/rejected": -191.74188232421875, |
|
"loss": 0.7072, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 1.4022397994995117, |
|
"rewards/margins": 0.6362533569335938, |
|
"rewards/rejected": 0.765986442565918, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.7749803304484658, |
|
"grad_norm": 14.10751724243164, |
|
"learning_rate": 6.024024540685802e-06, |
|
"logits/chosen": -0.38578885793685913, |
|
"logits/rejected": -0.8282561302185059, |
|
"logps/chosen": -190.1944122314453, |
|
"logps/rejected": -164.04934692382812, |
|
"loss": 0.5289, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 1.3787574768066406, |
|
"rewards/margins": 0.9574896097183228, |
|
"rewards/rejected": 0.4212678372859955, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.7789142407553108, |
|
"grad_norm": 16.705045700073242, |
|
"learning_rate": 5.828741669668339e-06, |
|
"logits/chosen": -0.5691582560539246, |
|
"logits/rejected": -0.9283183813095093, |
|
"logps/chosen": -188.06460571289062, |
|
"logps/rejected": -168.5616455078125, |
|
"loss": 0.6278, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 1.050636887550354, |
|
"rewards/margins": 0.8141539692878723, |
|
"rewards/rejected": 0.23648293316364288, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.7828481510621558, |
|
"grad_norm": 18.042753219604492, |
|
"learning_rate": 5.636135185412342e-06, |
|
"logits/chosen": 0.26870280504226685, |
|
"logits/rejected": -0.28138986229896545, |
|
"logps/chosen": -204.53746032714844, |
|
"logps/rejected": -172.84890747070312, |
|
"loss": 0.636, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 1.0944331884384155, |
|
"rewards/margins": 0.9463173151016235, |
|
"rewards/rejected": 0.14811599254608154, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.7867820613690008, |
|
"grad_norm": 18.450891494750977, |
|
"learning_rate": 5.446241463618027e-06, |
|
"logits/chosen": -0.13302814960479736, |
|
"logits/rejected": -0.6271988153457642, |
|
"logps/chosen": -222.73452758789062, |
|
"logps/rejected": -177.82113647460938, |
|
"loss": 0.6449, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 1.3603211641311646, |
|
"rewards/margins": 0.9086629748344421, |
|
"rewards/rejected": 0.4516581594944, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7867820613690008, |
|
"eval_logits/chosen": 1.2821887731552124, |
|
"eval_logits/rejected": 1.063094973564148, |
|
"eval_logps/chosen": -204.9945526123047, |
|
"eval_logps/rejected": -179.13351440429688, |
|
"eval_loss": 0.7391278147697449, |
|
"eval_rewards/accuracies": 0.637499988079071, |
|
"eval_rewards/chosen": 1.190301775932312, |
|
"eval_rewards/margins": 0.6905705332756042, |
|
"eval_rewards/rejected": 0.49973124265670776, |
|
"eval_runtime": 249.286, |
|
"eval_samples_per_second": 2.567, |
|
"eval_steps_per_second": 0.16, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7907159716758458, |
|
"grad_norm": 22.258323669433594, |
|
"learning_rate": 5.2590963676527255e-06, |
|
"logits/chosen": -0.10886897891759872, |
|
"logits/rejected": -0.31225308775901794, |
|
"logps/chosen": -210.9266357421875, |
|
"logps/rejected": -194.21224975585938, |
|
"loss": 0.6442, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 1.1041626930236816, |
|
"rewards/margins": 0.7658023834228516, |
|
"rewards/rejected": 0.33836036920547485, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.7946498819826908, |
|
"grad_norm": 20.957698822021484, |
|
"learning_rate": 5.074735241777733e-06, |
|
"logits/chosen": -0.2896588146686554, |
|
"logits/rejected": -0.6819514036178589, |
|
"logps/chosen": -207.27120971679688, |
|
"logps/rejected": -179.46530151367188, |
|
"loss": 0.7299, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.9105139970779419, |
|
"rewards/margins": 0.4902356266975403, |
|
"rewards/rejected": 0.42027825117111206, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.7985837922895358, |
|
"grad_norm": 17.720888137817383, |
|
"learning_rate": 4.893192904473183e-06, |
|
"logits/chosen": -0.3812113106250763, |
|
"logits/rejected": -0.9288043975830078, |
|
"logps/chosen": -194.83285522460938, |
|
"logps/rejected": -157.9988250732422, |
|
"loss": 0.8882, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.6736253499984741, |
|
"rewards/margins": 0.4428628087043762, |
|
"rewards/rejected": 0.2307625561952591, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.8025177025963808, |
|
"grad_norm": 17.797956466674805, |
|
"learning_rate": 4.714503641862225e-06, |
|
"logits/chosen": -0.3434005677700043, |
|
"logits/rejected": -0.9068504571914673, |
|
"logps/chosen": -207.9601287841797, |
|
"logps/rejected": -173.23727416992188, |
|
"loss": 0.5452, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 1.209609031677246, |
|
"rewards/margins": 1.1478995084762573, |
|
"rewards/rejected": 0.06170947104692459, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.8064516129032258, |
|
"grad_norm": 16.82356834411621, |
|
"learning_rate": 4.538701201235713e-06, |
|
"logits/chosen": -0.22616150975227356, |
|
"logits/rejected": -0.6889921426773071, |
|
"logps/chosen": -192.7413787841797, |
|
"logps/rejected": -163.8821258544922, |
|
"loss": 0.6258, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 1.1558220386505127, |
|
"rewards/margins": 0.9748827219009399, |
|
"rewards/rejected": 0.18093928694725037, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.8103855232100708, |
|
"grad_norm": 20.977785110473633, |
|
"learning_rate": 4.365818784678737e-06, |
|
"logits/chosen": -0.03155326843261719, |
|
"logits/rejected": -0.41564369201660156, |
|
"logps/chosen": -208.5790252685547, |
|
"logps/rejected": -175.08924865722656, |
|
"loss": 0.7402, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 1.0165379047393799, |
|
"rewards/margins": 0.8640995025634766, |
|
"rewards/rejected": 0.15243832767009735, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.8143194335169158, |
|
"grad_norm": 32.46459197998047, |
|
"learning_rate": 4.195889042800021e-06, |
|
"logits/chosen": -0.17535440623760223, |
|
"logits/rejected": -0.4597929120063782, |
|
"logps/chosen": -199.391845703125, |
|
"logps/rejected": -179.8048553466797, |
|
"loss": 0.7109, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 1.0733716487884521, |
|
"rewards/margins": 0.6777491569519043, |
|
"rewards/rejected": 0.3956224024295807, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.8182533438237608, |
|
"grad_norm": 16.850854873657227, |
|
"learning_rate": 4.028944068565552e-06, |
|
"logits/chosen": -0.5630426406860352, |
|
"logits/rejected": -0.8235554695129395, |
|
"logps/chosen": -179.50892639160156, |
|
"logps/rejected": -167.5303955078125, |
|
"loss": 0.6135, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 1.1456284523010254, |
|
"rewards/margins": 0.971184253692627, |
|
"rewards/rejected": 0.17444416880607605, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.8221872541306058, |
|
"grad_norm": 27.865501403808594, |
|
"learning_rate": 3.865015391237481e-06, |
|
"logits/chosen": -0.4081362783908844, |
|
"logits/rejected": -0.7251068353652954, |
|
"logps/chosen": -206.79672241210938, |
|
"logps/rejected": -183.33836364746094, |
|
"loss": 1.0297, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.3163181245326996, |
|
"rewards/margins": 0.19530019164085388, |
|
"rewards/rejected": 0.1210179552435875, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.8261211644374509, |
|
"grad_norm": 20.659204483032227, |
|
"learning_rate": 3.7041339704195147e-06, |
|
"logits/chosen": -0.1373288780450821, |
|
"logits/rejected": -0.5759511590003967, |
|
"logps/chosen": -203.7024383544922, |
|
"logps/rejected": -173.4119110107422, |
|
"loss": 0.8511, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.6036697626113892, |
|
"rewards/margins": 0.6436252593994141, |
|
"rewards/rejected": -0.0399555079638958, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.8300550747442959, |
|
"grad_norm": 24.464290618896484, |
|
"learning_rate": 3.54633019020985e-06, |
|
"logits/chosen": -0.43429121375083923, |
|
"logits/rejected": -0.5019878149032593, |
|
"logps/chosen": -198.01043701171875, |
|
"logps/rejected": -187.71475219726562, |
|
"loss": 0.9095, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.7602452039718628, |
|
"rewards/margins": 0.32361331582069397, |
|
"rewards/rejected": 0.436631977558136, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.8339889850511408, |
|
"grad_norm": 23.853845596313477, |
|
"learning_rate": 3.3916338534628613e-06, |
|
"logits/chosen": -0.11409912258386612, |
|
"logits/rejected": -0.40145158767700195, |
|
"logps/chosen": -210.5789794921875, |
|
"logps/rejected": -191.952392578125, |
|
"loss": 0.8087, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.8562873005867004, |
|
"rewards/margins": 0.44316864013671875, |
|
"rewards/rejected": 0.41311874985694885, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.8379228953579858, |
|
"grad_norm": 30.748018264770508, |
|
"learning_rate": 3.2400741761605102e-06, |
|
"logits/chosen": -0.21002812683582306, |
|
"logits/rejected": -0.5244247913360596, |
|
"logps/chosen": -195.76846313476562, |
|
"logps/rejected": -186.1009063720703, |
|
"loss": 0.7539, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.2099284678697586, |
|
"rewards/margins": 0.5657997131347656, |
|
"rewards/rejected": -0.35587114095687866, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.8418568056648308, |
|
"grad_norm": 20.117977142333984, |
|
"learning_rate": 3.0916797818946055e-06, |
|
"logits/chosen": -0.3361959457397461, |
|
"logits/rejected": -0.5092878937721252, |
|
"logps/chosen": -195.49034118652344, |
|
"logps/rejected": -186.6125030517578, |
|
"loss": 0.6731, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.40774106979370117, |
|
"rewards/margins": 0.6332494616508484, |
|
"rewards/rejected": -0.22550833225250244, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.8457907159716759, |
|
"grad_norm": 20.158287048339844, |
|
"learning_rate": 2.946478696460957e-06, |
|
"logits/chosen": -0.54124516248703, |
|
"logits/rejected": -0.8028567433357239, |
|
"logps/chosen": -192.8404998779297, |
|
"logps/rejected": -176.3671112060547, |
|
"loss": 0.6739, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.6862714886665344, |
|
"rewards/margins": 0.8415164947509766, |
|
"rewards/rejected": -0.15524490177631378, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.8497246262785209, |
|
"grad_norm": 17.690227508544922, |
|
"learning_rate": 2.8044983425664064e-06, |
|
"logits/chosen": -0.28623318672180176, |
|
"logits/rejected": -0.46538248658180237, |
|
"logps/chosen": -198.58580017089844, |
|
"logps/rejected": -183.89404296875, |
|
"loss": 0.7617, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.34613844752311707, |
|
"rewards/margins": 0.7363790273666382, |
|
"rewards/rejected": -0.3902406394481659, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.8536585365853658, |
|
"grad_norm": 26.19361114501953, |
|
"learning_rate": 2.6657655346497802e-06, |
|
"logits/chosen": -0.38322368264198303, |
|
"logits/rejected": -0.6140966415405273, |
|
"logps/chosen": -187.73886108398438, |
|
"logps/rejected": -176.44613647460938, |
|
"loss": 0.7017, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.20231589674949646, |
|
"rewards/margins": 0.5345470905303955, |
|
"rewards/rejected": -0.3322312533855438, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.8575924468922108, |
|
"grad_norm": 25.393978118896484, |
|
"learning_rate": 2.530306473817696e-06, |
|
"logits/chosen": -0.25616469979286194, |
|
"logits/rejected": -0.7887105941772461, |
|
"logps/chosen": -188.26930236816406, |
|
"logps/rejected": -164.24330139160156, |
|
"loss": 0.7071, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.39394354820251465, |
|
"rewards/margins": 0.7766583561897278, |
|
"rewards/rejected": -0.3827148377895355, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.8615263571990559, |
|
"grad_norm": 24.819536209106445, |
|
"learning_rate": 2.398146742896237e-06, |
|
"logits/chosen": -0.2826923131942749, |
|
"logits/rejected": -0.8120182156562805, |
|
"logps/chosen": -175.20675659179688, |
|
"logps/rejected": -142.05787658691406, |
|
"loss": 0.6828, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.2799919545650482, |
|
"rewards/margins": 0.5129075646400452, |
|
"rewards/rejected": -0.23291556537151337, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.8654602675059009, |
|
"grad_norm": 15.595181465148926, |
|
"learning_rate": 2.269311301599344e-06, |
|
"logits/chosen": -0.25191831588745117, |
|
"logits/rejected": -0.5568451881408691, |
|
"logps/chosen": -191.5902862548828, |
|
"logps/rejected": -171.51724243164062, |
|
"loss": 0.7207, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.6834554672241211, |
|
"rewards/margins": 0.6807907223701477, |
|
"rewards/rejected": 0.002664747880771756, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8654602675059009, |
|
"eval_logits/chosen": 1.2452117204666138, |
|
"eval_logits/rejected": 1.0254409313201904, |
|
"eval_logps/chosen": -206.24465942382812, |
|
"eval_logps/rejected": -180.29928588867188, |
|
"eval_loss": 0.737010657787323, |
|
"eval_rewards/accuracies": 0.6390625238418579, |
|
"eval_rewards/chosen": 0.6902583837509155, |
|
"eval_rewards/margins": 0.6568381786346436, |
|
"eval_rewards/rejected": 0.03342027962207794, |
|
"eval_runtime": 250.2397, |
|
"eval_samples_per_second": 2.558, |
|
"eval_steps_per_second": 0.16, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8693941778127459, |
|
"grad_norm": 26.02237319946289, |
|
"learning_rate": 2.143824481814947e-06, |
|
"logits/chosen": -0.5257942080497742, |
|
"logits/rejected": -0.6871519088745117, |
|
"logps/chosen": -183.809814453125, |
|
"logps/rejected": -175.24085998535156, |
|
"loss": 0.9311, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.2924796938896179, |
|
"rewards/margins": 0.16722363233566284, |
|
"rewards/rejected": 0.1252560317516327, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.8733280881195908, |
|
"grad_norm": 15.61811637878418, |
|
"learning_rate": 2.02170998300963e-06, |
|
"logits/chosen": -0.49455881118774414, |
|
"logits/rejected": -0.8319517970085144, |
|
"logps/chosen": -195.03097534179688, |
|
"logps/rejected": -172.79736328125, |
|
"loss": 0.7795, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.6870075464248657, |
|
"rewards/margins": 0.6901518702507019, |
|
"rewards/rejected": -0.0031443715561181307, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.8772619984264359, |
|
"grad_norm": 19.150976181030273, |
|
"learning_rate": 1.9029908677527409e-06, |
|
"logits/chosen": -0.27715402841567993, |
|
"logits/rejected": -0.5119122266769409, |
|
"logps/chosen": -190.5804901123047, |
|
"logps/rejected": -170.06210327148438, |
|
"loss": 0.7519, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.7375507950782776, |
|
"rewards/margins": 0.4395205080509186, |
|
"rewards/rejected": 0.2980303168296814, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.8811959087332809, |
|
"grad_norm": 20.794376373291016, |
|
"learning_rate": 1.78768955736079e-06, |
|
"logits/chosen": -0.47320452332496643, |
|
"logits/rejected": -0.5798605680465698, |
|
"logps/chosen": -186.94198608398438, |
|
"logps/rejected": -193.33441162109375, |
|
"loss": 0.837, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.7762082815170288, |
|
"rewards/margins": 0.520244836807251, |
|
"rewards/rejected": 0.25596338510513306, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.8851298190401259, |
|
"grad_norm": 23.86858558654785, |
|
"learning_rate": 1.6758278276629659e-06, |
|
"logits/chosen": -0.23235611617565155, |
|
"logits/rejected": -0.6398773789405823, |
|
"logps/chosen": -212.32974243164062, |
|
"logps/rejected": -180.15377807617188, |
|
"loss": 0.7666, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.5019949078559875, |
|
"rewards/margins": 0.5942084193229675, |
|
"rewards/rejected": -0.09221355617046356, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.8890637293469709, |
|
"grad_norm": 23.230112075805664, |
|
"learning_rate": 1.5674268048885277e-06, |
|
"logits/chosen": -0.23109452426433563, |
|
"logits/rejected": -0.593435525894165, |
|
"logps/chosen": -202.44102478027344, |
|
"logps/rejected": -179.34506225585938, |
|
"loss": 0.8569, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.6775950193405151, |
|
"rewards/margins": 0.16297699511051178, |
|
"rewards/rejected": 0.5146180391311646, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.8929976396538158, |
|
"grad_norm": 22.992965698242188, |
|
"learning_rate": 1.4625069616769215e-06, |
|
"logits/chosen": -0.3653388023376465, |
|
"logits/rejected": -0.7005013227462769, |
|
"logps/chosen": -199.02430725097656, |
|
"logps/rejected": -180.14822387695312, |
|
"loss": 0.7866, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.8748451471328735, |
|
"rewards/margins": 0.41802239418029785, |
|
"rewards/rejected": 0.45682281255722046, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.8969315499606609, |
|
"grad_norm": 13.673257827758789, |
|
"learning_rate": 1.3610881132113107e-06, |
|
"logits/chosen": -0.1476505994796753, |
|
"logits/rejected": -0.5143739581108093, |
|
"logps/chosen": -194.36900329589844, |
|
"logps/rejected": -174.2411651611328, |
|
"loss": 0.5425, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.6795452237129211, |
|
"rewards/margins": 1.169248104095459, |
|
"rewards/rejected": -0.4897027909755707, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.9008654602675059, |
|
"grad_norm": 28.896047592163086, |
|
"learning_rate": 1.2631894134762579e-06, |
|
"logits/chosen": -0.26152271032333374, |
|
"logits/rejected": -0.6222779750823975, |
|
"logps/chosen": -215.73617553710938, |
|
"logps/rejected": -192.5132293701172, |
|
"loss": 0.7018, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.757038950920105, |
|
"rewards/margins": 0.659284234046936, |
|
"rewards/rejected": 0.09775470197200775, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.9047993705743509, |
|
"grad_norm": 27.06733512878418, |
|
"learning_rate": 1.168829351640326e-06, |
|
"logits/chosen": 0.007974958047270775, |
|
"logits/rejected": -0.3661263585090637, |
|
"logps/chosen": -207.5963592529297, |
|
"logps/rejected": -186.1758575439453, |
|
"loss": 0.7401, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.5918732285499573, |
|
"rewards/margins": 0.6995636820793152, |
|
"rewards/rejected": -0.1076904684305191, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.9087332808811959, |
|
"grad_norm": 32.632076263427734, |
|
"learning_rate": 1.078025748564191e-06, |
|
"logits/chosen": -0.11743499338626862, |
|
"logits/rejected": -0.08067210018634796, |
|
"logps/chosen": -205.4510498046875, |
|
"logps/rejected": -210.25064086914062, |
|
"loss": 0.9985, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.5534142851829529, |
|
"rewards/margins": -0.008044779300689697, |
|
"rewards/rejected": 0.5614590644836426, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.912667191188041, |
|
"grad_norm": 28.71379280090332, |
|
"learning_rate": 9.907957534349921e-07, |
|
"logits/chosen": -0.004957390017807484, |
|
"logits/rejected": -0.4194992482662201, |
|
"logps/chosen": -205.16690063476562, |
|
"logps/rejected": -183.73019409179688, |
|
"loss": 0.7593, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.7620848417282104, |
|
"rewards/margins": 0.7305535078048706, |
|
"rewards/rejected": 0.03153129667043686, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.9166011014948859, |
|
"grad_norm": 16.41864776611328, |
|
"learning_rate": 9.071558405275427e-07, |
|
"logits/chosen": -0.1401735246181488, |
|
"logits/rejected": -0.5280479192733765, |
|
"logps/chosen": -191.03903198242188, |
|
"logps/rejected": -167.55050659179688, |
|
"loss": 0.6841, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.8857030868530273, |
|
"rewards/margins": 0.8941364288330078, |
|
"rewards/rejected": -0.008433381095528603, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.9205350118017309, |
|
"grad_norm": 23.13312339782715, |
|
"learning_rate": 8.271218060929919e-07, |
|
"logits/chosen": 0.09978056699037552, |
|
"logits/rejected": -0.262615829706192, |
|
"logps/chosen": -214.7543487548828, |
|
"logps/rejected": -196.4529266357422, |
|
"loss": 0.6888, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.8993024826049805, |
|
"rewards/margins": 0.7381424903869629, |
|
"rewards/rejected": 0.1611599177122116, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.9244689221085759, |
|
"grad_norm": 18.770814895629883, |
|
"learning_rate": 7.507087653755318e-07, |
|
"logits/chosen": -0.07238979637622833, |
|
"logits/rejected": -0.5419026613235474, |
|
"logps/chosen": -209.29220581054688, |
|
"logps/rejected": -173.19552612304688, |
|
"loss": 0.7126, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.8157307505607605, |
|
"rewards/margins": 0.6763442158699036, |
|
"rewards/rejected": 0.13938648998737335, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.9284028324154209, |
|
"grad_norm": 25.199766159057617, |
|
"learning_rate": 6.779311497577401e-07, |
|
"logits/chosen": -0.2254069745540619, |
|
"logits/rejected": -0.4307781159877777, |
|
"logps/chosen": -196.3717803955078, |
|
"logps/rejected": -181.4769287109375, |
|
"loss": 0.7135, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.47276610136032104, |
|
"rewards/margins": 0.584132969379425, |
|
"rewards/rejected": -0.11136679351329803, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.932336742722266, |
|
"grad_norm": 18.558359146118164, |
|
"learning_rate": 6.088027040350674e-07, |
|
"logits/chosen": -0.4991328716278076, |
|
"logits/rejected": -0.9202349781990051, |
|
"logps/chosen": -180.4629669189453, |
|
"logps/rejected": -145.46389770507812, |
|
"loss": 0.7549, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.6303231120109558, |
|
"rewards/margins": 0.5688610672950745, |
|
"rewards/rejected": 0.06146197393536568, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.9362706530291109, |
|
"grad_norm": 29.797143936157227, |
|
"learning_rate": 5.433364838199828e-07, |
|
"logits/chosen": -0.06067700311541557, |
|
"logits/rejected": -0.5211640000343323, |
|
"logps/chosen": -220.21395874023438, |
|
"logps/rejected": -198.33328247070312, |
|
"loss": 0.856, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.7763031125068665, |
|
"rewards/margins": 0.5708464980125427, |
|
"rewards/rejected": 0.20545658469200134, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.9402045633359559, |
|
"grad_norm": 30.696592330932617, |
|
"learning_rate": 4.815448530762923e-07, |
|
"logits/chosen": -0.014176195487380028, |
|
"logits/rejected": -0.40428978204727173, |
|
"logps/chosen": -191.35374450683594, |
|
"logps/rejected": -164.81369018554688, |
|
"loss": 0.8069, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.6798957586288452, |
|
"rewards/margins": 0.4587259888648987, |
|
"rewards/rejected": 0.22116975486278534, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.9441384736428009, |
|
"grad_norm": 19.7831974029541, |
|
"learning_rate": 4.2343948178408036e-07, |
|
"logits/chosen": -0.5212418437004089, |
|
"logits/rejected": -0.7373995780944824, |
|
"logps/chosen": -217.49252319335938, |
|
"logps/rejected": -194.1028289794922, |
|
"loss": 0.7382, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.7597753405570984, |
|
"rewards/margins": 0.6988024711608887, |
|
"rewards/rejected": 0.0609729178249836, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.9441384736428009, |
|
"eval_logits/chosen": 1.2533395290374756, |
|
"eval_logits/rejected": 1.0344760417938232, |
|
"eval_logps/chosen": -206.20431518554688, |
|
"eval_logps/rejected": -180.233154296875, |
|
"eval_loss": 0.7342613339424133, |
|
"eval_rewards/accuracies": 0.6421874761581421, |
|
"eval_rewards/chosen": 0.7063881158828735, |
|
"eval_rewards/margins": 0.6465141773223877, |
|
"eval_rewards/rejected": 0.05987401679158211, |
|
"eval_runtime": 251.029, |
|
"eval_samples_per_second": 2.55, |
|
"eval_steps_per_second": 0.159, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.948072383949646, |
|
"grad_norm": 15.179329872131348, |
|
"learning_rate": 3.6903134373571515e-07, |
|
"logits/chosen": 0.04393022134900093, |
|
"logits/rejected": -0.3573564887046814, |
|
"logps/chosen": -224.6765899658203, |
|
"logps/rejected": -196.52413940429688, |
|
"loss": 0.9152, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.5112829804420471, |
|
"rewards/margins": 0.32752710580825806, |
|
"rewards/rejected": 0.18375587463378906, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.952006294256491, |
|
"grad_norm": 26.592243194580078, |
|
"learning_rate": 3.1833071446333297e-07, |
|
"logits/chosen": -0.09226761013269424, |
|
"logits/rejected": -0.5401033163070679, |
|
"logps/chosen": -207.3560028076172, |
|
"logps/rejected": -179.0234832763672, |
|
"loss": 0.7916, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.7182900905609131, |
|
"rewards/margins": 0.3775801360607147, |
|
"rewards/rejected": 0.34070995450019836, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.955940204563336, |
|
"grad_norm": 17.654870986938477, |
|
"learning_rate": 2.713471692982017e-07, |
|
"logits/chosen": -0.10396593809127808, |
|
"logits/rejected": -0.5638905167579651, |
|
"logps/chosen": -221.9078369140625, |
|
"logps/rejected": -190.99435424804688, |
|
"loss": 0.6688, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 1.0176299810409546, |
|
"rewards/margins": 0.687278151512146, |
|
"rewards/rejected": 0.33035191893577576, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.9598741148701809, |
|
"grad_norm": 28.622743606567383, |
|
"learning_rate": 2.2808958156231853e-07, |
|
"logits/chosen": -0.3307565152645111, |
|
"logits/rejected": -0.6672178506851196, |
|
"logps/chosen": -205.37741088867188, |
|
"logps/rejected": -174.81289672851562, |
|
"loss": 0.8025, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.7133423089981079, |
|
"rewards/margins": 0.618229866027832, |
|
"rewards/rejected": 0.09511242806911469, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.963808025177026, |
|
"grad_norm": 20.119291305541992, |
|
"learning_rate": 1.8856612089259486e-07, |
|
"logits/chosen": -0.11120174080133438, |
|
"logits/rejected": -0.47498565912246704, |
|
"logps/chosen": -222.4508056640625, |
|
"logps/rejected": -193.77328491210938, |
|
"loss": 0.7055, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.6713041663169861, |
|
"rewards/margins": 0.6344063878059387, |
|
"rewards/rejected": 0.036897819489240646, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.967741935483871, |
|
"grad_norm": 23.203277587890625, |
|
"learning_rate": 1.5278425169794163e-07, |
|
"logits/chosen": -0.31409674882888794, |
|
"logits/rejected": -0.7144479155540466, |
|
"logps/chosen": -209.11270141601562, |
|
"logps/rejected": -189.7498779296875, |
|
"loss": 0.7145, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.8724400401115417, |
|
"rewards/margins": 0.7660844922065735, |
|
"rewards/rejected": 0.10635566711425781, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.971675845790716, |
|
"grad_norm": 17.31842803955078, |
|
"learning_rate": 1.2075073174952378e-07, |
|
"logits/chosen": -0.29665330052375793, |
|
"logits/rejected": -0.7147069573402405, |
|
"logps/chosen": -180.279541015625, |
|
"logps/rejected": -157.10206604003906, |
|
"loss": 0.7291, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.6150587797164917, |
|
"rewards/margins": 0.5622903108596802, |
|
"rewards/rejected": 0.052768461406230927, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.975609756097561, |
|
"grad_norm": 22.84340476989746, |
|
"learning_rate": 9.247161090451207e-08, |
|
"logits/chosen": -0.1943736970424652, |
|
"logits/rejected": -0.5411825180053711, |
|
"logps/chosen": -210.853759765625, |
|
"logps/rejected": -190.47271728515625, |
|
"loss": 0.7743, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.7618738412857056, |
|
"rewards/margins": 0.6717807054519653, |
|
"rewards/rejected": 0.09009318053722382, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.9795436664044059, |
|
"grad_norm": 18.27547836303711, |
|
"learning_rate": 6.795222996347495e-08, |
|
"logits/chosen": -0.16004569828510284, |
|
"logits/rejected": -0.5410211682319641, |
|
"logps/chosen": -222.00424194335938, |
|
"logps/rejected": -190.4541015625, |
|
"loss": 0.6775, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.933633029460907, |
|
"rewards/margins": 0.7526041269302368, |
|
"rewards/rejected": 0.18102897703647614, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.983477576711251, |
|
"grad_norm": 33.558162689208984, |
|
"learning_rate": 4.7197219661743176e-08, |
|
"logits/chosen": -0.308822900056839, |
|
"logits/rejected": -0.6432265043258667, |
|
"logps/chosen": -195.4747314453125, |
|
"logps/rejected": -166.3302459716797, |
|
"loss": 0.7417, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.7237275838851929, |
|
"rewards/margins": 0.6491376161575317, |
|
"rewards/rejected": 0.07458990067243576, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.987411487018096, |
|
"grad_norm": 16.65594482421875, |
|
"learning_rate": 3.021049979482715e-08, |
|
"logits/chosen": -0.6164910793304443, |
|
"logits/rejected": -0.9252687692642212, |
|
"logps/chosen": -188.57847595214844, |
|
"logps/rejected": -167.00997924804688, |
|
"loss": 0.823, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.2205405980348587, |
|
"rewards/margins": 0.4816465377807617, |
|
"rewards/rejected": -0.2611059546470642, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.991345397324941, |
|
"grad_norm": 21.41892433166504, |
|
"learning_rate": 1.6995278478133536e-08, |
|
"logits/chosen": -0.32881253957748413, |
|
"logits/rejected": -0.6159898042678833, |
|
"logps/chosen": -203.57620239257812, |
|
"logps/rejected": -189.83755493164062, |
|
"loss": 0.6967, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.2210671603679657, |
|
"rewards/margins": 0.6746386289596558, |
|
"rewards/rejected": -0.45357149839401245, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.995279307631786, |
|
"grad_norm": 25.134788513183594, |
|
"learning_rate": 7.554051541074359e-09, |
|
"logits/chosen": -0.724000096321106, |
|
"logits/rejected": -0.9451119303703308, |
|
"logps/chosen": -185.7470703125, |
|
"logps/rejected": -176.31640625, |
|
"loss": 0.7505, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.5909692049026489, |
|
"rewards/margins": 0.6639338731765747, |
|
"rewards/rejected": -0.07296473532915115, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.999213217938631, |
|
"grad_norm": 20.82672119140625, |
|
"learning_rate": 1.888602055710731e-09, |
|
"logits/chosen": -0.15106138586997986, |
|
"logits/rejected": -0.6675028800964355, |
|
"logps/chosen": -208.9075927734375, |
|
"logps/rejected": -172.70423889160156, |
|
"loss": 0.7455, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.7576273679733276, |
|
"rewards/margins": 0.660781741142273, |
|
"rewards/rejected": 0.09684564918279648, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1271, |
|
"total_flos": 0.0, |
|
"train_loss": 0.7529747673697963, |
|
"train_runtime": 21716.0887, |
|
"train_samples_per_second": 0.936, |
|
"train_steps_per_second": 0.059 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 1271, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|