|
{ |
|
"best_metric": 0.6631070971488953, |
|
"best_model_checkpoint": "./output/checkpoints/2024-05-27_09-03-33/checkpoint-1100", |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 1271, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.003933910306845004, |
|
"grad_norm": 27.324785232543945, |
|
"learning_rate": 6.25e-07, |
|
"logits/chosen": -0.23312029242515564, |
|
"logits/rejected": -0.7136957049369812, |
|
"logps/chosen": -206.98876953125, |
|
"logps/rejected": -177.72207641601562, |
|
"loss": 0.6946, |
|
"rewards/accuracies": 0.22499999403953552, |
|
"rewards/chosen": -0.0011991311330348253, |
|
"rewards/margins": -0.0031457520090043545, |
|
"rewards/rejected": 0.001946620992384851, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.007867820613690008, |
|
"grad_norm": 26.920639038085938, |
|
"learning_rate": 1.40625e-06, |
|
"logits/chosen": -0.3985660672187805, |
|
"logits/rejected": -0.7379584908485413, |
|
"logps/chosen": -201.005859375, |
|
"logps/rejected": -177.08181762695312, |
|
"loss": 0.688, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.010929527692496777, |
|
"rewards/margins": 0.013672275468707085, |
|
"rewards/rejected": -0.002742747776210308, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.011801730920535013, |
|
"grad_norm": 34.40425109863281, |
|
"learning_rate": 2.1875000000000002e-06, |
|
"logits/chosen": -0.35717901587486267, |
|
"logits/rejected": -0.660548746585846, |
|
"logps/chosen": -217.42825317382812, |
|
"logps/rejected": -194.10195922851562, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.034006841480731964, |
|
"rewards/margins": 0.0028066448867321014, |
|
"rewards/rejected": 0.03120020031929016, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.015735641227380016, |
|
"grad_norm": 27.097261428833008, |
|
"learning_rate": 2.96875e-06, |
|
"logits/chosen": -0.3896491825580597, |
|
"logits/rejected": -0.7307055592536926, |
|
"logps/chosen": -209.29373168945312, |
|
"logps/rejected": -179.78488159179688, |
|
"loss": 0.6839, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.08366340398788452, |
|
"rewards/margins": 0.025963936001062393, |
|
"rewards/rejected": 0.05769947171211243, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01966955153422502, |
|
"grad_norm": 29.19064712524414, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"logits/chosen": -0.24666282534599304, |
|
"logits/rejected": -0.7009283900260925, |
|
"logps/chosen": -196.3118438720703, |
|
"logps/rejected": -178.7552032470703, |
|
"loss": 0.683, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.13745614886283875, |
|
"rewards/margins": 0.03245489299297333, |
|
"rewards/rejected": 0.1050012856721878, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.023603461841070025, |
|
"grad_norm": 31.083709716796875, |
|
"learning_rate": 4.53125e-06, |
|
"logits/chosen": -0.3193593919277191, |
|
"logits/rejected": -0.6126649379730225, |
|
"logps/chosen": -208.44863891601562, |
|
"logps/rejected": -184.2353057861328, |
|
"loss": 0.6852, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.22270426154136658, |
|
"rewards/margins": 0.03411892056465149, |
|
"rewards/rejected": 0.18858537077903748, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02753737214791503, |
|
"grad_norm": 25.83799171447754, |
|
"learning_rate": 5.3125e-06, |
|
"logits/chosen": -0.46783486008644104, |
|
"logits/rejected": -0.7504000067710876, |
|
"logps/chosen": -221.98843383789062, |
|
"logps/rejected": -199.54000854492188, |
|
"loss": 0.6705, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.3932109773159027, |
|
"rewards/margins": 0.08941729366779327, |
|
"rewards/rejected": 0.30379369854927063, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.03147128245476003, |
|
"grad_norm": 24.734338760375977, |
|
"learning_rate": 6.093750000000001e-06, |
|
"logits/chosen": -0.3396364748477936, |
|
"logits/rejected": -0.7113901376724243, |
|
"logps/chosen": -196.3134765625, |
|
"logps/rejected": -179.5933380126953, |
|
"loss": 0.6879, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.4551575779914856, |
|
"rewards/margins": 0.05487058684229851, |
|
"rewards/rejected": 0.4002869725227356, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03540519276160504, |
|
"grad_norm": 28.34064292907715, |
|
"learning_rate": 6.718750000000001e-06, |
|
"logits/chosen": -0.667598307132721, |
|
"logits/rejected": -1.014026403427124, |
|
"logps/chosen": -196.5115966796875, |
|
"logps/rejected": -165.67092895507812, |
|
"loss": 0.6852, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.5074445605278015, |
|
"rewards/margins": 0.09343204647302628, |
|
"rewards/rejected": 0.41401252150535583, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.03933910306845004, |
|
"grad_norm": 30.12347984313965, |
|
"learning_rate": 7.500000000000001e-06, |
|
"logits/chosen": -0.2210284173488617, |
|
"logits/rejected": -0.32401731610298157, |
|
"logps/chosen": -210.63818359375, |
|
"logps/rejected": -205.76895141601562, |
|
"loss": 0.6641, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.5923845171928406, |
|
"rewards/margins": 0.12340062856674194, |
|
"rewards/rejected": 0.4689839482307434, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.043273013375295044, |
|
"grad_norm": 47.19338607788086, |
|
"learning_rate": 8.281250000000001e-06, |
|
"logits/chosen": -0.5629546642303467, |
|
"logits/rejected": -0.7718995213508606, |
|
"logps/chosen": -194.5259552001953, |
|
"logps/rejected": -179.5989532470703, |
|
"loss": 0.6902, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.6510985493659973, |
|
"rewards/margins": 0.10642552375793457, |
|
"rewards/rejected": 0.544672966003418, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.04720692368214005, |
|
"grad_norm": 23.202775955200195, |
|
"learning_rate": 9.0625e-06, |
|
"logits/chosen": -0.3029821217060089, |
|
"logits/rejected": -0.7788914442062378, |
|
"logps/chosen": -214.9969940185547, |
|
"logps/rejected": -167.64263916015625, |
|
"loss": 0.6472, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.6703575849533081, |
|
"rewards/margins": 0.22439488768577576, |
|
"rewards/rejected": 0.44596266746520996, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.05114083398898505, |
|
"grad_norm": 35.26408386230469, |
|
"learning_rate": 9.84375e-06, |
|
"logits/chosen": -0.4141275882720947, |
|
"logits/rejected": -0.7083785533905029, |
|
"logps/chosen": -212.9031524658203, |
|
"logps/rejected": -198.8483428955078, |
|
"loss": 0.662, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.7610660791397095, |
|
"rewards/margins": 0.2469903975725174, |
|
"rewards/rejected": 0.514075756072998, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.05507474429583006, |
|
"grad_norm": 19.10537338256836, |
|
"learning_rate": 1.0625e-05, |
|
"logits/chosen": -0.4033733308315277, |
|
"logits/rejected": -0.7651963829994202, |
|
"logps/chosen": -212.84487915039062, |
|
"logps/rejected": -174.28073120117188, |
|
"loss": 0.6534, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.7663796544075012, |
|
"rewards/margins": 0.24841317534446716, |
|
"rewards/rejected": 0.5179664492607117, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.059008654602675056, |
|
"grad_norm": 26.261890411376953, |
|
"learning_rate": 1.1406250000000001e-05, |
|
"logits/chosen": -0.10389180481433868, |
|
"logits/rejected": -0.5258628129959106, |
|
"logps/chosen": -206.84921264648438, |
|
"logps/rejected": -186.50869750976562, |
|
"loss": 0.6808, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.8156352043151855, |
|
"rewards/margins": 0.17864595353603363, |
|
"rewards/rejected": 0.6369892358779907, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.06294256490952006, |
|
"grad_norm": 32.33486557006836, |
|
"learning_rate": 1.2187500000000001e-05, |
|
"logits/chosen": -0.22502727806568146, |
|
"logits/rejected": -0.49946776032447815, |
|
"logps/chosen": -209.71426391601562, |
|
"logps/rejected": -198.34292602539062, |
|
"loss": 0.6852, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.6370053291320801, |
|
"rewards/margins": 0.15727970004081726, |
|
"rewards/rejected": 0.4797256886959076, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06687647521636507, |
|
"grad_norm": 38.13333511352539, |
|
"learning_rate": 1.2968750000000002e-05, |
|
"logits/chosen": -0.25742509961128235, |
|
"logits/rejected": -0.7358572483062744, |
|
"logps/chosen": -206.3865966796875, |
|
"logps/rejected": -178.12637329101562, |
|
"loss": 0.6652, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.3785225450992584, |
|
"rewards/margins": 0.16723336279392242, |
|
"rewards/rejected": 0.2112891674041748, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.07081038552321008, |
|
"grad_norm": 23.647096633911133, |
|
"learning_rate": 1.375e-05, |
|
"logits/chosen": -0.3365253806114197, |
|
"logits/rejected": -0.5771717429161072, |
|
"logps/chosen": -208.416748046875, |
|
"logps/rejected": -184.40476989746094, |
|
"loss": 0.7024, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.2850777506828308, |
|
"rewards/margins": 0.14986075460910797, |
|
"rewards/rejected": 0.13521698117256165, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.07474429583005507, |
|
"grad_norm": 22.20098114013672, |
|
"learning_rate": 1.453125e-05, |
|
"logits/chosen": -0.21254411339759827, |
|
"logits/rejected": -0.6303216218948364, |
|
"logps/chosen": -201.83139038085938, |
|
"logps/rejected": -183.7214813232422, |
|
"loss": 0.6843, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.3604539632797241, |
|
"rewards/margins": 0.1408630609512329, |
|
"rewards/rejected": 0.2195909023284912, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.07867820613690008, |
|
"grad_norm": 29.343482971191406, |
|
"learning_rate": 1.5312500000000003e-05, |
|
"logits/chosen": -0.41852107644081116, |
|
"logits/rejected": -0.7636915445327759, |
|
"logps/chosen": -208.08035278320312, |
|
"logps/rejected": -178.69972229003906, |
|
"loss": 0.6731, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.4267478585243225, |
|
"rewards/margins": 0.16477522253990173, |
|
"rewards/rejected": 0.2619726061820984, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07867820613690008, |
|
"eval_logits/chosen": 1.3246409893035889, |
|
"eval_logits/rejected": 1.0977884531021118, |
|
"eval_logps/chosen": -206.3737030029297, |
|
"eval_logps/rejected": -179.28366088867188, |
|
"eval_loss": 0.6665228009223938, |
|
"eval_rewards/accuracies": 0.635937511920929, |
|
"eval_rewards/chosen": 0.6386381387710571, |
|
"eval_rewards/margins": 0.19896559417247772, |
|
"eval_rewards/rejected": 0.4396725594997406, |
|
"eval_runtime": 307.3381, |
|
"eval_samples_per_second": 2.082, |
|
"eval_steps_per_second": 0.13, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08261211644374508, |
|
"grad_norm": 24.263774871826172, |
|
"learning_rate": 1.609375e-05, |
|
"logits/chosen": -0.16335585713386536, |
|
"logits/rejected": -0.4457281231880188, |
|
"logps/chosen": -201.37017822265625, |
|
"logps/rejected": -176.67379760742188, |
|
"loss": 0.6641, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.6822856068611145, |
|
"rewards/margins": 0.18253257870674133, |
|
"rewards/rejected": 0.4997529983520508, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.08654602675059009, |
|
"grad_norm": 25.775903701782227, |
|
"learning_rate": 1.6875e-05, |
|
"logits/chosen": -0.436201810836792, |
|
"logits/rejected": -0.9347764849662781, |
|
"logps/chosen": -195.61062622070312, |
|
"logps/rejected": -169.15048217773438, |
|
"loss": 0.6596, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.7915258407592773, |
|
"rewards/margins": 0.2815794348716736, |
|
"rewards/rejected": 0.5099464654922485, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.0904799370574351, |
|
"grad_norm": 30.208763122558594, |
|
"learning_rate": 1.7656250000000002e-05, |
|
"logits/chosen": -0.5659558176994324, |
|
"logits/rejected": -0.855063796043396, |
|
"logps/chosen": -198.71206665039062, |
|
"logps/rejected": -174.78524780273438, |
|
"loss": 0.7202, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.6501097679138184, |
|
"rewards/margins": 0.17246408760547638, |
|
"rewards/rejected": 0.4776456952095032, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.0944138473642801, |
|
"grad_norm": 23.550596237182617, |
|
"learning_rate": 1.84375e-05, |
|
"logits/chosen": -0.5133547186851501, |
|
"logits/rejected": -0.734718382358551, |
|
"logps/chosen": -193.6223602294922, |
|
"logps/rejected": -179.42771911621094, |
|
"loss": 0.7313, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.5589785575866699, |
|
"rewards/margins": 0.10251788794994354, |
|
"rewards/rejected": 0.4564606547355652, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0983477576711251, |
|
"grad_norm": 29.921533584594727, |
|
"learning_rate": 1.9062500000000003e-05, |
|
"logits/chosen": -0.3889247179031372, |
|
"logits/rejected": -0.6225888133049011, |
|
"logps/chosen": -187.0243377685547, |
|
"logps/rejected": -176.29808044433594, |
|
"loss": 0.6273, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.40915530920028687, |
|
"rewards/margins": 0.3502606451511383, |
|
"rewards/rejected": 0.058894671499729156, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.1022816679779701, |
|
"grad_norm": 29.90145492553711, |
|
"learning_rate": 1.984375e-05, |
|
"logits/chosen": -0.34609144926071167, |
|
"logits/rejected": -0.7598401308059692, |
|
"logps/chosen": -201.13104248046875, |
|
"logps/rejected": -173.50753784179688, |
|
"loss": 0.6626, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.1300664246082306, |
|
"rewards/margins": 0.21786466240882874, |
|
"rewards/rejected": -0.08779821544885635, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.10621557828481511, |
|
"grad_norm": 23.906503677368164, |
|
"learning_rate": 1.9999395643917957e-05, |
|
"logits/chosen": -0.41295546293258667, |
|
"logits/rejected": -0.8447906374931335, |
|
"logps/chosen": -201.5752716064453, |
|
"logps/rejected": -165.7244415283203, |
|
"loss": 0.6405, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.21347875893115997, |
|
"rewards/margins": 0.3085169196128845, |
|
"rewards/rejected": -0.09503819793462753, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.11014948859166011, |
|
"grad_norm": 25.38832664489746, |
|
"learning_rate": 1.999694057253083e-05, |
|
"logits/chosen": -0.2702675759792328, |
|
"logits/rejected": -0.6757915019989014, |
|
"logps/chosen": -198.8104705810547, |
|
"logps/rejected": -175.73355102539062, |
|
"loss": 0.6331, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.7145684361457825, |
|
"rewards/margins": 0.3979041576385498, |
|
"rewards/rejected": 0.31666427850723267, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.11408339889850512, |
|
"grad_norm": 25.388601303100586, |
|
"learning_rate": 1.9992597476892096e-05, |
|
"logits/chosen": -0.20559760928153992, |
|
"logits/rejected": -0.6221147775650024, |
|
"logps/chosen": -203.33877563476562, |
|
"logps/rejected": -177.6593780517578, |
|
"loss": 0.6278, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.8970493078231812, |
|
"rewards/margins": 0.41804951429367065, |
|
"rewards/rejected": 0.4789998531341553, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.11801730920535011, |
|
"grad_norm": 26.157350540161133, |
|
"learning_rate": 1.9986367177239688e-05, |
|
"logits/chosen": -0.34933823347091675, |
|
"logits/rejected": -0.5474187135696411, |
|
"logps/chosen": -192.22409057617188, |
|
"logps/rejected": -179.11972045898438, |
|
"loss": 0.7403, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.7421566843986511, |
|
"rewards/margins": 0.24275951087474823, |
|
"rewards/rejected": 0.4993972182273865, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.12195121951219512, |
|
"grad_norm": 27.657987594604492, |
|
"learning_rate": 1.9978250850229278e-05, |
|
"logits/chosen": -0.5602678060531616, |
|
"logits/rejected": -0.7431076765060425, |
|
"logps/chosen": -197.28172302246094, |
|
"logps/rejected": -180.0853271484375, |
|
"loss": 0.718, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.6257942914962769, |
|
"rewards/margins": 0.286087304353714, |
|
"rewards/rejected": 0.3397069573402405, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.12588512981904013, |
|
"grad_norm": 27.8662166595459, |
|
"learning_rate": 1.996825002871205e-05, |
|
"logits/chosen": -0.3598572611808777, |
|
"logits/rejected": -0.8388012647628784, |
|
"logps/chosen": -192.58541870117188, |
|
"logps/rejected": -165.87228393554688, |
|
"loss": 0.6815, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.49491995573043823, |
|
"rewards/margins": 0.3221299648284912, |
|
"rewards/rejected": 0.1727900207042694, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.12981904012588513, |
|
"grad_norm": 21.444156646728516, |
|
"learning_rate": 1.9956366601445212e-05, |
|
"logits/chosen": -0.18239173293113708, |
|
"logits/rejected": -0.6315879225730896, |
|
"logps/chosen": -214.19509887695312, |
|
"logps/rejected": -185.4246368408203, |
|
"loss": 0.6328, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.5859188437461853, |
|
"rewards/margins": 0.4131649136543274, |
|
"rewards/rejected": 0.1727539300918579, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.13375295043273014, |
|
"grad_norm": 22.295812606811523, |
|
"learning_rate": 1.994260281273529e-05, |
|
"logits/chosen": -0.27679482102394104, |
|
"logits/rejected": -0.7712021470069885, |
|
"logps/chosen": -206.1096954345703, |
|
"logps/rejected": -173.62576293945312, |
|
"loss": 0.6613, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.6996821165084839, |
|
"rewards/margins": 0.3059811294078827, |
|
"rewards/rejected": 0.3937010169029236, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.13768686073957515, |
|
"grad_norm": 33.50761413574219, |
|
"learning_rate": 1.9926961262014237e-05, |
|
"logits/chosen": -0.3116024136543274, |
|
"logits/rejected": -0.625832736492157, |
|
"logps/chosen": -219.8788604736328, |
|
"logps/rejected": -187.32510375976562, |
|
"loss": 0.746, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 1.1785697937011719, |
|
"rewards/margins": 0.21889865398406982, |
|
"rewards/rejected": 0.9596711993217468, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.14162077104642015, |
|
"grad_norm": 15.657761573791504, |
|
"learning_rate": 1.9909444903348546e-05, |
|
"logits/chosen": -0.005524394102394581, |
|
"logits/rejected": -0.3487216532230377, |
|
"logps/chosen": -228.5839385986328, |
|
"logps/rejected": -201.77001953125, |
|
"loss": 0.7435, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 1.1422548294067383, |
|
"rewards/margins": 0.15804262459278107, |
|
"rewards/rejected": 0.9842122793197632, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.14555468135326516, |
|
"grad_norm": 26.140518188476562, |
|
"learning_rate": 1.9890057044881308e-05, |
|
"logits/chosen": -0.12314258515834808, |
|
"logits/rejected": -0.5814956426620483, |
|
"logps/chosen": -201.1555633544922, |
|
"logps/rejected": -167.4046173095703, |
|
"loss": 0.6795, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 1.0470006465911865, |
|
"rewards/margins": 0.35150283575057983, |
|
"rewards/rejected": 0.6954978108406067, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.14948859166011014, |
|
"grad_norm": 19.782007217407227, |
|
"learning_rate": 1.9868801348207467e-05, |
|
"logits/chosen": -0.11235501617193222, |
|
"logits/rejected": -0.5538455247879028, |
|
"logps/chosen": -204.25839233398438, |
|
"logps/rejected": -181.46743774414062, |
|
"loss": 0.685, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 1.1285860538482666, |
|
"rewards/margins": 0.33753544092178345, |
|
"rewards/rejected": 0.7910505533218384, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.15342250196695514, |
|
"grad_norm": 19.97163200378418, |
|
"learning_rate": 1.9845681827682263e-05, |
|
"logits/chosen": -0.16671855747699738, |
|
"logits/rejected": -0.540806233882904, |
|
"logps/chosen": -194.2422332763672, |
|
"logps/rejected": -163.8104705810547, |
|
"loss": 0.6713, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.6743755340576172, |
|
"rewards/margins": 0.26031339168548584, |
|
"rewards/rejected": 0.4140622019767761, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.15735641227380015, |
|
"grad_norm": 18.71397590637207, |
|
"learning_rate": 1.982070284966309e-05, |
|
"logits/chosen": -0.1493137627840042, |
|
"logits/rejected": -0.43618321418762207, |
|
"logps/chosen": -202.78318786621094, |
|
"logps/rejected": -177.56668090820312, |
|
"loss": 0.6528, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.5738676190376282, |
|
"rewards/margins": 0.31430238485336304, |
|
"rewards/rejected": 0.25956520438194275, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15735641227380015, |
|
"eval_logits/chosen": 1.3314845561981201, |
|
"eval_logits/rejected": 1.1080169677734375, |
|
"eval_logps/chosen": -206.4569549560547, |
|
"eval_logps/rejected": -179.43057250976562, |
|
"eval_loss": 0.6942009329795837, |
|
"eval_rewards/accuracies": 0.604687511920929, |
|
"eval_rewards/chosen": 0.6053363680839539, |
|
"eval_rewards/margins": 0.2244330197572708, |
|
"eval_rewards/rejected": 0.38090336322784424, |
|
"eval_runtime": 309.8464, |
|
"eval_samples_per_second": 2.066, |
|
"eval_steps_per_second": 0.129, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.16129032258064516, |
|
"grad_norm": 15.415759086608887, |
|
"learning_rate": 1.9793869131684884e-05, |
|
"logits/chosen": -0.08272367715835571, |
|
"logits/rejected": -0.4305300712585449, |
|
"logps/chosen": -196.86305236816406, |
|
"logps/rejected": -178.54037475585938, |
|
"loss": 0.7078, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.5516935586929321, |
|
"rewards/margins": 0.21864008903503418, |
|
"rewards/rejected": 0.33305343985557556, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.16522423288749016, |
|
"grad_norm": 28.38641929626465, |
|
"learning_rate": 1.9765185741569126e-05, |
|
"logits/chosen": -0.14836929738521576, |
|
"logits/rejected": -0.4139153063297272, |
|
"logps/chosen": -215.8746795654297, |
|
"logps/rejected": -190.37954711914062, |
|
"loss": 0.7474, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.6253814697265625, |
|
"rewards/margins": 0.11703801155090332, |
|
"rewards/rejected": 0.5083434581756592, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.16915814319433517, |
|
"grad_norm": 23.663591384887695, |
|
"learning_rate": 1.9734658096466774e-05, |
|
"logits/chosen": 0.011041751131415367, |
|
"logits/rejected": -0.4074042737483978, |
|
"logps/chosen": -209.1394500732422, |
|
"logps/rejected": -178.0277099609375, |
|
"loss": 0.6711, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.3468799591064453, |
|
"rewards/margins": 0.2509341835975647, |
|
"rewards/rejected": 0.09594579041004181, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.17309205350118018, |
|
"grad_norm": 32.677852630615234, |
|
"learning_rate": 1.970229196183516e-05, |
|
"logits/chosen": -0.020372604951262474, |
|
"logits/rejected": -0.37563034892082214, |
|
"logps/chosen": -209.47402954101562, |
|
"logps/rejected": -177.0091094970703, |
|
"loss": 0.6983, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.594801127910614, |
|
"rewards/margins": 0.25161081552505493, |
|
"rewards/rejected": 0.34319034218788147, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.17702596380802518, |
|
"grad_norm": 22.306182861328125, |
|
"learning_rate": 1.9668093450349125e-05, |
|
"logits/chosen": -0.1756196916103363, |
|
"logits/rejected": -0.5201798677444458, |
|
"logps/chosen": -217.6730499267578, |
|
"logps/rejected": -185.24819946289062, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.9194382429122925, |
|
"rewards/margins": 0.3321036696434021, |
|
"rewards/rejected": 0.5873345136642456, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.1809598741148702, |
|
"grad_norm": 31.994035720825195, |
|
"learning_rate": 1.9632069020746574e-05, |
|
"logits/chosen": -0.3013627529144287, |
|
"logits/rejected": -0.7145218849182129, |
|
"logps/chosen": -206.0642547607422, |
|
"logps/rejected": -178.27896118164062, |
|
"loss": 0.6459, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.8781298398971558, |
|
"rewards/margins": 0.5241779088973999, |
|
"rewards/rejected": 0.353952020406723, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.1848937844217152, |
|
"grad_norm": 29.714988708496094, |
|
"learning_rate": 1.959422547660869e-05, |
|
"logits/chosen": -0.2492908537387848, |
|
"logits/rejected": -0.779377818107605, |
|
"logps/chosen": -198.94345092773438, |
|
"logps/rejected": -169.714599609375, |
|
"loss": 0.6366, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.5711004734039307, |
|
"rewards/margins": 0.413928359746933, |
|
"rewards/rejected": 0.15717211365699768, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.1888276947285602, |
|
"grad_norm": 24.506587982177734, |
|
"learning_rate": 1.955456996507499e-05, |
|
"logits/chosen": -0.019927600398659706, |
|
"logits/rejected": -0.43524104356765747, |
|
"logps/chosen": -197.2928009033203, |
|
"logps/rejected": -168.06382751464844, |
|
"loss": 0.6361, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.787044107913971, |
|
"rewards/margins": 0.3754611909389496, |
|
"rewards/rejected": 0.4115828573703766, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.19276160503540518, |
|
"grad_norm": 24.652503967285156, |
|
"learning_rate": 1.9513109975493553e-05, |
|
"logits/chosen": -0.30659085512161255, |
|
"logits/rejected": -0.6158447265625, |
|
"logps/chosen": -207.3615264892578, |
|
"logps/rejected": -198.04635620117188, |
|
"loss": 0.6338, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.8901578783988953, |
|
"rewards/margins": 0.45508089661598206, |
|
"rewards/rejected": 0.4350770115852356, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.1966955153422502, |
|
"grad_norm": 22.106698989868164, |
|
"learning_rate": 1.9469853338006515e-05, |
|
"logits/chosen": -0.07243610918521881, |
|
"logits/rejected": -0.2781897187232971, |
|
"logps/chosen": -203.30215454101562, |
|
"logps/rejected": -188.57080078125, |
|
"loss": 0.7046, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.7840886116027832, |
|
"rewards/margins": 0.2757692337036133, |
|
"rewards/rejected": 0.5083193778991699, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2006294256490952, |
|
"grad_norm": 17.76561164855957, |
|
"learning_rate": 1.9424808222071337e-05, |
|
"logits/chosen": -0.1372375786304474, |
|
"logits/rejected": -0.4728778898715973, |
|
"logps/chosen": -218.58462524414062, |
|
"logps/rejected": -192.29983520507812, |
|
"loss": 0.622, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.8517538905143738, |
|
"rewards/margins": 0.47115468978881836, |
|
"rewards/rejected": 0.3805992603302002, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.2045633359559402, |
|
"grad_norm": 21.741724014282227, |
|
"learning_rate": 1.9377983134917868e-05, |
|
"logits/chosen": -0.42930954694747925, |
|
"logits/rejected": -0.6508566737174988, |
|
"logps/chosen": -196.40382385253906, |
|
"logps/rejected": -180.81784057617188, |
|
"loss": 0.6814, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.42375677824020386, |
|
"rewards/margins": 0.3472265601158142, |
|
"rewards/rejected": 0.07653021067380905, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.2084972462627852, |
|
"grad_norm": 25.856201171875, |
|
"learning_rate": 1.9329386919941694e-05, |
|
"logits/chosen": -0.5100887417793274, |
|
"logits/rejected": -0.896782398223877, |
|
"logps/chosen": -200.4944610595703, |
|
"logps/rejected": -168.5055694580078, |
|
"loss": 0.631, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.5903893709182739, |
|
"rewards/margins": 0.45923447608947754, |
|
"rewards/rejected": 0.13115492463111877, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.21243115656963021, |
|
"grad_norm": 21.10732078552246, |
|
"learning_rate": 1.927902875503397e-05, |
|
"logits/chosen": -0.2257436066865921, |
|
"logits/rejected": -0.6618258953094482, |
|
"logps/chosen": -216.7244415283203, |
|
"logps/rejected": -172.3234405517578, |
|
"loss": 0.6436, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.9558561444282532, |
|
"rewards/margins": 0.5136295557022095, |
|
"rewards/rejected": 0.44222649931907654, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.21636506687647522, |
|
"grad_norm": 21.297080993652344, |
|
"learning_rate": 1.9226918150848067e-05, |
|
"logits/chosen": -0.325428307056427, |
|
"logits/rejected": -0.6309774518013, |
|
"logps/chosen": -190.9318389892578, |
|
"logps/rejected": -179.4983673095703, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 1.1083195209503174, |
|
"rewards/margins": 0.31002935767173767, |
|
"rewards/rejected": 0.7982901334762573, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.22029897718332023, |
|
"grad_norm": 21.540422439575195, |
|
"learning_rate": 1.9173064949003408e-05, |
|
"logits/chosen": -0.05009857565164566, |
|
"logits/rejected": -0.3596547245979309, |
|
"logps/chosen": -200.29823303222656, |
|
"logps/rejected": -180.3629150390625, |
|
"loss": 0.6645, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 1.4049633741378784, |
|
"rewards/margins": 0.5104038119316101, |
|
"rewards/rejected": 0.8945595026016235, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.22423288749016523, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.9128734540932494e-05, |
|
"logits/chosen": -0.3485383987426758, |
|
"logits/rejected": -0.5194178223609924, |
|
"logps/chosen": -197.75784301757812, |
|
"logps/rejected": -181.0018768310547, |
|
"loss": 0.7351, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 1.2131370306015015, |
|
"rewards/margins": 0.22653412818908691, |
|
"rewards/rejected": 0.9866029620170593, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.22816679779701024, |
|
"grad_norm": 24.915868759155273, |
|
"learning_rate": 1.9071770513468988e-05, |
|
"logits/chosen": -0.17852464318275452, |
|
"logits/rejected": -0.35372194647789, |
|
"logps/chosen": -193.89865112304688, |
|
"logps/rejected": -187.19973754882812, |
|
"loss": 0.7047, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.9371153116226196, |
|
"rewards/margins": 0.20712292194366455, |
|
"rewards/rejected": 0.7299925088882446, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.23210070810385522, |
|
"grad_norm": 19.513757705688477, |
|
"learning_rate": 1.901309318956141e-05, |
|
"logits/chosen": -0.4217872619628906, |
|
"logits/rejected": -0.7518173456192017, |
|
"logps/chosen": -194.53421020507812, |
|
"logps/rejected": -168.0951385498047, |
|
"loss": 0.7308, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.6975895166397095, |
|
"rewards/margins": 0.23306536674499512, |
|
"rewards/rejected": 0.46452417969703674, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.23603461841070023, |
|
"grad_norm": 18.220582962036133, |
|
"learning_rate": 1.8952713651021227e-05, |
|
"logits/chosen": -0.14223751425743103, |
|
"logits/rejected": -0.4979272484779358, |
|
"logps/chosen": -199.91549682617188, |
|
"logps/rejected": -177.2222900390625, |
|
"loss": 0.6827, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.819624125957489, |
|
"rewards/margins": 0.39503517746925354, |
|
"rewards/rejected": 0.42458897829055786, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.23603461841070023, |
|
"eval_logits/chosen": 1.2563122510910034, |
|
"eval_logits/rejected": 1.0339769124984741, |
|
"eval_logps/chosen": -206.1991424560547, |
|
"eval_logps/rejected": -179.33786010742188, |
|
"eval_loss": 0.7167426347732544, |
|
"eval_rewards/accuracies": 0.6171875, |
|
"eval_rewards/chosen": 0.708461582660675, |
|
"eval_rewards/margins": 0.2904762327671051, |
|
"eval_rewards/rejected": 0.41798537969589233, |
|
"eval_runtime": 284.7459, |
|
"eval_samples_per_second": 2.248, |
|
"eval_steps_per_second": 0.14, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.23996852871754523, |
|
"grad_norm": 23.576587677001953, |
|
"learning_rate": 1.8890643301140487e-05, |
|
"logits/chosen": -0.5384713411331177, |
|
"logits/rejected": -0.8448705673217773, |
|
"logps/chosen": -197.2958526611328, |
|
"logps/rejected": -165.64370727539062, |
|
"loss": 0.6409, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.6531845331192017, |
|
"rewards/margins": 0.39299410581588745, |
|
"rewards/rejected": 0.2601904273033142, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.24390243902439024, |
|
"grad_norm": 18.40612030029297, |
|
"learning_rate": 1.8826893862538233e-05, |
|
"logits/chosen": -0.3022890090942383, |
|
"logits/rejected": -0.5158249735832214, |
|
"logps/chosen": -207.9346160888672, |
|
"logps/rejected": -193.0900115966797, |
|
"loss": 0.7895, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.7925726771354675, |
|
"rewards/margins": 0.08937112987041473, |
|
"rewards/rejected": 0.7032015919685364, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.24783634933123525, |
|
"grad_norm": 18.7589168548584, |
|
"learning_rate": 1.8761477374946548e-05, |
|
"logits/chosen": -0.12031130492687225, |
|
"logits/rejected": -0.4747944474220276, |
|
"logps/chosen": -211.0299530029297, |
|
"logps/rejected": -186.3873291015625, |
|
"loss": 0.6952, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.9918599128723145, |
|
"rewards/margins": 0.28354746103286743, |
|
"rewards/rejected": 0.7083123922348022, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.25177025963808025, |
|
"grad_norm": 20.57366180419922, |
|
"learning_rate": 1.869440619293672e-05, |
|
"logits/chosen": 0.015002071857452393, |
|
"logits/rejected": -0.4523535668849945, |
|
"logps/chosen": -215.18704223632812, |
|
"logps/rejected": -179.958984375, |
|
"loss": 0.6336, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.9286333918571472, |
|
"rewards/margins": 0.4492555558681488, |
|
"rewards/rejected": 0.4793778359889984, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.25570416994492523, |
|
"grad_norm": 24.69734001159668, |
|
"learning_rate": 1.8625692983585976e-05, |
|
"logits/chosen": -0.3278903663158417, |
|
"logits/rejected": -0.9296085238456726, |
|
"logps/chosen": -212.3651580810547, |
|
"logps/rejected": -168.00753784179688, |
|
"loss": 0.6633, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.7741891741752625, |
|
"rewards/margins": 0.3930490016937256, |
|
"rewards/rejected": 0.38114017248153687, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.25963808025177026, |
|
"grad_norm": 27.854631423950195, |
|
"learning_rate": 1.855535072408516e-05, |
|
"logits/chosen": -0.4728453755378723, |
|
"logits/rejected": -0.6778625249862671, |
|
"logps/chosen": -211.9385528564453, |
|
"logps/rejected": -193.85667419433594, |
|
"loss": 0.6953, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.9169828295707703, |
|
"rewards/margins": 0.32869625091552734, |
|
"rewards/rejected": 0.5882865786552429, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.26357199055861524, |
|
"grad_norm": 18.423259735107422, |
|
"learning_rate": 1.8483392699287858e-05, |
|
"logits/chosen": -0.05396045371890068, |
|
"logits/rejected": -0.5624040365219116, |
|
"logps/chosen": -222.1643524169922, |
|
"logps/rejected": -177.35289001464844, |
|
"loss": 0.6206, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 1.1117911338806152, |
|
"rewards/margins": 0.5459399223327637, |
|
"rewards/rejected": 0.5658511519432068, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.2675059008654603, |
|
"grad_norm": 23.744850158691406, |
|
"learning_rate": 1.840983249920143e-05, |
|
"logits/chosen": -0.3244122564792633, |
|
"logits/rejected": -0.5297374725341797, |
|
"logps/chosen": -196.14691162109375, |
|
"logps/rejected": -188.9138946533203, |
|
"loss": 0.7056, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.8795296549797058, |
|
"rewards/margins": 0.3909408748149872, |
|
"rewards/rejected": 0.488588809967041, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.27143981117230526, |
|
"grad_norm": 18.513778686523438, |
|
"learning_rate": 1.8334684016420383e-05, |
|
"logits/chosen": -0.08137266337871552, |
|
"logits/rejected": -0.5458197593688965, |
|
"logps/chosen": -232.447509765625, |
|
"logps/rejected": -191.580078125, |
|
"loss": 0.6264, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 1.163153052330017, |
|
"rewards/margins": 0.4738085865974426, |
|
"rewards/rejected": 0.6893445253372192, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.2753737214791503, |
|
"grad_norm": 15.827184677124023, |
|
"learning_rate": 1.8257961443502626e-05, |
|
"logits/chosen": -0.30110448598861694, |
|
"logits/rejected": -0.6258831024169922, |
|
"logps/chosen": -190.89808654785156, |
|
"logps/rejected": -173.31884765625, |
|
"loss": 0.6519, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.946982204914093, |
|
"rewards/margins": 0.39443182945251465, |
|
"rewards/rejected": 0.5525503754615784, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.27930763178599527, |
|
"grad_norm": 19.0930118560791, |
|
"learning_rate": 1.8179679270289048e-05, |
|
"logits/chosen": -0.2574307322502136, |
|
"logits/rejected": -0.7561649680137634, |
|
"logps/chosen": -201.4808349609375, |
|
"logps/rejected": -172.31173706054688, |
|
"loss": 0.6453, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 1.1022285223007202, |
|
"rewards/margins": 0.5637288689613342, |
|
"rewards/rejected": 0.5384997129440308, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.2832415420928403, |
|
"grad_norm": 22.383216857910156, |
|
"learning_rate": 1.8099852281166974e-05, |
|
"logits/chosen": -0.2120940238237381, |
|
"logits/rejected": -0.7636501789093018, |
|
"logps/chosen": -209.04806518554688, |
|
"logps/rejected": -166.7012481689453, |
|
"loss": 0.6576, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 1.3941724300384521, |
|
"rewards/margins": 0.5536119341850281, |
|
"rewards/rejected": 0.8405605554580688, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.2871754523996853, |
|
"grad_norm": 18.3509578704834, |
|
"learning_rate": 1.8018495552277987e-05, |
|
"logits/chosen": 0.07260416448116302, |
|
"logits/rejected": -0.2597780227661133, |
|
"logps/chosen": -208.8731689453125, |
|
"logps/rejected": -187.85023498535156, |
|
"loss": 0.6275, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 1.4085180759429932, |
|
"rewards/margins": 0.5379746556282043, |
|
"rewards/rejected": 0.8705434799194336, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.2911093627065303, |
|
"grad_norm": 21.863872528076172, |
|
"learning_rate": 1.7935624448670625e-05, |
|
"logits/chosen": -0.4248635172843933, |
|
"logits/rejected": -0.4336097836494446, |
|
"logps/chosen": -179.680908203125, |
|
"logps/rejected": -173.14013671875, |
|
"loss": 0.75, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 1.0196665525436401, |
|
"rewards/margins": 0.20690293610095978, |
|
"rewards/rejected": 0.8127636909484863, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2950432730133753, |
|
"grad_norm": 26.93684196472168, |
|
"learning_rate": 1.785125462139855e-05, |
|
"logits/chosen": -0.16947659850120544, |
|
"logits/rejected": -0.451927125453949, |
|
"logps/chosen": -198.48106384277344, |
|
"logps/rejected": -174.99111938476562, |
|
"loss": 0.7696, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 1.1577861309051514, |
|
"rewards/margins": 0.21412566304206848, |
|
"rewards/rejected": 0.9436607360839844, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.2989771833202203, |
|
"grad_norm": 15.670443534851074, |
|
"learning_rate": 1.7765402004564687e-05, |
|
"logits/chosen": -0.1878432035446167, |
|
"logits/rejected": -0.5365083813667297, |
|
"logps/chosen": -204.27255249023438, |
|
"logps/rejected": -175.6739959716797, |
|
"loss": 0.6793, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 1.1427654027938843, |
|
"rewards/margins": 0.44054698944091797, |
|
"rewards/rejected": 0.7022184133529663, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.3029110936270653, |
|
"grad_norm": 20.738510131835938, |
|
"learning_rate": 1.76780828123119e-05, |
|
"logits/chosen": -0.22227105498313904, |
|
"logits/rejected": -0.4939172863960266, |
|
"logps/chosen": -204.56930541992188, |
|
"logps/rejected": -187.81863403320312, |
|
"loss": 0.6359, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 1.086004376411438, |
|
"rewards/margins": 0.5049671530723572, |
|
"rewards/rejected": 0.5810372233390808, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.3068450039339103, |
|
"grad_norm": 15.985719680786133, |
|
"learning_rate": 1.7589313535760787e-05, |
|
"logits/chosen": -0.33505499362945557, |
|
"logits/rejected": -0.5057377219200134, |
|
"logps/chosen": -203.09201049804688, |
|
"logps/rejected": -186.1582489013672, |
|
"loss": 0.728, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.9475752115249634, |
|
"rewards/margins": 0.21062707901000977, |
|
"rewards/rejected": 0.7369481325149536, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.3107789142407553, |
|
"grad_norm": 15.00536823272705, |
|
"learning_rate": 1.7499110939895162e-05, |
|
"logits/chosen": -0.2682803273200989, |
|
"logits/rejected": -0.6644273400306702, |
|
"logps/chosen": -197.18655395507812, |
|
"logps/rejected": -184.64974975585938, |
|
"loss": 0.7331, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.4725784361362457, |
|
"rewards/margins": 0.17313337326049805, |
|
"rewards/rejected": 0.29944509267807007, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.3147128245476003, |
|
"grad_norm": 18.541942596435547, |
|
"learning_rate": 1.7407492060395835e-05, |
|
"logits/chosen": -0.3485754132270813, |
|
"logits/rejected": -0.6408174633979797, |
|
"logps/chosen": -196.4596710205078, |
|
"logps/rejected": -178.34701538085938, |
|
"loss": 0.6891, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.33864206075668335, |
|
"rewards/margins": 0.22059743106365204, |
|
"rewards/rejected": 0.11804463714361191, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3147128245476003, |
|
"eval_logits/chosen": 1.2971076965332031, |
|
"eval_logits/rejected": 1.0804717540740967, |
|
"eval_logps/chosen": -207.33456420898438, |
|
"eval_logps/rejected": -180.31930541992188, |
|
"eval_loss": 0.7093836069107056, |
|
"eval_rewards/accuracies": 0.598437488079071, |
|
"eval_rewards/chosen": 0.2542892098426819, |
|
"eval_rewards/margins": 0.22887463867664337, |
|
"eval_rewards/rejected": 0.025414561852812767, |
|
"eval_runtime": 301.2073, |
|
"eval_samples_per_second": 2.125, |
|
"eval_steps_per_second": 0.133, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.31864673485444533, |
|
"grad_norm": 22.79604148864746, |
|
"learning_rate": 1.731447420042321e-05, |
|
"logits/chosen": -0.33927303552627563, |
|
"logits/rejected": -0.5682342052459717, |
|
"logps/chosen": -190.31930541992188, |
|
"logps/rejected": -173.07032775878906, |
|
"loss": 0.7979, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0728757381439209, |
|
"rewards/margins": 0.02971130609512329, |
|
"rewards/rejected": 0.0431644432246685, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.3225806451612903, |
|
"grad_norm": 22.005783081054688, |
|
"learning_rate": 1.7220074927349452e-05, |
|
"logits/chosen": -0.3349539339542389, |
|
"logits/rejected": -0.6785364151000977, |
|
"logps/chosen": -205.6999969482422, |
|
"logps/rejected": -174.34982299804688, |
|
"loss": 0.6723, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.04839733988046646, |
|
"rewards/margins": 0.2823019027709961, |
|
"rewards/rejected": -0.23390455543994904, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.32651455546813535, |
|
"grad_norm": 18.50445556640625, |
|
"learning_rate": 1.712431206944067e-05, |
|
"logits/chosen": -0.31676384806632996, |
|
"logits/rejected": -0.47476306557655334, |
|
"logps/chosen": -194.7633056640625, |
|
"logps/rejected": -185.64987182617188, |
|
"loss": 0.6637, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.2732119560241699, |
|
"rewards/margins": 0.3499985337257385, |
|
"rewards/rejected": -0.07678655534982681, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.3304484657749803, |
|
"grad_norm": 21.16750144958496, |
|
"learning_rate": 1.7027203712489902e-05, |
|
"logits/chosen": -0.22730335593223572, |
|
"logits/rejected": -0.6324140429496765, |
|
"logps/chosen": -209.23678588867188, |
|
"logps/rejected": -177.7320098876953, |
|
"loss": 0.7066, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.41964513063430786, |
|
"rewards/margins": 0.263131707906723, |
|
"rewards/rejected": 0.15651337802410126, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.33438237608182536, |
|
"grad_norm": 21.21584129333496, |
|
"learning_rate": 1.6928768196401403e-05, |
|
"logits/chosen": -0.19787462055683136, |
|
"logits/rejected": -0.5100497007369995, |
|
"logps/chosen": -213.1494140625, |
|
"logps/rejected": -194.2113800048828, |
|
"loss": 0.7113, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.434047132730484, |
|
"rewards/margins": 0.20316064357757568, |
|
"rewards/rejected": 0.2308865338563919, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.33831628638867034, |
|
"grad_norm": 26.320444107055664, |
|
"learning_rate": 1.682902411172698e-05, |
|
"logits/chosen": -0.27940934896469116, |
|
"logits/rejected": -0.6819210052490234, |
|
"logps/chosen": -191.19189453125, |
|
"logps/rejected": -160.06234741210938, |
|
"loss": 0.672, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.6531416177749634, |
|
"rewards/margins": 0.32751747965812683, |
|
"rewards/rejected": 0.32562416791915894, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.3422501966955153, |
|
"grad_norm": 16.507688522338867, |
|
"learning_rate": 1.6727990296154962e-05, |
|
"logits/chosen": -0.43093472719192505, |
|
"logits/rejected": -0.6659766435623169, |
|
"logps/chosen": -194.37916564941406, |
|
"logps/rejected": -175.87298583984375, |
|
"loss": 0.6782, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.9038249254226685, |
|
"rewards/margins": 0.3305993974208832, |
|
"rewards/rejected": 0.5732254385948181, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.34618410700236035, |
|
"grad_norm": 15.00309944152832, |
|
"learning_rate": 1.6625685830952533e-05, |
|
"logits/chosen": -0.017139725387096405, |
|
"logits/rejected": -0.5116509199142456, |
|
"logps/chosen": -203.77554321289062, |
|
"logps/rejected": -166.87571716308594, |
|
"loss": 0.6715, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.8991168141365051, |
|
"rewards/margins": 0.4240299165248871, |
|
"rewards/rejected": 0.47508686780929565, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.35011801730920533, |
|
"grad_norm": 22.238525390625, |
|
"learning_rate": 1.6522130037362018e-05, |
|
"logits/chosen": -0.4809524416923523, |
|
"logits/rejected": -0.77618408203125, |
|
"logps/chosen": -183.9463348388672, |
|
"logps/rejected": -168.94070434570312, |
|
"loss": 0.7005, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.9337043762207031, |
|
"rewards/margins": 0.2749274969100952, |
|
"rewards/rejected": 0.6587768197059631, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.35405192761605037, |
|
"grad_norm": 17.745378494262695, |
|
"learning_rate": 1.641734247295189e-05, |
|
"logits/chosen": -0.4837673306465149, |
|
"logits/rejected": -0.8133207559585571, |
|
"logps/chosen": -187.5880126953125, |
|
"logps/rejected": -172.59933471679688, |
|
"loss": 0.6777, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.9450648427009583, |
|
"rewards/margins": 0.3392513394355774, |
|
"rewards/rejected": 0.6058135032653809, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.35798583792289534, |
|
"grad_norm": 21.806243896484375, |
|
"learning_rate": 1.63113429279231e-05, |
|
"logits/chosen": -0.3670351207256317, |
|
"logits/rejected": -0.7418017387390137, |
|
"logps/chosen": -221.2038116455078, |
|
"logps/rejected": -184.3399200439453, |
|
"loss": 0.7212, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.8858639001846313, |
|
"rewards/margins": 0.2686173915863037, |
|
"rewards/rejected": 0.6172465085983276, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.3619197482297404, |
|
"grad_norm": 19.19058609008789, |
|
"learning_rate": 1.6204151421371504e-05, |
|
"logits/chosen": -0.5260201692581177, |
|
"logits/rejected": -0.887170672416687, |
|
"logps/chosen": -198.56930541992188, |
|
"logps/rejected": -170.34158325195312, |
|
"loss": 0.6642, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.6595619320869446, |
|
"rewards/margins": 0.25892138481140137, |
|
"rewards/rejected": 0.4006405472755432, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.36585365853658536, |
|
"grad_norm": 16.740882873535156, |
|
"learning_rate": 1.609578819750708e-05, |
|
"logits/chosen": -0.21146011352539062, |
|
"logits/rejected": -0.41337770223617554, |
|
"logps/chosen": -186.92779541015625, |
|
"logps/rejected": -183.7529754638672, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.568415641784668, |
|
"rewards/margins": 0.27034991979599, |
|
"rewards/rejected": 0.298065721988678, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.3697875688434304, |
|
"grad_norm": 22.620988845825195, |
|
"learning_rate": 1.5986273721830557e-05, |
|
"logits/chosen": -0.17011170089244843, |
|
"logits/rejected": -0.5642642974853516, |
|
"logps/chosen": -206.16073608398438, |
|
"logps/rejected": -187.0243377685547, |
|
"loss": 0.73, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.44800883531570435, |
|
"rewards/margins": 0.19431404769420624, |
|
"rewards/rejected": 0.2536947727203369, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.37372147915027537, |
|
"grad_norm": 19.39198112487793, |
|
"learning_rate": 1.587562867726832e-05, |
|
"logits/chosen": -0.18244773149490356, |
|
"logits/rejected": -0.5230101346969604, |
|
"logps/chosen": -223.02371215820312, |
|
"logps/rejected": -198.8177032470703, |
|
"loss": 0.6721, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.4324628710746765, |
|
"rewards/margins": 0.22442837059497833, |
|
"rewards/rejected": 0.20803451538085938, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.3776553894571204, |
|
"grad_norm": 19.32149314880371, |
|
"learning_rate": 1.5763873960266236e-05, |
|
"logits/chosen": -0.29324209690093994, |
|
"logits/rejected": -0.5279776453971863, |
|
"logps/chosen": -206.15469360351562, |
|
"logps/rejected": -188.80137634277344, |
|
"loss": 0.6942, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.4868395924568176, |
|
"rewards/margins": 0.33774086833000183, |
|
"rewards/rejected": 0.14909867942333221, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.3815892997639654, |
|
"grad_norm": 19.483469009399414, |
|
"learning_rate": 1.5673685398812467e-05, |
|
"logits/chosen": -0.1828387826681137, |
|
"logits/rejected": -0.41064882278442383, |
|
"logps/chosen": -217.49295043945312, |
|
"logps/rejected": -198.88177490234375, |
|
"loss": 0.7507, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 1.0268093347549438, |
|
"rewards/margins": 0.21613208949565887, |
|
"rewards/rejected": 0.810677170753479, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.38552321007081036, |
|
"grad_norm": 28.394817352294922, |
|
"learning_rate": 1.555998659687541e-05, |
|
"logits/chosen": -0.49702200293540955, |
|
"logits/rejected": -1.0014259815216064, |
|
"logps/chosen": -197.88128662109375, |
|
"logps/rejected": -160.67999267578125, |
|
"loss": 0.6519, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.9069635272026062, |
|
"rewards/margins": 0.40647339820861816, |
|
"rewards/rejected": 0.5004900693893433, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.3894571203776554, |
|
"grad_norm": 20.914031982421875, |
|
"learning_rate": 1.544523773472669e-05, |
|
"logits/chosen": 0.02130720391869545, |
|
"logits/rejected": -0.4486933648586273, |
|
"logps/chosen": -211.362060546875, |
|
"logps/rejected": -175.72430419921875, |
|
"loss": 0.685, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 1.1584622859954834, |
|
"rewards/margins": 0.3790398836135864, |
|
"rewards/rejected": 0.7794222831726074, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.3933910306845004, |
|
"grad_norm": 14.320610046386719, |
|
"learning_rate": 1.532946048386001e-05, |
|
"logits/chosen": -0.010864943265914917, |
|
"logits/rejected": -0.5150319337844849, |
|
"logps/chosen": -207.92333984375, |
|
"logps/rejected": -178.11700439453125, |
|
"loss": 0.6224, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 1.1484225988388062, |
|
"rewards/margins": 0.4593985676765442, |
|
"rewards/rejected": 0.6890240907669067, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3933910306845004, |
|
"eval_logits/chosen": 1.327344536781311, |
|
"eval_logits/rejected": 1.1055529117584229, |
|
"eval_logps/chosen": -205.45755004882812, |
|
"eval_logps/rejected": -178.61904907226562, |
|
"eval_loss": 0.7026852369308472, |
|
"eval_rewards/accuracies": 0.620312511920929, |
|
"eval_rewards/chosen": 1.0051077604293823, |
|
"eval_rewards/margins": 0.29959002137184143, |
|
"eval_rewards/rejected": 0.7055177688598633, |
|
"eval_runtime": 297.7987, |
|
"eval_samples_per_second": 2.149, |
|
"eval_steps_per_second": 0.134, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3973249409913454, |
|
"grad_norm": 17.606443405151367, |
|
"learning_rate": 1.5212676709990762e-05, |
|
"logits/chosen": 0.12024303525686264, |
|
"logits/rejected": -0.33552008867263794, |
|
"logps/chosen": -205.59109497070312, |
|
"logps/rejected": -181.02566528320312, |
|
"loss": 0.6522, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.9811790585517883, |
|
"rewards/margins": 0.43993645906448364, |
|
"rewards/rejected": 0.5412425994873047, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.4012588512981904, |
|
"grad_norm": 23.3114070892334, |
|
"learning_rate": 1.509490846892649e-05, |
|
"logits/chosen": 0.01656034216284752, |
|
"logits/rejected": -0.5744299292564392, |
|
"logps/chosen": -211.2788543701172, |
|
"logps/rejected": -167.57276916503906, |
|
"loss": 0.6138, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.8017475008964539, |
|
"rewards/margins": 0.5002428293228149, |
|
"rewards/rejected": 0.3015046715736389, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.4051927616050354, |
|
"grad_norm": 14.10328197479248, |
|
"learning_rate": 1.4976178002401408e-05, |
|
"logits/chosen": -0.3282383978366852, |
|
"logits/rejected": -0.48758015036582947, |
|
"logps/chosen": -200.8679962158203, |
|
"logps/rejected": -179.44241333007812, |
|
"loss": 0.6479, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.6766945719718933, |
|
"rewards/margins": 0.3457737863063812, |
|
"rewards/rejected": 0.33092084527015686, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.4091266719118804, |
|
"grad_norm": 26.593978881835938, |
|
"learning_rate": 1.4856507733875837e-05, |
|
"logits/chosen": -0.1160442978143692, |
|
"logits/rejected": -0.4207191467285156, |
|
"logps/chosen": -190.7376708984375, |
|
"logps/rejected": -169.13816833496094, |
|
"loss": 0.7379, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.7750043869018555, |
|
"rewards/margins": 0.34026703238487244, |
|
"rewards/rejected": 0.43473726511001587, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.41306058221872544, |
|
"grad_norm": 17.67402458190918, |
|
"learning_rate": 1.4735920264301288e-05, |
|
"logits/chosen": -0.17023354768753052, |
|
"logits/rejected": -0.5197206735610962, |
|
"logps/chosen": -207.9748077392578, |
|
"logps/rejected": -182.002197265625, |
|
"loss": 0.7135, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.5062464475631714, |
|
"rewards/margins": 0.19488921761512756, |
|
"rewards/rejected": 0.31135720014572144, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.4169944925255704, |
|
"grad_norm": 16.364791870117188, |
|
"learning_rate": 1.4614438367852056e-05, |
|
"logits/chosen": -0.35339441895484924, |
|
"logits/rejected": -0.6959262490272522, |
|
"logps/chosen": -202.8052215576172, |
|
"logps/rejected": -167.2289276123047, |
|
"loss": 0.6573, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.89134281873703, |
|
"rewards/margins": 0.39820951223373413, |
|
"rewards/rejected": 0.4931332468986511, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.4209284028324154, |
|
"grad_norm": 19.59364891052246, |
|
"learning_rate": 1.4492084987624071e-05, |
|
"logits/chosen": -0.1122426763176918, |
|
"logits/rejected": -0.44985610246658325, |
|
"logps/chosen": -204.77981567382812, |
|
"logps/rejected": -181.18716430664062, |
|
"loss": 0.6709, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.9437880516052246, |
|
"rewards/margins": 0.4574647545814514, |
|
"rewards/rejected": 0.48632335662841797, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.42486231313926043, |
|
"grad_norm": 17.59402084350586, |
|
"learning_rate": 1.4368883231301885e-05, |
|
"logits/chosen": -0.17638197541236877, |
|
"logits/rejected": -0.5632339715957642, |
|
"logps/chosen": -201.26885986328125, |
|
"logps/rejected": -170.08328247070312, |
|
"loss": 0.6228, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 1.184136986732483, |
|
"rewards/margins": 0.756480872631073, |
|
"rewards/rejected": 0.42765602469444275, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.4287962234461054, |
|
"grad_norm": 27.206796646118164, |
|
"learning_rate": 1.4244856366794517e-05, |
|
"logits/chosen": -0.057549990713596344, |
|
"logits/rejected": -0.4487794041633606, |
|
"logps/chosen": -205.1177215576172, |
|
"logps/rejected": -177.13014221191406, |
|
"loss": 0.6294, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 1.0669742822647095, |
|
"rewards/margins": 0.5120534896850586, |
|
"rewards/rejected": 0.5549208521842957, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.43273013375295044, |
|
"grad_norm": 16.399995803833008, |
|
"learning_rate": 1.4120027817841098e-05, |
|
"logits/chosen": -0.133390873670578, |
|
"logits/rejected": -0.47696390748023987, |
|
"logps/chosen": -214.5057373046875, |
|
"logps/rejected": -193.0947265625, |
|
"loss": 0.808, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.8214758038520813, |
|
"rewards/margins": 0.04125159978866577, |
|
"rewards/rejected": 0.7802242040634155, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.4366640440597954, |
|
"grad_norm": 18.979785919189453, |
|
"learning_rate": 1.399442115958704e-05, |
|
"logits/chosen": -0.569675862789154, |
|
"logits/rejected": -0.8924716711044312, |
|
"logps/chosen": -211.4713897705078, |
|
"logps/rejected": -183.01220703125, |
|
"loss": 0.6587, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.8996235132217407, |
|
"rewards/margins": 0.45010414719581604, |
|
"rewards/rejected": 0.4495193362236023, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.44059795436664045, |
|
"grad_norm": 21.638757705688477, |
|
"learning_rate": 1.3868060114131644e-05, |
|
"logits/chosen": -0.22702725231647491, |
|
"logits/rejected": -0.5234431028366089, |
|
"logps/chosen": -210.87393188476562, |
|
"logps/rejected": -195.6029052734375, |
|
"loss": 0.738, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 1.0586285591125488, |
|
"rewards/margins": 0.27768781781196594, |
|
"rewards/rejected": 0.7809406518936157, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.44453186467348543, |
|
"grad_norm": 23.013927459716797, |
|
"learning_rate": 1.3740968546047935e-05, |
|
"logits/chosen": -0.17697608470916748, |
|
"logits/rejected": -0.4483562409877777, |
|
"logps/chosen": -211.2060089111328, |
|
"logps/rejected": -197.86001586914062, |
|
"loss": 0.7594, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.9211471676826477, |
|
"rewards/margins": 0.0961461290717125, |
|
"rewards/rejected": 0.825001060962677, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.44846577498033047, |
|
"grad_norm": 20.101484298706055, |
|
"learning_rate": 1.3613170457875579e-05, |
|
"logits/chosen": -0.22834663093090057, |
|
"logits/rejected": -0.6228377223014832, |
|
"logps/chosen": -207.5561065673828, |
|
"logps/rejected": -182.3037567138672, |
|
"loss": 0.6097, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 1.1122691631317139, |
|
"rewards/margins": 0.5503975749015808, |
|
"rewards/rejected": 0.5618715882301331, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.45239968528717545, |
|
"grad_norm": 26.358943939208984, |
|
"learning_rate": 1.348468998558779e-05, |
|
"logits/chosen": -0.13707995414733887, |
|
"logits/rejected": -0.44805946946144104, |
|
"logps/chosen": -220.7776641845703, |
|
"logps/rejected": -201.1964874267578, |
|
"loss": 0.713, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.9859493374824524, |
|
"rewards/margins": 0.3383699953556061, |
|
"rewards/rejected": 0.6475793123245239, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.4563335955940205, |
|
"grad_norm": 16.33328628540039, |
|
"learning_rate": 1.3355551394032968e-05, |
|
"logits/chosen": -0.31562569737434387, |
|
"logits/rejected": -0.6708458065986633, |
|
"logps/chosen": -203.0553436279297, |
|
"logps/rejected": -176.8132781982422, |
|
"loss": 0.6889, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.7495515942573547, |
|
"rewards/margins": 0.3594974875450134, |
|
"rewards/rejected": 0.39005404710769653, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.46026750590086546, |
|
"grad_norm": 29.162113189697266, |
|
"learning_rate": 1.3225779072352066e-05, |
|
"logits/chosen": -0.32384806871414185, |
|
"logits/rejected": -0.6729586124420166, |
|
"logps/chosen": -214.14102172851562, |
|
"logps/rejected": -184.0008087158203, |
|
"loss": 0.6698, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.8371032476425171, |
|
"rewards/margins": 0.3700554370880127, |
|
"rewards/rejected": 0.4670478403568268, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.46420141620771044, |
|
"grad_norm": 25.16128921508789, |
|
"learning_rate": 1.309539752937243e-05, |
|
"logits/chosen": -0.256720632314682, |
|
"logits/rejected": -0.4291699528694153, |
|
"logps/chosen": -191.2805938720703, |
|
"logps/rejected": -184.6292266845703, |
|
"loss": 0.6755, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.6577237248420715, |
|
"rewards/margins": 0.28180426359176636, |
|
"rewards/rejected": 0.3759194016456604, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.46813532651455547, |
|
"grad_norm": 20.09102439880371, |
|
"learning_rate": 1.2964431388979075e-05, |
|
"logits/chosen": -0.3570843040943146, |
|
"logits/rejected": -0.8670114278793335, |
|
"logps/chosen": -203.76992797851562, |
|
"logps/rejected": -163.80783081054688, |
|
"loss": 0.6412, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.7835728526115417, |
|
"rewards/margins": 0.5176677703857422, |
|
"rewards/rejected": 0.2659050524234772, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.47206923682140045, |
|
"grad_norm": 22.330236434936523, |
|
"learning_rate": 1.2832905385464193e-05, |
|
"logits/chosen": -0.3153493404388428, |
|
"logits/rejected": -0.6954606771469116, |
|
"logps/chosen": -199.0489501953125, |
|
"logps/rejected": -172.42919921875, |
|
"loss": 0.6764, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.7396122217178345, |
|
"rewards/margins": 0.3455941677093506, |
|
"rewards/rejected": 0.39401811361312866, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.47206923682140045, |
|
"eval_logits/chosen": 1.3154770135879517, |
|
"eval_logits/rejected": 1.0959367752075195, |
|
"eval_logps/chosen": -205.95361328125, |
|
"eval_logps/rejected": -179.14404296875, |
|
"eval_loss": 0.688846230506897, |
|
"eval_rewards/accuracies": 0.6234375238418579, |
|
"eval_rewards/chosen": 0.8066827058792114, |
|
"eval_rewards/margins": 0.3111591935157776, |
|
"eval_rewards/rejected": 0.49552351236343384, |
|
"eval_runtime": 282.0013, |
|
"eval_samples_per_second": 2.269, |
|
"eval_steps_per_second": 0.142, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.4760031471282455, |
|
"grad_norm": 13.301490783691406, |
|
"learning_rate": 1.2700844358855853e-05, |
|
"logits/chosen": -0.2941150367259979, |
|
"logits/rejected": -0.7340162992477417, |
|
"logps/chosen": -194.4886932373047, |
|
"logps/rejected": -159.5877227783203, |
|
"loss": 0.6895, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.846507728099823, |
|
"rewards/margins": 0.3602963089942932, |
|
"rewards/rejected": 0.48621147871017456, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.47993705743509046, |
|
"grad_norm": 19.667444229125977, |
|
"learning_rate": 1.2568273250226681e-05, |
|
"logits/chosen": -0.2455168217420578, |
|
"logits/rejected": -0.608180820941925, |
|
"logps/chosen": -225.4668426513672, |
|
"logps/rejected": -192.55905151367188, |
|
"loss": 0.6672, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 1.017348289489746, |
|
"rewards/margins": 0.38524192571640015, |
|
"rewards/rejected": 0.632106363773346, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.4838709677419355, |
|
"grad_norm": 24.933828353881836, |
|
"learning_rate": 1.243521709698351e-05, |
|
"logits/chosen": -0.28044039011001587, |
|
"logits/rejected": -0.5124521255493164, |
|
"logps/chosen": -199.1013641357422, |
|
"logps/rejected": -195.05728149414062, |
|
"loss": 0.6967, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.9729631543159485, |
|
"rewards/margins": 0.31783193349838257, |
|
"rewards/rejected": 0.6551311016082764, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.4878048780487805, |
|
"grad_norm": 21.9912109375, |
|
"learning_rate": 1.230170102813879e-05, |
|
"logits/chosen": -0.6046349406242371, |
|
"logits/rejected": -0.8912727236747742, |
|
"logps/chosen": -193.95303344726562, |
|
"logps/rejected": -169.863037109375, |
|
"loss": 0.6994, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.9892646670341492, |
|
"rewards/margins": 0.35485339164733887, |
|
"rewards/rejected": 0.6344112753868103, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.4917387883556255, |
|
"grad_norm": 14.393425941467285, |
|
"learning_rate": 1.2167750259564733e-05, |
|
"logits/chosen": -0.21057292819023132, |
|
"logits/rejected": -0.6453763246536255, |
|
"logps/chosen": -197.05722045898438, |
|
"logps/rejected": -194.5146942138672, |
|
"loss": 0.6655, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.8438342809677124, |
|
"rewards/margins": 0.3562160134315491, |
|
"rewards/rejected": 0.4876182973384857, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.4956726986624705, |
|
"grad_norm": 27.751855850219727, |
|
"learning_rate": 1.203339008923103e-05, |
|
"logits/chosen": -0.08632899820804596, |
|
"logits/rejected": -0.5858111381530762, |
|
"logps/chosen": -210.37890625, |
|
"logps/rejected": -181.04751586914062, |
|
"loss": 0.7106, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 1.0096272230148315, |
|
"rewards/margins": 0.4222971796989441, |
|
"rewards/rejected": 0.587330162525177, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.4996066089693155, |
|
"grad_norm": 21.017240524291992, |
|
"learning_rate": 1.1898645892427064e-05, |
|
"logits/chosen": -0.48605161905288696, |
|
"logits/rejected": -0.6945669651031494, |
|
"logps/chosen": -182.28805541992188, |
|
"logps/rejected": -169.93661499023438, |
|
"loss": 0.7755, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.5226560831069946, |
|
"rewards/margins": 0.05550839379429817, |
|
"rewards/rejected": 0.46714773774147034, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.5035405192761605, |
|
"grad_norm": 20.2221622467041, |
|
"learning_rate": 1.1763543116969549e-05, |
|
"logits/chosen": -0.10474424064159393, |
|
"logits/rejected": -0.5913185477256775, |
|
"logps/chosen": -209.303466796875, |
|
"logps/rejected": -173.1480255126953, |
|
"loss": 0.6692, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.6786335706710815, |
|
"rewards/margins": 0.3942939341068268, |
|
"rewards/rejected": 0.28433966636657715, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.5074744295830055, |
|
"grad_norm": 15.26221752166748, |
|
"learning_rate": 1.1628107278396432e-05, |
|
"logits/chosen": -0.06124790757894516, |
|
"logits/rejected": -0.3360343873500824, |
|
"logps/chosen": -202.93270874023438, |
|
"logps/rejected": -184.75259399414062, |
|
"loss": 0.6547, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.39857378602027893, |
|
"rewards/margins": 0.2742787301540375, |
|
"rewards/rejected": 0.12429501861333847, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.5114083398898505, |
|
"grad_norm": 18.45632553100586, |
|
"learning_rate": 1.1492363955148023e-05, |
|
"logits/chosen": -0.1759663075208664, |
|
"logits/rejected": -0.6530739665031433, |
|
"logps/chosen": -218.36123657226562, |
|
"logps/rejected": -199.7471160888672, |
|
"loss": 0.653, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.5292393565177917, |
|
"rewards/margins": 0.3620988726615906, |
|
"rewards/rejected": 0.16714049875736237, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.5153422501966956, |
|
"grad_norm": 16.891386032104492, |
|
"learning_rate": 1.1356338783736256e-05, |
|
"logits/chosen": -0.4392605721950531, |
|
"logits/rejected": -0.7525895237922668, |
|
"logps/chosen": -194.24301147460938, |
|
"logps/rejected": -182.4429473876953, |
|
"loss": 0.6259, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.4986444115638733, |
|
"rewards/margins": 0.49716418981552124, |
|
"rewards/rejected": 0.0014802322257310152, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.5192761605035405, |
|
"grad_norm": 18.568416595458984, |
|
"learning_rate": 1.1220057453902973e-05, |
|
"logits/chosen": -0.2285362035036087, |
|
"logits/rejected": -0.6583995223045349, |
|
"logps/chosen": -219.6389617919922, |
|
"logps/rejected": -176.62965393066406, |
|
"loss": 0.6604, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.6904179453849792, |
|
"rewards/margins": 0.3659079670906067, |
|
"rewards/rejected": 0.32451000809669495, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.5232100708103855, |
|
"grad_norm": 16.81451416015625, |
|
"learning_rate": 1.1083545703768137e-05, |
|
"logits/chosen": -0.3168891370296478, |
|
"logits/rejected": -0.5861741304397583, |
|
"logps/chosen": -198.4099578857422, |
|
"logps/rejected": -181.83871459960938, |
|
"loss": 0.736, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.6643240451812744, |
|
"rewards/margins": 0.17423763871192932, |
|
"rewards/rejected": 0.4900864064693451, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.5271439811172305, |
|
"grad_norm": 20.030567169189453, |
|
"learning_rate": 1.0946829314968936e-05, |
|
"logits/chosen": -0.22313520312309265, |
|
"logits/rejected": -0.6608983874320984, |
|
"logps/chosen": -206.3205108642578, |
|
"logps/rejected": -178.14974975585938, |
|
"loss": 0.6314, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.784034252166748, |
|
"rewards/margins": 0.45540714263916016, |
|
"rewards/rejected": 0.3286270797252655, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.5310778914240756, |
|
"grad_norm": 12.727190017700195, |
|
"learning_rate": 1.0809934107790675e-05, |
|
"logits/chosen": -0.1376127302646637, |
|
"logits/rejected": -0.5582663416862488, |
|
"logps/chosen": -207.121337890625, |
|
"logps/rejected": -189.23037719726562, |
|
"loss": 0.5616, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 1.1340868473052979, |
|
"rewards/margins": 0.6862513422966003, |
|
"rewards/rejected": 0.4478355050086975, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.5350118017309206, |
|
"grad_norm": 15.704160690307617, |
|
"learning_rate": 1.0672885936290316e-05, |
|
"logits/chosen": -0.11958789825439453, |
|
"logits/rejected": -0.41796404123306274, |
|
"logps/chosen": -200.3405303955078, |
|
"logps/rejected": -185.74917602539062, |
|
"loss": 0.7025, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 1.0963573455810547, |
|
"rewards/margins": 0.3328610956668854, |
|
"rewards/rejected": 0.7634962797164917, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.5389457120377655, |
|
"grad_norm": 16.583145141601562, |
|
"learning_rate": 1.05357106834137e-05, |
|
"logits/chosen": -0.035154812037944794, |
|
"logits/rejected": -0.6018010377883911, |
|
"logps/chosen": -214.5799102783203, |
|
"logps/rejected": -181.4016571044922, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.9387739300727844, |
|
"rewards/margins": 0.34907636046409607, |
|
"rewards/rejected": 0.5896975994110107, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.5428796223446105, |
|
"grad_norm": 15.397040367126465, |
|
"learning_rate": 1.0398434256107291e-05, |
|
"logits/chosen": -0.3040166199207306, |
|
"logits/rejected": -0.6104984283447266, |
|
"logps/chosen": -190.73818969726562, |
|
"logps/rejected": -172.9613037109375, |
|
"loss": 0.6723, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.8516994714736938, |
|
"rewards/margins": 0.3647121787071228, |
|
"rewards/rejected": 0.48698729276657104, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.5468135326514555, |
|
"grad_norm": 17.214340209960938, |
|
"learning_rate": 1.0261082580425366e-05, |
|
"logits/chosen": -0.25491005182266235, |
|
"logits/rejected": -0.7748223543167114, |
|
"logps/chosen": -205.028564453125, |
|
"logps/rejected": -169.1365966796875, |
|
"loss": 0.6359, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.8545015454292297, |
|
"rewards/margins": 0.4432094693183899, |
|
"rewards/rejected": 0.41129201650619507, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.5507474429583006, |
|
"grad_norm": 18.72207260131836, |
|
"learning_rate": 1.012368159663363e-05, |
|
"logits/chosen": -0.43465644121170044, |
|
"logits/rejected": -0.6075267195701599, |
|
"logps/chosen": -198.85336303710938, |
|
"logps/rejected": -185.84034729003906, |
|
"loss": 0.6205, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.8257676959037781, |
|
"rewards/margins": 0.442889541387558, |
|
"rewards/rejected": 0.38287803530693054, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5507474429583006, |
|
"eval_logits/chosen": 1.289400339126587, |
|
"eval_logits/rejected": 1.06741201877594, |
|
"eval_logps/chosen": -206.27685546875, |
|
"eval_logps/rejected": -179.56541442871094, |
|
"eval_loss": 0.6758726835250854, |
|
"eval_rewards/accuracies": 0.6343749761581421, |
|
"eval_rewards/chosen": 0.6773768067359924, |
|
"eval_rewards/margins": 0.3504090905189514, |
|
"eval_rewards/rejected": 0.32696765661239624, |
|
"eval_runtime": 264.1292, |
|
"eval_samples_per_second": 2.423, |
|
"eval_steps_per_second": 0.151, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5546813532651456, |
|
"grad_norm": 20.8519344329834, |
|
"learning_rate": 9.98625725431013e-06, |
|
"logits/chosen": -0.020856428891420364, |
|
"logits/rejected": -0.20043806731700897, |
|
"logps/chosen": -193.96920776367188, |
|
"logps/rejected": -172.1241912841797, |
|
"loss": 0.7039, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.5150532722473145, |
|
"rewards/margins": 0.1648593544960022, |
|
"rewards/rejected": 0.35019388794898987, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.5586152635719905, |
|
"grad_norm": 18.23834800720215, |
|
"learning_rate": 9.848835507444405e-06, |
|
"logits/chosen": -0.17138266563415527, |
|
"logits/rejected": -0.5400444269180298, |
|
"logps/chosen": -213.20947265625, |
|
"logps/rejected": -179.41683959960938, |
|
"loss": 0.5993, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.878060519695282, |
|
"rewards/margins": 0.5326789617538452, |
|
"rewards/rejected": 0.34538155794143677, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.5625491738788355, |
|
"grad_norm": 17.19778060913086, |
|
"learning_rate": 9.71144230953582e-06, |
|
"logits/chosen": -0.15033751726150513, |
|
"logits/rejected": -0.6573851108551025, |
|
"logps/chosen": -209.91763305664062, |
|
"logps/rejected": -173.20547485351562, |
|
"loss": 0.637, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.7313550710678101, |
|
"rewards/margins": 0.45394793152809143, |
|
"rewards/rejected": 0.27740710973739624, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.5664830841856806, |
|
"grad_norm": 17.859058380126953, |
|
"learning_rate": 9.574103608691974e-06, |
|
"logits/chosen": -0.1018882766366005, |
|
"logits/rejected": -0.3827294111251831, |
|
"logps/chosen": -217.5899658203125, |
|
"logps/rejected": -190.86546325683594, |
|
"loss": 0.7034, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.7803667187690735, |
|
"rewards/margins": 0.14793583750724792, |
|
"rewards/rejected": 0.632430911064148, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.5704169944925256, |
|
"grad_norm": 17.891475677490234, |
|
"learning_rate": 9.436845342728142e-06, |
|
"logits/chosen": -0.23665161430835724, |
|
"logits/rejected": -0.6916168928146362, |
|
"logps/chosen": -198.93873596191406, |
|
"logps/rejected": -166.03292846679688, |
|
"loss": 0.6421, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.801128089427948, |
|
"rewards/margins": 0.4237571656703949, |
|
"rewards/rejected": 0.3773708939552307, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.5743509047993706, |
|
"grad_norm": 17.744354248046875, |
|
"learning_rate": 9.299693434268653e-06, |
|
"logits/chosen": -0.01328353863209486, |
|
"logits/rejected": -0.2819923758506775, |
|
"logps/chosen": -207.9522705078125, |
|
"logps/rejected": -188.49993896484375, |
|
"loss": 0.6878, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.904397189617157, |
|
"rewards/margins": 0.36915481090545654, |
|
"rewards/rejected": 0.5352423787117004, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.5782848151062155, |
|
"grad_norm": 18.68268394470215, |
|
"learning_rate": 9.162673785851131e-06, |
|
"logits/chosen": -0.39516356587409973, |
|
"logits/rejected": -0.7670010328292847, |
|
"logps/chosen": -204.0966796875, |
|
"logps/rejected": -170.11227416992188, |
|
"loss": 0.6341, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.8424245715141296, |
|
"rewards/margins": 0.40797433257102966, |
|
"rewards/rejected": 0.43445029854774475, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.5822187254130606, |
|
"grad_norm": 14.530721664428711, |
|
"learning_rate": 9.025812275034541e-06, |
|
"logits/chosen": -0.14751622080802917, |
|
"logits/rejected": -0.5135005116462708, |
|
"logps/chosen": -225.6256866455078, |
|
"logps/rejected": -200.2797393798828, |
|
"loss": 0.621, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 1.029211401939392, |
|
"rewards/margins": 0.5424867868423462, |
|
"rewards/rejected": 0.48672476410865784, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.5861526357199056, |
|
"grad_norm": 18.743927001953125, |
|
"learning_rate": 8.889134749511956e-06, |
|
"logits/chosen": -0.11462094634771347, |
|
"logits/rejected": -0.38805294036865234, |
|
"logps/chosen": -207.6776123046875, |
|
"logps/rejected": -181.88101196289062, |
|
"loss": 0.7368, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.7982211709022522, |
|
"rewards/margins": 0.21776151657104492, |
|
"rewards/rejected": 0.580459713935852, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.5900865460267506, |
|
"grad_norm": 14.667529106140137, |
|
"learning_rate": 8.752667022228936e-06, |
|
"logits/chosen": -0.022926175966858864, |
|
"logits/rejected": -0.4718795418739319, |
|
"logps/chosen": -216.82284545898438, |
|
"logps/rejected": -186.5943603515625, |
|
"loss": 0.622, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.8725186586380005, |
|
"rewards/margins": 0.6078484058380127, |
|
"rewards/rejected": 0.264670193195343, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.5940204563335956, |
|
"grad_norm": 20.248031616210938, |
|
"learning_rate": 8.616434866508519e-06, |
|
"logits/chosen": -0.15943610668182373, |
|
"logits/rejected": -0.6148089170455933, |
|
"logps/chosen": -209.1900177001953, |
|
"logps/rejected": -184.60047912597656, |
|
"loss": 0.6446, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.7891548871994019, |
|
"rewards/margins": 0.48758840560913086, |
|
"rewards/rejected": 0.30156660079956055, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.5979543666404405, |
|
"grad_norm": 16.850963592529297, |
|
"learning_rate": 8.480464011183631e-06, |
|
"logits/chosen": -0.2673138678073883, |
|
"logits/rejected": -0.6848293542861938, |
|
"logps/chosen": -201.9542999267578, |
|
"logps/rejected": -168.80638122558594, |
|
"loss": 0.6669, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.6175512671470642, |
|
"rewards/margins": 0.30348506569862366, |
|
"rewards/rejected": 0.31406617164611816, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.6018882769472856, |
|
"grad_norm": 18.8007755279541, |
|
"learning_rate": 8.344780135737962e-06, |
|
"logits/chosen": -0.31253287196159363, |
|
"logits/rejected": -0.8586766123771667, |
|
"logps/chosen": -212.3469696044922, |
|
"logps/rejected": -163.8748321533203, |
|
"loss": 0.6595, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.8451669812202454, |
|
"rewards/margins": 0.5855604410171509, |
|
"rewards/rejected": 0.2596065402030945, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.6058221872541306, |
|
"grad_norm": 13.551706314086914, |
|
"learning_rate": 8.209408865456127e-06, |
|
"logits/chosen": -0.13036459684371948, |
|
"logits/rejected": -0.4954930245876312, |
|
"logps/chosen": -213.2278289794922, |
|
"logps/rejected": -188.24514770507812, |
|
"loss": 0.662, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.8455514907836914, |
|
"rewards/margins": 0.34862059354782104, |
|
"rewards/rejected": 0.49693092703819275, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.6097560975609756, |
|
"grad_norm": 17.73063087463379, |
|
"learning_rate": 8.074375766584053e-06, |
|
"logits/chosen": 0.0039010108448565006, |
|
"logits/rejected": -0.5214850306510925, |
|
"logps/chosen": -213.3166046142578, |
|
"logps/rejected": -174.0699005126953, |
|
"loss": 0.717, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.6939308643341064, |
|
"rewards/margins": 0.32082659006118774, |
|
"rewards/rejected": 0.3731042742729187, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.6136900078678206, |
|
"grad_norm": 14.620991706848145, |
|
"learning_rate": 7.939706341500555e-06, |
|
"logits/chosen": -0.04872986674308777, |
|
"logits/rejected": -0.4084659516811371, |
|
"logps/chosen": -194.51834106445312, |
|
"logps/rejected": -185.00225830078125, |
|
"loss": 0.5966, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.8033088445663452, |
|
"rewards/margins": 0.5693622827529907, |
|
"rewards/rejected": 0.23394668102264404, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.6176239181746657, |
|
"grad_norm": 13.0098876953125, |
|
"learning_rate": 7.805426023900938e-06, |
|
"logits/chosen": -0.4255433976650238, |
|
"logits/rejected": -0.7939322590827942, |
|
"logps/chosen": -190.10177612304688, |
|
"logps/rejected": -162.91436767578125, |
|
"loss": 0.7034, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.6892917156219482, |
|
"rewards/margins": 0.3028218150138855, |
|
"rewards/rejected": 0.38646987080574036, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.6215578284815106, |
|
"grad_norm": 22.03873634338379, |
|
"learning_rate": 7.671560173993588e-06, |
|
"logits/chosen": -0.08852169662714005, |
|
"logits/rejected": -0.4719138741493225, |
|
"logps/chosen": -199.76376342773438, |
|
"logps/rejected": -182.2493896484375, |
|
"loss": 0.6744, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.7909868359565735, |
|
"rewards/margins": 0.3397650420665741, |
|
"rewards/rejected": 0.4512217938899994, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.6254917387883556, |
|
"grad_norm": 18.647151947021484, |
|
"learning_rate": 7.538134073710437e-06, |
|
"logits/chosen": -0.38996896147727966, |
|
"logits/rejected": -0.6869844198226929, |
|
"logps/chosen": -198.90866088867188, |
|
"logps/rejected": -178.61019897460938, |
|
"loss": 0.7028, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.7868278622627258, |
|
"rewards/margins": 0.44276612997055054, |
|
"rewards/rejected": 0.3440617322921753, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.6294256490952006, |
|
"grad_norm": 17.837268829345703, |
|
"learning_rate": 7.405172921932214e-06, |
|
"logits/chosen": -0.09680289775133133, |
|
"logits/rejected": -0.4570208191871643, |
|
"logps/chosen": -196.43899536132812, |
|
"logps/rejected": -173.35025024414062, |
|
"loss": 0.6309, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.7571867108345032, |
|
"rewards/margins": 0.43233370780944824, |
|
"rewards/rejected": 0.32485300302505493, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6294256490952006, |
|
"eval_logits/chosen": 1.2894115447998047, |
|
"eval_logits/rejected": 1.0707098245620728, |
|
"eval_logps/chosen": -206.11080932617188, |
|
"eval_logps/rejected": -179.48574829101562, |
|
"eval_loss": 0.6793522834777832, |
|
"eval_rewards/accuracies": 0.6265624761581421, |
|
"eval_rewards/chosen": 0.7437959313392639, |
|
"eval_rewards/margins": 0.384955495595932, |
|
"eval_rewards/rejected": 0.3588404655456543, |
|
"eval_runtime": 298.0621, |
|
"eval_samples_per_second": 2.147, |
|
"eval_steps_per_second": 0.134, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6333595594020456, |
|
"grad_norm": 23.481149673461914, |
|
"learning_rate": 7.272701829729378e-06, |
|
"logits/chosen": -0.09348127245903015, |
|
"logits/rejected": -0.39429792761802673, |
|
"logps/chosen": -222.31369018554688, |
|
"logps/rejected": -189.89024353027344, |
|
"loss": 0.7434, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.7474627494812012, |
|
"rewards/margins": 0.24622318148612976, |
|
"rewards/rejected": 0.5012395977973938, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.6372934697088907, |
|
"grad_norm": 18.71939468383789, |
|
"learning_rate": 7.140745815619632e-06, |
|
"logits/chosen": -0.09522039443254471, |
|
"logits/rejected": -0.4288865923881531, |
|
"logps/chosen": -198.81405639648438, |
|
"logps/rejected": -192.83120727539062, |
|
"loss": 0.6662, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.610317587852478, |
|
"rewards/margins": 0.3116861879825592, |
|
"rewards/rejected": 0.2986314296722412, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.6412273800157356, |
|
"grad_norm": 18.34478759765625, |
|
"learning_rate": 7.009329800842929e-06, |
|
"logits/chosen": 0.017814218997955322, |
|
"logits/rejected": -0.3244866132736206, |
|
"logps/chosen": -229.75381469726562, |
|
"logps/rejected": -199.60000610351562, |
|
"loss": 0.7092, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.5720285177230835, |
|
"rewards/margins": 0.19818969070911407, |
|
"rewards/rejected": 0.3738388121128082, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.6451612903225806, |
|
"grad_norm": 16.03777313232422, |
|
"learning_rate": 6.878478604654835e-06, |
|
"logits/chosen": -0.284344345331192, |
|
"logits/rejected": -0.6540359258651733, |
|
"logps/chosen": -195.71812438964844, |
|
"logps/rejected": -176.70550537109375, |
|
"loss": 0.5904, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.6928594708442688, |
|
"rewards/margins": 0.6011512875556946, |
|
"rewards/rejected": 0.09170810133218765, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.6490952006294256, |
|
"grad_norm": 22.05975914001465, |
|
"learning_rate": 6.748216939639158e-06, |
|
"logits/chosen": 0.07760115712881088, |
|
"logits/rejected": -0.4913705885410309, |
|
"logps/chosen": -190.44102478027344, |
|
"logps/rejected": -163.40457153320312, |
|
"loss": 0.6636, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.5673459768295288, |
|
"rewards/margins": 0.46832141280174255, |
|
"rewards/rejected": 0.09902457147836685, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.6530291109362707, |
|
"grad_norm": 19.04427146911621, |
|
"learning_rate": 6.618569407040736e-06, |
|
"logits/chosen": -0.2564006745815277, |
|
"logits/rejected": -0.621497392654419, |
|
"logps/chosen": -198.78524780273438, |
|
"logps/rejected": -172.7997283935547, |
|
"loss": 0.6624, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.6063997149467468, |
|
"rewards/margins": 0.4061097204685211, |
|
"rewards/rejected": 0.2002900391817093, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.6569630212431157, |
|
"grad_norm": 13.502724647521973, |
|
"learning_rate": 6.489560492119225e-06, |
|
"logits/chosen": 0.06354556977748871, |
|
"logits/rejected": -0.4314854145050049, |
|
"logps/chosen": -215.6816864013672, |
|
"logps/rejected": -183.03579711914062, |
|
"loss": 0.6743, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.43139171600341797, |
|
"rewards/margins": 0.3207935392856598, |
|
"rewards/rejected": 0.11059819161891937, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.6608969315499607, |
|
"grad_norm": 15.181354522705078, |
|
"learning_rate": 6.361214559524817e-06, |
|
"logits/chosen": -0.3440548777580261, |
|
"logits/rejected": -0.6467902660369873, |
|
"logps/chosen": -194.0684814453125, |
|
"logps/rejected": -180.21780395507812, |
|
"loss": 0.615, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.5782068967819214, |
|
"rewards/margins": 0.4565269947052002, |
|
"rewards/rejected": 0.12167992442846298, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.6648308418568056, |
|
"grad_norm": 79.10075378417969, |
|
"learning_rate": 6.233555848696724e-06, |
|
"logits/chosen": -0.293182373046875, |
|
"logits/rejected": -0.5915425419807434, |
|
"logps/chosen": -208.3809356689453, |
|
"logps/rejected": -191.13064575195312, |
|
"loss": 0.7247, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.5359665155410767, |
|
"rewards/margins": 0.28759217262268066, |
|
"rewards/rejected": 0.24837426841259003, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.6687647521636507, |
|
"grad_norm": 18.02682113647461, |
|
"learning_rate": 6.1066084692853224e-06, |
|
"logits/chosen": -0.03417937830090523, |
|
"logits/rejected": -0.43492475152015686, |
|
"logps/chosen": -212.67398071289062, |
|
"logps/rejected": -183.54196166992188, |
|
"loss": 0.6832, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.3686201870441437, |
|
"rewards/margins": 0.2786737084388733, |
|
"rewards/rejected": 0.08994650840759277, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.6726986624704957, |
|
"grad_norm": 17.677215576171875, |
|
"learning_rate": 5.980396396598777e-06, |
|
"logits/chosen": -0.2180563509464264, |
|
"logits/rejected": -0.3799629211425781, |
|
"logps/chosen": -192.2188720703125, |
|
"logps/rejected": -187.93289184570312, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.4506203234195709, |
|
"rewards/margins": 0.31998997926712036, |
|
"rewards/rejected": 0.13063031435012817, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.6766325727773407, |
|
"grad_norm": 13.698114395141602, |
|
"learning_rate": 5.854943467075087e-06, |
|
"logits/chosen": -0.22957925498485565, |
|
"logits/rejected": -0.5203697085380554, |
|
"logps/chosen": -198.90037536621094, |
|
"logps/rejected": -180.50279235839844, |
|
"loss": 0.6282, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.4433286786079407, |
|
"rewards/margins": 0.4702211916446686, |
|
"rewards/rejected": -0.026892513036727905, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.6805664830841857, |
|
"grad_norm": 16.75077247619629, |
|
"learning_rate": 5.730273373780309e-06, |
|
"logits/chosen": -0.3643267750740051, |
|
"logits/rejected": -0.7527881860733032, |
|
"logps/chosen": -193.90756225585938, |
|
"logps/rejected": -173.71755981445312, |
|
"loss": 0.6902, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.46958428621292114, |
|
"rewards/margins": 0.43391847610473633, |
|
"rewards/rejected": 0.03566574305295944, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.6845003933910306, |
|
"grad_norm": 21.622961044311523, |
|
"learning_rate": 5.606409661933889e-06, |
|
"logits/chosen": -0.023716717958450317, |
|
"logits/rejected": -0.3822089731693268, |
|
"logps/chosen": -221.4508056640625, |
|
"logps/rejected": -188.75930786132812, |
|
"loss": 0.7406, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.6389329433441162, |
|
"rewards/margins": 0.3061096668243408, |
|
"rewards/rejected": 0.332823246717453, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.6884343036978757, |
|
"grad_norm": 19.141998291015625, |
|
"learning_rate": 5.483375724461918e-06, |
|
"logits/chosen": -0.36916786432266235, |
|
"logits/rejected": -0.8393670320510864, |
|
"logps/chosen": -201.64920043945312, |
|
"logps/rejected": -163.6253662109375, |
|
"loss": 0.6788, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.36916905641555786, |
|
"rewards/margins": 0.3855450749397278, |
|
"rewards/rejected": -0.016376061365008354, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.6923682140047207, |
|
"grad_norm": 20.840383529663086, |
|
"learning_rate": 5.361194797579108e-06, |
|
"logits/chosen": -0.27600985765457153, |
|
"logits/rejected": -0.7273412346839905, |
|
"logps/chosen": -213.323486328125, |
|
"logps/rejected": -172.2437286376953, |
|
"loss": 0.7035, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.7029854655265808, |
|
"rewards/margins": 0.3768990635871887, |
|
"rewards/rejected": 0.3260864317417145, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.6963021243115657, |
|
"grad_norm": 20.24435806274414, |
|
"learning_rate": 5.239889956400435e-06, |
|
"logits/chosen": 0.13340488076210022, |
|
"logits/rejected": -0.46101540327072144, |
|
"logps/chosen": -217.2809295654297, |
|
"logps/rejected": -176.47802734375, |
|
"loss": 0.6408, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.5957245230674744, |
|
"rewards/margins": 0.416795551776886, |
|
"rewards/rejected": 0.17892900109291077, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.7002360346184107, |
|
"grad_norm": 18.37978172302246, |
|
"learning_rate": 5.119484110583135e-06, |
|
"logits/chosen": -0.4709344506263733, |
|
"logits/rejected": -0.7668399810791016, |
|
"logps/chosen": -200.41390991210938, |
|
"logps/rejected": -169.01779174804688, |
|
"loss": 0.6936, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.39610370993614197, |
|
"rewards/margins": 0.3159303665161133, |
|
"rewards/rejected": 0.08017335832118988, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.7041699449252558, |
|
"grad_norm": 14.384517669677734, |
|
"learning_rate": 5.000000000000003e-06, |
|
"logits/chosen": -0.2237352579832077, |
|
"logits/rejected": -0.7978562116622925, |
|
"logps/chosen": -200.4236297607422, |
|
"logps/rejected": -168.09664916992188, |
|
"loss": 0.6054, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.5839098691940308, |
|
"rewards/margins": 0.5763157606124878, |
|
"rewards/rejected": 0.007594155613332987, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.7081038552321007, |
|
"grad_norm": 23.844955444335938, |
|
"learning_rate": 4.881460190444726e-06, |
|
"logits/chosen": -0.57319176197052, |
|
"logits/rejected": -0.7391110062599182, |
|
"logps/chosen": -205.91015625, |
|
"logps/rejected": -186.86459350585938, |
|
"loss": 0.6875, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.5059628486633301, |
|
"rewards/margins": 0.3120475113391876, |
|
"rewards/rejected": 0.19391539692878723, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.7081038552321007, |
|
"eval_logits/chosen": 1.2854810953140259, |
|
"eval_logits/rejected": 1.0660665035247803, |
|
"eval_logps/chosen": -206.6718292236328, |
|
"eval_logps/rejected": -179.9932861328125, |
|
"eval_loss": 0.6678879857063293, |
|
"eval_rewards/accuracies": 0.6265624761581421, |
|
"eval_rewards/chosen": 0.519389808177948, |
|
"eval_rewards/margins": 0.3635701537132263, |
|
"eval_rewards/rejected": 0.15581969916820526, |
|
"eval_runtime": 296.5851, |
|
"eval_samples_per_second": 2.158, |
|
"eval_steps_per_second": 0.135, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.7120377655389457, |
|
"grad_norm": 14.835896492004395, |
|
"learning_rate": 4.763887069370107e-06, |
|
"logits/chosen": -0.1812276542186737, |
|
"logits/rejected": -0.5340962409973145, |
|
"logps/chosen": -184.94485473632812, |
|
"logps/rejected": -169.592041015625, |
|
"loss": 0.6794, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.4734250605106354, |
|
"rewards/margins": 0.3993573486804962, |
|
"rewards/rejected": 0.07406774908304214, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.7159716758457907, |
|
"grad_norm": 24.3856143951416, |
|
"learning_rate": 4.64730284165996e-06, |
|
"logits/chosen": -0.04929916188120842, |
|
"logits/rejected": -0.5009157061576843, |
|
"logps/chosen": -225.0531768798828, |
|
"logps/rejected": -193.8749237060547, |
|
"loss": 0.6584, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.7070298194885254, |
|
"rewards/margins": 0.4039214551448822, |
|
"rewards/rejected": 0.3031083941459656, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.7199055861526357, |
|
"grad_norm": 22.8303279876709, |
|
"learning_rate": 4.531729525435501e-06, |
|
"logits/chosen": 0.0025139451026916504, |
|
"logits/rejected": -0.6012422442436218, |
|
"logps/chosen": -205.25, |
|
"logps/rejected": -166.71438598632812, |
|
"loss": 0.639, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.6045829653739929, |
|
"rewards/margins": 0.42883044481277466, |
|
"rewards/rejected": 0.17575259506702423, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.7238394964594808, |
|
"grad_norm": 14.778836250305176, |
|
"learning_rate": 4.417188947896983e-06, |
|
"logits/chosen": -0.30647343397140503, |
|
"logits/rejected": -0.6068025827407837, |
|
"logps/chosen": -185.31884765625, |
|
"logps/rejected": -171.61390686035156, |
|
"loss": 0.6358, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.5817626118659973, |
|
"rewards/margins": 0.4069378972053528, |
|
"rewards/rejected": 0.17482469975948334, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.7277734067663257, |
|
"grad_norm": 14.139073371887207, |
|
"learning_rate": 4.303702741201431e-06, |
|
"logits/chosen": -0.5711551904678345, |
|
"logits/rejected": -0.8691667318344116, |
|
"logps/chosen": -192.8331298828125, |
|
"logps/rejected": -175.0562286376953, |
|
"loss": 0.6808, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.38311484456062317, |
|
"rewards/margins": 0.3181079924106598, |
|
"rewards/rejected": 0.0650068148970604, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.7317073170731707, |
|
"grad_norm": 15.895308494567871, |
|
"learning_rate": 4.1912923383771685e-06, |
|
"logits/chosen": -0.36842986941337585, |
|
"logits/rejected": -0.7152490019798279, |
|
"logps/chosen": -211.0810089111328, |
|
"logps/rejected": -196.27755737304688, |
|
"loss": 0.6735, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.5359424948692322, |
|
"rewards/margins": 0.3752870559692383, |
|
"rewards/rejected": 0.16065548360347748, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.7356412273800157, |
|
"grad_norm": 14.658058166503906, |
|
"learning_rate": 4.079978969275984e-06, |
|
"logits/chosen": -0.5706170797348022, |
|
"logits/rejected": -0.852310299873352, |
|
"logps/chosen": -176.20578002929688, |
|
"logps/rejected": -158.9827423095703, |
|
"loss": 0.7049, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.29048237204551697, |
|
"rewards/margins": 0.3135663866996765, |
|
"rewards/rejected": -0.023084009066224098, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.7395751376868608, |
|
"grad_norm": 14.667938232421875, |
|
"learning_rate": 3.9697836565636484e-06, |
|
"logits/chosen": -0.0873163565993309, |
|
"logits/rejected": -0.4978067874908447, |
|
"logps/chosen": -219.19210815429688, |
|
"logps/rejected": -186.28640747070312, |
|
"loss": 0.6177, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.48243242502212524, |
|
"rewards/margins": 0.47774791717529297, |
|
"rewards/rejected": 0.004684485495090485, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.7435090479937058, |
|
"grad_norm": 20.62685775756836, |
|
"learning_rate": 3.860727211749572e-06, |
|
"logits/chosen": -0.3459232449531555, |
|
"logits/rejected": -0.6185725927352905, |
|
"logps/chosen": -204.01295471191406, |
|
"logps/rejected": -179.14883422851562, |
|
"loss": 0.6996, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.7561392188072205, |
|
"rewards/margins": 0.43435636162757874, |
|
"rewards/rejected": 0.3217828571796417, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.7474429583005507, |
|
"grad_norm": 17.138633728027344, |
|
"learning_rate": 3.7528302312563447e-06, |
|
"logits/chosen": -0.21280460059642792, |
|
"logits/rejected": -0.6648741960525513, |
|
"logps/chosen": -207.45266723632812, |
|
"logps/rejected": -169.14617919921875, |
|
"loss": 0.7068, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.38267606496810913, |
|
"rewards/margins": 0.17945952713489532, |
|
"rewards/rejected": 0.2032165229320526, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.7513768686073957, |
|
"grad_norm": 58.220947265625, |
|
"learning_rate": 3.646113092529878e-06, |
|
"logits/chosen": -0.21766535937786102, |
|
"logits/rejected": -0.6996904611587524, |
|
"logps/chosen": -225.0487060546875, |
|
"logps/rejected": -184.19442749023438, |
|
"loss": 0.7056, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.6735895872116089, |
|
"rewards/margins": 0.4389261305332184, |
|
"rewards/rejected": 0.2346634566783905, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.7553107789142408, |
|
"grad_norm": 20.379343032836914, |
|
"learning_rate": 3.5405959501909313e-06, |
|
"logits/chosen": -0.18848784267902374, |
|
"logits/rejected": -0.5305780172348022, |
|
"logps/chosen": -212.13162231445312, |
|
"logps/rejected": -186.52542114257812, |
|
"loss": 0.6872, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.3702337145805359, |
|
"rewards/margins": 0.29016590118408203, |
|
"rewards/rejected": 0.08006780594587326, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.7592446892210858, |
|
"grad_norm": 17.178056716918945, |
|
"learning_rate": 3.436298732228699e-06, |
|
"logits/chosen": -0.21896116435527802, |
|
"logits/rejected": -0.6624099612236023, |
|
"logps/chosen": -205.2207794189453, |
|
"logps/rejected": -170.05699157714844, |
|
"loss": 0.6446, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.4240780472755432, |
|
"rewards/margins": 0.37573105096817017, |
|
"rewards/rejected": 0.04834695905447006, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.7631785995279308, |
|
"grad_norm": 19.06415557861328, |
|
"learning_rate": 3.3332411362372063e-06, |
|
"logits/chosen": -0.15206289291381836, |
|
"logits/rejected": -0.4406839907169342, |
|
"logps/chosen": -186.83627319335938, |
|
"logps/rejected": -164.04739379882812, |
|
"loss": 0.6972, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.5690515041351318, |
|
"rewards/margins": 0.338064044713974, |
|
"rewards/rejected": 0.23098750412464142, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.7671125098347757, |
|
"grad_norm": 19.997249603271484, |
|
"learning_rate": 3.231442625695217e-06, |
|
"logits/chosen": -0.4492325186729431, |
|
"logits/rejected": -0.6821542978286743, |
|
"logps/chosen": -192.6551971435547, |
|
"logps/rejected": -174.02772521972656, |
|
"loss": 0.6523, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.41465169191360474, |
|
"rewards/margins": 0.4233173727989197, |
|
"rewards/rejected": -0.008665725588798523, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.7710464201416207, |
|
"grad_norm": 21.12126350402832, |
|
"learning_rate": 3.1309224262903614e-06, |
|
"logits/chosen": -0.0248140636831522, |
|
"logits/rejected": -0.2627066373825073, |
|
"logps/chosen": -214.6104278564453, |
|
"logps/rejected": -192.9540557861328, |
|
"loss": 0.6733, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.5803993940353394, |
|
"rewards/margins": 0.2992710471153259, |
|
"rewards/rejected": 0.28112831711769104, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.7749803304484658, |
|
"grad_norm": 12.457499504089355, |
|
"learning_rate": 3.0316995222881584e-06, |
|
"logits/chosen": -0.40065187215805054, |
|
"logits/rejected": -0.8357529640197754, |
|
"logps/chosen": -192.20655822753906, |
|
"logps/rejected": -164.68626403808594, |
|
"loss": 0.6292, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.5738979578018188, |
|
"rewards/margins": 0.4073941111564636, |
|
"rewards/rejected": 0.1665038764476776, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.7789142407553108, |
|
"grad_norm": 12.965932846069336, |
|
"learning_rate": 2.9337926529466578e-06, |
|
"logits/chosen": -0.5754062533378601, |
|
"logits/rejected": -0.9457462430000305, |
|
"logps/chosen": -189.44522094726562, |
|
"logps/rejected": -169.0963897705078, |
|
"loss": 0.6242, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.49839162826538086, |
|
"rewards/margins": 0.4758077561855316, |
|
"rewards/rejected": 0.02258378639817238, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.7828481510621558, |
|
"grad_norm": 14.567062377929688, |
|
"learning_rate": 2.83722030897733e-06, |
|
"logits/chosen": 0.24449042975902557, |
|
"logits/rejected": -0.30078762769699097, |
|
"logps/chosen": -205.9731903076172, |
|
"logps/rejected": -173.31008911132812, |
|
"loss": 0.5947, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.5201369524002075, |
|
"rewards/margins": 0.5564968585968018, |
|
"rewards/rejected": -0.036359887570142746, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.7867820613690008, |
|
"grad_norm": 18.595260620117188, |
|
"learning_rate": 2.7420007290529118e-06, |
|
"logits/chosen": -0.1308153122663498, |
|
"logits/rejected": -0.6352800726890564, |
|
"logps/chosen": -224.5437469482422, |
|
"logps/rejected": -178.47549438476562, |
|
"loss": 0.6361, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.6366292238235474, |
|
"rewards/margins": 0.4467080235481262, |
|
"rewards/rejected": 0.18992114067077637, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7867820613690008, |
|
"eval_logits/chosen": 1.278507113456726, |
|
"eval_logits/rejected": 1.058009147644043, |
|
"eval_logps/chosen": -206.54354858398438, |
|
"eval_logps/rejected": -179.86978149414062, |
|
"eval_loss": 0.6649525165557861, |
|
"eval_rewards/accuracies": 0.625, |
|
"eval_rewards/chosen": 0.5706965923309326, |
|
"eval_rewards/margins": 0.3654647767543793, |
|
"eval_rewards/rejected": 0.20523183047771454, |
|
"eval_runtime": 301.4428, |
|
"eval_samples_per_second": 2.123, |
|
"eval_steps_per_second": 0.133, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7907159716758458, |
|
"grad_norm": 19.2440242767334, |
|
"learning_rate": 2.6481518963628383e-06, |
|
"logits/chosen": -0.11340751498937607, |
|
"logits/rejected": -0.31099405884742737, |
|
"logps/chosen": -212.424072265625, |
|
"logps/rejected": -195.0722198486328, |
|
"loss": 0.6193, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.5051929354667664, |
|
"rewards/margins": 0.5108169317245483, |
|
"rewards/rejected": -0.00562392920255661, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.7946498819826908, |
|
"grad_norm": 17.27981185913086, |
|
"learning_rate": 2.555691535216944e-06, |
|
"logits/chosen": -0.2921395003795624, |
|
"logits/rejected": -0.7080395817756653, |
|
"logps/chosen": -208.31747436523438, |
|
"logps/rejected": -180.02212524414062, |
|
"loss": 0.676, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.4920189380645752, |
|
"rewards/margins": 0.2944754660129547, |
|
"rewards/rejected": 0.19754347205162048, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.7985837922895358, |
|
"grad_norm": 12.276522636413574, |
|
"learning_rate": 2.464637107698046e-06, |
|
"logits/chosen": -0.3768986165523529, |
|
"logits/rejected": -0.9090649485588074, |
|
"logps/chosen": -195.61764526367188, |
|
"logps/rejected": -158.5428466796875, |
|
"loss": 0.676, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.35970592498779297, |
|
"rewards/margins": 0.34655410051345825, |
|
"rewards/rejected": 0.013151821680366993, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.8025177025963808, |
|
"grad_norm": 12.748953819274902, |
|
"learning_rate": 2.3750058103640427e-06, |
|
"logits/chosen": -0.3452379107475281, |
|
"logits/rejected": -0.8985518217086792, |
|
"logps/chosen": -209.6136016845703, |
|
"logps/rejected": -173.26414489746094, |
|
"loss": 0.6122, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.5482198596000671, |
|
"rewards/margins": 0.4972603917121887, |
|
"rewards/rejected": 0.05095947906374931, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.8064516129032258, |
|
"grad_norm": 15.02308177947998, |
|
"learning_rate": 2.286814571000171e-06, |
|
"logits/chosen": -0.2370177060365677, |
|
"logits/rejected": -0.6736031770706177, |
|
"logps/chosen": -194.3092041015625, |
|
"logps/rejected": -164.15817260742188, |
|
"loss": 0.6226, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.5286887288093567, |
|
"rewards/margins": 0.4581621289253235, |
|
"rewards/rejected": 0.0705266147851944, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.8103855232100708, |
|
"grad_norm": 15.919551849365234, |
|
"learning_rate": 2.2000800454220285e-06, |
|
"logits/chosen": -0.04363623261451721, |
|
"logits/rejected": -0.4236673414707184, |
|
"logps/chosen": -209.69235229492188, |
|
"logps/rejected": -175.1033935546875, |
|
"loss": 0.6664, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.5712024569511414, |
|
"rewards/margins": 0.4244155287742615, |
|
"rewards/rejected": 0.1467868834733963, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.8143194335169158, |
|
"grad_norm": 16.688159942626953, |
|
"learning_rate": 2.114818614329945e-06, |
|
"logits/chosen": -0.18427999317646027, |
|
"logits/rejected": -0.4734131693840027, |
|
"logps/chosen": -200.3739776611328, |
|
"logps/rejected": -180.11984252929688, |
|
"loss": 0.6447, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.6805271506309509, |
|
"rewards/margins": 0.4109037518501282, |
|
"rewards/rejected": 0.26962336897850037, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.8182533438237608, |
|
"grad_norm": 15.36899471282959, |
|
"learning_rate": 2.031046380215327e-06, |
|
"logits/chosen": -0.5546427965164185, |
|
"logits/rejected": -0.8263591527938843, |
|
"logps/chosen": -180.88345336914062, |
|
"logps/rejected": -167.74163818359375, |
|
"loss": 0.6247, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.5958175659179688, |
|
"rewards/margins": 0.5058714151382446, |
|
"rewards/rejected": 0.08994609862565994, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.8221872541306058, |
|
"grad_norm": 24.130155563354492, |
|
"learning_rate": 1.9487791643195276e-06, |
|
"logits/chosen": -0.3917720317840576, |
|
"logits/rejected": -0.7242711782455444, |
|
"logps/chosen": -206.98135375976562, |
|
"logps/rejected": -183.48318481445312, |
|
"loss": 0.7598, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.24247002601623535, |
|
"rewards/margins": 0.1793862134218216, |
|
"rewards/rejected": 0.06308381259441376, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.8261211644374509, |
|
"grad_norm": 16.544754028320312, |
|
"learning_rate": 1.8680325036458535e-06, |
|
"logits/chosen": -0.16317354142665863, |
|
"logits/rejected": -0.5910676717758179, |
|
"logps/chosen": -204.1961669921875, |
|
"logps/rejected": -173.1997833251953, |
|
"loss": 0.706, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.40617918968200684, |
|
"rewards/margins": 0.3612835705280304, |
|
"rewards/rejected": 0.04489566385746002, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.8300550747442959, |
|
"grad_norm": 28.87963104248047, |
|
"learning_rate": 1.788821648025242e-06, |
|
"logits/chosen": -0.46491608023643494, |
|
"logits/rejected": -0.5262236595153809, |
|
"logps/chosen": -198.90652465820312, |
|
"logps/rejected": -188.23049926757812, |
|
"loss": 0.7507, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.4018153250217438, |
|
"rewards/margins": 0.1714794784784317, |
|
"rewards/rejected": 0.23033586144447327, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.8339889850511408, |
|
"grad_norm": 21.89056968688965, |
|
"learning_rate": 1.7111615572361628e-06, |
|
"logits/chosen": -0.1197819709777832, |
|
"logits/rejected": -0.40464717149734497, |
|
"logps/chosen": -211.6194305419922, |
|
"logps/rejected": -192.4689483642578, |
|
"loss": 0.7031, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.44010597467422485, |
|
"rewards/margins": 0.23361381888389587, |
|
"rewards/rejected": 0.20649214088916779, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.8379228953579858, |
|
"grad_norm": 25.025197982788086, |
|
"learning_rate": 1.6350668981793304e-06, |
|
"logits/chosen": -0.21810774505138397, |
|
"logits/rejected": -0.536165714263916, |
|
"logps/chosen": -195.72702026367188, |
|
"logps/rejected": -185.3990478515625, |
|
"loss": 0.6838, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.22649447619915009, |
|
"rewards/margins": 0.3016238212585449, |
|
"rewards/rejected": -0.07512933015823364, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.8418568056648308, |
|
"grad_norm": 15.117574691772461, |
|
"learning_rate": 1.5605520421076969e-06, |
|
"logits/chosen": -0.34034574031829834, |
|
"logits/rejected": -0.5113102793693542, |
|
"logps/chosen": -195.9296417236328, |
|
"logps/rejected": -186.29287719726562, |
|
"loss": 0.6485, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.23202356696128845, |
|
"rewards/margins": 0.3296849727630615, |
|
"rewards/rejected": -0.09766140580177307, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.8457907159716759, |
|
"grad_norm": 16.99416732788086, |
|
"learning_rate": 1.487631061912298e-06, |
|
"logits/chosen": -0.5572665929794312, |
|
"logits/rejected": -0.8171085119247437, |
|
"logps/chosen": -193.6608123779297, |
|
"logps/rejected": -176.0238800048828, |
|
"loss": 0.6605, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.35814136266708374, |
|
"rewards/margins": 0.37609419226646423, |
|
"rewards/rejected": -0.017952853813767433, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.8497246262785209, |
|
"grad_norm": 14.536643981933594, |
|
"learning_rate": 1.4163177294644438e-06, |
|
"logits/chosen": -0.2895492613315582, |
|
"logits/rejected": -0.48721733689308167, |
|
"logps/chosen": -198.87753295898438, |
|
"logps/rejected": -183.21096801757812, |
|
"loss": 0.6838, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.22944995760917664, |
|
"rewards/margins": 0.3464585840702057, |
|
"rewards/rejected": -0.11700858920812607, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.8536585365853658, |
|
"grad_norm": 25.793216705322266, |
|
"learning_rate": 1.3466255130147622e-06, |
|
"logits/chosen": -0.36471131443977356, |
|
"logits/rejected": -0.5930619239807129, |
|
"logps/chosen": -187.9856719970703, |
|
"logps/rejected": -175.9360809326172, |
|
"loss": 0.683, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.1035921722650528, |
|
"rewards/margins": 0.23180215060710907, |
|
"rewards/rejected": -0.12820999324321747, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.8575924468922108, |
|
"grad_norm": 20.578927993774414, |
|
"learning_rate": 1.2785675746495752e-06, |
|
"logits/chosen": -0.24610432982444763, |
|
"logits/rejected": -0.7905102968215942, |
|
"logps/chosen": -188.7552032470703, |
|
"logps/rejected": -163.68289184570312, |
|
"loss": 0.6594, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.19958016276359558, |
|
"rewards/margins": 0.35812973976135254, |
|
"rewards/rejected": -0.15854960680007935, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.8615263571990559, |
|
"grad_norm": 17.24201011657715, |
|
"learning_rate": 1.212156767805115e-06, |
|
"logits/chosen": -0.3163990080356598, |
|
"logits/rejected": -0.8110219240188599, |
|
"logps/chosen": -175.55859375, |
|
"logps/rejected": -141.836181640625, |
|
"loss": 0.6571, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.13926038146018982, |
|
"rewards/margins": 0.2834976315498352, |
|
"rewards/rejected": -0.1442372053861618, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.8654602675059009, |
|
"grad_norm": 13.562137603759766, |
|
"learning_rate": 1.1474056348400141e-06, |
|
"logits/chosen": -0.25132131576538086, |
|
"logits/rejected": -0.5677313804626465, |
|
"logps/chosen": -192.50961303710938, |
|
"logps/rejected": -171.54611206054688, |
|
"loss": 0.6721, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.3157256245613098, |
|
"rewards/margins": 0.3246195316314697, |
|
"rewards/rejected": -0.008893907070159912, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8654602675059009, |
|
"eval_logits/chosen": 1.2642682790756226, |
|
"eval_logits/rejected": 1.043653964996338, |
|
"eval_logps/chosen": -207.01547241210938, |
|
"eval_logps/rejected": -180.30709838867188, |
|
"eval_loss": 0.6631070971488953, |
|
"eval_rewards/accuracies": 0.6265624761581421, |
|
"eval_rewards/chosen": 0.38192370533943176, |
|
"eval_rewards/margins": 0.3516288101673126, |
|
"eval_rewards/rejected": 0.03029490076005459, |
|
"eval_runtime": 300.501, |
|
"eval_samples_per_second": 2.13, |
|
"eval_steps_per_second": 0.133, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8693941778127459, |
|
"grad_norm": 21.122217178344727, |
|
"learning_rate": 1.0843264046665558e-06, |
|
"logits/chosen": -0.5116424560546875, |
|
"logits/rejected": -0.6911696195602417, |
|
"logps/chosen": -184.15603637695312, |
|
"logps/rejected": -175.35256958007812, |
|
"loss": 0.757, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.15398895740509033, |
|
"rewards/margins": 0.07341472804546356, |
|
"rewards/rejected": 0.08057420700788498, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.8733280881195908, |
|
"grad_norm": 12.891325950622559, |
|
"learning_rate": 1.0229309904411178e-06, |
|
"logits/chosen": -0.5018507838249207, |
|
"logits/rejected": -0.8595576286315918, |
|
"logps/chosen": -195.92578125, |
|
"logps/rejected": -172.9355010986328, |
|
"loss": 0.6866, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.32908183336257935, |
|
"rewards/margins": 0.38747507333755493, |
|
"rewards/rejected": -0.05839322879910469, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.8772619984264359, |
|
"grad_norm": 17.477975845336914, |
|
"learning_rate": 9.63230987314251e-07, |
|
"logits/chosen": -0.27941471338272095, |
|
"logits/rejected": -0.5305674076080322, |
|
"logps/chosen": -191.43380737304688, |
|
"logps/rejected": -170.3942108154297, |
|
"loss": 0.6999, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.39622825384140015, |
|
"rewards/margins": 0.23104743659496307, |
|
"rewards/rejected": 0.16518081724643707, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.8811959087332809, |
|
"grad_norm": 19.63365936279297, |
|
"learning_rate": 9.052376702408206e-07, |
|
"logits/chosen": -0.4624987542629242, |
|
"logits/rejected": -0.5762002468109131, |
|
"logps/chosen": -187.87295532226562, |
|
"logps/rejected": -193.58670043945312, |
|
"loss": 0.7027, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.4038239121437073, |
|
"rewards/margins": 0.24877241253852844, |
|
"rewards/rejected": 0.15505146980285645, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.8851298190401259, |
|
"grad_norm": 20.428455352783203, |
|
"learning_rate": 8.489619918506098e-07, |
|
"logits/chosen": -0.23860251903533936, |
|
"logits/rejected": -0.6500253677368164, |
|
"logps/chosen": -212.96658325195312, |
|
"logps/rejected": -179.9956512451172, |
|
"loss": 0.7073, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.24726350605487823, |
|
"rewards/margins": 0.2762225866317749, |
|
"rewards/rejected": -0.028959061950445175, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.8890637293469709, |
|
"grad_norm": 19.340242385864258, |
|
"learning_rate": 7.944145803798064e-07, |
|
"logits/chosen": -0.23527947068214417, |
|
"logits/rejected": -0.59322589635849, |
|
"logps/chosen": -203.28225708007812, |
|
"logps/rejected": -180.2418670654297, |
|
"loss": 0.681, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.3411061465740204, |
|
"rewards/margins": 0.18520446121692657, |
|
"rewards/rejected": 0.155901700258255, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.8929976396538158, |
|
"grad_norm": 79.16990661621094, |
|
"learning_rate": 7.416057376637543e-07, |
|
"logits/chosen": -0.3579210638999939, |
|
"logits/rejected": -0.6960107088088989, |
|
"logps/chosen": -200.02012634277344, |
|
"logps/rejected": -180.67965698242188, |
|
"loss": 0.6985, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.4765182435512543, |
|
"rewards/margins": 0.23227711021900177, |
|
"rewards/rejected": 0.2442411184310913, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.8969315499606609, |
|
"grad_norm": 17.410009384155273, |
|
"learning_rate": 6.905454371913467e-07, |
|
"logits/chosen": -0.1638367921113968, |
|
"logits/rejected": -0.5099595189094543, |
|
"logps/chosen": -195.05340576171875, |
|
"logps/rejected": -173.5426788330078, |
|
"loss": 0.586, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.4057907164096832, |
|
"rewards/margins": 0.6160932183265686, |
|
"rewards/rejected": -0.21030254662036896, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.9008654602675059, |
|
"grad_norm": 22.917627334594727, |
|
"learning_rate": 6.412433222214265e-07, |
|
"logits/chosen": -0.2664688527584076, |
|
"logits/rejected": -0.6332502365112305, |
|
"logps/chosen": -216.44711303710938, |
|
"logps/rejected": -192.39352416992188, |
|
"loss": 0.6699, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.47267699241638184, |
|
"rewards/margins": 0.3270387649536133, |
|
"rewards/rejected": 0.14563825726509094, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.9047993705743509, |
|
"grad_norm": 17.761707305908203, |
|
"learning_rate": 5.937087039615619e-07, |
|
"logits/chosen": 0.004246175289154053, |
|
"logits/rejected": -0.3583109974861145, |
|
"logps/chosen": -208.1468963623047, |
|
"logps/rejected": -186.03244018554688, |
|
"loss": 0.647, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.37165918946266174, |
|
"rewards/margins": 0.42197996377944946, |
|
"rewards/rejected": -0.05032079294323921, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.9087332808811959, |
|
"grad_norm": 26.38233184814453, |
|
"learning_rate": 5.479505598095292e-07, |
|
"logits/chosen": -0.12539446353912354, |
|
"logits/rejected": -0.085462287068367, |
|
"logps/chosen": -205.96804809570312, |
|
"logps/rejected": -210.92672729492188, |
|
"loss": 0.7508, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.34662169218063354, |
|
"rewards/margins": 0.055609725415706635, |
|
"rewards/rejected": 0.2910119593143463, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.912667191188041, |
|
"grad_norm": 25.847694396972656, |
|
"learning_rate": 5.03977531657841e-07, |
|
"logits/chosen": -0.023742878809571266, |
|
"logits/rejected": -0.445591539144516, |
|
"logps/chosen": -206.13525390625, |
|
"logps/rejected": -183.71890258789062, |
|
"loss": 0.6829, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.3747442364692688, |
|
"rewards/margins": 0.33868470788002014, |
|
"rewards/rejected": 0.036059536039829254, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.9166011014948859, |
|
"grad_norm": 14.531253814697266, |
|
"learning_rate": 4.6179792426163107e-07, |
|
"logits/chosen": -0.13202346861362457, |
|
"logits/rejected": -0.539734423160553, |
|
"logps/chosen": -192.2351531982422, |
|
"logps/rejected": -167.59829711914062, |
|
"loss": 0.6574, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.4072590470314026, |
|
"rewards/margins": 0.43480420112609863, |
|
"rewards/rejected": -0.027545183897018433, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.9205350118017309, |
|
"grad_norm": 18.72174835205078, |
|
"learning_rate": 4.214197036702239e-07, |
|
"logits/chosen": 0.10880019515752792, |
|
"logits/rejected": -0.2607296109199524, |
|
"logps/chosen": -215.71939086914062, |
|
"logps/rejected": -196.47320556640625, |
|
"loss": 0.657, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.5132928490638733, |
|
"rewards/margins": 0.36024293303489685, |
|
"rewards/rejected": 0.15304993093013763, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.9244689221085759, |
|
"grad_norm": 16.053632736206055, |
|
"learning_rate": 3.82850495722662e-07, |
|
"logits/chosen": -0.07127988338470459, |
|
"logits/rejected": -0.5435328483581543, |
|
"logps/chosen": -210.20547485351562, |
|
"logps/rejected": -173.3409881591797, |
|
"loss": 0.6586, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.4504272937774658, |
|
"rewards/margins": 0.3692251741886139, |
|
"rewards/rejected": 0.08120210468769073, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.9284028324154209, |
|
"grad_norm": 20.035791397094727, |
|
"learning_rate": 3.4609758460748656e-07, |
|
"logits/chosen": -0.1992299109697342, |
|
"logits/rejected": -0.43638792634010315, |
|
"logps/chosen": -196.8170623779297, |
|
"logps/rejected": -181.31607055664062, |
|
"loss": 0.6511, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.29465168714523315, |
|
"rewards/margins": 0.3416779041290283, |
|
"rewards/rejected": -0.04702623561024666, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.932336742722266, |
|
"grad_norm": 14.104338645935059, |
|
"learning_rate": 3.1116791148704584e-07, |
|
"logits/chosen": -0.5095082521438599, |
|
"logits/rejected": -0.933671772480011, |
|
"logps/chosen": -181.0245819091797, |
|
"logps/rejected": -145.5948944091797, |
|
"loss": 0.6582, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.405670702457428, |
|
"rewards/margins": 0.39661210775375366, |
|
"rewards/rejected": 0.009058552794158459, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.9362706530291109, |
|
"grad_norm": 24.393505096435547, |
|
"learning_rate": 2.78068073186587e-07, |
|
"logits/chosen": -0.07540292292833328, |
|
"logits/rejected": -0.5439732670783997, |
|
"logps/chosen": -220.9651336669922, |
|
"logps/rejected": -198.6578826904297, |
|
"loss": 0.662, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.47583404183387756, |
|
"rewards/margins": 0.4002237915992737, |
|
"rewards/rejected": 0.07561029493808746, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.9402045633359559, |
|
"grad_norm": 22.48759651184082, |
|
"learning_rate": 2.4680432094837394e-07, |
|
"logits/chosen": -0.030767759308218956, |
|
"logits/rejected": -0.40518251061439514, |
|
"logps/chosen": -192.23971557617188, |
|
"logps/rejected": -165.24063110351562, |
|
"loss": 0.6944, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.32550159096717834, |
|
"rewards/margins": 0.2751082479953766, |
|
"rewards/rejected": 0.05039336532354355, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.9441384736428009, |
|
"grad_norm": 16.363256454467773, |
|
"learning_rate": 2.1738255925108253e-07, |
|
"logits/chosen": -0.5227106809616089, |
|
"logits/rejected": -0.7640475034713745, |
|
"logps/chosen": -218.41708374023438, |
|
"logps/rejected": -194.16444396972656, |
|
"loss": 0.668, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.38994377851486206, |
|
"rewards/margins": 0.3536146283149719, |
|
"rewards/rejected": 0.03632917255163193, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.9441384736428009, |
|
"eval_logits/chosen": 1.2622064352035522, |
|
"eval_logits/rejected": 1.0416359901428223, |
|
"eval_logps/chosen": -207.0163116455078, |
|
"eval_logps/rejected": -180.30044555664062, |
|
"eval_loss": 0.6637659072875977, |
|
"eval_rewards/accuracies": 0.6390625238418579, |
|
"eval_rewards/chosen": 0.38159698247909546, |
|
"eval_rewards/margins": 0.34863370656967163, |
|
"eval_rewards/rejected": 0.03296329826116562, |
|
"eval_runtime": 307.2933, |
|
"eval_samples_per_second": 2.083, |
|
"eval_steps_per_second": 0.13, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.948072383949646, |
|
"grad_norm": 15.801830291748047, |
|
"learning_rate": 1.8980834469467523e-07, |
|
"logits/chosen": 0.028558891266584396, |
|
"logits/rejected": -0.36049187183380127, |
|
"logps/chosen": -225.1962127685547, |
|
"logps/rejected": -196.6998748779297, |
|
"loss": 0.7157, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.30342918634414673, |
|
"rewards/margins": 0.18997251987457275, |
|
"rewards/rejected": 0.11345665156841278, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.952006294256491, |
|
"grad_norm": 21.53165054321289, |
|
"learning_rate": 1.6408688495098134e-07, |
|
"logits/chosen": -0.09858529269695282, |
|
"logits/rejected": -0.52873694896698, |
|
"logps/chosen": -208.2776336669922, |
|
"logps/rejected": -179.6067657470703, |
|
"loss": 0.701, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.3496444821357727, |
|
"rewards/margins": 0.24224546551704407, |
|
"rewards/rejected": 0.10739902406930923, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.955940204563336, |
|
"grad_norm": 15.417522430419922, |
|
"learning_rate": 1.402230377801761e-07, |
|
"logits/chosen": -0.12817321717739105, |
|
"logits/rejected": -0.5611924529075623, |
|
"logps/chosen": -223.1984405517578, |
|
"logps/rejected": -191.31808471679688, |
|
"loss": 0.673, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.5014004707336426, |
|
"rewards/margins": 0.3005516231060028, |
|
"rewards/rejected": 0.20084881782531738, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.9598741148701809, |
|
"grad_norm": 21.139495849609375, |
|
"learning_rate": 1.1822131011334003e-07, |
|
"logits/chosen": -0.330310583114624, |
|
"logits/rejected": -0.6778287887573242, |
|
"logps/chosen": -206.1497802734375, |
|
"logps/rejected": -175.0183563232422, |
|
"loss": 0.6634, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.40439096093177795, |
|
"rewards/margins": 0.39146164059638977, |
|
"rewards/rejected": 0.012929338030517101, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.963808025177026, |
|
"grad_norm": 15.441524505615234, |
|
"learning_rate": 9.80858572012866e-08, |
|
"logits/chosen": -0.10460350662469864, |
|
"logits/rejected": -0.46022725105285645, |
|
"logps/chosen": -223.1492156982422, |
|
"logps/rejected": -193.82369995117188, |
|
"loss": 0.6415, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.3919476568698883, |
|
"rewards/margins": 0.37521207332611084, |
|
"rewards/rejected": 0.016735553741455078, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.967741935483871, |
|
"grad_norm": 19.24515724182129, |
|
"learning_rate": 7.982048182978985e-08, |
|
"logits/chosen": -0.3437039256095886, |
|
"logits/rejected": -0.7036724090576172, |
|
"logps/chosen": -210.358642578125, |
|
"logps/rejected": -189.95278930664062, |
|
"loss": 0.676, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.3740697503089905, |
|
"rewards/margins": 0.34888529777526855, |
|
"rewards/rejected": 0.02518446370959282, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.971675845790716, |
|
"grad_norm": 16.690387725830078, |
|
"learning_rate": 6.342863360139672e-08, |
|
"logits/chosen": -0.29954901337623596, |
|
"logits/rejected": -0.7138617634773254, |
|
"logps/chosen": -181.06094360351562, |
|
"logps/rejected": -157.12701416015625, |
|
"loss": 0.6961, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.3025146424770355, |
|
"rewards/margins": 0.25972747802734375, |
|
"rewards/rejected": 0.04278718680143356, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.975609756097561, |
|
"grad_norm": 17.477008819580078, |
|
"learning_rate": 4.8913408283934874e-08, |
|
"logits/chosen": -0.19394654035568237, |
|
"logits/rejected": -0.5592636466026306, |
|
"logps/chosen": -211.7626495361328, |
|
"logps/rejected": -190.55416870117188, |
|
"loss": 0.6955, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.39831337332725525, |
|
"rewards/margins": 0.3408041000366211, |
|
"rewards/rejected": 0.05750928074121475, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.9795436664044059, |
|
"grad_norm": 14.256926536560059, |
|
"learning_rate": 3.627754722584031e-08, |
|
"logits/chosen": -0.15048038959503174, |
|
"logits/rejected": -0.5208483934402466, |
|
"logps/chosen": -223.10110473632812, |
|
"logps/rejected": -190.59140014648438, |
|
"loss": 0.6593, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.49489039182662964, |
|
"rewards/margins": 0.3687785863876343, |
|
"rewards/rejected": 0.12611182034015656, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.983477576711251, |
|
"grad_norm": 29.595378875732422, |
|
"learning_rate": 2.5523436838430503e-08, |
|
"logits/chosen": -0.3160143494606018, |
|
"logits/rejected": -0.6430375576019287, |
|
"logps/chosen": -196.36361694335938, |
|
"logps/rejected": -166.49758911132812, |
|
"loss": 0.6625, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.36816468834877014, |
|
"rewards/margins": 0.36051416397094727, |
|
"rewards/rejected": 0.007650518324226141, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.987411487018096, |
|
"grad_norm": 13.689908027648926, |
|
"learning_rate": 1.665310814520482e-08, |
|
"logits/chosen": -0.6328016519546509, |
|
"logits/rejected": -0.9240643382072449, |
|
"logps/chosen": -188.88470458984375, |
|
"logps/rejected": -166.7686767578125, |
|
"loss": 0.6975, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.09804626554250717, |
|
"rewards/margins": 0.26263147592544556, |
|
"rewards/rejected": -0.1645852029323578, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.991345397324941, |
|
"grad_norm": 17.047653198242188, |
|
"learning_rate": 9.668236398262532e-09, |
|
"logits/chosen": -0.35158300399780273, |
|
"logits/rejected": -0.6125014424324036, |
|
"logps/chosen": -203.73788452148438, |
|
"logps/rejected": -189.255126953125, |
|
"loss": 0.6549, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.1564028114080429, |
|
"rewards/margins": 0.3770085275173187, |
|
"rewards/rejected": -0.22060570120811462, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.995279307631786, |
|
"grad_norm": 21.35641098022461, |
|
"learning_rate": 4.570140761918085e-09, |
|
"logits/chosen": -0.744472861289978, |
|
"logits/rejected": -0.9415663480758667, |
|
"logps/chosen": -186.4073028564453, |
|
"logps/rejected": -176.38418579101562, |
|
"loss": 0.6604, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.32688194513320923, |
|
"rewards/margins": 0.42695555090904236, |
|
"rewards/rejected": -0.10007365047931671, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.999213217938631, |
|
"grad_norm": 14.891934394836426, |
|
"learning_rate": 1.3597840635615201e-09, |
|
"logits/chosen": -0.14978916943073273, |
|
"logits/rejected": -0.6677058935165405, |
|
"logps/chosen": -209.85635375976562, |
|
"logps/rejected": -172.8124237060547, |
|
"loss": 0.6708, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.3781249225139618, |
|
"rewards/margins": 0.32455307245254517, |
|
"rewards/rejected": 0.05357181280851364, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1271, |
|
"total_flos": 0.0, |
|
"train_loss": 0.675776368140424, |
|
"train_runtime": 24039.6181, |
|
"train_samples_per_second": 0.846, |
|
"train_steps_per_second": 0.053 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 1271, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|