|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9904153354632586, |
|
"eval_steps": 500, |
|
"global_step": 468, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.006389776357827476, |
|
"grad_norm": 38.53680030738914, |
|
"learning_rate": 1.0638297872340425e-08, |
|
"logits/chosen": -3.453125, |
|
"logits/rejected": -3.4375, |
|
"logps/chosen": -139.0, |
|
"logps/rejected": -128.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06389776357827476, |
|
"grad_norm": 37.086320471160064, |
|
"learning_rate": 1.0638297872340425e-07, |
|
"logits/chosen": -3.5625, |
|
"logits/rejected": -3.5, |
|
"logps/chosen": -174.0, |
|
"logps/rejected": -170.0, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.1875, |
|
"rewards/chosen": 0.00677490234375, |
|
"rewards/margins": 0.00555419921875, |
|
"rewards/rejected": 0.00121307373046875, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.12779552715654952, |
|
"grad_norm": 37.797498575960304, |
|
"learning_rate": 2.127659574468085e-07, |
|
"logits/chosen": -3.578125, |
|
"logits/rejected": -3.5625, |
|
"logps/chosen": -164.0, |
|
"logps/rejected": -149.0, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": 0.004852294921875, |
|
"rewards/margins": 0.0111083984375, |
|
"rewards/rejected": -0.006256103515625, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.19169329073482427, |
|
"grad_norm": 36.37288176457082, |
|
"learning_rate": 3.1914893617021275e-07, |
|
"logits/chosen": -3.609375, |
|
"logits/rejected": -3.59375, |
|
"logps/chosen": -162.0, |
|
"logps/rejected": -155.0, |
|
"loss": 0.6801, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": 0.004669189453125, |
|
"rewards/margins": 0.0390625, |
|
"rewards/rejected": -0.0341796875, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.25559105431309903, |
|
"grad_norm": 34.81091514767431, |
|
"learning_rate": 4.25531914893617e-07, |
|
"logits/chosen": -3.578125, |
|
"logits/rejected": -3.53125, |
|
"logps/chosen": -161.0, |
|
"logps/rejected": -142.0, |
|
"loss": 0.6685, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.01409912109375, |
|
"rewards/margins": 0.06884765625, |
|
"rewards/rejected": -0.0830078125, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.3194888178913738, |
|
"grad_norm": 32.850826486655734, |
|
"learning_rate": 4.96437054631829e-07, |
|
"logits/chosen": -3.65625, |
|
"logits/rejected": -3.65625, |
|
"logps/chosen": -161.0, |
|
"logps/rejected": -158.0, |
|
"loss": 0.6611, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.0140380859375, |
|
"rewards/margins": 0.08984375, |
|
"rewards/rejected": -0.07568359375, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.38338658146964855, |
|
"grad_norm": 37.177195702506765, |
|
"learning_rate": 4.845605700712589e-07, |
|
"logits/chosen": -3.71875, |
|
"logits/rejected": -3.703125, |
|
"logps/chosen": -162.0, |
|
"logps/rejected": -153.0, |
|
"loss": 0.6238, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.0203857421875, |
|
"rewards/margins": 0.18359375, |
|
"rewards/rejected": -0.203125, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.4472843450479233, |
|
"grad_norm": 36.403665892064886, |
|
"learning_rate": 4.7268408551068883e-07, |
|
"logits/chosen": -3.625, |
|
"logits/rejected": -3.59375, |
|
"logps/chosen": -169.0, |
|
"logps/rejected": -152.0, |
|
"loss": 0.6254, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.033447265625, |
|
"rewards/margins": 0.212890625, |
|
"rewards/rejected": -0.24609375, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.5111821086261981, |
|
"grad_norm": 38.60720512624817, |
|
"learning_rate": 4.6080760095011875e-07, |
|
"logits/chosen": -3.625, |
|
"logits/rejected": -3.59375, |
|
"logps/chosen": -172.0, |
|
"logps/rejected": -154.0, |
|
"loss": 0.6216, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.08056640625, |
|
"rewards/margins": 0.388671875, |
|
"rewards/rejected": -0.30859375, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5750798722044729, |
|
"grad_norm": 32.642277330645705, |
|
"learning_rate": 4.4893111638954866e-07, |
|
"logits/chosen": -3.609375, |
|
"logits/rejected": -3.578125, |
|
"logps/chosen": -166.0, |
|
"logps/rejected": -145.0, |
|
"loss": 0.6482, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.240234375, |
|
"rewards/margins": 0.3046875, |
|
"rewards/rejected": -0.546875, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.6389776357827476, |
|
"grad_norm": 34.47332747576967, |
|
"learning_rate": 4.3705463182897863e-07, |
|
"logits/chosen": -3.671875, |
|
"logits/rejected": -3.65625, |
|
"logps/chosen": -172.0, |
|
"logps/rejected": -158.0, |
|
"loss": 0.6118, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.205078125, |
|
"rewards/margins": 0.380859375, |
|
"rewards/rejected": -0.5859375, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.7028753993610224, |
|
"grad_norm": 39.45037295466221, |
|
"learning_rate": 4.251781472684085e-07, |
|
"logits/chosen": -3.640625, |
|
"logits/rejected": -3.625, |
|
"logps/chosen": -170.0, |
|
"logps/rejected": -164.0, |
|
"loss": 0.6584, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.44921875, |
|
"rewards/margins": 0.37109375, |
|
"rewards/rejected": -0.8203125, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.7667731629392971, |
|
"grad_norm": 39.79332640803062, |
|
"learning_rate": 4.1330166270783846e-07, |
|
"logits/chosen": -3.578125, |
|
"logits/rejected": -3.578125, |
|
"logps/chosen": -167.0, |
|
"logps/rejected": -139.0, |
|
"loss": 0.6227, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.48046875, |
|
"rewards/margins": 0.400390625, |
|
"rewards/rejected": -0.87890625, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.8306709265175719, |
|
"grad_norm": 33.432327857908874, |
|
"learning_rate": 4.0142517814726837e-07, |
|
"logits/chosen": -3.609375, |
|
"logits/rejected": -3.609375, |
|
"logps/chosen": -178.0, |
|
"logps/rejected": -152.0, |
|
"loss": 0.6071, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.26953125, |
|
"rewards/margins": 0.55859375, |
|
"rewards/rejected": -0.828125, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.8945686900958466, |
|
"grad_norm": 31.757203089299825, |
|
"learning_rate": 3.8954869358669834e-07, |
|
"logits/chosen": -3.65625, |
|
"logits/rejected": -3.578125, |
|
"logps/chosen": -167.0, |
|
"logps/rejected": -156.0, |
|
"loss": 0.6081, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.333984375, |
|
"rewards/margins": 0.40625, |
|
"rewards/rejected": -0.73828125, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.9584664536741214, |
|
"grad_norm": 34.635549945156775, |
|
"learning_rate": 3.7767220902612825e-07, |
|
"logits/chosen": -3.609375, |
|
"logits/rejected": -3.59375, |
|
"logps/chosen": -173.0, |
|
"logps/rejected": -160.0, |
|
"loss": 0.644, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.55078125, |
|
"rewards/margins": 0.474609375, |
|
"rewards/rejected": -1.03125, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.9968051118210862, |
|
"eval_logits/chosen": -3.65625, |
|
"eval_logits/rejected": -3.6875, |
|
"eval_logps/chosen": -163.0, |
|
"eval_logps/rejected": -149.0, |
|
"eval_loss": 0.5966406464576721, |
|
"eval_rewards/accuracies": 0.6785714030265808, |
|
"eval_rewards/chosen": -0.470703125, |
|
"eval_rewards/margins": 0.470703125, |
|
"eval_rewards/rejected": -0.94140625, |
|
"eval_runtime": 12.2769, |
|
"eval_samples_per_second": 16.291, |
|
"eval_steps_per_second": 0.57, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 1.0223642172523961, |
|
"grad_norm": 17.578277558626937, |
|
"learning_rate": 3.6579572446555817e-07, |
|
"logits/chosen": -3.59375, |
|
"logits/rejected": -3.578125, |
|
"logps/chosen": -163.0, |
|
"logps/rejected": -165.0, |
|
"loss": 0.5154, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.337890625, |
|
"rewards/margins": 0.8125, |
|
"rewards/rejected": -1.1484375, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.0862619808306708, |
|
"grad_norm": 18.045497494579415, |
|
"learning_rate": 3.5391923990498813e-07, |
|
"logits/chosen": -3.609375, |
|
"logits/rejected": -3.5625, |
|
"logps/chosen": -166.0, |
|
"logps/rejected": -161.0, |
|
"loss": 0.2778, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.28515625, |
|
"rewards/margins": 2.03125, |
|
"rewards/rejected": -1.75, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.1501597444089458, |
|
"grad_norm": 31.421762384244513, |
|
"learning_rate": 3.42042755344418e-07, |
|
"logits/chosen": -3.6875, |
|
"logits/rejected": -3.65625, |
|
"logps/chosen": -169.0, |
|
"logps/rejected": -163.0, |
|
"loss": 0.2699, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.0830078125, |
|
"rewards/margins": 1.78125, |
|
"rewards/rejected": -1.6953125, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.2140575079872205, |
|
"grad_norm": 23.922864036418108, |
|
"learning_rate": 3.3016627078384796e-07, |
|
"logits/chosen": -3.6875, |
|
"logits/rejected": -3.625, |
|
"logps/chosen": -170.0, |
|
"logps/rejected": -160.0, |
|
"loss": 0.2828, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -0.0189208984375, |
|
"rewards/margins": 1.96875, |
|
"rewards/rejected": -1.984375, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.2779552715654952, |
|
"grad_norm": 35.48813795033075, |
|
"learning_rate": 3.182897862232779e-07, |
|
"logits/chosen": -3.6875, |
|
"logits/rejected": -3.59375, |
|
"logps/chosen": -166.0, |
|
"logps/rejected": -172.0, |
|
"loss": 0.2671, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.0281982421875, |
|
"rewards/margins": 2.0, |
|
"rewards/rejected": -1.96875, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.34185303514377, |
|
"grad_norm": 26.443386433583438, |
|
"learning_rate": 3.0641330166270784e-07, |
|
"logits/chosen": -3.671875, |
|
"logits/rejected": -3.609375, |
|
"logps/chosen": -170.0, |
|
"logps/rejected": -168.0, |
|
"loss": 0.2989, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": 0.16796875, |
|
"rewards/margins": 1.875, |
|
"rewards/rejected": -1.7109375, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.4057507987220448, |
|
"grad_norm": 21.45349324293884, |
|
"learning_rate": 2.9453681710213776e-07, |
|
"logits/chosen": -3.671875, |
|
"logits/rejected": -3.65625, |
|
"logps/chosen": -172.0, |
|
"logps/rejected": -171.0, |
|
"loss": 0.2874, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -0.26171875, |
|
"rewards/margins": 1.8125, |
|
"rewards/rejected": -2.078125, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.4696485623003195, |
|
"grad_norm": 15.23988385292947, |
|
"learning_rate": 2.8266033254156767e-07, |
|
"logits/chosen": -3.65625, |
|
"logits/rejected": -3.625, |
|
"logps/chosen": -152.0, |
|
"logps/rejected": -154.0, |
|
"loss": 0.3156, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -0.2158203125, |
|
"rewards/margins": 1.8046875, |
|
"rewards/rejected": -2.015625, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.5335463258785942, |
|
"grad_norm": 20.9068024116458, |
|
"learning_rate": 2.7078384798099764e-07, |
|
"logits/chosen": -3.6875, |
|
"logits/rejected": -3.640625, |
|
"logps/chosen": -166.0, |
|
"logps/rejected": -179.0, |
|
"loss": 0.2663, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.17578125, |
|
"rewards/margins": 2.296875, |
|
"rewards/rejected": -2.46875, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.5974440894568689, |
|
"grad_norm": 23.61858832933539, |
|
"learning_rate": 2.589073634204275e-07, |
|
"logits/chosen": -3.625, |
|
"logits/rejected": -3.59375, |
|
"logps/chosen": -169.0, |
|
"logps/rejected": -175.0, |
|
"loss": 0.255, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -0.302734375, |
|
"rewards/margins": 2.203125, |
|
"rewards/rejected": -2.515625, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.6613418530351438, |
|
"grad_norm": 15.684487484295666, |
|
"learning_rate": 2.4703087885985747e-07, |
|
"logits/chosen": -3.6875, |
|
"logits/rejected": -3.65625, |
|
"logps/chosen": -176.0, |
|
"logps/rejected": -178.0, |
|
"loss": 0.279, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -0.3984375, |
|
"rewards/margins": 1.984375, |
|
"rewards/rejected": -2.375, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.7252396166134185, |
|
"grad_norm": 17.902346692684933, |
|
"learning_rate": 2.351543942992874e-07, |
|
"logits/chosen": -3.6875, |
|
"logits/rejected": -3.625, |
|
"logps/chosen": -175.0, |
|
"logps/rejected": -186.0, |
|
"loss": 0.27, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.29296875, |
|
"rewards/margins": 2.140625, |
|
"rewards/rejected": -2.4375, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.7891373801916934, |
|
"grad_norm": 16.024604674297596, |
|
"learning_rate": 2.2327790973871732e-07, |
|
"logits/chosen": -3.65625, |
|
"logits/rejected": -3.609375, |
|
"logps/chosen": -178.0, |
|
"logps/rejected": -186.0, |
|
"loss": 0.3036, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.76171875, |
|
"rewards/margins": 2.15625, |
|
"rewards/rejected": -2.921875, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.8530351437699681, |
|
"grad_norm": 20.910031176692122, |
|
"learning_rate": 2.1140142517814726e-07, |
|
"logits/chosen": -3.625, |
|
"logits/rejected": -3.59375, |
|
"logps/chosen": -174.0, |
|
"logps/rejected": -182.0, |
|
"loss": 0.257, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -0.0810546875, |
|
"rewards/margins": 2.5625, |
|
"rewards/rejected": -2.640625, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.9169329073482428, |
|
"grad_norm": 26.07564194077576, |
|
"learning_rate": 1.9952494061757718e-07, |
|
"logits/chosen": -3.65625, |
|
"logits/rejected": -3.578125, |
|
"logps/chosen": -167.0, |
|
"logps/rejected": -170.0, |
|
"loss": 0.3132, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -0.2099609375, |
|
"rewards/margins": 2.234375, |
|
"rewards/rejected": -2.4375, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.9808306709265175, |
|
"grad_norm": 26.49958432728375, |
|
"learning_rate": 1.876484560570071e-07, |
|
"logits/chosen": -3.71875, |
|
"logits/rejected": -3.703125, |
|
"logps/chosen": -165.0, |
|
"logps/rejected": -174.0, |
|
"loss": 0.2996, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.400390625, |
|
"rewards/margins": 2.03125, |
|
"rewards/rejected": -2.4375, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_logits/chosen": -3.765625, |
|
"eval_logits/rejected": -3.796875, |
|
"eval_logps/chosen": -167.0, |
|
"eval_logps/rejected": -154.0, |
|
"eval_loss": 0.5964062213897705, |
|
"eval_rewards/accuracies": 0.7142857313156128, |
|
"eval_rewards/chosen": -0.86328125, |
|
"eval_rewards/margins": 0.5546875, |
|
"eval_rewards/rejected": -1.4140625, |
|
"eval_runtime": 12.2832, |
|
"eval_samples_per_second": 16.282, |
|
"eval_steps_per_second": 0.57, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 2.0447284345047922, |
|
"grad_norm": 12.799642134592919, |
|
"learning_rate": 1.7577197149643706e-07, |
|
"logits/chosen": -3.671875, |
|
"logits/rejected": -3.625, |
|
"logps/chosen": -173.0, |
|
"logps/rejected": -195.0, |
|
"loss": 0.1648, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.2431640625, |
|
"rewards/margins": 3.125, |
|
"rewards/rejected": -3.359375, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.108626198083067, |
|
"grad_norm": 24.727219361838603, |
|
"learning_rate": 1.6389548693586697e-07, |
|
"logits/chosen": -3.703125, |
|
"logits/rejected": -3.65625, |
|
"logps/chosen": -165.0, |
|
"logps/rejected": -176.0, |
|
"loss": 0.1591, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -0.287109375, |
|
"rewards/margins": 2.90625, |
|
"rewards/rejected": -3.1875, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.1725239616613417, |
|
"grad_norm": 26.008991012968647, |
|
"learning_rate": 1.520190023752969e-07, |
|
"logits/chosen": -3.65625, |
|
"logits/rejected": -3.609375, |
|
"logps/chosen": -162.0, |
|
"logps/rejected": -178.0, |
|
"loss": 0.1532, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.6640625, |
|
"rewards/margins": 3.125, |
|
"rewards/rejected": -3.796875, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.236421725239617, |
|
"grad_norm": 20.087149399156676, |
|
"learning_rate": 1.4014251781472683e-07, |
|
"logits/chosen": -3.734375, |
|
"logits/rejected": -3.65625, |
|
"logps/chosen": -165.0, |
|
"logps/rejected": -177.0, |
|
"loss": 0.1472, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -0.6171875, |
|
"rewards/margins": 3.015625, |
|
"rewards/rejected": -3.640625, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.3003194888178915, |
|
"grad_norm": 18.05087937817016, |
|
"learning_rate": 1.2826603325415677e-07, |
|
"logits/chosen": -3.796875, |
|
"logits/rejected": -3.734375, |
|
"logps/chosen": -166.0, |
|
"logps/rejected": -193.0, |
|
"loss": 0.1431, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.54296875, |
|
"rewards/margins": 3.0625, |
|
"rewards/rejected": -3.609375, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.364217252396166, |
|
"grad_norm": 18.05523178462953, |
|
"learning_rate": 1.163895486935867e-07, |
|
"logits/chosen": -3.78125, |
|
"logits/rejected": -3.75, |
|
"logps/chosen": -177.0, |
|
"logps/rejected": -185.0, |
|
"loss": 0.1465, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.78125, |
|
"rewards/margins": 3.15625, |
|
"rewards/rejected": -3.9375, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.428115015974441, |
|
"grad_norm": 12.57343346412284, |
|
"learning_rate": 1.0451306413301662e-07, |
|
"logits/chosen": -3.78125, |
|
"logits/rejected": -3.75, |
|
"logps/chosen": -175.0, |
|
"logps/rejected": -187.0, |
|
"loss": 0.1316, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.828125, |
|
"rewards/margins": 3.296875, |
|
"rewards/rejected": -4.125, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.4920127795527156, |
|
"grad_norm": 21.734917480559936, |
|
"learning_rate": 9.263657957244655e-08, |
|
"logits/chosen": -3.796875, |
|
"logits/rejected": -3.765625, |
|
"logps/chosen": -173.0, |
|
"logps/rejected": -200.0, |
|
"loss": 0.1625, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -1.1796875, |
|
"rewards/margins": 3.15625, |
|
"rewards/rejected": -4.34375, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.5559105431309903, |
|
"grad_norm": 19.74378557143576, |
|
"learning_rate": 8.076009501187649e-08, |
|
"logits/chosen": -3.84375, |
|
"logits/rejected": -3.765625, |
|
"logps/chosen": -172.0, |
|
"logps/rejected": -202.0, |
|
"loss": 0.1368, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -1.09375, |
|
"rewards/margins": 3.390625, |
|
"rewards/rejected": -4.5, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.619808306709265, |
|
"grad_norm": 18.45734841726139, |
|
"learning_rate": 6.88836104513064e-08, |
|
"logits/chosen": -3.703125, |
|
"logits/rejected": -3.65625, |
|
"logps/chosen": -166.0, |
|
"logps/rejected": -199.0, |
|
"loss": 0.1322, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -1.03125, |
|
"rewards/margins": 3.515625, |
|
"rewards/rejected": -4.53125, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.68370607028754, |
|
"grad_norm": 22.52618935655582, |
|
"learning_rate": 5.700712589073634e-08, |
|
"logits/chosen": -3.703125, |
|
"logits/rejected": -3.65625, |
|
"logps/chosen": -178.0, |
|
"logps/rejected": -203.0, |
|
"loss": 0.1197, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.6796875, |
|
"rewards/margins": 3.765625, |
|
"rewards/rejected": -4.4375, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.747603833865815, |
|
"grad_norm": 18.32031096894331, |
|
"learning_rate": 4.5130641330166267e-08, |
|
"logits/chosen": -3.765625, |
|
"logits/rejected": -3.71875, |
|
"logps/chosen": -177.0, |
|
"logps/rejected": -195.0, |
|
"loss": 0.1689, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -0.76953125, |
|
"rewards/margins": 3.359375, |
|
"rewards/rejected": -4.125, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.8115015974440896, |
|
"grad_norm": 15.907202030933394, |
|
"learning_rate": 3.32541567695962e-08, |
|
"logits/chosen": -3.71875, |
|
"logits/rejected": -3.671875, |
|
"logps/chosen": -170.0, |
|
"logps/rejected": -203.0, |
|
"loss": 0.1358, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.85546875, |
|
"rewards/margins": 3.578125, |
|
"rewards/rejected": -4.4375, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.8753993610223643, |
|
"grad_norm": 15.683451965611985, |
|
"learning_rate": 2.1377672209026125e-08, |
|
"logits/chosen": -3.6875, |
|
"logits/rejected": -3.59375, |
|
"logps/chosen": -172.0, |
|
"logps/rejected": -181.0, |
|
"loss": 0.1541, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.9375, |
|
"rewards/margins": 3.53125, |
|
"rewards/rejected": -4.46875, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.939297124600639, |
|
"grad_norm": 31.718439650295267, |
|
"learning_rate": 9.501187648456057e-09, |
|
"logits/chosen": -3.75, |
|
"logits/rejected": -3.734375, |
|
"logps/chosen": -171.0, |
|
"logps/rejected": -190.0, |
|
"loss": 0.1648, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.9140625, |
|
"rewards/margins": 3.453125, |
|
"rewards/rejected": -4.375, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.9904153354632586, |
|
"eval_logits/chosen": -3.828125, |
|
"eval_logits/rejected": -3.859375, |
|
"eval_logps/chosen": -176.0, |
|
"eval_logps/rejected": -163.0, |
|
"eval_loss": 0.64453125, |
|
"eval_rewards/accuracies": 0.6607142686843872, |
|
"eval_rewards/chosen": -1.7109375, |
|
"eval_rewards/margins": 0.63671875, |
|
"eval_rewards/rejected": -2.34375, |
|
"eval_runtime": 15.0233, |
|
"eval_samples_per_second": 13.313, |
|
"eval_steps_per_second": 0.466, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 2.9904153354632586, |
|
"step": 468, |
|
"total_flos": 0.0, |
|
"train_loss": 0.35841141399155313, |
|
"train_runtime": 4617.8414, |
|
"train_samples_per_second": 6.493, |
|
"train_steps_per_second": 0.101 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 468, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|