|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 3821, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 3.8125, |
|
"learning_rate": 1.3054830287206268e-08, |
|
"logits/chosen": -2.377302885055542, |
|
"logits/rejected": -2.2193148136138916, |
|
"logps/chosen": -290.4185485839844, |
|
"logps/rejected": -374.6668701171875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.40625, |
|
"learning_rate": 1.3054830287206266e-07, |
|
"logits/chosen": -2.2499454021453857, |
|
"logits/rejected": -2.0522336959838867, |
|
"logps/chosen": -279.5985107421875, |
|
"logps/rejected": -245.43223571777344, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.4861111044883728, |
|
"rewards/chosen": 0.0009557433077134192, |
|
"rewards/margins": 0.0005407779826782644, |
|
"rewards/rejected": 0.0004149653250351548, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.5, |
|
"learning_rate": 2.610966057441253e-07, |
|
"logits/chosen": -2.2457704544067383, |
|
"logits/rejected": -1.944566011428833, |
|
"logps/chosen": -305.46026611328125, |
|
"logps/rejected": -237.7046356201172, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.004223807714879513, |
|
"rewards/margins": 0.0008180936565622687, |
|
"rewards/rejected": 0.0034057139419019222, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.3125, |
|
"learning_rate": 3.9164490861618804e-07, |
|
"logits/chosen": -2.2053937911987305, |
|
"logits/rejected": -2.1369049549102783, |
|
"logps/chosen": -251.22940063476562, |
|
"logps/rejected": -251.3945770263672, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.011935219168663025, |
|
"rewards/margins": 0.001991255208849907, |
|
"rewards/rejected": 0.009943963959813118, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.9453125, |
|
"learning_rate": 5.221932114882506e-07, |
|
"logits/chosen": -2.0609099864959717, |
|
"logits/rejected": -2.0232198238372803, |
|
"logps/chosen": -216.2375030517578, |
|
"logps/rejected": -221.68643188476562, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.019067076966166496, |
|
"rewards/margins": 0.0031169778667390347, |
|
"rewards/rejected": 0.015950100496411324, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.078125, |
|
"learning_rate": 6.527415143603135e-07, |
|
"logits/chosen": -2.1113991737365723, |
|
"logits/rejected": -2.0994935035705566, |
|
"logps/chosen": -266.8785095214844, |
|
"logps/rejected": -234.3098907470703, |
|
"loss": 0.6906, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.030090373009443283, |
|
"rewards/margins": 0.005098854657262564, |
|
"rewards/rejected": 0.024991516023874283, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.125, |
|
"learning_rate": 7.832898172323761e-07, |
|
"logits/chosen": -2.099452257156372, |
|
"logits/rejected": -1.9423996210098267, |
|
"logps/chosen": -252.3367919921875, |
|
"logps/rejected": -226.71066284179688, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.031631551682949066, |
|
"rewards/margins": 0.006400656886398792, |
|
"rewards/rejected": 0.025230899453163147, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.03125, |
|
"learning_rate": 9.138381201044387e-07, |
|
"logits/chosen": -2.243349552154541, |
|
"logits/rejected": -2.035635471343994, |
|
"logps/chosen": -272.0526428222656, |
|
"logps/rejected": -246.71243286132812, |
|
"loss": 0.6878, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.04103558883070946, |
|
"rewards/margins": 0.01095958799123764, |
|
"rewards/rejected": 0.03007599711418152, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.40625, |
|
"learning_rate": 1.0443864229765013e-06, |
|
"logits/chosen": -2.154019355773926, |
|
"logits/rejected": -1.9776532649993896, |
|
"logps/chosen": -257.60150146484375, |
|
"logps/rejected": -246.8785858154297, |
|
"loss": 0.6873, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.03827068209648132, |
|
"rewards/margins": 0.01215548999607563, |
|
"rewards/rejected": 0.026115190237760544, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.203125, |
|
"learning_rate": 1.1749347258485642e-06, |
|
"logits/chosen": -2.135911703109741, |
|
"logits/rejected": -2.000063419342041, |
|
"logps/chosen": -250.10903930664062, |
|
"logps/rejected": -234.52127075195312, |
|
"loss": 0.6846, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.04227185994386673, |
|
"rewards/margins": 0.017635947093367577, |
|
"rewards/rejected": 0.024635912850499153, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.140625, |
|
"learning_rate": 1.305483028720627e-06, |
|
"logits/chosen": -2.180065631866455, |
|
"logits/rejected": -2.069608449935913, |
|
"logps/chosen": -247.0371551513672, |
|
"logps/rejected": -230.7642822265625, |
|
"loss": 0.6823, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.04732387885451317, |
|
"rewards/margins": 0.022458035498857498, |
|
"rewards/rejected": 0.02486584149301052, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_logits/chosen": -2.095273017883301, |
|
"eval_logits/rejected": -1.955887794494629, |
|
"eval_logps/chosen": -259.675048828125, |
|
"eval_logps/rejected": -241.93234252929688, |
|
"eval_loss": 0.6821526885032654, |
|
"eval_rewards/accuracies": 0.6610000133514404, |
|
"eval_rewards/chosen": 0.04976964741945267, |
|
"eval_rewards/margins": 0.02300717867910862, |
|
"eval_rewards/rejected": 0.026762468740344048, |
|
"eval_runtime": 384.6998, |
|
"eval_samples_per_second": 5.199, |
|
"eval_steps_per_second": 0.65, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.3125, |
|
"learning_rate": 1.4360313315926894e-06, |
|
"logits/chosen": -2.14589786529541, |
|
"logits/rejected": -2.0023417472839355, |
|
"logps/chosen": -284.3846130371094, |
|
"logps/rejected": -238.9386444091797, |
|
"loss": 0.6789, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.04979206249117851, |
|
"rewards/margins": 0.02978363260626793, |
|
"rewards/rejected": 0.020008429884910583, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.15625, |
|
"learning_rate": 1.5665796344647521e-06, |
|
"logits/chosen": -2.192373275756836, |
|
"logits/rejected": -2.052762985229492, |
|
"logps/chosen": -287.498291015625, |
|
"logps/rejected": -271.96441650390625, |
|
"loss": 0.6727, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.05546677112579346, |
|
"rewards/margins": 0.04281745105981827, |
|
"rewards/rejected": 0.012649321928620338, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.640625, |
|
"learning_rate": 1.6971279373368146e-06, |
|
"logits/chosen": -2.2076873779296875, |
|
"logits/rejected": -2.1181204319000244, |
|
"logps/chosen": -250.1416015625, |
|
"logps/rejected": -252.60836791992188, |
|
"loss": 0.6701, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.05010415241122246, |
|
"rewards/margins": 0.04891490936279297, |
|
"rewards/rejected": 0.0011892480542883277, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.5, |
|
"learning_rate": 1.8276762402088774e-06, |
|
"logits/chosen": -2.2444560527801514, |
|
"logits/rejected": -1.9101192951202393, |
|
"logps/chosen": -270.51544189453125, |
|
"logps/rejected": -226.2876739501953, |
|
"loss": 0.6685, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.04222479462623596, |
|
"rewards/margins": 0.05299673601984978, |
|
"rewards/rejected": -0.010771943256258965, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.640625, |
|
"learning_rate": 1.9582245430809403e-06, |
|
"logits/chosen": -2.264216899871826, |
|
"logits/rejected": -2.0381903648376465, |
|
"logps/chosen": -280.3586120605469, |
|
"logps/rejected": -242.8353271484375, |
|
"loss": 0.6677, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.03652295097708702, |
|
"rewards/margins": 0.05572789907455444, |
|
"rewards/rejected": -0.019204948097467422, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.71875, |
|
"learning_rate": 2.0887728459530026e-06, |
|
"logits/chosen": -2.1548912525177, |
|
"logits/rejected": -2.052673816680908, |
|
"logps/chosen": -256.0513610839844, |
|
"logps/rejected": -261.861328125, |
|
"loss": 0.6688, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.007725180592387915, |
|
"rewards/margins": 0.055973686277866364, |
|
"rewards/rejected": -0.04824850708246231, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.890625, |
|
"learning_rate": 2.2193211488250653e-06, |
|
"logits/chosen": -2.124185562133789, |
|
"logits/rejected": -1.9691429138183594, |
|
"logps/chosen": -220.9111328125, |
|
"logps/rejected": -228.2654571533203, |
|
"loss": 0.6707, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.0032043636310845613, |
|
"rewards/margins": 0.051164913922548294, |
|
"rewards/rejected": -0.054369281977415085, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 2.984375, |
|
"learning_rate": 2.3498694516971284e-06, |
|
"logits/chosen": -2.122745990753174, |
|
"logits/rejected": -1.9872467517852783, |
|
"logps/chosen": -259.26727294921875, |
|
"logps/rejected": -252.7638702392578, |
|
"loss": 0.6637, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.03562777489423752, |
|
"rewards/margins": 0.06830445677042007, |
|
"rewards/rejected": -0.1039322167634964, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 3.8125, |
|
"learning_rate": 2.4804177545691907e-06, |
|
"logits/chosen": -2.2456932067871094, |
|
"logits/rejected": -2.029968738555908, |
|
"logps/chosen": -275.09014892578125, |
|
"logps/rejected": -256.80023193359375, |
|
"loss": 0.65, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.050059832632541656, |
|
"rewards/margins": 0.10052521526813507, |
|
"rewards/rejected": -0.15058502554893494, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 3.40625, |
|
"learning_rate": 2.610966057441254e-06, |
|
"logits/chosen": -2.194169759750366, |
|
"logits/rejected": -1.954360008239746, |
|
"logps/chosen": -258.94854736328125, |
|
"logps/rejected": -231.2574920654297, |
|
"loss": 0.6492, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.09239193052053452, |
|
"rewards/margins": 0.10424431413412094, |
|
"rewards/rejected": -0.19663624465465546, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_logits/chosen": -2.0706493854522705, |
|
"eval_logits/rejected": -1.9337996244430542, |
|
"eval_logps/chosen": -269.44000244140625, |
|
"eval_logps/rejected": -259.9606018066406, |
|
"eval_loss": 0.6490568518638611, |
|
"eval_rewards/accuracies": 0.6815000176429749, |
|
"eval_rewards/chosen": -0.04788003861904144, |
|
"eval_rewards/margins": 0.10563990473747253, |
|
"eval_rewards/rejected": -0.15351995825767517, |
|
"eval_runtime": 408.5567, |
|
"eval_samples_per_second": 4.895, |
|
"eval_steps_per_second": 0.612, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 3.359375, |
|
"learning_rate": 2.741514360313316e-06, |
|
"logits/chosen": -2.196761131286621, |
|
"logits/rejected": -1.9796245098114014, |
|
"logps/chosen": -267.26092529296875, |
|
"logps/rejected": -249.635986328125, |
|
"loss": 0.6334, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.05040832608938217, |
|
"rewards/margins": 0.14044944941997528, |
|
"rewards/rejected": -0.19085776805877686, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 4.125, |
|
"learning_rate": 2.872062663185379e-06, |
|
"logits/chosen": -2.0959529876708984, |
|
"logits/rejected": -1.9805145263671875, |
|
"logps/chosen": -265.8911437988281, |
|
"logps/rejected": -252.39990234375, |
|
"loss": 0.6401, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.11875394731760025, |
|
"rewards/margins": 0.1290549784898758, |
|
"rewards/rejected": -0.24780890345573425, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 4.21875, |
|
"learning_rate": 3.0026109660574416e-06, |
|
"logits/chosen": -2.239375591278076, |
|
"logits/rejected": -2.0525119304656982, |
|
"logps/chosen": -317.2463684082031, |
|
"logps/rejected": -291.793212890625, |
|
"loss": 0.657, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.11077094078063965, |
|
"rewards/margins": 0.10038264095783234, |
|
"rewards/rejected": -0.21115358173847198, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.78125, |
|
"learning_rate": 3.1331592689295043e-06, |
|
"logits/chosen": -2.149392604827881, |
|
"logits/rejected": -1.961692452430725, |
|
"logps/chosen": -313.08331298828125, |
|
"logps/rejected": -309.60198974609375, |
|
"loss": 0.648, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.16783350706100464, |
|
"rewards/margins": 0.11663570255041122, |
|
"rewards/rejected": -0.28446921706199646, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.9375, |
|
"learning_rate": 3.263707571801567e-06, |
|
"logits/chosen": -2.109787702560425, |
|
"logits/rejected": -2.0100176334381104, |
|
"logps/chosen": -297.9364929199219, |
|
"logps/rejected": -283.5945129394531, |
|
"loss": 0.6475, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.38356590270996094, |
|
"rewards/margins": 0.11117170751094818, |
|
"rewards/rejected": -0.49473756551742554, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 3.125, |
|
"learning_rate": 3.3942558746736293e-06, |
|
"logits/chosen": -2.1550464630126953, |
|
"logits/rejected": -1.9509170055389404, |
|
"logps/chosen": -313.568115234375, |
|
"logps/rejected": -301.53759765625, |
|
"loss": 0.654, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.4116322994232178, |
|
"rewards/margins": 0.09985023736953735, |
|
"rewards/rejected": -0.5114825367927551, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 3.328125, |
|
"learning_rate": 3.524804177545692e-06, |
|
"logits/chosen": -2.0624794960021973, |
|
"logits/rejected": -1.9370944499969482, |
|
"logps/chosen": -292.0105895996094, |
|
"logps/rejected": -279.63177490234375, |
|
"loss": 0.6173, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.3133612275123596, |
|
"rewards/margins": 0.190281480550766, |
|
"rewards/rejected": -0.5036426782608032, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 4.6875, |
|
"learning_rate": 3.6553524804177547e-06, |
|
"logits/chosen": -2.13100528717041, |
|
"logits/rejected": -1.9591659307479858, |
|
"logps/chosen": -289.3089904785156, |
|
"logps/rejected": -282.8084411621094, |
|
"loss": 0.641, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.3035983741283417, |
|
"rewards/margins": 0.1496592015028, |
|
"rewards/rejected": -0.45325756072998047, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 4.875, |
|
"learning_rate": 3.7859007832898174e-06, |
|
"logits/chosen": -2.057344675064087, |
|
"logits/rejected": -1.9588420391082764, |
|
"logps/chosen": -311.67510986328125, |
|
"logps/rejected": -307.47015380859375, |
|
"loss": 0.6204, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.2604138255119324, |
|
"rewards/margins": 0.209051251411438, |
|
"rewards/rejected": -0.46946510672569275, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 5.15625, |
|
"learning_rate": 3.9164490861618806e-06, |
|
"logits/chosen": -2.0852608680725098, |
|
"logits/rejected": -1.8826076984405518, |
|
"logps/chosen": -279.9902038574219, |
|
"logps/rejected": -289.1649169921875, |
|
"loss": 0.6101, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.35862046480178833, |
|
"rewards/margins": 0.2289685755968094, |
|
"rewards/rejected": -0.5875889658927917, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_logits/chosen": -2.0021328926086426, |
|
"eval_logits/rejected": -1.8679821491241455, |
|
"eval_logps/chosen": -298.72515869140625, |
|
"eval_logps/rejected": -299.372802734375, |
|
"eval_loss": 0.6216704249382019, |
|
"eval_rewards/accuracies": 0.6769999861717224, |
|
"eval_rewards/chosen": -0.3407318592071533, |
|
"eval_rewards/margins": 0.20691031217575073, |
|
"eval_rewards/rejected": -0.547642171382904, |
|
"eval_runtime": 385.1163, |
|
"eval_samples_per_second": 5.193, |
|
"eval_steps_per_second": 0.649, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 4.15625, |
|
"learning_rate": 4.046997389033943e-06, |
|
"logits/chosen": -2.2290728092193604, |
|
"logits/rejected": -2.029090642929077, |
|
"logps/chosen": -319.5593566894531, |
|
"logps/rejected": -296.0638732910156, |
|
"loss": 0.5797, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.30653077363967896, |
|
"rewards/margins": 0.3014482855796814, |
|
"rewards/rejected": -0.6079790592193604, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 4.78125, |
|
"learning_rate": 4.177545691906005e-06, |
|
"logits/chosen": -2.1043484210968018, |
|
"logits/rejected": -1.9178568124771118, |
|
"logps/chosen": -299.31610107421875, |
|
"logps/rejected": -301.1820068359375, |
|
"loss": 0.6348, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.41052132844924927, |
|
"rewards/margins": 0.18894672393798828, |
|
"rewards/rejected": -0.5994681119918823, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 4.625, |
|
"learning_rate": 4.308093994778068e-06, |
|
"logits/chosen": -2.0332717895507812, |
|
"logits/rejected": -1.891761064529419, |
|
"logps/chosen": -292.5372619628906, |
|
"logps/rejected": -289.52130126953125, |
|
"loss": 0.6187, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.3294296860694885, |
|
"rewards/margins": 0.21162664890289307, |
|
"rewards/rejected": -0.5410563945770264, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 3.875, |
|
"learning_rate": 4.4386422976501306e-06, |
|
"logits/chosen": -2.096933364868164, |
|
"logits/rejected": -1.9842134714126587, |
|
"logps/chosen": -318.83001708984375, |
|
"logps/rejected": -326.63128662109375, |
|
"loss": 0.5927, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.29144176840782166, |
|
"rewards/margins": 0.2920153737068176, |
|
"rewards/rejected": -0.5834571719169617, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 5.625, |
|
"learning_rate": 4.569190600522193e-06, |
|
"logits/chosen": -2.0155937671661377, |
|
"logits/rejected": -1.8702361583709717, |
|
"logps/chosen": -324.283447265625, |
|
"logps/rejected": -333.35772705078125, |
|
"loss": 0.6323, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.3502510190010071, |
|
"rewards/margins": 0.20564763247966766, |
|
"rewards/rejected": -0.5558986067771912, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 5.84375, |
|
"learning_rate": 4.699738903394257e-06, |
|
"logits/chosen": -1.9730371236801147, |
|
"logits/rejected": -1.927020788192749, |
|
"logps/chosen": -289.91351318359375, |
|
"logps/rejected": -299.1167907714844, |
|
"loss": 0.5914, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.4892953038215637, |
|
"rewards/margins": 0.3004259467124939, |
|
"rewards/rejected": -0.7897213697433472, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 6.0625, |
|
"learning_rate": 4.8302872062663196e-06, |
|
"logits/chosen": -2.0422720909118652, |
|
"logits/rejected": -1.8679052591323853, |
|
"logps/chosen": -364.3427429199219, |
|
"logps/rejected": -349.3939514160156, |
|
"loss": 0.608, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.8744655847549438, |
|
"rewards/margins": 0.2888025939464569, |
|
"rewards/rejected": -1.1632683277130127, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 4.375, |
|
"learning_rate": 4.9608355091383814e-06, |
|
"logits/chosen": -1.9862794876098633, |
|
"logits/rejected": -1.7763971090316772, |
|
"logps/chosen": -339.8470153808594, |
|
"logps/rejected": -339.43743896484375, |
|
"loss": 0.5892, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.41790348291397095, |
|
"rewards/margins": 0.3297092318534851, |
|
"rewards/rejected": -0.7476127743721008, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 5.78125, |
|
"learning_rate": 4.9999488562447675e-06, |
|
"logits/chosen": -1.9420530796051025, |
|
"logits/rejected": -1.8238258361816406, |
|
"logps/chosen": -318.894775390625, |
|
"logps/rejected": -334.8386535644531, |
|
"loss": 0.5721, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.3784860074520111, |
|
"rewards/margins": 0.35959383845329285, |
|
"rewards/rejected": -0.738079845905304, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 6.90625, |
|
"learning_rate": 4.999698361256577e-06, |
|
"logits/chosen": -1.9580987691879272, |
|
"logits/rejected": -1.7359654903411865, |
|
"logps/chosen": -302.38946533203125, |
|
"logps/rejected": -285.8229675292969, |
|
"loss": 0.6173, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.36912912130355835, |
|
"rewards/margins": 0.24084654450416565, |
|
"rewards/rejected": -0.6099756956100464, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_logits/chosen": -1.7878212928771973, |
|
"eval_logits/rejected": -1.658238410949707, |
|
"eval_logps/chosen": -314.925048828125, |
|
"eval_logps/rejected": -327.9222412109375, |
|
"eval_loss": 0.5952155590057373, |
|
"eval_rewards/accuracies": 0.6834999918937683, |
|
"eval_rewards/chosen": -0.5027302503585815, |
|
"eval_rewards/margins": 0.33040642738342285, |
|
"eval_rewards/rejected": -0.8331366777420044, |
|
"eval_runtime": 384.9418, |
|
"eval_samples_per_second": 5.196, |
|
"eval_steps_per_second": 0.649, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 5.28125, |
|
"learning_rate": 4.999239142174581e-06, |
|
"logits/chosen": -1.791033148765564, |
|
"logits/rejected": -1.727502465248108, |
|
"logps/chosen": -305.58447265625, |
|
"logps/rejected": -327.44293212890625, |
|
"loss": 0.631, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.6326209306716919, |
|
"rewards/margins": 0.2363467961549759, |
|
"rewards/rejected": -0.8689676523208618, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 6.09375, |
|
"learning_rate": 4.99857123734344e-06, |
|
"logits/chosen": -1.758462905883789, |
|
"logits/rejected": -1.625067114830017, |
|
"logps/chosen": -289.7398376464844, |
|
"logps/rejected": -331.71600341796875, |
|
"loss": 0.5301, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.6094896197319031, |
|
"rewards/margins": 0.51044100522995, |
|
"rewards/rejected": -1.119930624961853, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 8.1875, |
|
"learning_rate": 4.997694702533016e-06, |
|
"logits/chosen": -1.7083183526992798, |
|
"logits/rejected": -1.6363675594329834, |
|
"logps/chosen": -356.60418701171875, |
|
"logps/rejected": -385.64410400390625, |
|
"loss": 0.5625, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.790561318397522, |
|
"rewards/margins": 0.5265924334526062, |
|
"rewards/rejected": -1.317153811454773, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 7.09375, |
|
"learning_rate": 4.996609610933713e-06, |
|
"logits/chosen": -1.8173853158950806, |
|
"logits/rejected": -1.73589289188385, |
|
"logps/chosen": -327.7349548339844, |
|
"logps/rejected": -338.24334716796875, |
|
"loss": 0.5833, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.5558608770370483, |
|
"rewards/margins": 0.3886395990848541, |
|
"rewards/rejected": -0.9445004463195801, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 7.4375, |
|
"learning_rate": 4.995316053150366e-06, |
|
"logits/chosen": -1.674541711807251, |
|
"logits/rejected": -1.5631834268569946, |
|
"logps/chosen": -329.3605651855469, |
|
"logps/rejected": -351.2587890625, |
|
"loss": 0.556, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6152701377868652, |
|
"rewards/margins": 0.4492935538291931, |
|
"rewards/rejected": -1.0645637512207031, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 12.5, |
|
"learning_rate": 4.9938141371946815e-06, |
|
"logits/chosen": -1.616389513015747, |
|
"logits/rejected": -1.5250511169433594, |
|
"logps/chosen": -378.802978515625, |
|
"logps/rejected": -416.6460876464844, |
|
"loss": 0.5634, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.1370322704315186, |
|
"rewards/margins": 0.523708701133728, |
|
"rewards/rejected": -1.6607410907745361, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 6.625, |
|
"learning_rate": 4.992103988476206e-06, |
|
"logits/chosen": -1.6163583993911743, |
|
"logits/rejected": -1.4710776805877686, |
|
"logps/chosen": -357.6758728027344, |
|
"logps/rejected": -395.6207580566406, |
|
"loss": 0.5805, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.1385869979858398, |
|
"rewards/margins": 0.504925012588501, |
|
"rewards/rejected": -1.6435120105743408, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 5.71875, |
|
"learning_rate": 4.990185749791866e-06, |
|
"logits/chosen": -1.6142799854278564, |
|
"logits/rejected": -1.484505534172058, |
|
"logps/chosen": -354.9118957519531, |
|
"logps/rejected": -412.96539306640625, |
|
"loss": 0.542, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.980329155921936, |
|
"rewards/margins": 0.5626578330993652, |
|
"rewards/rejected": -1.5429868698120117, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 8.25, |
|
"learning_rate": 4.9880595813140395e-06, |
|
"logits/chosen": -1.6605031490325928, |
|
"logits/rejected": -1.519315242767334, |
|
"logps/chosen": -388.9188537597656, |
|
"logps/rejected": -418.9161071777344, |
|
"loss": 0.5346, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.0402735471725464, |
|
"rewards/margins": 0.603448212146759, |
|
"rewards/rejected": -1.6437218189239502, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 6.21875, |
|
"learning_rate": 4.985725660577184e-06, |
|
"logits/chosen": -1.6577751636505127, |
|
"logits/rejected": -1.5049307346343994, |
|
"logps/chosen": -385.02813720703125, |
|
"logps/rejected": -409.57379150390625, |
|
"loss": 0.5435, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.11268150806427, |
|
"rewards/margins": 0.6577950119972229, |
|
"rewards/rejected": -1.7704765796661377, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_logits/chosen": -1.5543876886367798, |
|
"eval_logits/rejected": -1.427258014678955, |
|
"eval_logps/chosen": -376.1609191894531, |
|
"eval_logps/rejected": -405.3195495605469, |
|
"eval_loss": 0.5753844976425171, |
|
"eval_rewards/accuracies": 0.6890000104904175, |
|
"eval_rewards/chosen": -1.115088939666748, |
|
"eval_rewards/margins": 0.4920206665992737, |
|
"eval_rewards/rejected": -1.607109785079956, |
|
"eval_runtime": 388.587, |
|
"eval_samples_per_second": 5.147, |
|
"eval_steps_per_second": 0.643, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 6.5625, |
|
"learning_rate": 4.983184182463009e-06, |
|
"logits/chosen": -1.6563122272491455, |
|
"logits/rejected": -1.5474936962127686, |
|
"logps/chosen": -371.3856201171875, |
|
"logps/rejected": -394.072021484375, |
|
"loss": 0.5478, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.9312448501586914, |
|
"rewards/margins": 0.5972838401794434, |
|
"rewards/rejected": -1.5285285711288452, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 10.1875, |
|
"learning_rate": 4.980435359184203e-06, |
|
"logits/chosen": -1.6844135522842407, |
|
"logits/rejected": -1.6247421503067017, |
|
"logps/chosen": -349.91943359375, |
|
"logps/rejected": -377.5780029296875, |
|
"loss": 0.5953, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.768576443195343, |
|
"rewards/margins": 0.43869537115097046, |
|
"rewards/rejected": -1.2072718143463135, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 10.75, |
|
"learning_rate": 4.9774794202667236e-06, |
|
"logits/chosen": -1.68100106716156, |
|
"logits/rejected": -1.63496994972229, |
|
"logps/chosen": -316.89166259765625, |
|
"logps/rejected": -371.69012451171875, |
|
"loss": 0.577, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.5579014420509338, |
|
"rewards/margins": 0.4634733200073242, |
|
"rewards/rejected": -1.0213747024536133, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 10.9375, |
|
"learning_rate": 4.974316612530615e-06, |
|
"logits/chosen": -1.5825644731521606, |
|
"logits/rejected": -1.4188969135284424, |
|
"logps/chosen": -361.13397216796875, |
|
"logps/rejected": -398.6629638671875, |
|
"loss": 0.4772, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.8265112042427063, |
|
"rewards/margins": 0.8431331515312195, |
|
"rewards/rejected": -1.6696443557739258, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 15.25, |
|
"learning_rate": 4.970947200069416e-06, |
|
"logits/chosen": -1.460860252380371, |
|
"logits/rejected": -1.3948581218719482, |
|
"logps/chosen": -469.55615234375, |
|
"logps/rejected": -493.6904296875, |
|
"loss": 0.6643, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.9104303121566772, |
|
"rewards/margins": 0.43918126821517944, |
|
"rewards/rejected": -2.349611759185791, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 7.6875, |
|
"learning_rate": 4.967371464228096e-06, |
|
"logits/chosen": -1.6625244617462158, |
|
"logits/rejected": -1.5688579082489014, |
|
"logps/chosen": -452.31256103515625, |
|
"logps/rejected": -504.4009704589844, |
|
"loss": 0.57, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.948133111000061, |
|
"rewards/margins": 0.5168659687042236, |
|
"rewards/rejected": -2.464999198913574, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 9.5, |
|
"learning_rate": 4.963589703579569e-06, |
|
"logits/chosen": -1.7668651342391968, |
|
"logits/rejected": -1.6291354894638062, |
|
"logps/chosen": -480.91107177734375, |
|
"logps/rejected": -497.85650634765625, |
|
"loss": 0.6004, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.8087114095687866, |
|
"rewards/margins": 0.5162175297737122, |
|
"rewards/rejected": -2.3249289989471436, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 8.4375, |
|
"learning_rate": 4.9596022338997615e-06, |
|
"logits/chosen": -1.7466462850570679, |
|
"logits/rejected": -1.5261166095733643, |
|
"logps/chosen": -454.7201232910156, |
|
"logps/rejected": -477.67987060546875, |
|
"loss": 0.5409, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.569333791732788, |
|
"rewards/margins": 0.6403428316116333, |
|
"rewards/rejected": -2.209676504135132, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 5.8125, |
|
"learning_rate": 4.955409388141243e-06, |
|
"logits/chosen": -1.620123267173767, |
|
"logits/rejected": -1.504206895828247, |
|
"logps/chosen": -378.0823669433594, |
|
"logps/rejected": -403.84649658203125, |
|
"loss": 0.5869, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.2078220844268799, |
|
"rewards/margins": 0.49430006742477417, |
|
"rewards/rejected": -1.7021223306655884, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 4.5, |
|
"learning_rate": 4.951011516405429e-06, |
|
"logits/chosen": -1.7101682424545288, |
|
"logits/rejected": -1.6404016017913818, |
|
"logps/chosen": -328.59051513671875, |
|
"logps/rejected": -365.19866943359375, |
|
"loss": 0.5547, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.7783375978469849, |
|
"rewards/margins": 0.5154664516448975, |
|
"rewards/rejected": -1.2938039302825928, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_logits/chosen": -1.5725802183151245, |
|
"eval_logits/rejected": -1.439643383026123, |
|
"eval_logps/chosen": -340.6527404785156, |
|
"eval_logps/rejected": -371.2197570800781, |
|
"eval_loss": 0.5694642066955566, |
|
"eval_rewards/accuracies": 0.6984999775886536, |
|
"eval_rewards/chosen": -0.7600072622299194, |
|
"eval_rewards/margins": 0.5061042904853821, |
|
"eval_rewards/rejected": -1.2661116123199463, |
|
"eval_runtime": 384.8138, |
|
"eval_samples_per_second": 5.197, |
|
"eval_steps_per_second": 0.65, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 8.375, |
|
"learning_rate": 4.946408985913344e-06, |
|
"logits/chosen": -1.6002981662750244, |
|
"logits/rejected": -1.5132791996002197, |
|
"logps/chosen": -320.1644592285156, |
|
"logps/rejected": -367.06707763671875, |
|
"loss": 0.5344, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.7430071830749512, |
|
"rewards/margins": 0.6466056108474731, |
|
"rewards/rejected": -1.3896129131317139, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 11.5, |
|
"learning_rate": 4.941602180974958e-06, |
|
"logits/chosen": -1.5505564212799072, |
|
"logits/rejected": -1.3113032579421997, |
|
"logps/chosen": -394.12786865234375, |
|
"logps/rejected": -412.055908203125, |
|
"loss": 0.5398, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.0630056858062744, |
|
"rewards/margins": 0.7659745216369629, |
|
"rewards/rejected": -1.8289800882339478, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 4.96875, |
|
"learning_rate": 4.936591502957101e-06, |
|
"logits/chosen": -1.4749863147735596, |
|
"logits/rejected": -1.315354347229004, |
|
"logps/chosen": -395.21185302734375, |
|
"logps/rejected": -471.55157470703125, |
|
"loss": 0.5375, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.470820665359497, |
|
"rewards/margins": 0.8663395643234253, |
|
"rewards/rejected": -2.337160587310791, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 8.6875, |
|
"learning_rate": 4.931377370249946e-06, |
|
"logits/chosen": -1.4970736503601074, |
|
"logits/rejected": -1.2784922122955322, |
|
"logps/chosen": -469.60565185546875, |
|
"logps/rejected": -513.3096923828125, |
|
"loss": 0.5511, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.0428223609924316, |
|
"rewards/margins": 0.6945194602012634, |
|
"rewards/rejected": -2.73734188079834, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 12.1875, |
|
"learning_rate": 4.925960218232073e-06, |
|
"logits/chosen": -1.5300517082214355, |
|
"logits/rejected": -1.4055113792419434, |
|
"logps/chosen": -411.9557189941406, |
|
"logps/rejected": -478.2559509277344, |
|
"loss": 0.5422, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.5665361881256104, |
|
"rewards/margins": 0.7580282092094421, |
|
"rewards/rejected": -2.324564218521118, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 8.9375, |
|
"learning_rate": 4.920340499234116e-06, |
|
"logits/chosen": -1.5464222431182861, |
|
"logits/rejected": -1.322009563446045, |
|
"logps/chosen": -396.75299072265625, |
|
"logps/rejected": -416.83868408203125, |
|
"loss": 0.5751, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.307492733001709, |
|
"rewards/margins": 0.5529674291610718, |
|
"rewards/rejected": -1.8604600429534912, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 6.875, |
|
"learning_rate": 4.914518682500995e-06, |
|
"logits/chosen": -1.7627025842666626, |
|
"logits/rejected": -1.554469347000122, |
|
"logps/chosen": -392.9539794921875, |
|
"logps/rejected": -419.94952392578125, |
|
"loss": 0.529, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.0941137075424194, |
|
"rewards/margins": 0.6771708726882935, |
|
"rewards/rejected": -1.7712844610214233, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 7.3125, |
|
"learning_rate": 4.9084952541527315e-06, |
|
"logits/chosen": -1.6612813472747803, |
|
"logits/rejected": -1.459668755531311, |
|
"logps/chosen": -392.9391784667969, |
|
"logps/rejected": -423.2802734375, |
|
"loss": 0.5137, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.1960499286651611, |
|
"rewards/margins": 0.774462878704071, |
|
"rewards/rejected": -1.9705129861831665, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 9.75, |
|
"learning_rate": 4.902270717143858e-06, |
|
"logits/chosen": -1.5917980670928955, |
|
"logits/rejected": -1.4846012592315674, |
|
"logps/chosen": -386.42498779296875, |
|
"logps/rejected": -509.9639587402344, |
|
"loss": 0.4446, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -1.5019272565841675, |
|
"rewards/margins": 1.1186031103134155, |
|
"rewards/rejected": -2.620530605316162, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 10.375, |
|
"learning_rate": 4.895845591221427e-06, |
|
"logits/chosen": -1.5352580547332764, |
|
"logits/rejected": -1.4566528797149658, |
|
"logps/chosen": -440.7987365722656, |
|
"logps/rejected": -521.3024291992188, |
|
"loss": 0.5282, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.8748830556869507, |
|
"rewards/margins": 0.8497939109802246, |
|
"rewards/rejected": -2.7246768474578857, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_logits/chosen": -1.3803926706314087, |
|
"eval_logits/rejected": -1.2514902353286743, |
|
"eval_logps/chosen": -470.9231262207031, |
|
"eval_logps/rejected": -536.3329467773438, |
|
"eval_loss": 0.5560212135314941, |
|
"eval_rewards/accuracies": 0.7164999842643738, |
|
"eval_rewards/chosen": -2.062711000442505, |
|
"eval_rewards/margins": 0.8545322418212891, |
|
"eval_rewards/rejected": -2.917243242263794, |
|
"eval_runtime": 385.2779, |
|
"eval_samples_per_second": 5.191, |
|
"eval_steps_per_second": 0.649, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 11.0, |
|
"learning_rate": 4.8892204128816e-06, |
|
"logits/chosen": -1.5460926294326782, |
|
"logits/rejected": -1.4308459758758545, |
|
"logps/chosen": -454.60797119140625, |
|
"logps/rejected": -516.79296875, |
|
"loss": 0.5244, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.8704407215118408, |
|
"rewards/margins": 0.7577110528945923, |
|
"rewards/rejected": -2.6281516551971436, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 7.78125, |
|
"learning_rate": 4.882395735324864e-06, |
|
"logits/chosen": -1.5478827953338623, |
|
"logits/rejected": -1.4022762775421143, |
|
"logps/chosen": -418.3267517089844, |
|
"logps/rejected": -491.538818359375, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.5224473476409912, |
|
"rewards/margins": 0.9039508104324341, |
|
"rewards/rejected": -2.4263980388641357, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 7.28125, |
|
"learning_rate": 4.87537212840983e-06, |
|
"logits/chosen": -1.5722401142120361, |
|
"logits/rejected": -1.4339630603790283, |
|
"logps/chosen": -399.0245056152344, |
|
"logps/rejected": -433.6438903808594, |
|
"loss": 0.5526, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.3276931047439575, |
|
"rewards/margins": 0.6516003608703613, |
|
"rewards/rejected": -1.9792934656143188, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 9.8125, |
|
"learning_rate": 4.8681501786056545e-06, |
|
"logits/chosen": -1.4682786464691162, |
|
"logits/rejected": -1.3141381740570068, |
|
"logps/chosen": -346.26873779296875, |
|
"logps/rejected": -393.406005859375, |
|
"loss": 0.5037, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.1805267333984375, |
|
"rewards/margins": 0.7937687039375305, |
|
"rewards/rejected": -1.9742956161499023, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 15.0, |
|
"learning_rate": 4.860730488943068e-06, |
|
"logits/chosen": -1.3719309568405151, |
|
"logits/rejected": -1.326554536819458, |
|
"logps/chosen": -396.23095703125, |
|
"logps/rejected": -483.92169189453125, |
|
"loss": 0.4852, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.6170562505722046, |
|
"rewards/margins": 0.8960045576095581, |
|
"rewards/rejected": -2.513061046600342, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 6.59375, |
|
"learning_rate": 4.853113678964022e-06, |
|
"logits/chosen": -1.3708162307739258, |
|
"logits/rejected": -1.295253038406372, |
|
"logps/chosen": -474.16522216796875, |
|
"logps/rejected": -546.8492431640625, |
|
"loss": 0.5003, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.0177154541015625, |
|
"rewards/margins": 0.8057907223701477, |
|
"rewards/rejected": -2.8235061168670654, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 6.875, |
|
"learning_rate": 4.845300384669958e-06, |
|
"logits/chosen": -1.4265882968902588, |
|
"logits/rejected": -1.292400598526001, |
|
"logps/chosen": -428.4332580566406, |
|
"logps/rejected": -476.52471923828125, |
|
"loss": 0.5517, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.7522939443588257, |
|
"rewards/margins": 0.7091314196586609, |
|
"rewards/rejected": -2.461425304412842, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 12.3125, |
|
"learning_rate": 4.837291258468701e-06, |
|
"logits/chosen": -1.4694699048995972, |
|
"logits/rejected": -1.3255608081817627, |
|
"logps/chosen": -471.6322326660156, |
|
"logps/rejected": -542.5789794921875, |
|
"loss": 0.5457, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.8628593683242798, |
|
"rewards/margins": 0.9583638310432434, |
|
"rewards/rejected": -2.821223258972168, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 9.8125, |
|
"learning_rate": 4.829086969119984e-06, |
|
"logits/chosen": -1.3006738424301147, |
|
"logits/rejected": -1.2943049669265747, |
|
"logps/chosen": -477.1316833496094, |
|
"logps/rejected": -552.9581298828125, |
|
"loss": 0.6434, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -2.2640693187713623, |
|
"rewards/margins": 0.740705132484436, |
|
"rewards/rejected": -3.004774570465088, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 9.625, |
|
"learning_rate": 4.820688201679605e-06, |
|
"logits/chosen": -1.5880753993988037, |
|
"logits/rejected": -1.2926288843154907, |
|
"logps/chosen": -464.5062561035156, |
|
"logps/rejected": -476.73968505859375, |
|
"loss": 0.5205, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.0187087059020996, |
|
"rewards/margins": 0.745971143245697, |
|
"rewards/rejected": -2.7646796703338623, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": -1.4040697813034058, |
|
"eval_logits/rejected": -1.275604009628296, |
|
"eval_logps/chosen": -434.3306884765625, |
|
"eval_logps/rejected": -484.6470031738281, |
|
"eval_loss": 0.5363825559616089, |
|
"eval_rewards/accuracies": 0.7264999747276306, |
|
"eval_rewards/chosen": -1.6967861652374268, |
|
"eval_rewards/margins": 0.7035976052284241, |
|
"eval_rewards/rejected": -2.400383472442627, |
|
"eval_runtime": 385.6904, |
|
"eval_samples_per_second": 5.186, |
|
"eval_steps_per_second": 0.648, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 7.28125, |
|
"learning_rate": 4.8120956574422315e-06, |
|
"logits/chosen": -1.5584218502044678, |
|
"logits/rejected": -1.5333359241485596, |
|
"logps/chosen": -437.0270080566406, |
|
"logps/rejected": -483.4334411621094, |
|
"loss": 0.5977, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.6310217380523682, |
|
"rewards/margins": 0.5348454117774963, |
|
"rewards/rejected": -2.165867328643799, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 8.3125, |
|
"learning_rate": 4.803310053882831e-06, |
|
"logits/chosen": -1.5471457242965698, |
|
"logits/rejected": -1.5452721118927002, |
|
"logps/chosen": -370.4143981933594, |
|
"logps/rejected": -443.0765686035156, |
|
"loss": 0.5346, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.4108860492706299, |
|
"rewards/margins": 0.6031405925750732, |
|
"rewards/rejected": -2.014026641845703, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 7.65625, |
|
"learning_rate": 4.794332124596775e-06, |
|
"logits/chosen": -1.542186975479126, |
|
"logits/rejected": -1.4280331134796143, |
|
"logps/chosen": -428.5491638183594, |
|
"logps/rejected": -479.2059631347656, |
|
"loss": 0.5879, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.6010816097259521, |
|
"rewards/margins": 0.5433088541030884, |
|
"rewards/rejected": -2.144390344619751, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 6.25, |
|
"learning_rate": 4.785162619238575e-06, |
|
"logits/chosen": -1.5202006101608276, |
|
"logits/rejected": -1.357043743133545, |
|
"logps/chosen": -382.9326171875, |
|
"logps/rejected": -429.1929626464844, |
|
"loss": 0.5204, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.2908388376235962, |
|
"rewards/margins": 0.7357919216156006, |
|
"rewards/rejected": -2.0266308784484863, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 8.375, |
|
"learning_rate": 4.775802303459288e-06, |
|
"logits/chosen": -1.4242273569107056, |
|
"logits/rejected": -1.3464834690093994, |
|
"logps/chosen": -368.41864013671875, |
|
"logps/rejected": -433.1285705566406, |
|
"loss": 0.5601, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.2065433263778687, |
|
"rewards/margins": 0.69434654712677, |
|
"rewards/rejected": -1.9008897542953491, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 11.5625, |
|
"learning_rate": 4.766251958842589e-06, |
|
"logits/chosen": -1.3795868158340454, |
|
"logits/rejected": -1.2619365453720093, |
|
"logps/chosen": -401.754150390625, |
|
"logps/rejected": -458.87786865234375, |
|
"loss": 0.5436, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.2925317287445068, |
|
"rewards/margins": 0.6759337186813354, |
|
"rewards/rejected": -1.9684655666351318, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 7.375, |
|
"learning_rate": 4.7565123828395066e-06, |
|
"logits/chosen": -1.2665306329727173, |
|
"logits/rejected": -1.1762630939483643, |
|
"logps/chosen": -408.20220947265625, |
|
"logps/rejected": -490.75634765625, |
|
"loss": 0.5077, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.522733449935913, |
|
"rewards/margins": 0.8316558599472046, |
|
"rewards/rejected": -2.3543894290924072, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 7.71875, |
|
"learning_rate": 4.746584388701831e-06, |
|
"logits/chosen": -1.2851665019989014, |
|
"logits/rejected": -1.2308191061019897, |
|
"logps/chosen": -443.7979431152344, |
|
"logps/rejected": -522.8231201171875, |
|
"loss": 0.5035, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.8055862188339233, |
|
"rewards/margins": 0.9323374629020691, |
|
"rewards/rejected": -2.7379238605499268, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 10.5625, |
|
"learning_rate": 4.736468805414218e-06, |
|
"logits/chosen": -1.250001072883606, |
|
"logits/rejected": -1.2183704376220703, |
|
"logps/chosen": -407.4109802246094, |
|
"logps/rejected": -504.15130615234375, |
|
"loss": 0.5632, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.5997565984725952, |
|
"rewards/margins": 0.8288668394088745, |
|
"rewards/rejected": -2.428623676300049, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 10.375, |
|
"learning_rate": 4.7261664776249595e-06, |
|
"logits/chosen": -1.1423993110656738, |
|
"logits/rejected": -1.050307273864746, |
|
"logps/chosen": -386.7989807128906, |
|
"logps/rejected": -467.6982421875, |
|
"loss": 0.4983, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5788599252700806, |
|
"rewards/margins": 0.8989045023918152, |
|
"rewards/rejected": -2.477764368057251, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_logits/chosen": -1.2160884141921997, |
|
"eval_logits/rejected": -1.095556616783142, |
|
"eval_logps/chosen": -432.6338806152344, |
|
"eval_logps/rejected": -489.990966796875, |
|
"eval_loss": 0.5329138040542603, |
|
"eval_rewards/accuracies": 0.7204999923706055, |
|
"eval_rewards/chosen": -1.679819107055664, |
|
"eval_rewards/margins": 0.7740048170089722, |
|
"eval_rewards/rejected": -2.4538238048553467, |
|
"eval_runtime": 384.8785, |
|
"eval_samples_per_second": 5.196, |
|
"eval_steps_per_second": 0.65, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 8.8125, |
|
"learning_rate": 4.715678265575463e-06, |
|
"logits/chosen": -1.3864091634750366, |
|
"logits/rejected": -1.188279390335083, |
|
"logps/chosen": -431.4292907714844, |
|
"logps/rejected": -437.1918029785156, |
|
"loss": 0.5621, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.4715720415115356, |
|
"rewards/margins": 0.6131043434143066, |
|
"rewards/rejected": -2.0846762657165527, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 9.1875, |
|
"learning_rate": 4.705005045028415e-06, |
|
"logits/chosen": -1.3089066743850708, |
|
"logits/rejected": -1.1869691610336304, |
|
"logps/chosen": -409.567626953125, |
|
"logps/rejected": -467.461181640625, |
|
"loss": 0.5395, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.4643157720565796, |
|
"rewards/margins": 0.7529044151306152, |
|
"rewards/rejected": -2.2172200679779053, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 10.1875, |
|
"learning_rate": 4.694147707194659e-06, |
|
"logits/chosen": -1.3738486766815186, |
|
"logits/rejected": -1.295534372329712, |
|
"logps/chosen": -467.11102294921875, |
|
"logps/rejected": -519.7848510742188, |
|
"loss": 0.5253, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.9523290395736694, |
|
"rewards/margins": 0.7249565720558167, |
|
"rewards/rejected": -2.677285671234131, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 8.5, |
|
"learning_rate": 4.683107158658782e-06, |
|
"logits/chosen": -1.2867457866668701, |
|
"logits/rejected": -1.202335000038147, |
|
"logps/chosen": -514.80126953125, |
|
"logps/rejected": -559.9244384765625, |
|
"loss": 0.5364, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.245605707168579, |
|
"rewards/margins": 0.7386397123336792, |
|
"rewards/rejected": -2.9842450618743896, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 7.9375, |
|
"learning_rate": 4.671884321303407e-06, |
|
"logits/chosen": -1.4214376211166382, |
|
"logits/rejected": -1.286566972732544, |
|
"logps/chosen": -444.1116638183594, |
|
"logps/rejected": -498.4622497558594, |
|
"loss": 0.5246, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.9778563976287842, |
|
"rewards/margins": 0.6870826482772827, |
|
"rewards/rejected": -2.6649391651153564, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 7.625, |
|
"learning_rate": 4.660480132232224e-06, |
|
"logits/chosen": -1.5147006511688232, |
|
"logits/rejected": -1.4103442430496216, |
|
"logps/chosen": -423.77252197265625, |
|
"logps/rejected": -456.38330078125, |
|
"loss": 0.5784, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.5195940732955933, |
|
"rewards/margins": 0.5671976804733276, |
|
"rewards/rejected": -2.086791753768921, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 14.4375, |
|
"learning_rate": 4.6488955436917414e-06, |
|
"logits/chosen": -1.5512803792953491, |
|
"logits/rejected": -1.333134412765503, |
|
"logps/chosen": -416.9090270996094, |
|
"logps/rejected": -457.83563232421875, |
|
"loss": 0.5199, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.369279384613037, |
|
"rewards/margins": 0.875058650970459, |
|
"rewards/rejected": -2.244338274002075, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 5.71875, |
|
"learning_rate": 4.6371315229917644e-06, |
|
"logits/chosen": -1.4992878437042236, |
|
"logits/rejected": -1.369624376296997, |
|
"logps/chosen": -448.16314697265625, |
|
"logps/rejected": -506.52191162109375, |
|
"loss": 0.527, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.6474645137786865, |
|
"rewards/margins": 0.7875878810882568, |
|
"rewards/rejected": -2.4350523948669434, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 11.9375, |
|
"learning_rate": 4.625189052424638e-06, |
|
"logits/chosen": -1.4010595083236694, |
|
"logits/rejected": -1.259636402130127, |
|
"logps/chosen": -442.61590576171875, |
|
"logps/rejected": -522.6851196289062, |
|
"loss": 0.4455, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -2.035126209259033, |
|
"rewards/margins": 1.0280003547668457, |
|
"rewards/rejected": -3.063126802444458, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 9.5, |
|
"learning_rate": 4.613069129183218e-06, |
|
"logits/chosen": -1.4701205492019653, |
|
"logits/rejected": -1.3206748962402344, |
|
"logps/chosen": -506.3350524902344, |
|
"logps/rejected": -548.3522338867188, |
|
"loss": 0.5443, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.02931809425354, |
|
"rewards/margins": 0.7819973826408386, |
|
"rewards/rejected": -2.8113150596618652, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": -1.2533063888549805, |
|
"eval_logits/rejected": -1.1263666152954102, |
|
"eval_logps/chosen": -454.4657897949219, |
|
"eval_logps/rejected": -521.2656860351562, |
|
"eval_loss": 0.5279005169868469, |
|
"eval_rewards/accuracies": 0.7239999771118164, |
|
"eval_rewards/chosen": -1.8981376886367798, |
|
"eval_rewards/margins": 0.8684335947036743, |
|
"eval_rewards/rejected": -2.766571283340454, |
|
"eval_runtime": 396.7737, |
|
"eval_samples_per_second": 5.041, |
|
"eval_steps_per_second": 0.63, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 8.8125, |
|
"learning_rate": 4.600772765277607e-06, |
|
"logits/chosen": -1.295363187789917, |
|
"logits/rejected": -1.21110200881958, |
|
"logps/chosen": -406.8030700683594, |
|
"logps/rejected": -491.1182556152344, |
|
"loss": 0.4907, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.7324583530426025, |
|
"rewards/margins": 0.8947044610977173, |
|
"rewards/rejected": -2.6271629333496094, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 10.5625, |
|
"learning_rate": 4.588300987450652e-06, |
|
"logits/chosen": -1.3897264003753662, |
|
"logits/rejected": -1.2775170803070068, |
|
"logps/chosen": -400.9536437988281, |
|
"logps/rejected": -446.50244140625, |
|
"loss": 0.5402, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.4720966815948486, |
|
"rewards/margins": 0.7910727262496948, |
|
"rewards/rejected": -2.263169050216675, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 6.5, |
|
"learning_rate": 4.5756548370922136e-06, |
|
"logits/chosen": -1.3535692691802979, |
|
"logits/rejected": -1.2674891948699951, |
|
"logps/chosen": -364.35394287109375, |
|
"logps/rejected": -435.6012268066406, |
|
"loss": 0.5053, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.2365007400512695, |
|
"rewards/margins": 0.8022448420524597, |
|
"rewards/rejected": -2.038745641708374, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 10.9375, |
|
"learning_rate": 4.562835370152206e-06, |
|
"logits/chosen": -1.4244534969329834, |
|
"logits/rejected": -1.2097485065460205, |
|
"logps/chosen": -458.898681640625, |
|
"logps/rejected": -542.5496215820312, |
|
"loss": 0.4608, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.5766918659210205, |
|
"rewards/margins": 1.1474573612213135, |
|
"rewards/rejected": -2.724148988723755, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 6.8125, |
|
"learning_rate": 4.54984365705243e-06, |
|
"logits/chosen": -1.3653185367584229, |
|
"logits/rejected": -1.2626806497573853, |
|
"logps/chosen": -441.8795471191406, |
|
"logps/rejected": -547.1009521484375, |
|
"loss": 0.5005, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.7340948581695557, |
|
"rewards/margins": 1.117974042892456, |
|
"rewards/rejected": -2.8520689010620117, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 10.1875, |
|
"learning_rate": 4.536680782597191e-06, |
|
"logits/chosen": -1.2812556028366089, |
|
"logits/rejected": -1.2028696537017822, |
|
"logps/chosen": -422.71746826171875, |
|
"logps/rejected": -500.39276123046875, |
|
"loss": 0.5823, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.8669233322143555, |
|
"rewards/margins": 0.8329373598098755, |
|
"rewards/rejected": -2.6998608112335205, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 20.625, |
|
"learning_rate": 4.523347845882718e-06, |
|
"logits/chosen": -1.4049158096313477, |
|
"logits/rejected": -1.2168524265289307, |
|
"logps/chosen": -458.36956787109375, |
|
"logps/rejected": -519.8746337890625, |
|
"loss": 0.4512, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.7019942998886108, |
|
"rewards/margins": 1.1078611612319946, |
|
"rewards/rejected": -2.8098552227020264, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 7.3125, |
|
"learning_rate": 4.50984596020539e-06, |
|
"logits/chosen": -1.2350142002105713, |
|
"logits/rejected": -1.166898488998413, |
|
"logps/chosen": -446.97833251953125, |
|
"logps/rejected": -499.6363220214844, |
|
"loss": 0.5662, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.6682651042938232, |
|
"rewards/margins": 0.8490194082260132, |
|
"rewards/rejected": -2.517284393310547, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 7.125, |
|
"learning_rate": 4.4961762529687745e-06, |
|
"logits/chosen": -1.427812933921814, |
|
"logits/rejected": -1.3044004440307617, |
|
"logps/chosen": -407.113525390625, |
|
"logps/rejected": -485.9717712402344, |
|
"loss": 0.4963, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.4671194553375244, |
|
"rewards/margins": 0.9535934329032898, |
|
"rewards/rejected": -2.420712947845459, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 6.96875, |
|
"learning_rate": 4.482339865589492e-06, |
|
"logits/chosen": -1.4146265983581543, |
|
"logits/rejected": -1.2579143047332764, |
|
"logps/chosen": -434.85498046875, |
|
"logps/rejected": -476.2469787597656, |
|
"loss": 0.565, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.685683012008667, |
|
"rewards/margins": 0.8535755276679993, |
|
"rewards/rejected": -2.5392584800720215, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_logits/chosen": -1.271475911140442, |
|
"eval_logits/rejected": -1.1444941759109497, |
|
"eval_logps/chosen": -415.9482727050781, |
|
"eval_logps/rejected": -478.28485107421875, |
|
"eval_loss": 0.5207434296607971, |
|
"eval_rewards/accuracies": 0.7289999723434448, |
|
"eval_rewards/chosen": -1.5129626989364624, |
|
"eval_rewards/margins": 0.8237999677658081, |
|
"eval_rewards/rejected": -2.3367626667022705, |
|
"eval_runtime": 400.4213, |
|
"eval_samples_per_second": 4.995, |
|
"eval_steps_per_second": 0.624, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 7.1875, |
|
"learning_rate": 4.468337953401909e-06, |
|
"logits/chosen": -1.4210216999053955, |
|
"logits/rejected": -1.3622697591781616, |
|
"logps/chosen": -422.3374938964844, |
|
"logps/rejected": -482.82489013671875, |
|
"loss": 0.5625, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.5198637247085571, |
|
"rewards/margins": 0.6534841656684875, |
|
"rewards/rejected": -2.1733479499816895, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 8.5, |
|
"learning_rate": 4.45417168556166e-06, |
|
"logits/chosen": -1.328687310218811, |
|
"logits/rejected": -1.2253139019012451, |
|
"logps/chosen": -382.0760192871094, |
|
"logps/rejected": -463.2789001464844, |
|
"loss": 0.5021, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.4092481136322021, |
|
"rewards/margins": 0.8184933662414551, |
|
"rewards/rejected": -2.2277414798736572, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 9.25, |
|
"learning_rate": 4.439842244948036e-06, |
|
"logits/chosen": -1.2945640087127686, |
|
"logits/rejected": -1.1504161357879639, |
|
"logps/chosen": -432.81524658203125, |
|
"logps/rejected": -501.5387268066406, |
|
"loss": 0.5614, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.7175318002700806, |
|
"rewards/margins": 0.742792010307312, |
|
"rewards/rejected": -2.4603238105773926, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 17.75, |
|
"learning_rate": 4.425350828065204e-06, |
|
"logits/chosen": -1.3531075716018677, |
|
"logits/rejected": -1.145227074623108, |
|
"logps/chosen": -450.9183654785156, |
|
"logps/rejected": -492.70623779296875, |
|
"loss": 0.495, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.6683238744735718, |
|
"rewards/margins": 0.8965514898300171, |
|
"rewards/rejected": -2.564875364303589, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 10.375, |
|
"learning_rate": 4.410698644942303e-06, |
|
"logits/chosen": -1.3738346099853516, |
|
"logits/rejected": -1.2316032648086548, |
|
"logps/chosen": -427.7021484375, |
|
"logps/rejected": -499.776611328125, |
|
"loss": 0.4885, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.5538952350616455, |
|
"rewards/margins": 0.9599501490592957, |
|
"rewards/rejected": -2.513845443725586, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 11.8125, |
|
"learning_rate": 4.395886919032406e-06, |
|
"logits/chosen": -1.240241527557373, |
|
"logits/rejected": -1.113989233970642, |
|
"logps/chosen": -439.04095458984375, |
|
"logps/rejected": -497.86297607421875, |
|
"loss": 0.5348, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.7520458698272705, |
|
"rewards/margins": 0.8531185984611511, |
|
"rewards/rejected": -2.6051642894744873, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 8.3125, |
|
"learning_rate": 4.380916887110366e-06, |
|
"logits/chosen": -1.3933711051940918, |
|
"logits/rejected": -1.2005256414413452, |
|
"logps/chosen": -448.8775329589844, |
|
"logps/rejected": -503.5738220214844, |
|
"loss": 0.5142, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.897091269493103, |
|
"rewards/margins": 0.9505263566970825, |
|
"rewards/rejected": -2.8476176261901855, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 6.71875, |
|
"learning_rate": 4.365789799169539e-06, |
|
"logits/chosen": -1.2259958982467651, |
|
"logits/rejected": -1.2687208652496338, |
|
"logps/chosen": -432.62786865234375, |
|
"logps/rejected": -522.6715698242188, |
|
"loss": 0.5228, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.8601690530776978, |
|
"rewards/margins": 0.8469558954238892, |
|
"rewards/rejected": -2.707124710083008, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 10.0625, |
|
"learning_rate": 4.350506918317416e-06, |
|
"logits/chosen": -1.4284965991973877, |
|
"logits/rejected": -1.2679976224899292, |
|
"logps/chosen": -413.3136291503906, |
|
"logps/rejected": -491.58721923828125, |
|
"loss": 0.4982, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6571779251098633, |
|
"rewards/margins": 0.8545892834663391, |
|
"rewards/rejected": -2.5117671489715576, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 8.5625, |
|
"learning_rate": 4.335069520670149e-06, |
|
"logits/chosen": -1.2467167377471924, |
|
"logits/rejected": -1.170364260673523, |
|
"logps/chosen": -406.6671447753906, |
|
"logps/rejected": -487.4442443847656, |
|
"loss": 0.5837, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.7695426940917969, |
|
"rewards/margins": 0.7259476780891418, |
|
"rewards/rejected": -2.495490312576294, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_logits/chosen": -1.2314388751983643, |
|
"eval_logits/rejected": -1.10646390914917, |
|
"eval_logps/chosen": -431.9436950683594, |
|
"eval_logps/rejected": -498.354736328125, |
|
"eval_loss": 0.5104092359542847, |
|
"eval_rewards/accuracies": 0.7354999780654907, |
|
"eval_rewards/chosen": -1.6729168891906738, |
|
"eval_rewards/margins": 0.8645446300506592, |
|
"eval_rewards/rejected": -2.537461280822754, |
|
"eval_runtime": 385.0952, |
|
"eval_samples_per_second": 5.194, |
|
"eval_steps_per_second": 0.649, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 8.375, |
|
"learning_rate": 4.319478895246e-06, |
|
"logits/chosen": -1.3048737049102783, |
|
"logits/rejected": -1.1300756931304932, |
|
"logps/chosen": -410.984375, |
|
"logps/rejected": -473.4725036621094, |
|
"loss": 0.4842, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.638689637184143, |
|
"rewards/margins": 0.871795654296875, |
|
"rewards/rejected": -2.5104851722717285, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 17.875, |
|
"learning_rate": 4.303736343857704e-06, |
|
"logits/chosen": -1.2923145294189453, |
|
"logits/rejected": -1.1951522827148438, |
|
"logps/chosen": -442.15472412109375, |
|
"logps/rejected": -570.807861328125, |
|
"loss": 0.4982, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.8504784107208252, |
|
"rewards/margins": 1.1673939228057861, |
|
"rewards/rejected": -3.0178723335266113, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 6.03125, |
|
"learning_rate": 4.287843181003772e-06, |
|
"logits/chosen": -1.347501277923584, |
|
"logits/rejected": -1.1933776140213013, |
|
"logps/chosen": -496.73077392578125, |
|
"logps/rejected": -529.7448120117188, |
|
"loss": 0.5797, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.02878999710083, |
|
"rewards/margins": 0.7935463190078735, |
|
"rewards/rejected": -2.822335958480835, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 7.90625, |
|
"learning_rate": 4.27180073375873e-06, |
|
"logits/chosen": -1.411339521408081, |
|
"logits/rejected": -1.2728986740112305, |
|
"logps/chosen": -444.9125061035156, |
|
"logps/rejected": -488.64794921875, |
|
"loss": 0.5208, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.5754519701004028, |
|
"rewards/margins": 0.8528854250907898, |
|
"rewards/rejected": -2.428337574005127, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 5.59375, |
|
"learning_rate": 4.255610341662304e-06, |
|
"logits/chosen": -1.4314241409301758, |
|
"logits/rejected": -1.2228209972381592, |
|
"logps/chosen": -408.0103454589844, |
|
"logps/rejected": -470.0995178222656, |
|
"loss": 0.5539, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.5272233486175537, |
|
"rewards/margins": 0.8200508952140808, |
|
"rewards/rejected": -2.3472743034362793, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 9.1875, |
|
"learning_rate": 4.2392733566075764e-06, |
|
"logits/chosen": -1.4001052379608154, |
|
"logits/rejected": -1.2643131017684937, |
|
"logps/chosen": -446.30975341796875, |
|
"logps/rejected": -492.1767578125, |
|
"loss": 0.5838, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.9030348062515259, |
|
"rewards/margins": 0.6141453981399536, |
|
"rewards/rejected": -2.5171799659729004, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 8.4375, |
|
"learning_rate": 4.2227911427280975e-06, |
|
"logits/chosen": -1.3647363185882568, |
|
"logits/rejected": -1.1842234134674072, |
|
"logps/chosen": -421.44952392578125, |
|
"logps/rejected": -471.9312438964844, |
|
"loss": 0.5187, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.711439847946167, |
|
"rewards/margins": 0.8332679867744446, |
|
"rewards/rejected": -2.544707775115967, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 11.1875, |
|
"learning_rate": 4.206165076283983e-06, |
|
"logits/chosen": -1.4086225032806396, |
|
"logits/rejected": -1.258502721786499, |
|
"logps/chosen": -433.7823791503906, |
|
"logps/rejected": -515.7772827148438, |
|
"loss": 0.4621, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.878491759300232, |
|
"rewards/margins": 1.0307425260543823, |
|
"rewards/rejected": -2.9092345237731934, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 12.1875, |
|
"learning_rate": 4.189396545546995e-06, |
|
"logits/chosen": -1.3694835901260376, |
|
"logits/rejected": -1.2587060928344727, |
|
"logps/chosen": -476.4090270996094, |
|
"logps/rejected": -558.2841796875, |
|
"loss": 0.5179, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.272345781326294, |
|
"rewards/margins": 1.0181748867034912, |
|
"rewards/rejected": -3.290520429611206, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 11.875, |
|
"learning_rate": 4.172486950684627e-06, |
|
"logits/chosen": -1.3458369970321655, |
|
"logits/rejected": -1.2686779499053955, |
|
"logps/chosen": -503.9125061035156, |
|
"logps/rejected": -597.0067138671875, |
|
"loss": 0.5342, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.502702236175537, |
|
"rewards/margins": 0.962365448474884, |
|
"rewards/rejected": -3.4650676250457764, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_logits/chosen": -1.1852304935455322, |
|
"eval_logits/rejected": -1.065573811531067, |
|
"eval_logps/chosen": -541.4911499023438, |
|
"eval_logps/rejected": -629.0701293945312, |
|
"eval_loss": 0.5145753026008606, |
|
"eval_rewards/accuracies": 0.7239999771118164, |
|
"eval_rewards/chosen": -2.7683911323547363, |
|
"eval_rewards/margins": 1.0762238502502441, |
|
"eval_rewards/rejected": -3.8446152210235596, |
|
"eval_runtime": 384.8604, |
|
"eval_samples_per_second": 5.197, |
|
"eval_steps_per_second": 0.65, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 10.875, |
|
"learning_rate": 4.155437703643182e-06, |
|
"logits/chosen": -1.3424588441848755, |
|
"logits/rejected": -1.1757004261016846, |
|
"logps/chosen": -503.0860900878906, |
|
"logps/rejected": -576.0645751953125, |
|
"loss": 0.5175, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.563424587249756, |
|
"rewards/margins": 1.0206806659698486, |
|
"rewards/rejected": -3.5841050148010254, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 12.5625, |
|
"learning_rate": 4.138250228029882e-06, |
|
"logits/chosen": -1.2897417545318604, |
|
"logits/rejected": -1.2073286771774292, |
|
"logps/chosen": -513.29541015625, |
|
"logps/rejected": -622.9282836914062, |
|
"loss": 0.4944, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.5636470317840576, |
|
"rewards/margins": 1.061576008796692, |
|
"rewards/rejected": -3.625222682952881, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 6.40625, |
|
"learning_rate": 4.120925958993994e-06, |
|
"logits/chosen": -1.2789089679718018, |
|
"logits/rejected": -1.1816047430038452, |
|
"logps/chosen": -455.1483459472656, |
|
"logps/rejected": -547.6185302734375, |
|
"loss": 0.5614, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.212470293045044, |
|
"rewards/margins": 0.9680255055427551, |
|
"rewards/rejected": -3.180495500564575, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 10.875, |
|
"learning_rate": 4.103466343106999e-06, |
|
"logits/chosen": -1.451428771018982, |
|
"logits/rejected": -1.3242452144622803, |
|
"logps/chosen": -471.908203125, |
|
"logps/rejected": -530.3082275390625, |
|
"loss": 0.531, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.0252251625061035, |
|
"rewards/margins": 0.8427697420120239, |
|
"rewards/rejected": -2.867994785308838, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 13.375, |
|
"learning_rate": 4.085872838241797e-06, |
|
"logits/chosen": -1.3740856647491455, |
|
"logits/rejected": -1.2394291162490845, |
|
"logps/chosen": -466.03948974609375, |
|
"logps/rejected": -517.3243408203125, |
|
"loss": 0.5872, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -2.0367541313171387, |
|
"rewards/margins": 0.7136788368225098, |
|
"rewards/rejected": -2.7504329681396484, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 11.625, |
|
"learning_rate": 4.06814691345098e-06, |
|
"logits/chosen": -1.3596174716949463, |
|
"logits/rejected": -1.1983596086502075, |
|
"logps/chosen": -435.5516052246094, |
|
"logps/rejected": -502.25762939453125, |
|
"loss": 0.4841, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8000373840332031, |
|
"rewards/margins": 0.8877509832382202, |
|
"rewards/rejected": -2.687788486480713, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 14.8125, |
|
"learning_rate": 4.050290048844171e-06, |
|
"logits/chosen": -1.4539238214492798, |
|
"logits/rejected": -1.3574968576431274, |
|
"logps/chosen": -447.8072204589844, |
|
"logps/rejected": -525.6444091796875, |
|
"loss": 0.5237, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.7919375896453857, |
|
"rewards/margins": 0.829770565032959, |
|
"rewards/rejected": -2.6217081546783447, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 6.9375, |
|
"learning_rate": 4.032303735464422e-06, |
|
"logits/chosen": -1.498198390007019, |
|
"logits/rejected": -1.3165781497955322, |
|
"logps/chosen": -449.3099670410156, |
|
"logps/rejected": -533.7264404296875, |
|
"loss": 0.4426, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.7732903957366943, |
|
"rewards/margins": 1.075021505355835, |
|
"rewards/rejected": -2.8483121395111084, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 8.0625, |
|
"learning_rate": 4.014189475163727e-06, |
|
"logits/chosen": -1.2981091737747192, |
|
"logits/rejected": -1.1843674182891846, |
|
"logps/chosen": -436.0013732910156, |
|
"logps/rejected": -536.7470703125, |
|
"loss": 0.4771, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.8134443759918213, |
|
"rewards/margins": 1.0886361598968506, |
|
"rewards/rejected": -2.902080535888672, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 16.5, |
|
"learning_rate": 3.995948780477605e-06, |
|
"logits/chosen": -1.4245179891586304, |
|
"logits/rejected": -1.2672098875045776, |
|
"logps/chosen": -442.8516540527344, |
|
"logps/rejected": -501.25775146484375, |
|
"loss": 0.5287, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.7409827709197998, |
|
"rewards/margins": 0.8305804133415222, |
|
"rewards/rejected": -2.571563243865967, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_logits/chosen": -1.2506229877471924, |
|
"eval_logits/rejected": -1.1252646446228027, |
|
"eval_logps/chosen": -455.32855224609375, |
|
"eval_logps/rejected": -530.7439575195312, |
|
"eval_loss": 0.5196547508239746, |
|
"eval_rewards/accuracies": 0.7235000133514404, |
|
"eval_rewards/chosen": -1.9067655801773071, |
|
"eval_rewards/margins": 0.9545875191688538, |
|
"eval_rewards/rejected": -2.8613533973693848, |
|
"eval_runtime": 384.8833, |
|
"eval_samples_per_second": 5.196, |
|
"eval_steps_per_second": 0.65, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 10.0, |
|
"learning_rate": 3.977583174498816e-06, |
|
"logits/chosen": -1.296602487564087, |
|
"logits/rejected": -1.1704097986221313, |
|
"logps/chosen": -470.2802734375, |
|
"logps/rejected": -584.6675415039062, |
|
"loss": 0.3797, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -2.065967082977295, |
|
"rewards/margins": 1.363657832145691, |
|
"rewards/rejected": -3.4296250343322754, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 12.75, |
|
"learning_rate": 3.959094190750172e-06, |
|
"logits/chosen": -1.2688853740692139, |
|
"logits/rejected": -1.1229419708251953, |
|
"logps/chosen": -514.8965454101562, |
|
"logps/rejected": -601.8798217773438, |
|
"loss": 0.5131, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.297360897064209, |
|
"rewards/margins": 1.1389044523239136, |
|
"rewards/rejected": -3.436265468597412, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 23.625, |
|
"learning_rate": 3.9404833730564975e-06, |
|
"logits/chosen": -1.12982177734375, |
|
"logits/rejected": -1.0124092102050781, |
|
"logps/chosen": -485.57647705078125, |
|
"logps/rejected": -589.0986328125, |
|
"loss": 0.509, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.324758529663086, |
|
"rewards/margins": 1.1509692668914795, |
|
"rewards/rejected": -3.4757277965545654, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 18.5, |
|
"learning_rate": 3.921752275415712e-06, |
|
"logits/chosen": -1.1547746658325195, |
|
"logits/rejected": -1.0837663412094116, |
|
"logps/chosen": -519.43994140625, |
|
"logps/rejected": -648.2784423828125, |
|
"loss": 0.4382, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.689040184020996, |
|
"rewards/margins": 1.4942899942398071, |
|
"rewards/rejected": -4.1833295822143555, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 7.125, |
|
"learning_rate": 3.902902461869079e-06, |
|
"logits/chosen": -1.122580885887146, |
|
"logits/rejected": -0.9964879155158997, |
|
"logps/chosen": -557.9876098632812, |
|
"logps/rejected": -675.2171630859375, |
|
"loss": 0.5761, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -3.184739351272583, |
|
"rewards/margins": 1.3369649648666382, |
|
"rewards/rejected": -4.52170467376709, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 14.4375, |
|
"learning_rate": 3.883935506370605e-06, |
|
"logits/chosen": -1.2318613529205322, |
|
"logits/rejected": -1.1085087060928345, |
|
"logps/chosen": -504.7203674316406, |
|
"logps/rejected": -563.6139526367188, |
|
"loss": 0.5652, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.5147933959960938, |
|
"rewards/margins": 0.8672431111335754, |
|
"rewards/rejected": -3.3820366859436035, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 4.46875, |
|
"learning_rate": 3.864852992655617e-06, |
|
"logits/chosen": -1.3699067831039429, |
|
"logits/rejected": -1.2695183753967285, |
|
"logps/chosen": -446.06903076171875, |
|
"logps/rejected": -533.8704833984375, |
|
"loss": 0.4493, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.9743432998657227, |
|
"rewards/margins": 1.0008807182312012, |
|
"rewards/rejected": -2.975224256515503, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 7.40625, |
|
"learning_rate": 3.845656514108516e-06, |
|
"logits/chosen": -1.3080308437347412, |
|
"logits/rejected": -1.1480329036712646, |
|
"logps/chosen": -499.15087890625, |
|
"logps/rejected": -540.1708984375, |
|
"loss": 0.5052, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.400202512741089, |
|
"rewards/margins": 0.9600592851638794, |
|
"rewards/rejected": -3.360261917114258, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 10.0625, |
|
"learning_rate": 3.826347673629738e-06, |
|
"logits/chosen": -1.3206231594085693, |
|
"logits/rejected": -1.1406716108322144, |
|
"logps/chosen": -456.264404296875, |
|
"logps/rejected": -542.2021484375, |
|
"loss": 0.4827, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.029609203338623, |
|
"rewards/margins": 1.1219456195831299, |
|
"rewards/rejected": -3.151554822921753, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 16.375, |
|
"learning_rate": 3.8069280835019062e-06, |
|
"logits/chosen": -1.3296412229537964, |
|
"logits/rejected": -1.1789557933807373, |
|
"logps/chosen": -472.45135498046875, |
|
"logps/rejected": -577.9127197265625, |
|
"loss": 0.4634, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.0813910961151123, |
|
"rewards/margins": 1.246517300605774, |
|
"rewards/rejected": -3.3279082775115967, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_logits/chosen": -1.2696473598480225, |
|
"eval_logits/rejected": -1.1408381462097168, |
|
"eval_logps/chosen": -478.6543884277344, |
|
"eval_logps/rejected": -568.5231323242188, |
|
"eval_loss": 0.516459047794342, |
|
"eval_rewards/accuracies": 0.7294999957084656, |
|
"eval_rewards/chosen": -2.1400234699249268, |
|
"eval_rewards/margins": 1.0991216897964478, |
|
"eval_rewards/rejected": -3.239145040512085, |
|
"eval_runtime": 384.85, |
|
"eval_samples_per_second": 5.197, |
|
"eval_steps_per_second": 0.65, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 18.75, |
|
"learning_rate": 3.7873993652552077e-06, |
|
"logits/chosen": -1.2893366813659668, |
|
"logits/rejected": -1.1984317302703857, |
|
"logps/chosen": -447.75994873046875, |
|
"logps/rejected": -531.8748779296875, |
|
"loss": 0.6089, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -2.140458583831787, |
|
"rewards/margins": 0.8995217084884644, |
|
"rewards/rejected": -3.039980173110962, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 9.375, |
|
"learning_rate": 3.7677631495319953e-06, |
|
"logits/chosen": -1.4299360513687134, |
|
"logits/rejected": -1.3164293766021729, |
|
"logps/chosen": -420.2286682128906, |
|
"logps/rejected": -484.6932067871094, |
|
"loss": 0.5232, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.5654377937316895, |
|
"rewards/margins": 0.824730396270752, |
|
"rewards/rejected": -2.3901684284210205, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 6.65625, |
|
"learning_rate": 3.748021075950633e-06, |
|
"logits/chosen": -1.4940413236618042, |
|
"logits/rejected": -1.3882437944412231, |
|
"logps/chosen": -430.9405822753906, |
|
"logps/rejected": -477.81365966796875, |
|
"loss": 0.5819, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.5629723072052002, |
|
"rewards/margins": 0.5750766396522522, |
|
"rewards/rejected": -2.1380488872528076, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 9.5625, |
|
"learning_rate": 3.7281747929685824e-06, |
|
"logits/chosen": -1.3339357376098633, |
|
"logits/rejected": -1.1862207651138306, |
|
"logps/chosen": -417.4947814941406, |
|
"logps/rejected": -475.46160888671875, |
|
"loss": 0.5335, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.7983973026275635, |
|
"rewards/margins": 0.7366180419921875, |
|
"rewards/rejected": -2.535015106201172, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 7.5625, |
|
"learning_rate": 3.7082259577447604e-06, |
|
"logits/chosen": -1.4374284744262695, |
|
"logits/rejected": -1.3284789323806763, |
|
"logps/chosen": -462.6572265625, |
|
"logps/rejected": -520.775390625, |
|
"loss": 0.4858, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9146639108657837, |
|
"rewards/margins": 0.7811635732650757, |
|
"rewards/rejected": -2.6958274841308594, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 9.875, |
|
"learning_rate": 3.6881762360011688e-06, |
|
"logits/chosen": -1.45357346534729, |
|
"logits/rejected": -1.2578237056732178, |
|
"logps/chosen": -482.557861328125, |
|
"logps/rejected": -538.22705078125, |
|
"loss": 0.5034, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.010615110397339, |
|
"rewards/margins": 0.9266406297683716, |
|
"rewards/rejected": -2.937255620956421, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 9.3125, |
|
"learning_rate": 3.668027301883802e-06, |
|
"logits/chosen": -1.3933765888214111, |
|
"logits/rejected": -1.2354159355163574, |
|
"logps/chosen": -465.87628173828125, |
|
"logps/rejected": -557.3919067382812, |
|
"loss": 0.4966, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.145137310028076, |
|
"rewards/margins": 1.0642468929290771, |
|
"rewards/rejected": -3.209383726119995, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 6.3125, |
|
"learning_rate": 3.64778083782286e-06, |
|
"logits/chosen": -1.283038854598999, |
|
"logits/rejected": -1.264458417892456, |
|
"logps/chosen": -504.4510803222656, |
|
"logps/rejected": -629.853271484375, |
|
"loss": 0.5256, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.5184431076049805, |
|
"rewards/margins": 0.9640257954597473, |
|
"rewards/rejected": -3.482469081878662, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 12.1875, |
|
"learning_rate": 3.627438534392268e-06, |
|
"logits/chosen": -1.3820140361785889, |
|
"logits/rejected": -1.3510745763778687, |
|
"logps/chosen": -486.08721923828125, |
|
"logps/rejected": -596.9356689453125, |
|
"loss": 0.5014, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.4804224967956543, |
|
"rewards/margins": 1.0413092374801636, |
|
"rewards/rejected": -3.5217316150665283, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 7.9375, |
|
"learning_rate": 3.607002090168506e-06, |
|
"logits/chosen": -1.2637202739715576, |
|
"logits/rejected": -1.1885454654693604, |
|
"logps/chosen": -528.8330688476562, |
|
"logps/rejected": -598.1657104492188, |
|
"loss": 0.5551, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -2.6452443599700928, |
|
"rewards/margins": 0.9074187278747559, |
|
"rewards/rejected": -3.5526630878448486, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_logits/chosen": -1.2402483224868774, |
|
"eval_logits/rejected": -1.1162159442901611, |
|
"eval_logps/chosen": -512.13427734375, |
|
"eval_logps/rejected": -599.2672119140625, |
|
"eval_loss": 0.5056775212287903, |
|
"eval_rewards/accuracies": 0.7310000061988831, |
|
"eval_rewards/chosen": -2.474822521209717, |
|
"eval_rewards/margins": 1.0717631578445435, |
|
"eval_rewards/rejected": -3.5465855598449707, |
|
"eval_runtime": 384.9561, |
|
"eval_samples_per_second": 5.195, |
|
"eval_steps_per_second": 0.649, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 4.5625, |
|
"learning_rate": 3.586473211588787e-06, |
|
"logits/chosen": -1.3531196117401123, |
|
"logits/rejected": -1.2485313415527344, |
|
"logps/chosen": -474.69183349609375, |
|
"logps/rejected": -598.9015502929688, |
|
"loss": 0.4461, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.299967050552368, |
|
"rewards/margins": 1.1753101348876953, |
|
"rewards/rejected": -3.4752774238586426, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 16.125, |
|
"learning_rate": 3.5658536128085623e-06, |
|
"logits/chosen": -1.397863745689392, |
|
"logits/rejected": -1.2203514575958252, |
|
"logps/chosen": -538.4635009765625, |
|
"logps/rejected": -599.5867919921875, |
|
"loss": 0.5809, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.7555203437805176, |
|
"rewards/margins": 0.9067786931991577, |
|
"rewards/rejected": -3.6622989177703857, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 8.0, |
|
"learning_rate": 3.545145015558399e-06, |
|
"logits/chosen": -1.198150873184204, |
|
"logits/rejected": -1.1703031063079834, |
|
"logps/chosen": -492.5457458496094, |
|
"logps/rejected": -591.6585083007812, |
|
"loss": 0.484, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.609083652496338, |
|
"rewards/margins": 1.1332842111587524, |
|
"rewards/rejected": -3.7423675060272217, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 5.375, |
|
"learning_rate": 3.5243491490002056e-06, |
|
"logits/chosen": -1.3408269882202148, |
|
"logits/rejected": -1.2534765005111694, |
|
"logps/chosen": -525.4117431640625, |
|
"logps/rejected": -613.4912109375, |
|
"loss": 0.5814, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.717186450958252, |
|
"rewards/margins": 0.9326400756835938, |
|
"rewards/rejected": -3.649826765060425, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 10.5, |
|
"learning_rate": 3.503467749582857e-06, |
|
"logits/chosen": -1.4275528192520142, |
|
"logits/rejected": -1.233086109161377, |
|
"logps/chosen": -476.9230041503906, |
|
"logps/rejected": -512.470703125, |
|
"loss": 0.594, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.256998300552368, |
|
"rewards/margins": 0.6955486536026001, |
|
"rewards/rejected": -2.9525468349456787, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 11.375, |
|
"learning_rate": 3.4825025608971947e-06, |
|
"logits/chosen": -1.347503662109375, |
|
"logits/rejected": -1.2736327648162842, |
|
"logps/chosen": -408.91351318359375, |
|
"logps/rejected": -484.05926513671875, |
|
"loss": 0.5229, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.8696982860565186, |
|
"rewards/margins": 0.7134873270988464, |
|
"rewards/rejected": -2.583185911178589, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 6.375, |
|
"learning_rate": 3.4614553335304407e-06, |
|
"logits/chosen": -1.3952078819274902, |
|
"logits/rejected": -1.1842278242111206, |
|
"logps/chosen": -461.0892028808594, |
|
"logps/rejected": -531.4512939453125, |
|
"loss": 0.4605, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.9217849969863892, |
|
"rewards/margins": 1.02143132686615, |
|
"rewards/rejected": -2.9432168006896973, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 10.875, |
|
"learning_rate": 3.4403278249200222e-06, |
|
"logits/chosen": -1.3681023120880127, |
|
"logits/rejected": -1.159461259841919, |
|
"logps/chosen": -495.4026794433594, |
|
"logps/rejected": -574.2462768554688, |
|
"loss": 0.4479, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.127955913543701, |
|
"rewards/margins": 1.2076514959335327, |
|
"rewards/rejected": -3.3356070518493652, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 15.1875, |
|
"learning_rate": 3.4191217992068293e-06, |
|
"logits/chosen": -1.4043166637420654, |
|
"logits/rejected": -1.2147849798202515, |
|
"logps/chosen": -536.7705688476562, |
|
"logps/rejected": -593.9422607421875, |
|
"loss": 0.5436, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.586280345916748, |
|
"rewards/margins": 1.0274693965911865, |
|
"rewards/rejected": -3.6137497425079346, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 11.5625, |
|
"learning_rate": 3.3978390270879056e-06, |
|
"logits/chosen": -1.2982449531555176, |
|
"logits/rejected": -1.202523946762085, |
|
"logps/chosen": -495.7208557128906, |
|
"logps/rejected": -613.0609130859375, |
|
"loss": 0.5183, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.8570523262023926, |
|
"rewards/margins": 1.105515480041504, |
|
"rewards/rejected": -3.9625678062438965, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_logits/chosen": -1.2783551216125488, |
|
"eval_logits/rejected": -1.1493244171142578, |
|
"eval_logps/chosen": -543.2153930664062, |
|
"eval_logps/rejected": -629.583251953125, |
|
"eval_loss": 0.49932965636253357, |
|
"eval_rewards/accuracies": 0.7390000224113464, |
|
"eval_rewards/chosen": -2.7856333255767822, |
|
"eval_rewards/margins": 1.0641134977340698, |
|
"eval_rewards/rejected": -3.8497471809387207, |
|
"eval_runtime": 384.9856, |
|
"eval_samples_per_second": 5.195, |
|
"eval_steps_per_second": 0.649, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 12.3125, |
|
"learning_rate": 3.3764812856685995e-06, |
|
"logits/chosen": -1.3749181032180786, |
|
"logits/rejected": -1.3575098514556885, |
|
"logps/chosen": -486.59808349609375, |
|
"logps/rejected": -601.6511840820312, |
|
"loss": 0.5154, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.6204190254211426, |
|
"rewards/margins": 1.015373945236206, |
|
"rewards/rejected": -3.6357929706573486, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 9.5625, |
|
"learning_rate": 3.3550503583141726e-06, |
|
"logits/chosen": -1.4978001117706299, |
|
"logits/rejected": -1.3547831773757935, |
|
"logps/chosen": -509.7710876464844, |
|
"logps/rejected": -603.9198608398438, |
|
"loss": 0.4785, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.409210205078125, |
|
"rewards/margins": 1.0928349494934082, |
|
"rewards/rejected": -3.5020453929901123, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 9.8125, |
|
"learning_rate": 3.3335480345008907e-06, |
|
"logits/chosen": -1.359413743019104, |
|
"logits/rejected": -1.2617199420928955, |
|
"logps/chosen": -466.9374084472656, |
|
"logps/rejected": -555.6993408203125, |
|
"loss": 0.4525, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.067884922027588, |
|
"rewards/margins": 1.134508728981018, |
|
"rewards/rejected": -3.2023932933807373, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 7.875, |
|
"learning_rate": 3.3119761096666055e-06, |
|
"logits/chosen": -1.4102518558502197, |
|
"logits/rejected": -1.2553739547729492, |
|
"logps/chosen": -483.46356201171875, |
|
"logps/rejected": -535.0630493164062, |
|
"loss": 0.5686, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.0844430923461914, |
|
"rewards/margins": 0.8192558288574219, |
|
"rewards/rejected": -2.9036991596221924, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 8.875, |
|
"learning_rate": 3.290336385060832e-06, |
|
"logits/chosen": -1.573932409286499, |
|
"logits/rejected": -1.365490436553955, |
|
"logps/chosen": -463.2930603027344, |
|
"logps/rejected": -536.6669921875, |
|
"loss": 0.5262, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.1443819999694824, |
|
"rewards/margins": 0.9354622960090637, |
|
"rewards/rejected": -3.0798439979553223, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 10.125, |
|
"learning_rate": 3.268630667594348e-06, |
|
"logits/chosen": -1.4119102954864502, |
|
"logits/rejected": -1.375628113746643, |
|
"logps/chosen": -442.0873107910156, |
|
"logps/rejected": -511.404296875, |
|
"loss": 0.5134, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.8643802404403687, |
|
"rewards/margins": 0.9355432391166687, |
|
"rewards/rejected": -2.7999236583709717, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 8.5625, |
|
"learning_rate": 3.2468607696883147e-06, |
|
"logits/chosen": -1.4483537673950195, |
|
"logits/rejected": -1.3931195735931396, |
|
"logps/chosen": -452.58056640625, |
|
"logps/rejected": -550.0877075195312, |
|
"loss": 0.4914, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.9940522909164429, |
|
"rewards/margins": 0.9315992593765259, |
|
"rewards/rejected": -2.9256515502929688, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 6.40625, |
|
"learning_rate": 3.225028509122944e-06, |
|
"logits/chosen": -1.5014052391052246, |
|
"logits/rejected": -1.3815762996673584, |
|
"logps/chosen": -431.58447265625, |
|
"logps/rejected": -498.1240234375, |
|
"loss": 0.5295, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.9506728649139404, |
|
"rewards/margins": 0.7785587310791016, |
|
"rewards/rejected": -2.729231595993042, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 11.8125, |
|
"learning_rate": 3.2031357088857083e-06, |
|
"logits/chosen": -1.4752798080444336, |
|
"logits/rejected": -1.3948938846588135, |
|
"logps/chosen": -494.77056884765625, |
|
"logps/rejected": -584.5908813476562, |
|
"loss": 0.5126, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.1956658363342285, |
|
"rewards/margins": 0.9330615997314453, |
|
"rewards/rejected": -3.128727674484253, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 14.125, |
|
"learning_rate": 3.181184197019127e-06, |
|
"logits/chosen": -1.227853775024414, |
|
"logits/rejected": -1.1199489831924438, |
|
"logps/chosen": -480.1280212402344, |
|
"logps/rejected": -640.9280395507812, |
|
"loss": 0.478, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.4991328716278076, |
|
"rewards/margins": 1.3459218740463257, |
|
"rewards/rejected": -3.845055103302002, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_logits/chosen": -1.2418839931488037, |
|
"eval_logits/rejected": -1.1179745197296143, |
|
"eval_logps/chosen": -533.2012329101562, |
|
"eval_logps/rejected": -618.8510131835938, |
|
"eval_loss": 0.5059686303138733, |
|
"eval_rewards/accuracies": 0.7390000224113464, |
|
"eval_rewards/chosen": -2.685492753982544, |
|
"eval_rewards/margins": 1.0569311380386353, |
|
"eval_rewards/rejected": -3.7424237728118896, |
|
"eval_runtime": 385.0443, |
|
"eval_samples_per_second": 5.194, |
|
"eval_steps_per_second": 0.649, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 22.25, |
|
"learning_rate": 3.159175806468126e-06, |
|
"logits/chosen": -1.206802248954773, |
|
"logits/rejected": -1.0140331983566284, |
|
"logps/chosen": -536.1395263671875, |
|
"logps/rejected": -618.5433959960938, |
|
"loss": 0.4884, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.874638795852661, |
|
"rewards/margins": 1.125908374786377, |
|
"rewards/rejected": -4.000546932220459, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 18.125, |
|
"learning_rate": 3.1371123749269804e-06, |
|
"logits/chosen": -1.27443265914917, |
|
"logits/rejected": -1.2016910314559937, |
|
"logps/chosen": -608.0801391601562, |
|
"logps/rejected": -680.9607543945312, |
|
"loss": 0.6001, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -3.246840000152588, |
|
"rewards/margins": 0.8893483877182007, |
|
"rewards/rejected": -4.13618803024292, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 10.125, |
|
"learning_rate": 3.114995744685877e-06, |
|
"logits/chosen": -1.238593339920044, |
|
"logits/rejected": -1.2031556367874146, |
|
"logps/chosen": -542.8019409179688, |
|
"logps/rejected": -626.592041015625, |
|
"loss": 0.5158, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.95475697517395, |
|
"rewards/margins": 0.9683350324630737, |
|
"rewards/rejected": -3.9230918884277344, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 5.8125, |
|
"learning_rate": 3.0928277624770743e-06, |
|
"logits/chosen": -1.4476687908172607, |
|
"logits/rejected": -1.2830606698989868, |
|
"logps/chosen": -545.8017578125, |
|
"logps/rejected": -637.268798828125, |
|
"loss": 0.5053, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.61057710647583, |
|
"rewards/margins": 1.1988862752914429, |
|
"rewards/rejected": -3.8094639778137207, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 6.0625, |
|
"learning_rate": 3.070610279320708e-06, |
|
"logits/chosen": -1.469509243965149, |
|
"logits/rejected": -1.2941926717758179, |
|
"logps/chosen": -530.1407470703125, |
|
"logps/rejected": -617.324462890625, |
|
"loss": 0.4534, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.4305920600891113, |
|
"rewards/margins": 1.112181544303894, |
|
"rewards/rejected": -3.542773723602295, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 6.125, |
|
"learning_rate": 3.0483451503702264e-06, |
|
"logits/chosen": -1.3953410387039185, |
|
"logits/rejected": -1.319723129272461, |
|
"logps/chosen": -553.1173706054688, |
|
"logps/rejected": -624.0015258789062, |
|
"loss": 0.5575, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.6702237129211426, |
|
"rewards/margins": 0.9092646837234497, |
|
"rewards/rejected": -3.5794882774353027, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 7.40625, |
|
"learning_rate": 3.0260342347574916e-06, |
|
"logits/chosen": -1.366349697113037, |
|
"logits/rejected": -1.2165526151657104, |
|
"logps/chosen": -525.2376098632812, |
|
"logps/rejected": -642.2418823242188, |
|
"loss": 0.4285, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.52901291847229, |
|
"rewards/margins": 1.3603582382202148, |
|
"rewards/rejected": -3.889371156692505, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 11.0625, |
|
"learning_rate": 3.0036793954375358e-06, |
|
"logits/chosen": -1.3298574686050415, |
|
"logits/rejected": -1.175115942955017, |
|
"logps/chosen": -573.416015625, |
|
"logps/rejected": -657.1756591796875, |
|
"loss": 0.4426, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -3.033194065093994, |
|
"rewards/margins": 1.259937047958374, |
|
"rewards/rejected": -4.293131351470947, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 16.625, |
|
"learning_rate": 2.981282499033009e-06, |
|
"logits/chosen": -1.3132747411727905, |
|
"logits/rejected": -1.1860190629959106, |
|
"logps/chosen": -596.8779296875, |
|
"logps/rejected": -696.3871459960938, |
|
"loss": 0.5166, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -3.2053794860839844, |
|
"rewards/margins": 1.2494663000106812, |
|
"rewards/rejected": -4.454846382141113, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 14.5, |
|
"learning_rate": 2.9588454156783163e-06, |
|
"logits/chosen": -1.3445230722427368, |
|
"logits/rejected": -1.1744747161865234, |
|
"logps/chosen": -582.3587646484375, |
|
"logps/rejected": -712.7427978515625, |
|
"loss": 0.4325, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -3.0100293159484863, |
|
"rewards/margins": 1.5214197635650635, |
|
"rewards/rejected": -4.531449317932129, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_logits/chosen": -1.2515246868133545, |
|
"eval_logits/rejected": -1.1244795322418213, |
|
"eval_logps/chosen": -567.7128295898438, |
|
"eval_logps/rejected": -665.8478393554688, |
|
"eval_loss": 0.4995792806148529, |
|
"eval_rewards/accuracies": 0.7369999885559082, |
|
"eval_rewards/chosen": -3.0306081771850586, |
|
"eval_rewards/margins": 1.1817845106124878, |
|
"eval_rewards/rejected": -4.2123918533325195, |
|
"eval_runtime": 384.958, |
|
"eval_samples_per_second": 5.195, |
|
"eval_steps_per_second": 0.649, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 10.6875, |
|
"learning_rate": 2.9363700188634597e-06, |
|
"logits/chosen": -1.3585935831069946, |
|
"logits/rejected": -1.2228021621704102, |
|
"logps/chosen": -564.982666015625, |
|
"logps/rejected": -625.9069213867188, |
|
"loss": 0.5244, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.999126434326172, |
|
"rewards/margins": 0.9794772863388062, |
|
"rewards/rejected": -3.9786033630371094, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 10.625, |
|
"learning_rate": 2.9138581852776053e-06, |
|
"logits/chosen": -1.378015160560608, |
|
"logits/rejected": -1.2671245336532593, |
|
"logps/chosen": -547.2576904296875, |
|
"logps/rejected": -642.3396606445312, |
|
"loss": 0.5133, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.8754656314849854, |
|
"rewards/margins": 1.0803815126419067, |
|
"rewards/rejected": -3.9558472633361816, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 6.71875, |
|
"learning_rate": 2.8913117946523805e-06, |
|
"logits/chosen": -1.4040184020996094, |
|
"logits/rejected": -1.2129814624786377, |
|
"logps/chosen": -538.7586669921875, |
|
"logps/rejected": -607.2654418945312, |
|
"loss": 0.4589, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.72344970703125, |
|
"rewards/margins": 1.065987229347229, |
|
"rewards/rejected": -3.7894368171691895, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 12.1875, |
|
"learning_rate": 2.8687327296049126e-06, |
|
"logits/chosen": -1.3795297145843506, |
|
"logits/rejected": -1.2698510885238647, |
|
"logps/chosen": -529.3519897460938, |
|
"logps/rejected": -627.5960693359375, |
|
"loss": 0.5143, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.702291965484619, |
|
"rewards/margins": 1.0797038078308105, |
|
"rewards/rejected": -3.781996250152588, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 17.0, |
|
"learning_rate": 2.8461228754806376e-06, |
|
"logits/chosen": -1.4074313640594482, |
|
"logits/rejected": -1.231386661529541, |
|
"logps/chosen": -545.1365966796875, |
|
"logps/rejected": -611.1114501953125, |
|
"loss": 0.5387, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.6934621334075928, |
|
"rewards/margins": 0.9177573323249817, |
|
"rewards/rejected": -3.611219882965088, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 8.0625, |
|
"learning_rate": 2.823484120195865e-06, |
|
"logits/chosen": -1.51656174659729, |
|
"logits/rejected": -1.2999058961868286, |
|
"logps/chosen": -509.6175231933594, |
|
"logps/rejected": -593.9664306640625, |
|
"loss": 0.4301, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.3180630207061768, |
|
"rewards/margins": 1.187303900718689, |
|
"rewards/rejected": -3.5053672790527344, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 8.8125, |
|
"learning_rate": 2.8008183540801486e-06, |
|
"logits/chosen": -1.366084337234497, |
|
"logits/rejected": -1.211987853050232, |
|
"logps/chosen": -522.1681518554688, |
|
"logps/rejected": -573.3563232421875, |
|
"loss": 0.5128, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -2.4853363037109375, |
|
"rewards/margins": 0.965847373008728, |
|
"rewards/rejected": -3.451184034347534, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 14.3125, |
|
"learning_rate": 2.7781274697184353e-06, |
|
"logits/chosen": -1.2134374380111694, |
|
"logits/rejected": -1.2537566423416138, |
|
"logps/chosen": -503.17462158203125, |
|
"logps/rejected": -637.951171875, |
|
"loss": 0.5379, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.738171100616455, |
|
"rewards/margins": 1.1446306705474854, |
|
"rewards/rejected": -3.8828015327453613, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 6.96875, |
|
"learning_rate": 2.7554133617930397e-06, |
|
"logits/chosen": -1.3368358612060547, |
|
"logits/rejected": -1.2081658840179443, |
|
"logps/chosen": -505.2743225097656, |
|
"logps/rejected": -597.1849365234375, |
|
"loss": 0.5015, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.5743813514709473, |
|
"rewards/margins": 1.0698320865631104, |
|
"rewards/rejected": -3.644213914871216, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 10.125, |
|
"learning_rate": 2.7326779269254363e-06, |
|
"logits/chosen": -1.5063138008117676, |
|
"logits/rejected": -1.330679178237915, |
|
"logps/chosen": -549.5060424804688, |
|
"logps/rejected": -596.6401977539062, |
|
"loss": 0.4926, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.6017112731933594, |
|
"rewards/margins": 1.0307661294937134, |
|
"rewards/rejected": -3.6324775218963623, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -1.2879465818405151, |
|
"eval_logits/rejected": -1.1607391834259033, |
|
"eval_logps/chosen": -531.1353759765625, |
|
"eval_logps/rejected": -612.32275390625, |
|
"eval_loss": 0.4934316873550415, |
|
"eval_rewards/accuracies": 0.7404999732971191, |
|
"eval_rewards/chosen": -2.664834499359131, |
|
"eval_rewards/margins": 1.0123074054718018, |
|
"eval_rewards/rejected": -3.6771416664123535, |
|
"eval_runtime": 385.0864, |
|
"eval_samples_per_second": 5.194, |
|
"eval_steps_per_second": 0.649, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 12.0625, |
|
"learning_rate": 2.7099230635178954e-06, |
|
"logits/chosen": -1.32606840133667, |
|
"logits/rejected": -1.284746766090393, |
|
"logps/chosen": -530.9010009765625, |
|
"logps/rejected": -617.3204956054688, |
|
"loss": 0.5175, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.6605124473571777, |
|
"rewards/margins": 0.9288894534111023, |
|
"rewards/rejected": -3.589401960372925, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 12.6875, |
|
"learning_rate": 2.6871506715949608e-06, |
|
"logits/chosen": -1.4429805278778076, |
|
"logits/rejected": -1.309429407119751, |
|
"logps/chosen": -509.5052795410156, |
|
"logps/rejected": -599.0560913085938, |
|
"loss": 0.4724, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.565361976623535, |
|
"rewards/margins": 1.0760364532470703, |
|
"rewards/rejected": -3.6413981914520264, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 11.125, |
|
"learning_rate": 2.6643626526448063e-06, |
|
"logits/chosen": -1.516875982284546, |
|
"logits/rejected": -1.3450555801391602, |
|
"logps/chosen": -554.38427734375, |
|
"logps/rejected": -638.1011352539062, |
|
"loss": 0.4402, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.594825267791748, |
|
"rewards/margins": 1.260938048362732, |
|
"rewards/rejected": -3.8557631969451904, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 9.375, |
|
"learning_rate": 2.6415609094604562e-06, |
|
"logits/chosen": -1.331624150276184, |
|
"logits/rejected": -1.273493766784668, |
|
"logps/chosen": -572.4739990234375, |
|
"logps/rejected": -668.96484375, |
|
"loss": 0.4552, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -3.0190746784210205, |
|
"rewards/margins": 1.1551822423934937, |
|
"rewards/rejected": -4.174256801605225, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 15.5, |
|
"learning_rate": 2.618747345980904e-06, |
|
"logits/chosen": -1.3320283889770508, |
|
"logits/rejected": -1.138301134109497, |
|
"logps/chosen": -604.2484130859375, |
|
"logps/rejected": -656.2549438476562, |
|
"loss": 0.5591, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -3.5585105419158936, |
|
"rewards/margins": 1.0249335765838623, |
|
"rewards/rejected": -4.583444118499756, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 5.78125, |
|
"learning_rate": 2.595923867132136e-06, |
|
"logits/chosen": -1.3874107599258423, |
|
"logits/rejected": -1.25830078125, |
|
"logps/chosen": -597.45751953125, |
|
"logps/rejected": -698.0640258789062, |
|
"loss": 0.482, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -3.1653714179992676, |
|
"rewards/margins": 1.2528908252716064, |
|
"rewards/rejected": -4.418262481689453, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 7.625, |
|
"learning_rate": 2.5730923786680672e-06, |
|
"logits/chosen": -1.3206942081451416, |
|
"logits/rejected": -1.2852368354797363, |
|
"logps/chosen": -542.5220336914062, |
|
"logps/rejected": -643.3800048828125, |
|
"loss": 0.5358, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -2.9099090099334717, |
|
"rewards/margins": 0.932235836982727, |
|
"rewards/rejected": -3.842144727706909, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 9.75, |
|
"learning_rate": 2.5502547870114137e-06, |
|
"logits/chosen": -1.3924452066421509, |
|
"logits/rejected": -1.2653155326843262, |
|
"logps/chosen": -522.9793090820312, |
|
"logps/rejected": -588.5538940429688, |
|
"loss": 0.5191, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.6759915351867676, |
|
"rewards/margins": 0.9582807421684265, |
|
"rewards/rejected": -3.634272336959839, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 11.5, |
|
"learning_rate": 2.527412999094507e-06, |
|
"logits/chosen": -1.3764816522598267, |
|
"logits/rejected": -1.2036128044128418, |
|
"logps/chosen": -582.7906494140625, |
|
"logps/rejected": -684.7365112304688, |
|
"loss": 0.4795, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.890341281890869, |
|
"rewards/margins": 1.1449689865112305, |
|
"rewards/rejected": -4.0353102684021, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 11.0625, |
|
"learning_rate": 2.504568922200064e-06, |
|
"logits/chosen": -1.3423351049423218, |
|
"logits/rejected": -1.1891554594039917, |
|
"logps/chosen": -519.73388671875, |
|
"logps/rejected": -617.8820190429688, |
|
"loss": 0.5009, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.7839534282684326, |
|
"rewards/margins": 1.1430864334106445, |
|
"rewards/rejected": -3.9270401000976562, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_logits/chosen": -1.3098708391189575, |
|
"eval_logits/rejected": -1.1825140714645386, |
|
"eval_logps/chosen": -547.086669921875, |
|
"eval_logps/rejected": -630.5530395507812, |
|
"eval_loss": 0.4915066063404083, |
|
"eval_rewards/accuracies": 0.7509999871253967, |
|
"eval_rewards/chosen": -2.8243465423583984, |
|
"eval_rewards/margins": 1.0350984334945679, |
|
"eval_rewards/rejected": -3.859445095062256, |
|
"eval_runtime": 384.9833, |
|
"eval_samples_per_second": 5.195, |
|
"eval_steps_per_second": 0.649, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 7.25, |
|
"learning_rate": 2.4817244638019333e-06, |
|
"logits/chosen": -1.4199883937835693, |
|
"logits/rejected": -1.2606356143951416, |
|
"logps/chosen": -566.4400634765625, |
|
"logps/rejected": -621.7224731445312, |
|
"loss": 0.5296, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.906280040740967, |
|
"rewards/margins": 0.9649537205696106, |
|
"rewards/rejected": -3.8712337017059326, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 14.75, |
|
"learning_rate": 2.4588815314058155e-06, |
|
"logits/chosen": -1.3702977895736694, |
|
"logits/rejected": -1.307600736618042, |
|
"logps/chosen": -536.4708862304688, |
|
"logps/rejected": -595.4000244140625, |
|
"loss": 0.4918, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.909069776535034, |
|
"rewards/margins": 0.9321239590644836, |
|
"rewards/rejected": -3.841193675994873, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 11.9375, |
|
"learning_rate": 2.4360420323899922e-06, |
|
"logits/chosen": -1.4435946941375732, |
|
"logits/rejected": -1.3134028911590576, |
|
"logps/chosen": -554.8719482421875, |
|
"logps/rejected": -603.7503051757812, |
|
"loss": 0.5711, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.7905924320220947, |
|
"rewards/margins": 0.8145803213119507, |
|
"rewards/rejected": -3.605172634124756, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 8.5625, |
|
"learning_rate": 2.4132078738460585e-06, |
|
"logits/chosen": -1.4984867572784424, |
|
"logits/rejected": -1.3388562202453613, |
|
"logps/chosen": -511.22344970703125, |
|
"logps/rejected": -575.4027099609375, |
|
"loss": 0.4819, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.4675207138061523, |
|
"rewards/margins": 1.027344822883606, |
|
"rewards/rejected": -3.4948654174804688, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 13.8125, |
|
"learning_rate": 2.3903809624196826e-06, |
|
"logits/chosen": -1.4616576433181763, |
|
"logits/rejected": -1.3179928064346313, |
|
"logps/chosen": -468.075439453125, |
|
"logps/rejected": -520.423095703125, |
|
"loss": 0.5464, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.3437161445617676, |
|
"rewards/margins": 0.8483073115348816, |
|
"rewards/rejected": -3.192023515701294, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 12.375, |
|
"learning_rate": 2.3675632041513978e-06, |
|
"logits/chosen": -1.5807464122772217, |
|
"logits/rejected": -1.3334547281265259, |
|
"logps/chosen": -532.7571411132812, |
|
"logps/rejected": -571.2667846679688, |
|
"loss": 0.4903, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.4575352668762207, |
|
"rewards/margins": 1.0318888425827026, |
|
"rewards/rejected": -3.489424228668213, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 14.125, |
|
"learning_rate": 2.3447565043174533e-06, |
|
"logits/chosen": -1.42782461643219, |
|
"logits/rejected": -1.2667099237442017, |
|
"logps/chosen": -528.2996826171875, |
|
"logps/rejected": -579.03173828125, |
|
"loss": 0.525, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -2.6998863220214844, |
|
"rewards/margins": 0.8944419026374817, |
|
"rewards/rejected": -3.5943286418914795, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 9.9375, |
|
"learning_rate": 2.321962767270724e-06, |
|
"logits/chosen": -1.4696300029754639, |
|
"logits/rejected": -1.3142036199569702, |
|
"logps/chosen": -505.57843017578125, |
|
"logps/rejected": -546.5269165039062, |
|
"loss": 0.5521, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.556256055831909, |
|
"rewards/margins": 0.7811610698699951, |
|
"rewards/rejected": -3.3374171257019043, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 14.1875, |
|
"learning_rate": 2.299183896281692e-06, |
|
"logits/chosen": -1.4029340744018555, |
|
"logits/rejected": -1.2631093263626099, |
|
"logps/chosen": -490.07208251953125, |
|
"logps/rejected": -572.7850952148438, |
|
"loss": 0.5183, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.3514010906219482, |
|
"rewards/margins": 0.850308895111084, |
|
"rewards/rejected": -3.2017102241516113, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 8.125, |
|
"learning_rate": 2.2764217933795297e-06, |
|
"logits/chosen": -1.497179627418518, |
|
"logits/rejected": -1.3680561780929565, |
|
"logps/chosen": -482.985595703125, |
|
"logps/rejected": -569.66552734375, |
|
"loss": 0.4777, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.2155346870422363, |
|
"rewards/margins": 1.0720819234848022, |
|
"rewards/rejected": -3.2876172065734863, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_logits/chosen": -1.377992033958435, |
|
"eval_logits/rejected": -1.2484394311904907, |
|
"eval_logps/chosen": -498.22637939453125, |
|
"eval_logps/rejected": -575.8182983398438, |
|
"eval_loss": 0.49137604236602783, |
|
"eval_rewards/accuracies": 0.7475000023841858, |
|
"eval_rewards/chosen": -2.3357439041137695, |
|
"eval_rewards/margins": 0.9763532876968384, |
|
"eval_rewards/rejected": -3.3120970726013184, |
|
"eval_runtime": 384.8882, |
|
"eval_samples_per_second": 5.196, |
|
"eval_steps_per_second": 0.65, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 8.1875, |
|
"learning_rate": 2.2536783591932786e-06, |
|
"logits/chosen": -1.5414104461669922, |
|
"logits/rejected": -1.380723237991333, |
|
"logps/chosen": -521.5967407226562, |
|
"logps/rejected": -590.3738403320312, |
|
"loss": 0.5316, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.460937023162842, |
|
"rewards/margins": 0.8691738843917847, |
|
"rewards/rejected": -3.330111026763916, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 7.71875, |
|
"learning_rate": 2.230955492793149e-06, |
|
"logits/chosen": -1.3435004949569702, |
|
"logits/rejected": -1.285659909248352, |
|
"logps/chosen": -540.8585815429688, |
|
"logps/rejected": -614.8424072265625, |
|
"loss": 0.591, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.6706583499908447, |
|
"rewards/margins": 0.8687313199043274, |
|
"rewards/rejected": -3.5393898487091064, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 6.53125, |
|
"learning_rate": 2.208255091531947e-06, |
|
"logits/chosen": -1.3562614917755127, |
|
"logits/rejected": -1.267709493637085, |
|
"logps/chosen": -524.9986572265625, |
|
"logps/rejected": -600.77880859375, |
|
"loss": 0.4914, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.480012893676758, |
|
"rewards/margins": 1.107474684715271, |
|
"rewards/rejected": -3.5874874591827393, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 12.25, |
|
"learning_rate": 2.1855790508866435e-06, |
|
"logits/chosen": -1.4271819591522217, |
|
"logits/rejected": -1.3109095096588135, |
|
"logps/chosen": -548.445556640625, |
|
"logps/rejected": -636.0255126953125, |
|
"loss": 0.4951, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.51424241065979, |
|
"rewards/margins": 1.0523946285247803, |
|
"rewards/rejected": -3.5666370391845703, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 10.0, |
|
"learning_rate": 2.162929264300107e-06, |
|
"logits/chosen": -1.4263137578964233, |
|
"logits/rejected": -1.329329252243042, |
|
"logps/chosen": -499.01947021484375, |
|
"logps/rejected": -603.9342041015625, |
|
"loss": 0.4131, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.300555944442749, |
|
"rewards/margins": 1.2727267742156982, |
|
"rewards/rejected": -3.5732827186584473, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 12.0, |
|
"learning_rate": 2.1403076230230006e-06, |
|
"logits/chosen": -1.3517494201660156, |
|
"logits/rejected": -1.2308024168014526, |
|
"logps/chosen": -553.6866455078125, |
|
"logps/rejected": -609.8203125, |
|
"loss": 0.5893, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.825040817260742, |
|
"rewards/margins": 0.8394268155097961, |
|
"rewards/rejected": -3.6644675731658936, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 6.90625, |
|
"learning_rate": 2.11771601595586e-06, |
|
"logits/chosen": -1.420016884803772, |
|
"logits/rejected": -1.302247405052185, |
|
"logps/chosen": -558.3585205078125, |
|
"logps/rejected": -605.810791015625, |
|
"loss": 0.5185, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.7631030082702637, |
|
"rewards/margins": 0.9972630739212036, |
|
"rewards/rejected": -3.760366439819336, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 10.1875, |
|
"learning_rate": 2.0951563294913737e-06, |
|
"logits/chosen": -1.4174630641937256, |
|
"logits/rejected": -1.2032339572906494, |
|
"logps/chosen": -525.8551025390625, |
|
"logps/rejected": -591.8328247070312, |
|
"loss": 0.4771, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.643219232559204, |
|
"rewards/margins": 0.9647111892700195, |
|
"rewards/rejected": -3.6079304218292236, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 8.9375, |
|
"learning_rate": 2.0726304473568693e-06, |
|
"logits/chosen": -1.3942341804504395, |
|
"logits/rejected": -1.2679177522659302, |
|
"logps/chosen": -523.4191284179688, |
|
"logps/rejected": -589.2734985351562, |
|
"loss": 0.492, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.6448018550872803, |
|
"rewards/margins": 0.9818207621574402, |
|
"rewards/rejected": -3.6266231536865234, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 9.1875, |
|
"learning_rate": 2.050140250457023e-06, |
|
"logits/chosen": -1.4893324375152588, |
|
"logits/rejected": -1.2684142589569092, |
|
"logps/chosen": -571.5191650390625, |
|
"logps/rejected": -653.9939575195312, |
|
"loss": 0.4655, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.9395713806152344, |
|
"rewards/margins": 1.165346384048462, |
|
"rewards/rejected": -4.104917526245117, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_logits/chosen": -1.2897459268569946, |
|
"eval_logits/rejected": -1.1627777814865112, |
|
"eval_logps/chosen": -571.7406616210938, |
|
"eval_logps/rejected": -672.1651000976562, |
|
"eval_loss": 0.4928034543991089, |
|
"eval_rewards/accuracies": 0.7450000047683716, |
|
"eval_rewards/chosen": -3.0708866119384766, |
|
"eval_rewards/margins": 1.2046781778335571, |
|
"eval_rewards/rejected": -4.2755656242370605, |
|
"eval_runtime": 384.9685, |
|
"eval_samples_per_second": 5.195, |
|
"eval_steps_per_second": 0.649, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 17.75, |
|
"learning_rate": 2.0276876167168042e-06, |
|
"logits/chosen": -1.2386648654937744, |
|
"logits/rejected": -1.1422879695892334, |
|
"logps/chosen": -528.5611572265625, |
|
"logps/rejected": -600.717529296875, |
|
"loss": 0.5928, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -3.040616273880005, |
|
"rewards/margins": 1.0473132133483887, |
|
"rewards/rejected": -4.087929725646973, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 8.875, |
|
"learning_rate": 2.0052744209246682e-06, |
|
"logits/chosen": -1.3995951414108276, |
|
"logits/rejected": -1.2767788171768188, |
|
"logps/chosen": -552.1273193359375, |
|
"logps/rejected": -622.7492065429688, |
|
"loss": 0.5088, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.9678916931152344, |
|
"rewards/margins": 1.0349812507629395, |
|
"rewards/rejected": -4.002872943878174, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 12.1875, |
|
"learning_rate": 1.9829025345760127e-06, |
|
"logits/chosen": -1.405669093132019, |
|
"logits/rejected": -1.3700844049453735, |
|
"logps/chosen": -559.1365966796875, |
|
"logps/rejected": -644.623779296875, |
|
"loss": 0.5386, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.8013358116149902, |
|
"rewards/margins": 0.9156203269958496, |
|
"rewards/rejected": -3.7169559001922607, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 9.125, |
|
"learning_rate": 1.9605738257169115e-06, |
|
"logits/chosen": -1.3817434310913086, |
|
"logits/rejected": -1.2056105136871338, |
|
"logps/chosen": -506.63201904296875, |
|
"logps/rejected": -614.6943969726562, |
|
"loss": 0.4856, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.7365362644195557, |
|
"rewards/margins": 1.1604435443878174, |
|
"rewards/rejected": -3.896979808807373, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 14.5, |
|
"learning_rate": 1.9382901587881275e-06, |
|
"logits/chosen": -1.4400955438613892, |
|
"logits/rejected": -1.3102456331253052, |
|
"logps/chosen": -521.36767578125, |
|
"logps/rejected": -608.1643676757812, |
|
"loss": 0.4185, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -2.6261146068573, |
|
"rewards/margins": 1.1909650564193726, |
|
"rewards/rejected": -3.817080020904541, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 11.0625, |
|
"learning_rate": 1.916053394469437e-06, |
|
"logits/chosen": -1.4624320268630981, |
|
"logits/rejected": -1.2485642433166504, |
|
"logps/chosen": -547.3516845703125, |
|
"logps/rejected": -640.5094604492188, |
|
"loss": 0.5228, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.8405938148498535, |
|
"rewards/margins": 1.0770297050476074, |
|
"rewards/rejected": -3.917623996734619, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 9.75, |
|
"learning_rate": 1.8938653895242604e-06, |
|
"logits/chosen": -1.4186649322509766, |
|
"logits/rejected": -1.2279117107391357, |
|
"logps/chosen": -544.6094970703125, |
|
"logps/rejected": -642.7184448242188, |
|
"loss": 0.4335, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.803039073944092, |
|
"rewards/margins": 1.269425630569458, |
|
"rewards/rejected": -4.072464942932129, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 11.8125, |
|
"learning_rate": 1.8717279966446267e-06, |
|
"logits/chosen": -1.2762877941131592, |
|
"logits/rejected": -1.1935598850250244, |
|
"logps/chosen": -549.9871826171875, |
|
"logps/rejected": -655.0281982421875, |
|
"loss": 0.4571, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -3.014120101928711, |
|
"rewards/margins": 1.1458585262298584, |
|
"rewards/rejected": -4.15997838973999, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 7.8125, |
|
"learning_rate": 1.8496430642964698e-06, |
|
"logits/chosen": -1.351825475692749, |
|
"logits/rejected": -1.2405725717544556, |
|
"logps/chosen": -560.7874145507812, |
|
"logps/rejected": -648.8487548828125, |
|
"loss": 0.4959, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.8837532997131348, |
|
"rewards/margins": 1.1171314716339111, |
|
"rewards/rejected": -4.000885009765625, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 7.40625, |
|
"learning_rate": 1.827612436565286e-06, |
|
"logits/chosen": -1.359593391418457, |
|
"logits/rejected": -1.2036197185516357, |
|
"logps/chosen": -551.798828125, |
|
"logps/rejected": -647.3968505859375, |
|
"loss": 0.47, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.88834285736084, |
|
"rewards/margins": 1.1616257429122925, |
|
"rewards/rejected": -4.049968242645264, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_logits/chosen": -1.2773113250732422, |
|
"eval_logits/rejected": -1.1517482995986938, |
|
"eval_logps/chosen": -557.9854125976562, |
|
"eval_logps/rejected": -651.6221923828125, |
|
"eval_loss": 0.4908619225025177, |
|
"eval_rewards/accuracies": 0.7409999966621399, |
|
"eval_rewards/chosen": -2.9333345890045166, |
|
"eval_rewards/margins": 1.1368014812469482, |
|
"eval_rewards/rejected": -4.070136070251465, |
|
"eval_runtime": 385.0042, |
|
"eval_samples_per_second": 5.195, |
|
"eval_steps_per_second": 0.649, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 16.5, |
|
"learning_rate": 1.8056379530021492e-06, |
|
"logits/chosen": -1.4074201583862305, |
|
"logits/rejected": -1.3206876516342163, |
|
"logps/chosen": -535.7772216796875, |
|
"logps/rejected": -602.4798583984375, |
|
"loss": 0.5365, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.915788173675537, |
|
"rewards/margins": 0.915139377117157, |
|
"rewards/rejected": -3.830927610397339, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 8.125, |
|
"learning_rate": 1.7837214484701154e-06, |
|
"logits/chosen": -1.4410103559494019, |
|
"logits/rejected": -1.3098431825637817, |
|
"logps/chosen": -498.352294921875, |
|
"logps/rejected": -587.9461669921875, |
|
"loss": 0.4705, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.460021495819092, |
|
"rewards/margins": 1.1296519041061401, |
|
"rewards/rejected": -3.5896732807159424, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 15.25, |
|
"learning_rate": 1.7618647529910043e-06, |
|
"logits/chosen": -1.4378907680511475, |
|
"logits/rejected": -1.3039597272872925, |
|
"logps/chosen": -500.2588806152344, |
|
"logps/rejected": -604.9884033203125, |
|
"loss": 0.4869, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.434854030609131, |
|
"rewards/margins": 1.166461706161499, |
|
"rewards/rejected": -3.601315975189209, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 9.6875, |
|
"learning_rate": 1.7400696915925996e-06, |
|
"logits/chosen": -1.442056655883789, |
|
"logits/rejected": -1.2497450113296509, |
|
"logps/chosen": -522.6090698242188, |
|
"logps/rejected": -571.2970581054688, |
|
"loss": 0.5167, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.519986391067505, |
|
"rewards/margins": 1.0696182250976562, |
|
"rewards/rejected": -3.589604139328003, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 11.0, |
|
"learning_rate": 1.718338084156254e-06, |
|
"logits/chosen": -1.4057360887527466, |
|
"logits/rejected": -1.2487547397613525, |
|
"logps/chosen": -521.5921020507812, |
|
"logps/rejected": -594.0106201171875, |
|
"loss": 0.4502, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.3480098247528076, |
|
"rewards/margins": 1.1080354452133179, |
|
"rewards/rejected": -3.456045627593994, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 11.3125, |
|
"learning_rate": 1.6966717452649372e-06, |
|
"logits/chosen": -1.5112414360046387, |
|
"logits/rejected": -1.3404825925827026, |
|
"logps/chosen": -511.281005859375, |
|
"logps/rejected": -574.0786743164062, |
|
"loss": 0.4402, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.34338641166687, |
|
"rewards/margins": 1.1465994119644165, |
|
"rewards/rejected": -3.489985704421997, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 8.375, |
|
"learning_rate": 1.6750724840517103e-06, |
|
"logits/chosen": -1.4542334079742432, |
|
"logits/rejected": -1.3746201992034912, |
|
"logps/chosen": -482.7185974121094, |
|
"logps/rejected": -584.7723999023438, |
|
"loss": 0.504, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.2991232872009277, |
|
"rewards/margins": 0.9794435501098633, |
|
"rewards/rejected": -3.278566837310791, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 12.0625, |
|
"learning_rate": 1.6535421040486686e-06, |
|
"logits/chosen": -1.2772949934005737, |
|
"logits/rejected": -1.177643895149231, |
|
"logps/chosen": -497.01861572265625, |
|
"logps/rejected": -593.6337890625, |
|
"loss": 0.424, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -2.4366674423217773, |
|
"rewards/margins": 1.3086668252944946, |
|
"rewards/rejected": -3.7453346252441406, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 15.5, |
|
"learning_rate": 1.6320824030363458e-06, |
|
"logits/chosen": -1.351653814315796, |
|
"logits/rejected": -1.2928129434585571, |
|
"logps/chosen": -490.3855895996094, |
|
"logps/rejected": -596.65380859375, |
|
"loss": 0.465, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.5555453300476074, |
|
"rewards/margins": 1.217242956161499, |
|
"rewards/rejected": -3.7727882862091064, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 14.5625, |
|
"learning_rate": 1.6106951728936028e-06, |
|
"logits/chosen": -1.4520965814590454, |
|
"logits/rejected": -1.318485975265503, |
|
"logps/chosen": -513.4244995117188, |
|
"logps/rejected": -615.5191040039062, |
|
"loss": 0.4963, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.5622081756591797, |
|
"rewards/margins": 1.0996941328048706, |
|
"rewards/rejected": -3.661902666091919, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_logits/chosen": -1.3239047527313232, |
|
"eval_logits/rejected": -1.1944924592971802, |
|
"eval_logps/chosen": -525.2288208007812, |
|
"eval_logps/rejected": -621.9060668945312, |
|
"eval_loss": 0.49328407645225525, |
|
"eval_rewards/accuracies": 0.7390000224113464, |
|
"eval_rewards/chosen": -2.6057679653167725, |
|
"eval_rewards/margins": 1.1672067642211914, |
|
"eval_rewards/rejected": -3.7729744911193848, |
|
"eval_runtime": 384.9389, |
|
"eval_samples_per_second": 5.196, |
|
"eval_steps_per_second": 0.649, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 10.6875, |
|
"learning_rate": 1.5893821994479996e-06, |
|
"logits/chosen": -1.4442546367645264, |
|
"logits/rejected": -1.3286397457122803, |
|
"logps/chosen": -525.9605102539062, |
|
"logps/rejected": -608.4495239257812, |
|
"loss": 0.475, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.4855237007141113, |
|
"rewards/margins": 1.2105185985565186, |
|
"rewards/rejected": -3.696042537689209, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 7.90625, |
|
"learning_rate": 1.5681452623266868e-06, |
|
"logits/chosen": -1.4192955493927002, |
|
"logits/rejected": -1.174787998199463, |
|
"logps/chosen": -559.93603515625, |
|
"logps/rejected": -632.15234375, |
|
"loss": 0.4838, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.7089059352874756, |
|
"rewards/margins": 1.304880976676941, |
|
"rewards/rejected": -4.013787269592285, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 5.9375, |
|
"learning_rate": 1.5469861348078014e-06, |
|
"logits/chosen": -1.420716643333435, |
|
"logits/rejected": -1.2741087675094604, |
|
"logps/chosen": -522.9864501953125, |
|
"logps/rejected": -641.4890747070312, |
|
"loss": 0.4193, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.7621614933013916, |
|
"rewards/margins": 1.2914037704467773, |
|
"rewards/rejected": -4.05356502532959, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 9.4375, |
|
"learning_rate": 1.5259065836724035e-06, |
|
"logits/chosen": -1.2834079265594482, |
|
"logits/rejected": -1.2232940196990967, |
|
"logps/chosen": -522.1956176757812, |
|
"logps/rejected": -655.6646728515625, |
|
"loss": 0.4116, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.773864269256592, |
|
"rewards/margins": 1.3960521221160889, |
|
"rewards/rejected": -4.16991662979126, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 19.5, |
|
"learning_rate": 1.5049083690569456e-06, |
|
"logits/chosen": -1.3350251913070679, |
|
"logits/rejected": -1.2278568744659424, |
|
"logps/chosen": -531.40380859375, |
|
"logps/rejected": -651.0161743164062, |
|
"loss": 0.5176, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -2.939335584640503, |
|
"rewards/margins": 1.2402127981185913, |
|
"rewards/rejected": -4.179548740386963, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 14.0625, |
|
"learning_rate": 1.4839932443063057e-06, |
|
"logits/chosen": -1.339280366897583, |
|
"logits/rejected": -1.1727148294448853, |
|
"logps/chosen": -578.1463623046875, |
|
"logps/rejected": -648.4544677734375, |
|
"loss": 0.4648, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.86167573928833, |
|
"rewards/margins": 1.2659341096878052, |
|
"rewards/rejected": -4.127610206604004, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 22.875, |
|
"learning_rate": 1.4631629558273803e-06, |
|
"logits/chosen": -1.3597378730773926, |
|
"logits/rejected": -1.2507550716400146, |
|
"logps/chosen": -532.3685302734375, |
|
"logps/rejected": -612.3403930664062, |
|
"loss": 0.6361, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -2.9064340591430664, |
|
"rewards/margins": 0.9161791801452637, |
|
"rewards/rejected": -3.822613477706909, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 5.09375, |
|
"learning_rate": 1.4424192429432657e-06, |
|
"logits/chosen": -1.4379812479019165, |
|
"logits/rejected": -1.3553606271743774, |
|
"logps/chosen": -497.770263671875, |
|
"logps/rejected": -619.8333129882812, |
|
"loss": 0.4692, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.4189367294311523, |
|
"rewards/margins": 1.1874587535858154, |
|
"rewards/rejected": -3.606395721435547, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 14.9375, |
|
"learning_rate": 1.421763837748016e-06, |
|
"logits/chosen": -1.4066466093063354, |
|
"logits/rejected": -1.3101098537445068, |
|
"logps/chosen": -501.1058654785156, |
|
"logps/rejected": -613.1395263671875, |
|
"loss": 0.4538, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.5047993659973145, |
|
"rewards/margins": 1.2053807973861694, |
|
"rewards/rejected": -3.7101802825927734, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 10.125, |
|
"learning_rate": 1.401198464962021e-06, |
|
"logits/chosen": -1.4462471008300781, |
|
"logits/rejected": -1.2622243165969849, |
|
"logps/chosen": -535.8546142578125, |
|
"logps/rejected": -603.2532348632812, |
|
"loss": 0.4663, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.672712564468384, |
|
"rewards/margins": 1.0602437257766724, |
|
"rewards/rejected": -3.7329559326171875, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_logits/chosen": -1.3264377117156982, |
|
"eval_logits/rejected": -1.1991208791732788, |
|
"eval_logps/chosen": -532.6129760742188, |
|
"eval_logps/rejected": -628.5565795898438, |
|
"eval_loss": 0.4950037896633148, |
|
"eval_rewards/accuracies": 0.7450000047683716, |
|
"eval_rewards/chosen": -2.67961049079895, |
|
"eval_rewards/margins": 1.1598690748214722, |
|
"eval_rewards/rejected": -3.839479446411133, |
|
"eval_runtime": 384.8048, |
|
"eval_samples_per_second": 5.197, |
|
"eval_steps_per_second": 0.65, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 10.9375, |
|
"learning_rate": 1.3807248417879896e-06, |
|
"logits/chosen": -1.4817050695419312, |
|
"logits/rejected": -1.368260145187378, |
|
"logps/chosen": -536.787109375, |
|
"logps/rejected": -646.0281982421875, |
|
"loss": 0.4362, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -2.6372110843658447, |
|
"rewards/margins": 1.2936350107192993, |
|
"rewards/rejected": -3.9308464527130127, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 22.75, |
|
"learning_rate": 1.3603446777675665e-06, |
|
"logits/chosen": -1.3277854919433594, |
|
"logits/rejected": -1.207275629043579, |
|
"logps/chosen": -556.1530151367188, |
|
"logps/rejected": -648.6631469726562, |
|
"loss": 0.5338, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.9295554161071777, |
|
"rewards/margins": 1.1459153890609741, |
|
"rewards/rejected": -4.075470924377441, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 8.3125, |
|
"learning_rate": 1.3400596746385817e-06, |
|
"logits/chosen": -1.4622247219085693, |
|
"logits/rejected": -1.293874979019165, |
|
"logps/chosen": -548.2469482421875, |
|
"logps/rejected": -635.0680541992188, |
|
"loss": 0.5024, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.7906343936920166, |
|
"rewards/margins": 1.13905930519104, |
|
"rewards/rejected": -3.9296936988830566, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 9.125, |
|
"learning_rate": 1.3198715261929587e-06, |
|
"logits/chosen": -1.4278955459594727, |
|
"logits/rejected": -1.2781603336334229, |
|
"logps/chosen": -530.7630615234375, |
|
"logps/rejected": -637.7789306640625, |
|
"loss": 0.4257, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.943067789077759, |
|
"rewards/margins": 1.2065026760101318, |
|
"rewards/rejected": -4.149571418762207, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 8.1875, |
|
"learning_rate": 1.2997819181352823e-06, |
|
"logits/chosen": -1.4591927528381348, |
|
"logits/rejected": -1.3012304306030273, |
|
"logps/chosen": -570.8213500976562, |
|
"logps/rejected": -686.0819091796875, |
|
"loss": 0.432, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.7398808002471924, |
|
"rewards/margins": 1.3976715803146362, |
|
"rewards/rejected": -4.137551784515381, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 21.375, |
|
"learning_rate": 1.2797925279420454e-06, |
|
"logits/chosen": -1.4334865808486938, |
|
"logits/rejected": -1.2914705276489258, |
|
"logps/chosen": -569.4708862304688, |
|
"logps/rejected": -681.2939453125, |
|
"loss": 0.4801, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.9758732318878174, |
|
"rewards/margins": 1.2384196519851685, |
|
"rewards/rejected": -4.214293003082275, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 13.625, |
|
"learning_rate": 1.2599050247215764e-06, |
|
"logits/chosen": -1.3719279766082764, |
|
"logits/rejected": -1.2636692523956299, |
|
"logps/chosen": -553.9847412109375, |
|
"logps/rejected": -650.590576171875, |
|
"loss": 0.4906, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -2.9605300426483154, |
|
"rewards/margins": 1.196462869644165, |
|
"rewards/rejected": -4.1569929122924805, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 13.5625, |
|
"learning_rate": 1.2401210690746705e-06, |
|
"logits/chosen": -1.407362699508667, |
|
"logits/rejected": -1.252087950706482, |
|
"logps/chosen": -551.7230834960938, |
|
"logps/rejected": -632.700439453125, |
|
"loss": 0.4983, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.845663547515869, |
|
"rewards/margins": 1.1418297290802002, |
|
"rewards/rejected": -3.9874930381774902, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 13.0, |
|
"learning_rate": 1.2204423129559306e-06, |
|
"logits/chosen": -1.4424464702606201, |
|
"logits/rejected": -1.3769545555114746, |
|
"logps/chosen": -529.952880859375, |
|
"logps/rejected": -642.1946411132812, |
|
"loss": 0.5035, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.6443989276885986, |
|
"rewards/margins": 1.1635662317276, |
|
"rewards/rejected": -3.807965040206909, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 18.625, |
|
"learning_rate": 1.20087039953583e-06, |
|
"logits/chosen": -1.4672467708587646, |
|
"logits/rejected": -1.3367975950241089, |
|
"logps/chosen": -524.6241455078125, |
|
"logps/rejected": -619.1414794921875, |
|
"loss": 0.5286, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.604055881500244, |
|
"rewards/margins": 1.2088748216629028, |
|
"rewards/rejected": -3.8129310607910156, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_logits/chosen": -1.330871820449829, |
|
"eval_logits/rejected": -1.2033063173294067, |
|
"eval_logps/chosen": -528.7828979492188, |
|
"eval_logps/rejected": -622.6273193359375, |
|
"eval_loss": 0.49607598781585693, |
|
"eval_rewards/accuracies": 0.7379999756813049, |
|
"eval_rewards/chosen": -2.6413092613220215, |
|
"eval_rewards/margins": 1.1388777494430542, |
|
"eval_rewards/rejected": -3.7801873683929443, |
|
"eval_runtime": 384.8793, |
|
"eval_samples_per_second": 5.196, |
|
"eval_steps_per_second": 0.65, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 12.1875, |
|
"learning_rate": 1.181406963063507e-06, |
|
"logits/chosen": -1.3730335235595703, |
|
"logits/rejected": -1.3149739503860474, |
|
"logps/chosen": -519.0841064453125, |
|
"logps/rejected": -629.9993896484375, |
|
"loss": 0.5014, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.532372236251831, |
|
"rewards/margins": 1.1152244806289673, |
|
"rewards/rejected": -3.647596836090088, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 5.875, |
|
"learning_rate": 1.1620536287303052e-06, |
|
"logits/chosen": -1.4739983081817627, |
|
"logits/rejected": -1.3339478969573975, |
|
"logps/chosen": -543.446533203125, |
|
"logps/rejected": -608.2566528320312, |
|
"loss": 0.5447, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.522928476333618, |
|
"rewards/margins": 0.9496552348136902, |
|
"rewards/rejected": -3.472583770751953, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 10.5, |
|
"learning_rate": 1.1428120125340717e-06, |
|
"logits/chosen": -1.4052727222442627, |
|
"logits/rejected": -1.2576491832733154, |
|
"logps/chosen": -496.2265625, |
|
"logps/rejected": -606.283203125, |
|
"loss": 0.4235, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.4208767414093018, |
|
"rewards/margins": 1.5151195526123047, |
|
"rewards/rejected": -3.9359962940216064, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 10.4375, |
|
"learning_rate": 1.123683721144223e-06, |
|
"logits/chosen": -1.4060310125350952, |
|
"logits/rejected": -1.3006138801574707, |
|
"logps/chosen": -531.5321655273438, |
|
"logps/rejected": -636.8844604492188, |
|
"loss": 0.4255, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.4785878658294678, |
|
"rewards/margins": 1.376922845840454, |
|
"rewards/rejected": -3.85551118850708, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 7.375, |
|
"learning_rate": 1.1046703517675848e-06, |
|
"logits/chosen": -1.4380762577056885, |
|
"logits/rejected": -1.3517663478851318, |
|
"logps/chosen": -497.983154296875, |
|
"logps/rejected": -609.3760986328125, |
|
"loss": 0.5127, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.428668975830078, |
|
"rewards/margins": 1.0526988506317139, |
|
"rewards/rejected": -3.481367588043213, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 11.3125, |
|
"learning_rate": 1.085773492015028e-06, |
|
"logits/chosen": -1.422131896018982, |
|
"logits/rejected": -1.2450854778289795, |
|
"logps/chosen": -488.3145446777344, |
|
"logps/rejected": -581.1353149414062, |
|
"loss": 0.4374, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.4193508625030518, |
|
"rewards/margins": 1.271308183670044, |
|
"rewards/rejected": -3.6906590461730957, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 11.1875, |
|
"learning_rate": 1.0669947197689034e-06, |
|
"logits/chosen": -1.3928449153900146, |
|
"logits/rejected": -1.2680118083953857, |
|
"logps/chosen": -527.7369995117188, |
|
"logps/rejected": -608.68408203125, |
|
"loss": 0.4956, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.5510122776031494, |
|
"rewards/margins": 1.0964926481246948, |
|
"rewards/rejected": -3.6475048065185547, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 10.5, |
|
"learning_rate": 1.048335603051291e-06, |
|
"logits/chosen": -1.3895783424377441, |
|
"logits/rejected": -1.2499104738235474, |
|
"logps/chosen": -566.0472412109375, |
|
"logps/rejected": -672.4845581054688, |
|
"loss": 0.4484, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.7824342250823975, |
|
"rewards/margins": 1.3463845252990723, |
|
"rewards/rejected": -4.128818511962891, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 19.25, |
|
"learning_rate": 1.0297976998930665e-06, |
|
"logits/chosen": -1.3868653774261475, |
|
"logits/rejected": -1.2727091312408447, |
|
"logps/chosen": -521.1253662109375, |
|
"logps/rejected": -633.4533081054688, |
|
"loss": 0.44, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.605109453201294, |
|
"rewards/margins": 1.4010969400405884, |
|
"rewards/rejected": -4.006206035614014, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 9.4375, |
|
"learning_rate": 1.0113825582038078e-06, |
|
"logits/chosen": -1.4213166236877441, |
|
"logits/rejected": -1.304223656654358, |
|
"logps/chosen": -538.0697021484375, |
|
"logps/rejected": -635.2910766601562, |
|
"loss": 0.4564, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.727999210357666, |
|
"rewards/margins": 1.1396801471710205, |
|
"rewards/rejected": -3.8676788806915283, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_logits/chosen": -1.3305258750915527, |
|
"eval_logits/rejected": -1.2037560939788818, |
|
"eval_logps/chosen": -532.7353515625, |
|
"eval_logps/rejected": -627.1752319335938, |
|
"eval_loss": 0.49248310923576355, |
|
"eval_rewards/accuracies": 0.7404999732971191, |
|
"eval_rewards/chosen": -2.680833578109741, |
|
"eval_rewards/margins": 1.1448326110839844, |
|
"eval_rewards/rejected": -3.8256664276123047, |
|
"eval_runtime": 384.8109, |
|
"eval_samples_per_second": 5.197, |
|
"eval_steps_per_second": 0.65, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 10.5, |
|
"learning_rate": 9.930917156425477e-07, |
|
"logits/chosen": -1.4014348983764648, |
|
"logits/rejected": -1.2874269485473633, |
|
"logps/chosen": -547.7164306640625, |
|
"logps/rejected": -650.8362426757812, |
|
"loss": 0.5363, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.8659284114837646, |
|
"rewards/margins": 1.0600517988204956, |
|
"rewards/rejected": -3.9259800910949707, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 23.5, |
|
"learning_rate": 9.749266994893756e-07, |
|
"logits/chosen": -1.3308923244476318, |
|
"logits/rejected": -1.20591139793396, |
|
"logps/chosen": -509.127685546875, |
|
"logps/rejected": -585.4031982421875, |
|
"loss": 0.5619, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.684295177459717, |
|
"rewards/margins": 0.8970636129379272, |
|
"rewards/rejected": -3.5813584327697754, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 12.0625, |
|
"learning_rate": 9.56889026517913e-07, |
|
"logits/chosen": -1.388494610786438, |
|
"logits/rejected": -1.2768661975860596, |
|
"logps/chosen": -543.159423828125, |
|
"logps/rejected": -621.0648193359375, |
|
"loss": 0.5044, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.818211078643799, |
|
"rewards/margins": 1.039044737815857, |
|
"rewards/rejected": -3.857255458831787, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 6.90625, |
|
"learning_rate": 9.389802028686617e-07, |
|
"logits/chosen": -1.4692569971084595, |
|
"logits/rejected": -1.3720782995224, |
|
"logps/chosen": -529.861328125, |
|
"logps/rejected": -575.4440307617188, |
|
"loss": 0.5904, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.690932035446167, |
|
"rewards/margins": 0.7855546474456787, |
|
"rewards/rejected": -3.4764866828918457, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 10.5, |
|
"learning_rate": 9.212017239232427e-07, |
|
"logits/chosen": -1.4099429845809937, |
|
"logits/rejected": -1.2403053045272827, |
|
"logps/chosen": -530.2276611328125, |
|
"logps/rejected": -631.5889892578125, |
|
"loss": 0.4731, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.5727314949035645, |
|
"rewards/margins": 1.2429524660110474, |
|
"rewards/rejected": -3.8156840801239014, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 9.625, |
|
"learning_rate": 9.03555074179533e-07, |
|
"logits/chosen": -1.3713786602020264, |
|
"logits/rejected": -1.3497127294540405, |
|
"logps/chosen": -513.812744140625, |
|
"logps/rejected": -644.496826171875, |
|
"loss": 0.4514, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.570328950881958, |
|
"rewards/margins": 1.2485467195510864, |
|
"rewards/rejected": -3.818875551223755, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 20.0, |
|
"learning_rate": 8.860417271277067e-07, |
|
"logits/chosen": -1.4884029626846313, |
|
"logits/rejected": -1.4525179862976074, |
|
"logps/chosen": -530.30712890625, |
|
"logps/rejected": -616.304931640625, |
|
"loss": 0.4918, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.6107208728790283, |
|
"rewards/margins": 0.9398597478866577, |
|
"rewards/rejected": -3.5505805015563965, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 8.25, |
|
"learning_rate": 8.686631451272029e-07, |
|
"logits/chosen": -1.4650144577026367, |
|
"logits/rejected": -1.3026695251464844, |
|
"logps/chosen": -529.6368408203125, |
|
"logps/rejected": -623.4691162109375, |
|
"loss": 0.4882, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.762089252471924, |
|
"rewards/margins": 1.1926974058151245, |
|
"rewards/rejected": -3.954786777496338, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 7.03125, |
|
"learning_rate": 8.514207792846168e-07, |
|
"logits/chosen": -1.4712326526641846, |
|
"logits/rejected": -1.3452935218811035, |
|
"logps/chosen": -522.875244140625, |
|
"logps/rejected": -606.8411254882812, |
|
"loss": 0.5049, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.720153570175171, |
|
"rewards/margins": 1.1239979267120361, |
|
"rewards/rejected": -3.844151735305786, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 9.6875, |
|
"learning_rate": 8.343160693325356e-07, |
|
"logits/chosen": -1.3627361059188843, |
|
"logits/rejected": -1.2458826303482056, |
|
"logps/chosen": -535.0531005859375, |
|
"logps/rejected": -643.9103393554688, |
|
"loss": 0.5166, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.795009136199951, |
|
"rewards/margins": 1.1317135095596313, |
|
"rewards/rejected": -3.926722764968872, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_logits/chosen": -1.3309776782989502, |
|
"eval_logits/rejected": -1.2046185731887817, |
|
"eval_logps/chosen": -542.6776733398438, |
|
"eval_logps/rejected": -634.599365234375, |
|
"eval_loss": 0.4903542995452881, |
|
"eval_rewards/accuracies": 0.7415000200271606, |
|
"eval_rewards/chosen": -2.7802560329437256, |
|
"eval_rewards/margins": 1.1196515560150146, |
|
"eval_rewards/rejected": -3.8999080657958984, |
|
"eval_runtime": 384.9371, |
|
"eval_samples_per_second": 5.196, |
|
"eval_steps_per_second": 0.649, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 7.65625, |
|
"learning_rate": 8.173504435093174e-07, |
|
"logits/chosen": -1.3742562532424927, |
|
"logits/rejected": -1.193378210067749, |
|
"logps/chosen": -514.3562622070312, |
|
"logps/rejected": -601.44091796875, |
|
"loss": 0.4823, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.727877378463745, |
|
"rewards/margins": 1.1920711994171143, |
|
"rewards/rejected": -3.9199485778808594, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 6.6875, |
|
"learning_rate": 8.00525318439836e-07, |
|
"logits/chosen": -1.4089447259902954, |
|
"logits/rejected": -1.2591418027877808, |
|
"logps/chosen": -555.609375, |
|
"logps/rejected": -642.9563598632812, |
|
"loss": 0.5406, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.7878005504608154, |
|
"rewards/margins": 0.9498197436332703, |
|
"rewards/rejected": -3.7376205921173096, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 8.625, |
|
"learning_rate": 7.838420990171927e-07, |
|
"logits/chosen": -1.4908835887908936, |
|
"logits/rejected": -1.3264166116714478, |
|
"logps/chosen": -539.0242919921875, |
|
"logps/rejected": -614.5499267578125, |
|
"loss": 0.5213, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.6965396404266357, |
|
"rewards/margins": 1.0111539363861084, |
|
"rewards/rejected": -3.7076938152313232, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 7.90625, |
|
"learning_rate": 7.673021782854084e-07, |
|
"logits/chosen": -1.3615717887878418, |
|
"logits/rejected": -1.2126728296279907, |
|
"logps/chosen": -533.7975463867188, |
|
"logps/rejected": -619.9442138671875, |
|
"loss": 0.4664, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.7310891151428223, |
|
"rewards/margins": 1.2802739143371582, |
|
"rewards/rejected": -4.0113630294799805, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 8.0625, |
|
"learning_rate": 7.509069373231039e-07, |
|
"logits/chosen": -1.364745855331421, |
|
"logits/rejected": -1.2478384971618652, |
|
"logps/chosen": -535.0496215820312, |
|
"logps/rejected": -594.5074462890625, |
|
"loss": 0.5768, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.823659658432007, |
|
"rewards/margins": 0.8424208760261536, |
|
"rewards/rejected": -3.666080951690674, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 7.25, |
|
"learning_rate": 7.346577451281822e-07, |
|
"logits/chosen": -1.3784279823303223, |
|
"logits/rejected": -1.2886309623718262, |
|
"logps/chosen": -533.7701416015625, |
|
"logps/rejected": -641.3207397460938, |
|
"loss": 0.4516, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.715505361557007, |
|
"rewards/margins": 1.3165885210037231, |
|
"rewards/rejected": -4.032094478607178, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 22.0, |
|
"learning_rate": 7.185559585035138e-07, |
|
"logits/chosen": -1.413137674331665, |
|
"logits/rejected": -1.2511926889419556, |
|
"logps/chosen": -575.2888793945312, |
|
"logps/rejected": -671.4484252929688, |
|
"loss": 0.4863, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.9251255989074707, |
|
"rewards/margins": 1.120355248451233, |
|
"rewards/rejected": -4.045480728149414, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 9.25, |
|
"learning_rate": 7.026029219436504e-07, |
|
"logits/chosen": -1.4352123737335205, |
|
"logits/rejected": -1.2644484043121338, |
|
"logps/chosen": -532.7044677734375, |
|
"logps/rejected": -639.6867065429688, |
|
"loss": 0.479, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.804140090942383, |
|
"rewards/margins": 1.197808861732483, |
|
"rewards/rejected": -4.001949310302734, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 6.46875, |
|
"learning_rate": 6.867999675225523e-07, |
|
"logits/chosen": -1.4778783321380615, |
|
"logits/rejected": -1.3396053314208984, |
|
"logps/chosen": -498.43896484375, |
|
"logps/rejected": -601.5620727539062, |
|
"loss": 0.4659, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.675211191177368, |
|
"rewards/margins": 1.1733391284942627, |
|
"rewards/rejected": -3.8485500812530518, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 9.75, |
|
"learning_rate": 6.711484147823663e-07, |
|
"logits/chosen": -1.3854588270187378, |
|
"logits/rejected": -1.3027629852294922, |
|
"logps/chosen": -500.9271545410156, |
|
"logps/rejected": -624.5755615234375, |
|
"loss": 0.4653, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.667020797729492, |
|
"rewards/margins": 1.1879808902740479, |
|
"rewards/rejected": -3.855001449584961, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_logits/chosen": -1.3332931995391846, |
|
"eval_logits/rejected": -1.2066706418991089, |
|
"eval_logps/chosen": -544.357421875, |
|
"eval_logps/rejected": -633.0811157226562, |
|
"eval_loss": 0.48963090777397156, |
|
"eval_rewards/accuracies": 0.7425000071525574, |
|
"eval_rewards/chosen": -2.7970540523529053, |
|
"eval_rewards/margins": 1.0876713991165161, |
|
"eval_rewards/rejected": -3.884725332260132, |
|
"eval_runtime": 384.4736, |
|
"eval_samples_per_second": 5.202, |
|
"eval_steps_per_second": 0.65, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 12.6875, |
|
"learning_rate": 6.556495706232413e-07, |
|
"logits/chosen": -1.3909966945648193, |
|
"logits/rejected": -1.2924126386642456, |
|
"logps/chosen": -558.5319213867188, |
|
"logps/rejected": -642.2532958984375, |
|
"loss": 0.5411, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.8983654975891113, |
|
"rewards/margins": 1.0214240550994873, |
|
"rewards/rejected": -3.9197897911071777, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 9.75, |
|
"learning_rate": 6.403047291942057e-07, |
|
"logits/chosen": -1.316248893737793, |
|
"logits/rejected": -1.1616556644439697, |
|
"logps/chosen": -507.49603271484375, |
|
"logps/rejected": -592.3514404296875, |
|
"loss": 0.4966, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.8122828006744385, |
|
"rewards/margins": 1.092459797859192, |
|
"rewards/rejected": -3.904742479324341, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 13.1875, |
|
"learning_rate": 6.251151717851023e-07, |
|
"logits/chosen": -1.3809759616851807, |
|
"logits/rejected": -1.300438642501831, |
|
"logps/chosen": -501.9111328125, |
|
"logps/rejected": -600.9503784179688, |
|
"loss": 0.4907, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.685105323791504, |
|
"rewards/margins": 1.123300313949585, |
|
"rewards/rejected": -3.8084053993225098, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 5.96875, |
|
"learning_rate": 6.100821667196041e-07, |
|
"logits/chosen": -1.5608792304992676, |
|
"logits/rejected": -1.2841638326644897, |
|
"logps/chosen": -542.8932495117188, |
|
"logps/rejected": -584.8834228515625, |
|
"loss": 0.4858, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.672549247741699, |
|
"rewards/margins": 1.0747697353363037, |
|
"rewards/rejected": -3.747319459915161, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 96.5, |
|
"learning_rate": 5.952069692493062e-07, |
|
"logits/chosen": -1.364072561264038, |
|
"logits/rejected": -1.2446839809417725, |
|
"logps/chosen": -493.5611267089844, |
|
"logps/rejected": -623.0722045898438, |
|
"loss": 0.4197, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -2.652458906173706, |
|
"rewards/margins": 1.2607505321502686, |
|
"rewards/rejected": -3.9132094383239746, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 9.25, |
|
"learning_rate": 5.80490821448918e-07, |
|
"logits/chosen": -1.3073358535766602, |
|
"logits/rejected": -1.3144903182983398, |
|
"logps/chosen": -533.8553466796875, |
|
"logps/rejected": -709.3834838867188, |
|
"loss": 0.4358, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.7133326530456543, |
|
"rewards/margins": 1.3306279182434082, |
|
"rewards/rejected": -4.043961048126221, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 9.0625, |
|
"learning_rate": 5.659349521125459e-07, |
|
"logits/chosen": -1.5068944692611694, |
|
"logits/rejected": -1.4476134777069092, |
|
"logps/chosen": -550.5355224609375, |
|
"logps/rejected": -633.8616943359375, |
|
"loss": 0.5084, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.6911702156066895, |
|
"rewards/margins": 1.0392811298370361, |
|
"rewards/rejected": -3.7304511070251465, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 8.0, |
|
"learning_rate": 5.5154057665109e-07, |
|
"logits/chosen": -1.4646342992782593, |
|
"logits/rejected": -1.306074857711792, |
|
"logps/chosen": -547.6588745117188, |
|
"logps/rejected": -648.3570556640625, |
|
"loss": 0.4836, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.8676917552948, |
|
"rewards/margins": 1.2712510824203491, |
|
"rewards/rejected": -4.138943672180176, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 8.3125, |
|
"learning_rate": 5.373088969907586e-07, |
|
"logits/chosen": -1.4882913827896118, |
|
"logits/rejected": -1.3172584772109985, |
|
"logps/chosen": -559.1968994140625, |
|
"logps/rejected": -619.5064697265625, |
|
"loss": 0.4568, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.8238325119018555, |
|
"rewards/margins": 1.1052324771881104, |
|
"rewards/rejected": -3.929064989089966, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 6.53125, |
|
"learning_rate": 5.23241101472709e-07, |
|
"logits/chosen": -1.4091695547103882, |
|
"logits/rejected": -1.2783794403076172, |
|
"logps/chosen": -545.93359375, |
|
"logps/rejected": -631.8021240234375, |
|
"loss": 0.4808, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -2.696319580078125, |
|
"rewards/margins": 1.0787622928619385, |
|
"rewards/rejected": -3.7750816345214844, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_logits/chosen": -1.32783842086792, |
|
"eval_logits/rejected": -1.2009104490280151, |
|
"eval_logps/chosen": -546.65625, |
|
"eval_logps/rejected": -639.3413696289062, |
|
"eval_loss": 0.4900914132595062, |
|
"eval_rewards/accuracies": 0.7409999966621399, |
|
"eval_rewards/chosen": -2.820042133331299, |
|
"eval_rewards/margins": 1.1272854804992676, |
|
"eval_rewards/rejected": -3.9473278522491455, |
|
"eval_runtime": 384.996, |
|
"eval_samples_per_second": 5.195, |
|
"eval_steps_per_second": 0.649, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 8.625, |
|
"learning_rate": 5.09338364753818e-07, |
|
"logits/chosen": -1.4804376363754272, |
|
"logits/rejected": -1.3111730813980103, |
|
"logps/chosen": -562.015625, |
|
"logps/rejected": -653.310546875, |
|
"loss": 0.5286, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.7659342288970947, |
|
"rewards/margins": 1.0708248615264893, |
|
"rewards/rejected": -3.836758852005005, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 8.8125, |
|
"learning_rate": 4.956018477086005e-07, |
|
"logits/chosen": -1.444879174232483, |
|
"logits/rejected": -1.2762501239776611, |
|
"logps/chosen": -557.318115234375, |
|
"logps/rejected": -641.5279541015625, |
|
"loss": 0.5268, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -2.881852626800537, |
|
"rewards/margins": 1.1156418323516846, |
|
"rewards/rejected": -3.9974944591522217, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 10.8125, |
|
"learning_rate": 4.820326973322764e-07, |
|
"logits/chosen": -1.3559176921844482, |
|
"logits/rejected": -1.2783609628677368, |
|
"logps/chosen": -545.87255859375, |
|
"logps/rejected": -644.6527099609375, |
|
"loss": 0.5488, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.9490904808044434, |
|
"rewards/margins": 1.0479673147201538, |
|
"rewards/rejected": -3.997058153152466, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 16.0, |
|
"learning_rate": 4.686320466449981e-07, |
|
"logits/chosen": -1.3531897068023682, |
|
"logits/rejected": -1.1676143407821655, |
|
"logps/chosen": -509.6865234375, |
|
"logps/rejected": -646.2823486328125, |
|
"loss": 0.4513, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -2.704374313354492, |
|
"rewards/margins": 1.3875735998153687, |
|
"rewards/rejected": -4.091948509216309, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 7.53125, |
|
"learning_rate": 4.554010145972418e-07, |
|
"logits/chosen": -1.5110399723052979, |
|
"logits/rejected": -1.3339247703552246, |
|
"logps/chosen": -550.0721435546875, |
|
"logps/rejected": -648.1046752929688, |
|
"loss": 0.5631, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.862560749053955, |
|
"rewards/margins": 1.0935637950897217, |
|
"rewards/rejected": -3.956124782562256, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 10.0, |
|
"learning_rate": 4.4234070597637455e-07, |
|
"logits/chosen": -1.3611904382705688, |
|
"logits/rejected": -1.270994782447815, |
|
"logps/chosen": -550.0131225585938, |
|
"logps/rejected": -645.00341796875, |
|
"loss": 0.5164, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.744797468185425, |
|
"rewards/margins": 1.0690962076187134, |
|
"rewards/rejected": -3.8138937950134277, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 8.6875, |
|
"learning_rate": 4.2945221131440783e-07, |
|
"logits/chosen": -1.3394265174865723, |
|
"logits/rejected": -1.1380027532577515, |
|
"logps/chosen": -532.6522216796875, |
|
"logps/rejected": -629.9595336914062, |
|
"loss": 0.4172, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -2.6694254875183105, |
|
"rewards/margins": 1.289540410041809, |
|
"rewards/rejected": -3.958966016769409, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 9.8125, |
|
"learning_rate": 4.167366067969381e-07, |
|
"logits/chosen": -1.420111060142517, |
|
"logits/rejected": -1.356261134147644, |
|
"logps/chosen": -500.45135498046875, |
|
"logps/rejected": -622.8685302734375, |
|
"loss": 0.4999, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.775409698486328, |
|
"rewards/margins": 0.9858112335205078, |
|
"rewards/rejected": -3.7612204551696777, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 5.78125, |
|
"learning_rate": 4.041949541732826e-07, |
|
"logits/chosen": -1.4244636297225952, |
|
"logits/rejected": -1.3622827529907227, |
|
"logps/chosen": -547.3236694335938, |
|
"logps/rejected": -641.5424194335938, |
|
"loss": 0.5021, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.83439302444458, |
|
"rewards/margins": 1.0849192142486572, |
|
"rewards/rejected": -3.9193122386932373, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 18.5, |
|
"learning_rate": 3.9182830066782614e-07, |
|
"logits/chosen": -1.342223882675171, |
|
"logits/rejected": -1.326421856880188, |
|
"logps/chosen": -541.6400756835938, |
|
"logps/rejected": -670.80126953125, |
|
"loss": 0.4882, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.8389620780944824, |
|
"rewards/margins": 1.1673671007156372, |
|
"rewards/rejected": -4.006329536437988, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_logits/chosen": -1.3354921340942383, |
|
"eval_logits/rejected": -1.2087724208831787, |
|
"eval_logps/chosen": -541.2137451171875, |
|
"eval_logps/rejected": -633.5067749023438, |
|
"eval_loss": 0.48956310749053955, |
|
"eval_rewards/accuracies": 0.7440000176429749, |
|
"eval_rewards/chosen": -2.7656171321868896, |
|
"eval_rewards/margins": 1.1233649253845215, |
|
"eval_rewards/rejected": -3.8889822959899902, |
|
"eval_runtime": 384.9676, |
|
"eval_samples_per_second": 5.195, |
|
"eval_steps_per_second": 0.649, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 7.09375, |
|
"learning_rate": 3.796376788925771e-07, |
|
"logits/chosen": -1.3472093343734741, |
|
"logits/rejected": -1.27553129196167, |
|
"logps/chosen": -528.8458251953125, |
|
"logps/rejected": -600.9093627929688, |
|
"loss": 0.5081, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.674180507659912, |
|
"rewards/margins": 0.9610416293144226, |
|
"rewards/rejected": -3.6352221965789795, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 7.8125, |
|
"learning_rate": 3.676241067609465e-07, |
|
"logits/chosen": -1.4311882257461548, |
|
"logits/rejected": -1.3133292198181152, |
|
"logps/chosen": -566.4583740234375, |
|
"logps/rejected": -629.7789306640625, |
|
"loss": 0.516, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.7275829315185547, |
|
"rewards/margins": 1.0640867948532104, |
|
"rewards/rejected": -3.791670322418213, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 13.3125, |
|
"learning_rate": 3.5578858740274976e-07, |
|
"logits/chosen": -1.3556554317474365, |
|
"logits/rejected": -1.2464677095413208, |
|
"logps/chosen": -546.6044311523438, |
|
"logps/rejected": -628.5745849609375, |
|
"loss": 0.5223, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.848706007003784, |
|
"rewards/margins": 0.9455618858337402, |
|
"rewards/rejected": -3.7942676544189453, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 10.875, |
|
"learning_rate": 3.44132109080447e-07, |
|
"logits/chosen": -1.5402499437332153, |
|
"logits/rejected": -1.3627904653549194, |
|
"logps/chosen": -532.7853393554688, |
|
"logps/rejected": -620.8280029296875, |
|
"loss": 0.4349, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.6756184101104736, |
|
"rewards/margins": 1.2552212476730347, |
|
"rewards/rejected": -3.9308395385742188, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 11.625, |
|
"learning_rate": 3.3265564510662344e-07, |
|
"logits/chosen": -1.4746288061141968, |
|
"logits/rejected": -1.3366063833236694, |
|
"logps/chosen": -556.5170288085938, |
|
"logps/rejected": -653.8280029296875, |
|
"loss": 0.4486, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.631749391555786, |
|
"rewards/margins": 1.22456693649292, |
|
"rewards/rejected": -3.856316328048706, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 13.4375, |
|
"learning_rate": 3.213601537627195e-07, |
|
"logits/chosen": -1.3857685327529907, |
|
"logits/rejected": -1.2827407121658325, |
|
"logps/chosen": -551.5367431640625, |
|
"logps/rejected": -642.1528930664062, |
|
"loss": 0.5368, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.944179058074951, |
|
"rewards/margins": 1.0771982669830322, |
|
"rewards/rejected": -4.021376609802246, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 13.1875, |
|
"learning_rate": 3.1024657821901063e-07, |
|
"logits/chosen": -1.4493802785873413, |
|
"logits/rejected": -1.3605637550354004, |
|
"logps/chosen": -506.7554626464844, |
|
"logps/rejected": -613.6812744140625, |
|
"loss": 0.4775, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.590136766433716, |
|
"rewards/margins": 1.2277183532714844, |
|
"rewards/rejected": -3.8178551197052, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 14.1875, |
|
"learning_rate": 2.9931584645585654e-07, |
|
"logits/chosen": -1.3915107250213623, |
|
"logits/rejected": -1.3630131483078003, |
|
"logps/chosen": -540.7659912109375, |
|
"logps/rejected": -647.734375, |
|
"loss": 0.5041, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.6625149250030518, |
|
"rewards/margins": 1.0405315160751343, |
|
"rewards/rejected": -3.7030467987060547, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 5.5625, |
|
"learning_rate": 2.885688711862136e-07, |
|
"logits/chosen": -1.4039568901062012, |
|
"logits/rejected": -1.4016002416610718, |
|
"logps/chosen": -542.4031372070312, |
|
"logps/rejected": -669.4109497070312, |
|
"loss": 0.4979, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.813516139984131, |
|
"rewards/margins": 1.2897857427597046, |
|
"rewards/rejected": -4.103302001953125, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 13.9375, |
|
"learning_rate": 2.7800654977942486e-07, |
|
"logits/chosen": -1.3982176780700684, |
|
"logits/rejected": -1.2750059366226196, |
|
"logps/chosen": -529.8203735351562, |
|
"logps/rejected": -631.67431640625, |
|
"loss": 0.5123, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.7246382236480713, |
|
"rewards/margins": 1.056668996810913, |
|
"rewards/rejected": -3.7813076972961426, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_logits/chosen": -1.3351857662200928, |
|
"eval_logits/rejected": -1.208348274230957, |
|
"eval_logps/chosen": -542.1024780273438, |
|
"eval_logps/rejected": -634.3662109375, |
|
"eval_loss": 0.4894912838935852, |
|
"eval_rewards/accuracies": 0.7434999942779541, |
|
"eval_rewards/chosen": -2.7745048999786377, |
|
"eval_rewards/margins": 1.123070478439331, |
|
"eval_rewards/rejected": -3.8975753784179688, |
|
"eval_runtime": 384.9043, |
|
"eval_samples_per_second": 5.196, |
|
"eval_steps_per_second": 0.65, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 14.3125, |
|
"learning_rate": 2.6762976418628797e-07, |
|
"logits/chosen": -1.434815764427185, |
|
"logits/rejected": -1.2825909852981567, |
|
"logps/chosen": -497.67559814453125, |
|
"logps/rejected": -557.65478515625, |
|
"loss": 0.5421, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -2.67899751663208, |
|
"rewards/margins": 1.0358701944351196, |
|
"rewards/rejected": -3.714867353439331, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 17.0, |
|
"learning_rate": 2.5743938086541354e-07, |
|
"logits/chosen": -1.4120391607284546, |
|
"logits/rejected": -1.282286524772644, |
|
"logps/chosen": -537.319580078125, |
|
"logps/rejected": -624.9915771484375, |
|
"loss": 0.4846, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.730372428894043, |
|
"rewards/margins": 1.1250317096710205, |
|
"rewards/rejected": -3.8554039001464844, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 13.125, |
|
"learning_rate": 2.4743625071087574e-07, |
|
"logits/chosen": -1.5484386682510376, |
|
"logits/rejected": -1.370973825454712, |
|
"logps/chosen": -544.7464599609375, |
|
"logps/rejected": -646.3916625976562, |
|
"loss": 0.479, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.6717612743377686, |
|
"rewards/margins": 1.311167597770691, |
|
"rewards/rejected": -3.982929229736328, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 10.5, |
|
"learning_rate": 2.3762120898116498e-07, |
|
"logits/chosen": -1.428130865097046, |
|
"logits/rejected": -1.3167780637741089, |
|
"logps/chosen": -561.8053588867188, |
|
"logps/rejected": -654.1575927734375, |
|
"loss": 0.4989, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.930124282836914, |
|
"rewards/margins": 1.0036227703094482, |
|
"rewards/rejected": -3.9337470531463623, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 8.5625, |
|
"learning_rate": 2.2799507522944048e-07, |
|
"logits/chosen": -1.361971139907837, |
|
"logits/rejected": -1.2745027542114258, |
|
"logps/chosen": -532.9896240234375, |
|
"logps/rejected": -649.6702270507812, |
|
"loss": 0.4514, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.671921730041504, |
|
"rewards/margins": 1.2491633892059326, |
|
"rewards/rejected": -3.9210853576660156, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 11.375, |
|
"learning_rate": 2.1855865323510056e-07, |
|
"logits/chosen": -1.424619436264038, |
|
"logits/rejected": -1.2369648218154907, |
|
"logps/chosen": -545.8442993164062, |
|
"logps/rejected": -683.6163330078125, |
|
"loss": 0.4292, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -2.731658458709717, |
|
"rewards/margins": 1.4368107318878174, |
|
"rewards/rejected": -4.168468952178955, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 8.3125, |
|
"learning_rate": 2.0931273093666575e-07, |
|
"logits/chosen": -1.3846908807754517, |
|
"logits/rejected": -1.2318280935287476, |
|
"logps/chosen": -527.0106201171875, |
|
"logps/rejected": -627.6875, |
|
"loss": 0.4511, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -2.868238925933838, |
|
"rewards/margins": 1.2093108892440796, |
|
"rewards/rejected": -4.077549934387207, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 12.0, |
|
"learning_rate": 2.002580803659873e-07, |
|
"logits/chosen": -1.3799206018447876, |
|
"logits/rejected": -1.2638591527938843, |
|
"logps/chosen": -536.2264404296875, |
|
"logps/rejected": -633.6256713867188, |
|
"loss": 0.4582, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.816713333129883, |
|
"rewards/margins": 1.155531406402588, |
|
"rewards/rejected": -3.97224497795105, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 6.125, |
|
"learning_rate": 1.913954575837826e-07, |
|
"logits/chosen": -1.455733299255371, |
|
"logits/rejected": -1.1953377723693848, |
|
"logps/chosen": -554.5827026367188, |
|
"logps/rejected": -613.561767578125, |
|
"loss": 0.4729, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.8246824741363525, |
|
"rewards/margins": 1.0867326259613037, |
|
"rewards/rejected": -3.9114151000976562, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 9.1875, |
|
"learning_rate": 1.827256026165028e-07, |
|
"logits/chosen": -1.46336829662323, |
|
"logits/rejected": -1.2622567415237427, |
|
"logps/chosen": -575.8062744140625, |
|
"logps/rejected": -644.166748046875, |
|
"loss": 0.4526, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.6073572635650635, |
|
"rewards/margins": 1.2542911767959595, |
|
"rewards/rejected": -3.8616480827331543, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_logits/chosen": -1.3318783044815063, |
|
"eval_logits/rejected": -1.205055594444275, |
|
"eval_logps/chosen": -543.2083129882812, |
|
"eval_logps/rejected": -635.9655151367188, |
|
"eval_loss": 0.4895820915699005, |
|
"eval_rewards/accuracies": 0.7444999814033508, |
|
"eval_rewards/chosen": -2.785562753677368, |
|
"eval_rewards/margins": 1.1280065774917603, |
|
"eval_rewards/rejected": -3.913569450378418, |
|
"eval_runtime": 384.9489, |
|
"eval_samples_per_second": 5.195, |
|
"eval_steps_per_second": 0.649, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 11.4375, |
|
"learning_rate": 1.7424923939454274e-07, |
|
"logits/chosen": -1.4153268337249756, |
|
"logits/rejected": -1.235887885093689, |
|
"logps/chosen": -558.7437744140625, |
|
"logps/rejected": -640.8536376953125, |
|
"loss": 0.4144, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -2.77394437789917, |
|
"rewards/margins": 1.254831075668335, |
|
"rewards/rejected": -4.028775215148926, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 19.25, |
|
"learning_rate": 1.6596707569179304e-07, |
|
"logits/chosen": -1.4984407424926758, |
|
"logits/rejected": -1.3432929515838623, |
|
"logps/chosen": -559.1913452148438, |
|
"logps/rejected": -636.6985473632812, |
|
"loss": 0.491, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.7800402641296387, |
|
"rewards/margins": 1.1263272762298584, |
|
"rewards/rejected": -3.906367540359497, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 8.75, |
|
"learning_rate": 1.578798030665385e-07, |
|
"logits/chosen": -1.4436315298080444, |
|
"logits/rejected": -1.2577316761016846, |
|
"logps/chosen": -546.3856811523438, |
|
"logps/rejected": -664.1823120117188, |
|
"loss": 0.4391, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.7168757915496826, |
|
"rewards/margins": 1.348455786705017, |
|
"rewards/rejected": -4.065331935882568, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 8.875, |
|
"learning_rate": 1.499880968037165e-07, |
|
"logits/chosen": -1.4255344867706299, |
|
"logits/rejected": -1.2893887758255005, |
|
"logps/chosen": -532.6036376953125, |
|
"logps/rejected": -603.4581298828125, |
|
"loss": 0.5173, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.7699015140533447, |
|
"rewards/margins": 1.071218729019165, |
|
"rewards/rejected": -3.8411202430725098, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 12.4375, |
|
"learning_rate": 1.4229261585852805e-07, |
|
"logits/chosen": -1.4486864805221558, |
|
"logits/rejected": -1.366389513015747, |
|
"logps/chosen": -536.9041748046875, |
|
"logps/rejected": -628.5606689453125, |
|
"loss": 0.4501, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.6847262382507324, |
|
"rewards/margins": 1.1747690439224243, |
|
"rewards/rejected": -3.859494686126709, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 11.5, |
|
"learning_rate": 1.3479400280141886e-07, |
|
"logits/chosen": -1.3643203973770142, |
|
"logits/rejected": -1.319645643234253, |
|
"logps/chosen": -524.4163818359375, |
|
"logps/rejected": -644.7371826171875, |
|
"loss": 0.4688, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.781571865081787, |
|
"rewards/margins": 1.223384976387024, |
|
"rewards/rejected": -4.00495719909668, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 7.25, |
|
"learning_rate": 1.2749288376442044e-07, |
|
"logits/chosen": -1.4585063457489014, |
|
"logits/rejected": -1.2669061422348022, |
|
"logps/chosen": -567.2780151367188, |
|
"logps/rejected": -623.2936401367188, |
|
"loss": 0.4761, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.70743727684021, |
|
"rewards/margins": 1.135317325592041, |
|
"rewards/rejected": -3.842755079269409, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 9.1875, |
|
"learning_rate": 1.203898683888713e-07, |
|
"logits/chosen": -1.4662349224090576, |
|
"logits/rejected": -1.3190191984176636, |
|
"logps/chosen": -531.9495849609375, |
|
"logps/rejected": -622.1035766601562, |
|
"loss": 0.5705, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -2.879521369934082, |
|
"rewards/margins": 0.9325839281082153, |
|
"rewards/rejected": -3.812105178833008, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 8.875, |
|
"learning_rate": 1.1348554977451132e-07, |
|
"logits/chosen": -1.4893817901611328, |
|
"logits/rejected": -1.3383376598358154, |
|
"logps/chosen": -555.6965942382812, |
|
"logps/rejected": -626.2882690429688, |
|
"loss": 0.5165, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.737074613571167, |
|
"rewards/margins": 1.034942865371704, |
|
"rewards/rejected": -3.77201771736145, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 7.09375, |
|
"learning_rate": 1.0678050442995802e-07, |
|
"logits/chosen": -1.444490671157837, |
|
"logits/rejected": -1.2492029666900635, |
|
"logps/chosen": -563.559326171875, |
|
"logps/rejected": -623.2242431640625, |
|
"loss": 0.5432, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.8276607990264893, |
|
"rewards/margins": 1.0474214553833008, |
|
"rewards/rejected": -3.875082015991211, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_logits/chosen": -1.331380009651184, |
|
"eval_logits/rejected": -1.2045425176620483, |
|
"eval_logps/chosen": -543.0230712890625, |
|
"eval_logps/rejected": -635.9038696289062, |
|
"eval_loss": 0.4895781874656677, |
|
"eval_rewards/accuracies": 0.7440000176429749, |
|
"eval_rewards/chosen": -2.783710479736328, |
|
"eval_rewards/margins": 1.1292426586151123, |
|
"eval_rewards/rejected": -3.9129531383514404, |
|
"eval_runtime": 385.1815, |
|
"eval_samples_per_second": 5.192, |
|
"eval_steps_per_second": 0.649, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 8.0625, |
|
"learning_rate": 1.0027529222456755e-07, |
|
"logits/chosen": -1.423830509185791, |
|
"logits/rejected": -1.2484782934188843, |
|
"logps/chosen": -528.2448120117188, |
|
"logps/rejected": -626.2152099609375, |
|
"loss": 0.4541, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.7407870292663574, |
|
"rewards/margins": 1.155261754989624, |
|
"rewards/rejected": -3.8960487842559814, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 10.1875, |
|
"learning_rate": 9.397045634168766e-08, |
|
"logits/chosen": -1.4465529918670654, |
|
"logits/rejected": -1.3796955347061157, |
|
"logps/chosen": -536.9094848632812, |
|
"logps/rejected": -664.8760375976562, |
|
"loss": 0.4559, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.6832659244537354, |
|
"rewards/margins": 1.277266502380371, |
|
"rewards/rejected": -3.9605324268341064, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 10.125, |
|
"learning_rate": 8.78665232332998e-08, |
|
"logits/chosen": -1.3762016296386719, |
|
"logits/rejected": -1.3040244579315186, |
|
"logps/chosen": -512.94140625, |
|
"logps/rejected": -617.2539672851562, |
|
"loss": 0.4859, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.8223133087158203, |
|
"rewards/margins": 1.0507687330245972, |
|
"rewards/rejected": -3.873081922531128, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 9.5, |
|
"learning_rate": 8.196400257606208e-08, |
|
"logits/chosen": -1.4817397594451904, |
|
"logits/rejected": -1.334993839263916, |
|
"logps/chosen": -555.3631591796875, |
|
"logps/rejected": -686.2461547851562, |
|
"loss": 0.4321, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.72278094291687, |
|
"rewards/margins": 1.3615996837615967, |
|
"rewards/rejected": -4.084380626678467, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 8.875, |
|
"learning_rate": 7.626338722875076e-08, |
|
"logits/chosen": -1.423555612564087, |
|
"logits/rejected": -1.3563083410263062, |
|
"logps/chosen": -526.125244140625, |
|
"logps/rejected": -638.4963989257812, |
|
"loss": 0.4891, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.720315933227539, |
|
"rewards/margins": 1.1140156984329224, |
|
"rewards/rejected": -3.83433198928833, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 6.125, |
|
"learning_rate": 7.076515319110688e-08, |
|
"logits/chosen": -1.42192804813385, |
|
"logits/rejected": -1.3248012065887451, |
|
"logps/chosen": -528.8367919921875, |
|
"logps/rejected": -604.7586669921875, |
|
"loss": 0.5312, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.725490093231201, |
|
"rewards/margins": 1.1749989986419678, |
|
"rewards/rejected": -3.900489091873169, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 9.6875, |
|
"learning_rate": 6.54697595640899e-08, |
|
"logits/chosen": -1.4399796724319458, |
|
"logits/rejected": -1.3037444353103638, |
|
"logps/chosen": -573.2326049804688, |
|
"logps/rejected": -660.8978271484375, |
|
"loss": 0.4778, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.8101038932800293, |
|
"rewards/margins": 1.1283040046691895, |
|
"rewards/rejected": -3.938408374786377, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 8.625, |
|
"learning_rate": 6.037764851154426e-08, |
|
"logits/chosen": -1.425642490386963, |
|
"logits/rejected": -1.3774255514144897, |
|
"logps/chosen": -533.1048583984375, |
|
"logps/rejected": -653.04736328125, |
|
"loss": 0.4959, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.6904561519622803, |
|
"rewards/margins": 1.1518871784210205, |
|
"rewards/rejected": -3.842343807220459, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 7.5625, |
|
"learning_rate": 5.548924522327748e-08, |
|
"logits/chosen": -1.4221152067184448, |
|
"logits/rejected": -1.2739677429199219, |
|
"logps/chosen": -533.532958984375, |
|
"logps/rejected": -629.9300537109375, |
|
"loss": 0.4811, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.7177557945251465, |
|
"rewards/margins": 1.110734224319458, |
|
"rewards/rejected": -3.8284904956817627, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 10.375, |
|
"learning_rate": 5.0804957879556915e-08, |
|
"logits/chosen": -1.3408573865890503, |
|
"logits/rejected": -1.2565336227416992, |
|
"logps/chosen": -498.77581787109375, |
|
"logps/rejected": -613.9055786132812, |
|
"loss": 0.4617, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.732095241546631, |
|
"rewards/margins": 1.105553150177002, |
|
"rewards/rejected": -3.837648391723633, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_logits/chosen": -1.337357521057129, |
|
"eval_logits/rejected": -1.210402011871338, |
|
"eval_logps/chosen": -543.2186279296875, |
|
"eval_logps/rejected": -636.1134643554688, |
|
"eval_loss": 0.4894636869430542, |
|
"eval_rewards/accuracies": 0.7434999942779541, |
|
"eval_rewards/chosen": -2.785665988922119, |
|
"eval_rewards/margins": 1.1293821334838867, |
|
"eval_rewards/rejected": -3.915048122406006, |
|
"eval_runtime": 384.9877, |
|
"eval_samples_per_second": 5.195, |
|
"eval_steps_per_second": 0.649, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 12.8125, |
|
"learning_rate": 4.632517761702815e-08, |
|
"logits/chosen": -1.3654999732971191, |
|
"logits/rejected": -1.2209546566009521, |
|
"logps/chosen": -514.5064697265625, |
|
"logps/rejected": -634.9259033203125, |
|
"loss": 0.451, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.7906320095062256, |
|
"rewards/margins": 1.32748281955719, |
|
"rewards/rejected": -4.118114948272705, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 11.9375, |
|
"learning_rate": 4.205027849605359e-08, |
|
"logits/chosen": -1.3972408771514893, |
|
"logits/rejected": -1.2909516096115112, |
|
"logps/chosen": -535.4818115234375, |
|
"logps/rejected": -608.0510864257812, |
|
"loss": 0.5396, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.875584363937378, |
|
"rewards/margins": 1.0263030529022217, |
|
"rewards/rejected": -3.9018874168395996, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 9.875, |
|
"learning_rate": 3.798061746947995e-08, |
|
"logits/chosen": -1.5177159309387207, |
|
"logits/rejected": -1.354299783706665, |
|
"logps/chosen": -539.2265014648438, |
|
"logps/rejected": -615.7210693359375, |
|
"loss": 0.4806, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.750765800476074, |
|
"rewards/margins": 1.1568615436553955, |
|
"rewards/rejected": -3.907627820968628, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 10.5, |
|
"learning_rate": 3.411653435283158e-08, |
|
"logits/chosen": -1.4375641345977783, |
|
"logits/rejected": -1.2251198291778564, |
|
"logps/chosen": -544.5794677734375, |
|
"logps/rejected": -597.5071411132812, |
|
"loss": 0.4841, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.713228464126587, |
|
"rewards/margins": 1.0865360498428345, |
|
"rewards/rejected": -3.799764633178711, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 13.125, |
|
"learning_rate": 3.04583517959367e-08, |
|
"logits/chosen": -1.4777367115020752, |
|
"logits/rejected": -1.3228670358657837, |
|
"logps/chosen": -516.0816650390625, |
|
"logps/rejected": -592.5147094726562, |
|
"loss": 0.4782, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.6396498680114746, |
|
"rewards/margins": 1.0887558460235596, |
|
"rewards/rejected": -3.728405714035034, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 15.25, |
|
"learning_rate": 2.7006375255985984e-08, |
|
"logits/chosen": -1.4119120836257935, |
|
"logits/rejected": -1.3788807392120361, |
|
"logps/chosen": -553.791015625, |
|
"logps/rejected": -641.79443359375, |
|
"loss": 0.5775, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -2.8898892402648926, |
|
"rewards/margins": 0.8745861053466797, |
|
"rewards/rejected": -3.7644753456115723, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 10.0, |
|
"learning_rate": 2.3760892972027328e-08, |
|
"logits/chosen": -1.5255684852600098, |
|
"logits/rejected": -1.3464335203170776, |
|
"logps/chosen": -559.984375, |
|
"logps/rejected": -636.6268310546875, |
|
"loss": 0.5395, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.874812602996826, |
|
"rewards/margins": 1.1065670251846313, |
|
"rewards/rejected": -3.981379270553589, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 12.8125, |
|
"learning_rate": 2.072217594089765e-08, |
|
"logits/chosen": -1.3876298666000366, |
|
"logits/rejected": -1.365395188331604, |
|
"logps/chosen": -542.5714721679688, |
|
"logps/rejected": -655.452880859375, |
|
"loss": 0.4209, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.8103041648864746, |
|
"rewards/margins": 1.2513482570648193, |
|
"rewards/rejected": -4.061652660369873, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 8.375, |
|
"learning_rate": 1.789047789459375e-08, |
|
"logits/chosen": -1.488208532333374, |
|
"logits/rejected": -1.2888844013214111, |
|
"logps/chosen": -593.5070190429688, |
|
"logps/rejected": -661.3151245117188, |
|
"loss": 0.5217, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.765303373336792, |
|
"rewards/margins": 1.168646216392517, |
|
"rewards/rejected": -3.9339499473571777, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 7.65625, |
|
"learning_rate": 1.5266035279088708e-08, |
|
"logits/chosen": -1.322689414024353, |
|
"logits/rejected": -1.1827501058578491, |
|
"logps/chosen": -589.371826171875, |
|
"logps/rejected": -676.3383178710938, |
|
"loss": 0.4797, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.927542209625244, |
|
"rewards/margins": 1.131638765335083, |
|
"rewards/rejected": -4.059180736541748, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_logits/chosen": -1.3342877626419067, |
|
"eval_logits/rejected": -1.2075273990631104, |
|
"eval_logps/chosen": -543.0763549804688, |
|
"eval_logps/rejected": -635.919189453125, |
|
"eval_loss": 0.4895748794078827, |
|
"eval_rewards/accuracies": 0.7434999942779541, |
|
"eval_rewards/chosen": -2.784243583679199, |
|
"eval_rewards/margins": 1.1288617849349976, |
|
"eval_rewards/rejected": -3.9131054878234863, |
|
"eval_runtime": 385.4925, |
|
"eval_samples_per_second": 5.188, |
|
"eval_steps_per_second": 0.649, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 16.0, |
|
"learning_rate": 1.2849067234584623e-08, |
|
"logits/chosen": -1.3140819072723389, |
|
"logits/rejected": -1.2478830814361572, |
|
"logps/chosen": -514.704833984375, |
|
"logps/rejected": -628.93017578125, |
|
"loss": 0.4821, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.7834410667419434, |
|
"rewards/margins": 1.2183729410171509, |
|
"rewards/rejected": -4.001813888549805, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 15.1875, |
|
"learning_rate": 1.0639775577218625e-08, |
|
"logits/chosen": -1.3101227283477783, |
|
"logits/rejected": -1.1396509408950806, |
|
"logps/chosen": -528.97265625, |
|
"logps/rejected": -609.4863891601562, |
|
"loss": 0.5179, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.7978572845458984, |
|
"rewards/margins": 1.1644933223724365, |
|
"rewards/rejected": -3.962350368499756, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 11.5625, |
|
"learning_rate": 8.638344782207486e-09, |
|
"logits/chosen": -1.3434375524520874, |
|
"logits/rejected": -1.2269071340560913, |
|
"logps/chosen": -513.5137939453125, |
|
"logps/rejected": -601.6809692382812, |
|
"loss": 0.4808, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.6822597980499268, |
|
"rewards/margins": 1.1096186637878418, |
|
"rewards/rejected": -3.7918784618377686, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 9.875, |
|
"learning_rate": 6.84494196844715e-09, |
|
"logits/chosen": -1.3948211669921875, |
|
"logits/rejected": -1.2849534749984741, |
|
"logps/chosen": -548.9085083007812, |
|
"logps/rejected": -666.4553833007812, |
|
"loss": 0.4667, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.7680201530456543, |
|
"rewards/margins": 1.2913219928741455, |
|
"rewards/rejected": -4.059341907501221, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 8.875, |
|
"learning_rate": 5.259716884556121e-09, |
|
"logits/chosen": -1.4579029083251953, |
|
"logits/rejected": -1.3110452890396118, |
|
"logps/chosen": -538.3919067382812, |
|
"logps/rejected": -639.5615234375, |
|
"loss": 0.4595, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.739250659942627, |
|
"rewards/margins": 1.1599111557006836, |
|
"rewards/rejected": -3.8991622924804688, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 8.8125, |
|
"learning_rate": 3.882801896372967e-09, |
|
"logits/chosen": -1.4429559707641602, |
|
"logits/rejected": -1.3740085363388062, |
|
"logps/chosen": -534.4335327148438, |
|
"logps/rejected": -618.3828735351562, |
|
"loss": 0.4868, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.6907172203063965, |
|
"rewards/margins": 1.1578295230865479, |
|
"rewards/rejected": -3.8485465049743652, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 8.625, |
|
"learning_rate": 2.7143119759026614e-09, |
|
"logits/chosen": -1.4640624523162842, |
|
"logits/rejected": -1.2803726196289062, |
|
"logps/chosen": -557.8724975585938, |
|
"logps/rejected": -646.456298828125, |
|
"loss": 0.4267, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.7360005378723145, |
|
"rewards/margins": 1.1408417224884033, |
|
"rewards/rejected": -3.8768420219421387, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 11.125, |
|
"learning_rate": 1.754344691717591e-09, |
|
"logits/chosen": -1.3610948324203491, |
|
"logits/rejected": -1.3128563165664673, |
|
"logps/chosen": -533.3128662109375, |
|
"logps/rejected": -645.0467529296875, |
|
"loss": 0.5359, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.835211992263794, |
|
"rewards/margins": 0.896623432636261, |
|
"rewards/rejected": -3.731835126876831, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 9.9375, |
|
"learning_rate": 1.0029802008096335e-09, |
|
"logits/chosen": -1.3813669681549072, |
|
"logits/rejected": -1.2357242107391357, |
|
"logps/chosen": -551.9505615234375, |
|
"logps/rejected": -645.6619262695312, |
|
"loss": 0.4777, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.7765860557556152, |
|
"rewards/margins": 1.1649653911590576, |
|
"rewards/rejected": -3.941551685333252, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 11.0, |
|
"learning_rate": 4.602812418974534e-10, |
|
"logits/chosen": -1.48513925075531, |
|
"logits/rejected": -1.3569138050079346, |
|
"logps/chosen": -561.6650390625, |
|
"logps/rejected": -650.4069213867188, |
|
"loss": 0.5092, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.7982420921325684, |
|
"rewards/margins": 1.1328747272491455, |
|
"rewards/rejected": -3.931116819381714, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_logits/chosen": -1.33402681350708, |
|
"eval_logits/rejected": -1.207356572151184, |
|
"eval_logps/chosen": -543.0327758789062, |
|
"eval_logps/rejected": -635.90625, |
|
"eval_loss": 0.48944273591041565, |
|
"eval_rewards/accuracies": 0.7444999814033508, |
|
"eval_rewards/chosen": -2.7838070392608643, |
|
"eval_rewards/margins": 1.1291695833206177, |
|
"eval_rewards/rejected": -3.9129767417907715, |
|
"eval_runtime": 385.4448, |
|
"eval_samples_per_second": 5.189, |
|
"eval_steps_per_second": 0.649, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 11.25, |
|
"learning_rate": 1.2629313018819312e-10, |
|
"logits/chosen": -1.4069092273712158, |
|
"logits/rejected": -1.28346848487854, |
|
"logps/chosen": -526.5909423828125, |
|
"logps/rejected": -611.5968017578125, |
|
"loss": 0.5062, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.7411856651306152, |
|
"rewards/margins": 1.006593942642212, |
|
"rewards/rejected": -3.7477798461914062, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 20.25, |
|
"learning_rate": 1.0437535929996855e-12, |
|
"logits/chosen": -1.399791955947876, |
|
"logits/rejected": -1.2320655584335327, |
|
"logps/chosen": -567.6913452148438, |
|
"logps/rejected": -660.8294067382812, |
|
"loss": 0.4642, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.8092477321624756, |
|
"rewards/margins": 1.3779563903808594, |
|
"rewards/rejected": -4.187203884124756, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 3821, |
|
"total_flos": 0.0, |
|
"train_loss": 0.521260229900413, |
|
"train_runtime": 42446.6624, |
|
"train_samples_per_second": 1.44, |
|
"train_steps_per_second": 0.09 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3821, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|