|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9667896678966788, |
|
"eval_steps": 40, |
|
"global_step": 201, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07380073800738007, |
|
"grad_norm": 80.02477445251274, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -2.7569785118103027, |
|
"logits/rejected": -2.715679883956909, |
|
"logps/chosen": -343.655517578125, |
|
"logps/rejected": -244.0912628173828, |
|
"loss": 0.687, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": 0.02633141539990902, |
|
"rewards/margins": 0.006850541569292545, |
|
"rewards/rejected": 0.0194808728992939, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.14760147601476015, |
|
"grad_norm": 65.98268514011825, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -2.676809787750244, |
|
"logits/rejected": -2.666592836380005, |
|
"logps/chosen": -296.428955078125, |
|
"logps/rejected": -247.4902801513672, |
|
"loss": 0.6147, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.8011910319328308, |
|
"rewards/margins": 0.2567104995250702, |
|
"rewards/rejected": 0.5444804430007935, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.22140221402214022, |
|
"grad_norm": 48.66427015180346, |
|
"learning_rate": 9.983100718730718e-07, |
|
"logits/chosen": -2.416226863861084, |
|
"logits/rejected": -2.3806653022766113, |
|
"logps/chosen": -316.8359069824219, |
|
"logps/rejected": -258.2687683105469, |
|
"loss": 0.6095, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 2.1471664905548096, |
|
"rewards/margins": 0.7412694692611694, |
|
"rewards/rejected": 1.4058969020843506, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.2952029520295203, |
|
"grad_norm": 50.05057195236849, |
|
"learning_rate": 9.932517109205849e-07, |
|
"logits/chosen": -2.1923749446868896, |
|
"logits/rejected": -2.1478309631347656, |
|
"logps/chosen": -294.5142517089844, |
|
"logps/rejected": -243.7734375, |
|
"loss": 0.556, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 2.3299460411071777, |
|
"rewards/margins": 1.3834998607635498, |
|
"rewards/rejected": 0.9464457631111145, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.36900369003690037, |
|
"grad_norm": 41.37624373189553, |
|
"learning_rate": 9.848591102083375e-07, |
|
"logits/chosen": -2.0363731384277344, |
|
"logits/rejected": -2.030383348464966, |
|
"logps/chosen": -282.7300720214844, |
|
"logps/rejected": -221.184326171875, |
|
"loss": 0.4963, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 2.7908506393432617, |
|
"rewards/margins": 1.624943494796753, |
|
"rewards/rejected": 1.1659072637557983, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.44280442804428044, |
|
"grad_norm": 43.83501071765918, |
|
"learning_rate": 9.731890013043367e-07, |
|
"logits/chosen": -2.0403037071228027, |
|
"logits/rejected": -1.9934555292129517, |
|
"logps/chosen": -325.14227294921875, |
|
"logps/rejected": -214.34542846679688, |
|
"loss": 0.4972, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 2.984839916229248, |
|
"rewards/margins": 1.5722445249557495, |
|
"rewards/rejected": 1.412595510482788, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.5166051660516605, |
|
"grad_norm": 47.665657648113644, |
|
"learning_rate": 9.583202707897073e-07, |
|
"logits/chosen": -2.0699315071105957, |
|
"logits/rejected": -2.042548418045044, |
|
"logps/chosen": -318.35357666015625, |
|
"logps/rejected": -221.4462432861328, |
|
"loss": 0.5431, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 3.141150951385498, |
|
"rewards/margins": 1.8329731225967407, |
|
"rewards/rejected": 1.3081778287887573, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.5904059040590406, |
|
"grad_norm": 42.1852532770112, |
|
"learning_rate": 9.403534270080829e-07, |
|
"logits/chosen": -2.1574552059173584, |
|
"logits/rejected": -2.105395555496216, |
|
"logps/chosen": -282.8706359863281, |
|
"logps/rejected": -239.42562866210938, |
|
"loss": 0.563, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 2.571629524230957, |
|
"rewards/margins": 1.7009865045547485, |
|
"rewards/rejected": 0.8706433176994324, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.5904059040590406, |
|
"eval_logits/chosen": -2.179224967956543, |
|
"eval_logits/rejected": -2.15881085395813, |
|
"eval_logps/chosen": -304.3701171875, |
|
"eval_logps/rejected": -235.69309997558594, |
|
"eval_loss": 0.4594477713108063, |
|
"eval_rewards/accuracies": 0.8185483813285828, |
|
"eval_rewards/chosen": 2.485563278198242, |
|
"eval_rewards/margins": 1.8135225772857666, |
|
"eval_rewards/rejected": 0.6720407009124756, |
|
"eval_runtime": 131.0305, |
|
"eval_samples_per_second": 14.661, |
|
"eval_steps_per_second": 0.237, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.6642066420664207, |
|
"grad_norm": 38.916777514219696, |
|
"learning_rate": 9.19409920658098e-07, |
|
"logits/chosen": -2.225562572479248, |
|
"logits/rejected": -2.181002378463745, |
|
"logps/chosen": -276.44537353515625, |
|
"logps/rejected": -232.626220703125, |
|
"loss": 0.5076, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 2.278400182723999, |
|
"rewards/margins": 1.4811707735061646, |
|
"rewards/rejected": 0.7972294092178345, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.7380073800738007, |
|
"grad_norm": 50.710250280321, |
|
"learning_rate": 8.956313238215823e-07, |
|
"logits/chosen": -2.2307848930358887, |
|
"logits/rejected": -2.1967437267303467, |
|
"logps/chosen": -313.6961364746094, |
|
"logps/rejected": -241.0548858642578, |
|
"loss": 0.5239, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 2.592728853225708, |
|
"rewards/margins": 1.948052167892456, |
|
"rewards/rejected": 0.6446765661239624, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.8118081180811808, |
|
"grad_norm": 39.063704669645155, |
|
"learning_rate": 8.691783729769873e-07, |
|
"logits/chosen": -2.139880895614624, |
|
"logits/rejected": -2.139148712158203, |
|
"logps/chosen": -299.7575988769531, |
|
"logps/rejected": -245.935546875, |
|
"loss": 0.5018, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 2.354003429412842, |
|
"rewards/margins": 1.81247878074646, |
|
"rewards/rejected": 0.5415242910385132, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.8856088560885609, |
|
"grad_norm": 33.2796085112328, |
|
"learning_rate": 8.402298824670029e-07, |
|
"logits/chosen": -2.0772578716278076, |
|
"logits/rejected": -2.054955005645752, |
|
"logps/chosen": -295.5028991699219, |
|
"logps/rejected": -244.0660858154297, |
|
"loss": 0.4817, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 2.2515780925750732, |
|
"rewards/margins": 1.5721994638442993, |
|
"rewards/rejected": 0.6793786883354187, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.959409594095941, |
|
"grad_norm": 36.14638757212613, |
|
"learning_rate": 8.089815357650089e-07, |
|
"logits/chosen": -2.0140891075134277, |
|
"logits/rejected": -1.9471585750579834, |
|
"logps/chosen": -302.58148193359375, |
|
"logps/rejected": -237.9540252685547, |
|
"loss": 0.4943, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": 2.2041029930114746, |
|
"rewards/margins": 2.0399723052978516, |
|
"rewards/rejected": 0.16413061320781708, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.033210332103321, |
|
"grad_norm": 21.302121663013374, |
|
"learning_rate": 7.756445627110522e-07, |
|
"logits/chosen": -2.040945053100586, |
|
"logits/rejected": -2.0241832733154297, |
|
"logps/chosen": -312.1359558105469, |
|
"logps/rejected": -239.3393096923828, |
|
"loss": 0.3303, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 2.603544235229492, |
|
"rewards/margins": 2.4756617546081543, |
|
"rewards/rejected": 0.12788262963294983, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.1070110701107012, |
|
"grad_norm": 20.556094388092646, |
|
"learning_rate": 7.404443116588547e-07, |
|
"logits/chosen": -2.104165554046631, |
|
"logits/rejected": -2.059689521789551, |
|
"logps/chosen": -294.634765625, |
|
"logps/rejected": -238.32437133789062, |
|
"loss": 0.129, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 3.189236879348755, |
|
"rewards/margins": 3.7732062339782715, |
|
"rewards/rejected": -0.5839694142341614, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.1808118081180812, |
|
"grad_norm": 20.50931148538785, |
|
"learning_rate": 7.036187261857288e-07, |
|
"logits/chosen": -2.146726608276367, |
|
"logits/rejected": -2.1075119972229004, |
|
"logps/chosen": -297.4272155761719, |
|
"logps/rejected": -262.4473876953125, |
|
"loss": 0.154, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 3.2519805431365967, |
|
"rewards/margins": 3.6943678855895996, |
|
"rewards/rejected": -0.44238725304603577, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.1808118081180812, |
|
"eval_logits/chosen": -2.13566517829895, |
|
"eval_logits/rejected": -2.110398054122925, |
|
"eval_logps/chosen": -301.3644104003906, |
|
"eval_logps/rejected": -238.48484802246094, |
|
"eval_loss": 0.46015238761901855, |
|
"eval_rewards/accuracies": 0.8427419066429138, |
|
"eval_rewards/chosen": 2.7861340045928955, |
|
"eval_rewards/margins": 2.3932666778564453, |
|
"eval_rewards/rejected": 0.3928670585155487, |
|
"eval_runtime": 129.5743, |
|
"eval_samples_per_second": 14.825, |
|
"eval_steps_per_second": 0.239, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.2546125461254611, |
|
"grad_norm": 24.33309810818949, |
|
"learning_rate": 6.654167366624008e-07, |
|
"logits/chosen": -2.142047882080078, |
|
"logits/rejected": -2.1115987300872803, |
|
"logps/chosen": -289.6197204589844, |
|
"logps/rejected": -245.8259735107422, |
|
"loss": 0.1699, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 3.555595874786377, |
|
"rewards/margins": 4.105128288269043, |
|
"rewards/rejected": -0.5495321750640869, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.3284132841328413, |
|
"grad_norm": 23.507286919588484, |
|
"learning_rate": 6.260965775552713e-07, |
|
"logits/chosen": -2.1702046394348145, |
|
"logits/rejected": -2.1256089210510254, |
|
"logps/chosen": -299.5054626464844, |
|
"logps/rejected": -242.0937042236328, |
|
"loss": 0.159, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 3.9076132774353027, |
|
"rewards/margins": 4.560946464538574, |
|
"rewards/rejected": -0.6533328890800476, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.4022140221402215, |
|
"grad_norm": 15.516195820704533, |
|
"learning_rate": 5.859240418356614e-07, |
|
"logits/chosen": -2.1203560829162598, |
|
"logits/rejected": -2.07737398147583, |
|
"logps/chosen": -270.5323791503906, |
|
"logps/rejected": -282.30242919921875, |
|
"loss": 0.1745, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 3.4920401573181152, |
|
"rewards/margins": 4.871523380279541, |
|
"rewards/rejected": -1.3794825077056885, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.4760147601476015, |
|
"grad_norm": 15.962268006534465, |
|
"learning_rate": 5.451706842957421e-07, |
|
"logits/chosen": -2.0756678581237793, |
|
"logits/rejected": -2.0366768836975098, |
|
"logps/chosen": -285.35400390625, |
|
"logps/rejected": -261.02069091796875, |
|
"loss": 0.1518, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 3.5108916759490967, |
|
"rewards/margins": 4.940871715545654, |
|
"rewards/rejected": -1.4299800395965576, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.5498154981549814, |
|
"grad_norm": 25.320702801914457, |
|
"learning_rate": 5.041119859162068e-07, |
|
"logits/chosen": -2.1494388580322266, |
|
"logits/rejected": -2.1103031635284424, |
|
"logps/chosen": -291.79193115234375, |
|
"logps/rejected": -242.1620635986328, |
|
"loss": 0.1927, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 3.214129686355591, |
|
"rewards/margins": 4.194614410400391, |
|
"rewards/rejected": -0.980484664440155, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.6236162361623616, |
|
"grad_norm": 24.127332932431226, |
|
"learning_rate": 4.630254916940423e-07, |
|
"logits/chosen": -2.174290180206299, |
|
"logits/rejected": -2.179755926132202, |
|
"logps/chosen": -279.0810546875, |
|
"logps/rejected": -252.66488647460938, |
|
"loss": 0.1829, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 3.157012701034546, |
|
"rewards/margins": 4.443808078765869, |
|
"rewards/rejected": -1.2867956161499023, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.6974169741697418, |
|
"grad_norm": 20.11391135642748, |
|
"learning_rate": 4.2218893451814e-07, |
|
"logits/chosen": -2.2010812759399414, |
|
"logits/rejected": -2.164829730987549, |
|
"logps/chosen": -289.4188232421875, |
|
"logps/rejected": -246.65945434570312, |
|
"loss": 0.1934, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 3.433326244354248, |
|
"rewards/margins": 4.391345977783203, |
|
"rewards/rejected": -0.9580191373825073, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.7712177121771218, |
|
"grad_norm": 20.706343509306766, |
|
"learning_rate": 3.8187835777481375e-07, |
|
"logits/chosen": -2.176086187362671, |
|
"logits/rejected": -2.1578235626220703, |
|
"logps/chosen": -281.7149353027344, |
|
"logps/rejected": -265.0261535644531, |
|
"loss": 0.2027, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 3.2026546001434326, |
|
"rewards/margins": 4.289515495300293, |
|
"rewards/rejected": -1.0868606567382812, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.7712177121771218, |
|
"eval_logits/chosen": -2.1726152896881104, |
|
"eval_logits/rejected": -2.146054983139038, |
|
"eval_logps/chosen": -304.15960693359375, |
|
"eval_logps/rejected": -246.97988891601562, |
|
"eval_loss": 0.48685166239738464, |
|
"eval_rewards/accuracies": 0.8548387289047241, |
|
"eval_rewards/chosen": 2.5066120624542236, |
|
"eval_rewards/margins": 2.9632484912872314, |
|
"eval_rewards/rejected": -0.4566364884376526, |
|
"eval_runtime": 129.7757, |
|
"eval_samples_per_second": 14.802, |
|
"eval_steps_per_second": 0.239, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.8450184501845017, |
|
"grad_norm": 20.871306894670933, |
|
"learning_rate": 3.423662493738687e-07, |
|
"logits/chosen": -2.180792808532715, |
|
"logits/rejected": -2.159304141998291, |
|
"logps/chosen": -301.1511535644531, |
|
"logps/rejected": -255.13919067382812, |
|
"loss": 0.1609, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 3.2595107555389404, |
|
"rewards/margins": 4.297440528869629, |
|
"rewards/rejected": -1.037929654121399, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.918819188191882, |
|
"grad_norm": 27.947861559843737, |
|
"learning_rate": 3.039196998086687e-07, |
|
"logits/chosen": -2.136273145675659, |
|
"logits/rejected": -2.1014552116394043, |
|
"logps/chosen": -286.9736022949219, |
|
"logps/rejected": -244.7154083251953, |
|
"loss": 0.1847, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 3.395556926727295, |
|
"rewards/margins": 4.3099188804626465, |
|
"rewards/rejected": -0.9143617749214172, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.992619926199262, |
|
"grad_norm": 20.821197239752305, |
|
"learning_rate": 2.667985967011878e-07, |
|
"logits/chosen": -2.1088356971740723, |
|
"logits/rejected": -2.0703465938568115, |
|
"logps/chosen": -286.96917724609375, |
|
"logps/rejected": -256.48016357421875, |
|
"loss": 0.1724, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 3.350585460662842, |
|
"rewards/margins": 4.244786262512207, |
|
"rewards/rejected": -0.8942006826400757, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 2.066420664206642, |
|
"grad_norm": 8.245558323252546, |
|
"learning_rate": 2.3125386803640183e-07, |
|
"logits/chosen": -2.1218690872192383, |
|
"logits/rejected": -2.0660667419433594, |
|
"logps/chosen": -284.4044494628906, |
|
"logps/rejected": -270.7417907714844, |
|
"loss": 0.0938, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 3.351060152053833, |
|
"rewards/margins": 4.853818416595459, |
|
"rewards/rejected": -1.5027587413787842, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.140221402214022, |
|
"grad_norm": 14.140599014287302, |
|
"learning_rate": 1.9752578596124952e-07, |
|
"logits/chosen": -2.093632936477661, |
|
"logits/rejected": -2.0502517223358154, |
|
"logps/chosen": -288.5584716796875, |
|
"logps/rejected": -256.74652099609375, |
|
"loss": 0.0775, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": 3.4150993824005127, |
|
"rewards/margins": 4.966043949127197, |
|
"rewards/rejected": -1.5509445667266846, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 2.2140221402214024, |
|
"grad_norm": 7.605905759499919, |
|
"learning_rate": 1.6584234261399532e-07, |
|
"logits/chosen": -2.0875797271728516, |
|
"logits/rejected": -2.0646932125091553, |
|
"logps/chosen": -295.5018310546875, |
|
"logps/rejected": -290.001708984375, |
|
"loss": 0.0579, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": 3.694823741912842, |
|
"rewards/margins": 5.430555820465088, |
|
"rewards/rejected": -1.7357313632965088, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.2878228782287824, |
|
"grad_norm": 20.873090027101682, |
|
"learning_rate": 1.3641770896292082e-07, |
|
"logits/chosen": -2.0764248371124268, |
|
"logits/rejected": -2.060342311859131, |
|
"logps/chosen": -278.5547790527344, |
|
"logps/rejected": -249.08203125, |
|
"loss": 0.0718, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 3.3755805492401123, |
|
"rewards/margins": 5.271130084991455, |
|
"rewards/rejected": -1.8955495357513428, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 2.3616236162361623, |
|
"grad_norm": 12.7807011486128, |
|
"learning_rate": 1.0945078707215221e-07, |
|
"logits/chosen": -2.073279857635498, |
|
"logits/rejected": -2.0515029430389404, |
|
"logps/chosen": -279.70892333984375, |
|
"logps/rejected": -263.677734375, |
|
"loss": 0.0725, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 3.862575054168701, |
|
"rewards/margins": 5.486065864562988, |
|
"rewards/rejected": -1.623490571975708, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.3616236162361623, |
|
"eval_logits/chosen": -2.0765814781188965, |
|
"eval_logits/rejected": -2.042445182800293, |
|
"eval_logps/chosen": -301.5458984375, |
|
"eval_logps/rejected": -246.53857421875, |
|
"eval_loss": 0.48189839720726013, |
|
"eval_rewards/accuracies": 0.8629032373428345, |
|
"eval_rewards/chosen": 2.7679829597473145, |
|
"eval_rewards/margins": 3.1804890632629395, |
|
"eval_rewards/rejected": -0.412506103515625, |
|
"eval_runtime": 129.9118, |
|
"eval_samples_per_second": 14.787, |
|
"eval_steps_per_second": 0.239, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.4354243542435423, |
|
"grad_norm": 16.62994387557585, |
|
"learning_rate": 8.512386558088919e-08, |
|
"logits/chosen": -2.0903940200805664, |
|
"logits/rejected": -2.0252914428710938, |
|
"logps/chosen": -286.7425842285156, |
|
"logps/rejected": -249.64614868164062, |
|
"loss": 0.0707, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 3.8004047870635986, |
|
"rewards/margins": 5.124575614929199, |
|
"rewards/rejected": -1.3241703510284424, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 2.5092250922509223, |
|
"grad_norm": 13.149398258549308, |
|
"learning_rate": 6.360138748461013e-08, |
|
"logits/chosen": -2.078819751739502, |
|
"logits/rejected": -2.0325751304626465, |
|
"logps/chosen": -279.3172912597656, |
|
"logps/rejected": -262.2966003417969, |
|
"loss": 0.0712, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 3.741664409637451, |
|
"rewards/margins": 5.292626857757568, |
|
"rewards/rejected": -1.5509625673294067, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.5830258302583027, |
|
"grad_norm": 15.477600906013183, |
|
"learning_rate": 4.5028838547699346e-08, |
|
"logits/chosen": -2.058854818344116, |
|
"logits/rejected": -2.045734167098999, |
|
"logps/chosen": -293.87738037109375, |
|
"logps/rejected": -277.49139404296875, |
|
"loss": 0.0756, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 3.888404369354248, |
|
"rewards/margins": 5.4004316329956055, |
|
"rewards/rejected": -1.512027382850647, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 2.6568265682656826, |
|
"grad_norm": 10.486814550692278, |
|
"learning_rate": 2.9531763861505964e-08, |
|
"logits/chosen": -2.057389497756958, |
|
"logits/rejected": -2.0072054862976074, |
|
"logps/chosen": -284.025634765625, |
|
"logps/rejected": -249.7481231689453, |
|
"loss": 0.0701, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 3.7061257362365723, |
|
"rewards/margins": 5.206698417663574, |
|
"rewards/rejected": -1.500572919845581, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.7306273062730626, |
|
"grad_norm": 16.604175060639175, |
|
"learning_rate": 1.7214919195619125e-08, |
|
"logits/chosen": -2.0375514030456543, |
|
"logits/rejected": -2.0372228622436523, |
|
"logps/chosen": -293.4367980957031, |
|
"logps/rejected": -243.2362823486328, |
|
"loss": 0.0833, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 3.817143201828003, |
|
"rewards/margins": 5.346969127655029, |
|
"rewards/rejected": -1.5298258066177368, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 2.804428044280443, |
|
"grad_norm": 11.656202399163227, |
|
"learning_rate": 8.161562878982398e-09, |
|
"logits/chosen": -2.064812183380127, |
|
"logits/rejected": -2.0154833793640137, |
|
"logps/chosen": -295.53033447265625, |
|
"logps/rejected": -259.0420837402344, |
|
"loss": 0.0933, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 3.9195189476013184, |
|
"rewards/margins": 5.322437286376953, |
|
"rewards/rejected": -1.4029181003570557, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.878228782287823, |
|
"grad_norm": 12.21325697905649, |
|
"learning_rate": 2.432892997526026e-09, |
|
"logits/chosen": -2.0528626441955566, |
|
"logits/rejected": -2.0427441596984863, |
|
"logps/chosen": -290.7054443359375, |
|
"logps/rejected": -244.73696899414062, |
|
"loss": 0.0959, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": 3.4299838542938232, |
|
"rewards/margins": 5.134265899658203, |
|
"rewards/rejected": -1.7042820453643799, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 2.952029520295203, |
|
"grad_norm": 13.794636154783172, |
|
"learning_rate": 6.763371270035457e-11, |
|
"logits/chosen": -2.0266225337982178, |
|
"logits/rejected": -2.011596441268921, |
|
"logps/chosen": -275.36798095703125, |
|
"logps/rejected": -242.58694458007812, |
|
"loss": 0.0505, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 3.6280651092529297, |
|
"rewards/margins": 5.206905364990234, |
|
"rewards/rejected": -1.5788400173187256, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.952029520295203, |
|
"eval_logits/chosen": -2.054385185241699, |
|
"eval_logits/rejected": -2.0193707942962646, |
|
"eval_logps/chosen": -301.7057800292969, |
|
"eval_logps/rejected": -247.90260314941406, |
|
"eval_loss": 0.48475462198257446, |
|
"eval_rewards/accuracies": 0.8548387289047241, |
|
"eval_rewards/chosen": 2.75199556350708, |
|
"eval_rewards/margins": 3.300902843475342, |
|
"eval_rewards/rejected": -0.5489078760147095, |
|
"eval_runtime": 129.5144, |
|
"eval_samples_per_second": 14.832, |
|
"eval_steps_per_second": 0.239, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.9667896678966788, |
|
"step": 201, |
|
"total_flos": 2369906314051584.0, |
|
"train_loss": 0.26609369445202957, |
|
"train_runtime": 7643.0309, |
|
"train_samples_per_second": 6.784, |
|
"train_steps_per_second": 0.026 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 201, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 40, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2369906314051584.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|