|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 936, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.003205128205128205, |
|
"grad_norm": 48.20300728973734, |
|
"learning_rate": 5.3191489361702125e-09, |
|
"logits/chosen": -0.267578125, |
|
"logits/rejected": -0.462890625, |
|
"logps/chosen": -320.0, |
|
"logps/rejected": -155.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03205128205128205, |
|
"grad_norm": 40.55807354957128, |
|
"learning_rate": 5.3191489361702123e-08, |
|
"logits/chosen": -0.265625, |
|
"logits/rejected": -0.37109375, |
|
"logps/chosen": -348.0, |
|
"logps/rejected": -318.0, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.1944444477558136, |
|
"rewards/chosen": -0.0107421875, |
|
"rewards/margins": -0.0093994140625, |
|
"rewards/rejected": -0.00138092041015625, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0641025641025641, |
|
"grad_norm": 38.94632768279024, |
|
"learning_rate": 1.0638297872340425e-07, |
|
"logits/chosen": -0.416015625, |
|
"logits/rejected": -0.384765625, |
|
"logps/chosen": -272.0, |
|
"logps/rejected": -248.0, |
|
"loss": 0.6986, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.004425048828125, |
|
"rewards/margins": -0.00811767578125, |
|
"rewards/rejected": 0.00372314453125, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.09615384615384616, |
|
"grad_norm": 44.94338708626801, |
|
"learning_rate": 1.5957446808510638e-07, |
|
"logits/chosen": -0.412109375, |
|
"logits/rejected": -0.31640625, |
|
"logps/chosen": -292.0, |
|
"logps/rejected": -274.0, |
|
"loss": 0.6842, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": 0.00811767578125, |
|
"rewards/margins": 0.004058837890625, |
|
"rewards/rejected": 0.004058837890625, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1282051282051282, |
|
"grad_norm": 44.203343489664924, |
|
"learning_rate": 2.127659574468085e-07, |
|
"logits/chosen": -0.439453125, |
|
"logits/rejected": -0.353515625, |
|
"logps/chosen": -280.0, |
|
"logps/rejected": -336.0, |
|
"loss": 0.6941, |
|
"rewards/accuracies": 0.23749999701976776, |
|
"rewards/chosen": 0.0106201171875, |
|
"rewards/margins": -0.006256103515625, |
|
"rewards/rejected": 0.016845703125, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.16025641025641027, |
|
"grad_norm": 52.46671251233884, |
|
"learning_rate": 2.659574468085106e-07, |
|
"logits/chosen": -0.31640625, |
|
"logits/rejected": -0.35546875, |
|
"logps/chosen": -306.0, |
|
"logps/rejected": -314.0, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": 0.03466796875, |
|
"rewards/margins": 0.047607421875, |
|
"rewards/rejected": -0.0128173828125, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.19230769230769232, |
|
"grad_norm": 52.16025133508881, |
|
"learning_rate": 3.1914893617021275e-07, |
|
"logits/chosen": -0.408203125, |
|
"logits/rejected": -0.451171875, |
|
"logps/chosen": -284.0, |
|
"logps/rejected": -260.0, |
|
"loss": 0.6793, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.064453125, |
|
"rewards/margins": 0.034423828125, |
|
"rewards/rejected": 0.030029296875, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.22435897435897437, |
|
"grad_norm": 44.198032602975644, |
|
"learning_rate": 3.7234042553191484e-07, |
|
"logits/chosen": -0.435546875, |
|
"logits/rejected": -0.390625, |
|
"logps/chosen": -364.0, |
|
"logps/rejected": -292.0, |
|
"loss": 0.6599, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.09130859375, |
|
"rewards/margins": 0.0888671875, |
|
"rewards/rejected": 0.00250244140625, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.2564102564102564, |
|
"grad_norm": 40.58383899570081, |
|
"learning_rate": 4.25531914893617e-07, |
|
"logits/chosen": -0.380859375, |
|
"logits/rejected": -0.3359375, |
|
"logps/chosen": -276.0, |
|
"logps/rejected": -256.0, |
|
"loss": 0.6542, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.11083984375, |
|
"rewards/margins": 0.09130859375, |
|
"rewards/rejected": 0.02001953125, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.28846153846153844, |
|
"grad_norm": 37.229119513751584, |
|
"learning_rate": 4.787234042553192e-07, |
|
"logits/chosen": -0.3359375, |
|
"logits/rejected": -0.26171875, |
|
"logps/chosen": -364.0, |
|
"logps/rejected": -308.0, |
|
"loss": 0.6369, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.19140625, |
|
"rewards/margins": 0.2001953125, |
|
"rewards/rejected": -0.0084228515625, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.32051282051282054, |
|
"grad_norm": 39.33278491363928, |
|
"learning_rate": 4.96437054631829e-07, |
|
"logits/chosen": -0.400390625, |
|
"logits/rejected": -0.2412109375, |
|
"logps/chosen": -290.0, |
|
"logps/rejected": -310.0, |
|
"loss": 0.6039, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.138671875, |
|
"rewards/margins": 0.23046875, |
|
"rewards/rejected": -0.091796875, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3525641025641026, |
|
"grad_norm": 43.81240463931525, |
|
"learning_rate": 4.904988123515439e-07, |
|
"logits/chosen": -0.3984375, |
|
"logits/rejected": -0.3984375, |
|
"logps/chosen": -370.0, |
|
"logps/rejected": -352.0, |
|
"loss": 0.5957, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.1435546875, |
|
"rewards/margins": 0.251953125, |
|
"rewards/rejected": -0.10791015625, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.38461538461538464, |
|
"grad_norm": 39.38457584773928, |
|
"learning_rate": 4.845605700712589e-07, |
|
"logits/chosen": -0.193359375, |
|
"logits/rejected": -0.25390625, |
|
"logps/chosen": -260.0, |
|
"logps/rejected": -278.0, |
|
"loss": 0.5985, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.00640869140625, |
|
"rewards/margins": 0.1279296875, |
|
"rewards/rejected": -0.134765625, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.4166666666666667, |
|
"grad_norm": 41.00555642083936, |
|
"learning_rate": 4.786223277909738e-07, |
|
"logits/chosen": -0.44921875, |
|
"logits/rejected": -0.3203125, |
|
"logps/chosen": -302.0, |
|
"logps/rejected": -272.0, |
|
"loss": 0.5941, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.22265625, |
|
"rewards/margins": 0.345703125, |
|
"rewards/rejected": -0.123046875, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.44871794871794873, |
|
"grad_norm": 39.857994444324696, |
|
"learning_rate": 4.7268408551068883e-07, |
|
"logits/chosen": -0.421875, |
|
"logits/rejected": -0.302734375, |
|
"logps/chosen": -338.0, |
|
"logps/rejected": -302.0, |
|
"loss": 0.5634, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.166015625, |
|
"rewards/margins": 0.41015625, |
|
"rewards/rejected": -0.244140625, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.4807692307692308, |
|
"grad_norm": 30.401529156884564, |
|
"learning_rate": 4.667458432304038e-07, |
|
"logits/chosen": -0.3125, |
|
"logits/rejected": -0.28125, |
|
"logps/chosen": -278.0, |
|
"logps/rejected": -262.0, |
|
"loss": 0.5875, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.1005859375, |
|
"rewards/margins": 0.34375, |
|
"rewards/rejected": -0.2421875, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.5128205128205128, |
|
"grad_norm": 40.47565062008821, |
|
"learning_rate": 4.6080760095011875e-07, |
|
"logits/chosen": -0.306640625, |
|
"logits/rejected": -0.22265625, |
|
"logps/chosen": -314.0, |
|
"logps/rejected": -286.0, |
|
"loss": 0.5833, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.09716796875, |
|
"rewards/margins": 0.466796875, |
|
"rewards/rejected": -0.369140625, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.5448717948717948, |
|
"grad_norm": 47.70230858758091, |
|
"learning_rate": 4.548693586698337e-07, |
|
"logits/chosen": -0.369140625, |
|
"logits/rejected": -0.46484375, |
|
"logps/chosen": -322.0, |
|
"logps/rejected": -324.0, |
|
"loss": 0.5529, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.201171875, |
|
"rewards/margins": 0.55078125, |
|
"rewards/rejected": -0.349609375, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.5769230769230769, |
|
"grad_norm": 37.59643094962871, |
|
"learning_rate": 4.4893111638954866e-07, |
|
"logits/chosen": -0.443359375, |
|
"logits/rejected": -0.4453125, |
|
"logps/chosen": -388.0, |
|
"logps/rejected": -296.0, |
|
"loss": 0.5596, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.1279296875, |
|
"rewards/margins": 0.49609375, |
|
"rewards/rejected": -0.3671875, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.6089743589743589, |
|
"grad_norm": 42.89913513576229, |
|
"learning_rate": 4.429928741092636e-07, |
|
"logits/chosen": -0.306640625, |
|
"logits/rejected": -0.3828125, |
|
"logps/chosen": -306.0, |
|
"logps/rejected": -278.0, |
|
"loss": 0.5667, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.05908203125, |
|
"rewards/margins": 0.53125, |
|
"rewards/rejected": -0.58984375, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.6410256410256411, |
|
"grad_norm": 38.491897938435464, |
|
"learning_rate": 4.3705463182897863e-07, |
|
"logits/chosen": -0.5390625, |
|
"logits/rejected": -0.412109375, |
|
"logps/chosen": -324.0, |
|
"logps/rejected": -322.0, |
|
"loss": 0.5589, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.02294921875, |
|
"rewards/margins": 0.470703125, |
|
"rewards/rejected": -0.4921875, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.6730769230769231, |
|
"grad_norm": 45.42331710145845, |
|
"learning_rate": 4.311163895486936e-07, |
|
"logits/chosen": -0.326171875, |
|
"logits/rejected": -0.365234375, |
|
"logps/chosen": -320.0, |
|
"logps/rejected": -330.0, |
|
"loss": 0.567, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.10791015625, |
|
"rewards/margins": 0.7109375, |
|
"rewards/rejected": -0.8203125, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.7051282051282052, |
|
"grad_norm": 38.10849789978559, |
|
"learning_rate": 4.251781472684085e-07, |
|
"logits/chosen": -0.267578125, |
|
"logits/rejected": -0.26171875, |
|
"logps/chosen": -318.0, |
|
"logps/rejected": -298.0, |
|
"loss": 0.5264, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.062255859375, |
|
"rewards/margins": 0.6484375, |
|
"rewards/rejected": -0.5859375, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.7371794871794872, |
|
"grad_norm": 35.69252085216096, |
|
"learning_rate": 4.192399049881235e-07, |
|
"logits/chosen": -0.4375, |
|
"logits/rejected": -0.392578125, |
|
"logps/chosen": -296.0, |
|
"logps/rejected": -282.0, |
|
"loss": 0.5494, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.0233154296875, |
|
"rewards/margins": 0.734375, |
|
"rewards/rejected": -0.7109375, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.7692307692307693, |
|
"grad_norm": 49.72047308664797, |
|
"learning_rate": 4.1330166270783846e-07, |
|
"logits/chosen": -0.427734375, |
|
"logits/rejected": -0.37109375, |
|
"logps/chosen": -338.0, |
|
"logps/rejected": -330.0, |
|
"loss": 0.5414, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.171875, |
|
"rewards/margins": 0.484375, |
|
"rewards/rejected": -0.65625, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.8012820512820513, |
|
"grad_norm": 43.61282576031846, |
|
"learning_rate": 4.0736342042755347e-07, |
|
"logits/chosen": -0.3125, |
|
"logits/rejected": -0.27734375, |
|
"logps/chosen": -304.0, |
|
"logps/rejected": -326.0, |
|
"loss": 0.5342, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.1904296875, |
|
"rewards/margins": 0.494140625, |
|
"rewards/rejected": -0.6875, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 33.83836461453226, |
|
"learning_rate": 4.0142517814726837e-07, |
|
"logits/chosen": -0.341796875, |
|
"logits/rejected": -0.330078125, |
|
"logps/chosen": -326.0, |
|
"logps/rejected": -284.0, |
|
"loss": 0.5625, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.059326171875, |
|
"rewards/margins": 0.765625, |
|
"rewards/rejected": -0.70703125, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.8653846153846154, |
|
"grad_norm": 32.404327383318645, |
|
"learning_rate": 3.9548693586698333e-07, |
|
"logits/chosen": -0.4140625, |
|
"logits/rejected": -0.359375, |
|
"logps/chosen": -300.0, |
|
"logps/rejected": -288.0, |
|
"loss": 0.5288, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.087890625, |
|
"rewards/margins": 0.6328125, |
|
"rewards/rejected": -0.54296875, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.8974358974358975, |
|
"grad_norm": 45.49431428845984, |
|
"learning_rate": 3.8954869358669834e-07, |
|
"logits/chosen": -0.26953125, |
|
"logits/rejected": -0.3046875, |
|
"logps/chosen": -328.0, |
|
"logps/rejected": -288.0, |
|
"loss": 0.5815, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.107421875, |
|
"rewards/margins": 0.4453125, |
|
"rewards/rejected": -0.5546875, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.9294871794871795, |
|
"grad_norm": 40.072165310498, |
|
"learning_rate": 3.836104513064133e-07, |
|
"logits/chosen": -0.416015625, |
|
"logits/rejected": -0.306640625, |
|
"logps/chosen": -298.0, |
|
"logps/rejected": -235.0, |
|
"loss": 0.5336, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.035888671875, |
|
"rewards/margins": 0.6953125, |
|
"rewards/rejected": -0.66015625, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.9615384615384616, |
|
"grad_norm": 32.34581916400338, |
|
"learning_rate": 3.7767220902612825e-07, |
|
"logits/chosen": -0.427734375, |
|
"logits/rejected": -0.408203125, |
|
"logps/chosen": -312.0, |
|
"logps/rejected": -318.0, |
|
"loss": 0.521, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.2021484375, |
|
"rewards/margins": 0.78515625, |
|
"rewards/rejected": -0.5859375, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.9935897435897436, |
|
"grad_norm": 40.41128996018611, |
|
"learning_rate": 3.717339667458432e-07, |
|
"logits/chosen": -0.2890625, |
|
"logits/rejected": -0.275390625, |
|
"logps/chosen": -286.0, |
|
"logps/rejected": -284.0, |
|
"loss": 0.5046, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.0751953125, |
|
"rewards/margins": 0.82421875, |
|
"rewards/rejected": -0.75, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -0.341796875, |
|
"eval_logits/rejected": -0.34375, |
|
"eval_logps/chosen": -294.0, |
|
"eval_logps/rejected": -332.0, |
|
"eval_loss": 0.5677343606948853, |
|
"eval_rewards/accuracies": 0.6071428656578064, |
|
"eval_rewards/chosen": -0.0888671875, |
|
"eval_rewards/margins": 0.44140625, |
|
"eval_rewards/rejected": -0.53125, |
|
"eval_runtime": 18.4562, |
|
"eval_samples_per_second": 10.836, |
|
"eval_steps_per_second": 0.379, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.0256410256410255, |
|
"grad_norm": 31.256761205730616, |
|
"learning_rate": 3.6579572446555817e-07, |
|
"logits/chosen": -0.353515625, |
|
"logits/rejected": -0.349609375, |
|
"logps/chosen": -286.0, |
|
"logps/rejected": -268.0, |
|
"loss": 0.3844, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.1279296875, |
|
"rewards/margins": 0.984375, |
|
"rewards/rejected": -0.859375, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.0576923076923077, |
|
"grad_norm": 29.12754445870708, |
|
"learning_rate": 3.598574821852731e-07, |
|
"logits/chosen": -0.451171875, |
|
"logits/rejected": -0.47265625, |
|
"logps/chosen": -380.0, |
|
"logps/rejected": -334.0, |
|
"loss": 0.293, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.3984375, |
|
"rewards/margins": 1.53125, |
|
"rewards/rejected": -1.1328125, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.0897435897435896, |
|
"grad_norm": 23.255524440901077, |
|
"learning_rate": 3.5391923990498813e-07, |
|
"logits/chosen": -0.388671875, |
|
"logits/rejected": -0.296875, |
|
"logps/chosen": -350.0, |
|
"logps/rejected": -320.0, |
|
"loss": 0.3295, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.369140625, |
|
"rewards/margins": 1.5625, |
|
"rewards/rejected": -1.1953125, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.1217948717948718, |
|
"grad_norm": 32.212012138764614, |
|
"learning_rate": 3.479809976247031e-07, |
|
"logits/chosen": -0.228515625, |
|
"logits/rejected": -0.265625, |
|
"logps/chosen": -255.0, |
|
"logps/rejected": -268.0, |
|
"loss": 0.3649, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.07763671875, |
|
"rewards/margins": 1.375, |
|
"rewards/rejected": -1.296875, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.1538461538461537, |
|
"grad_norm": 21.919087970652495, |
|
"learning_rate": 3.42042755344418e-07, |
|
"logits/chosen": -0.412109375, |
|
"logits/rejected": -0.359375, |
|
"logps/chosen": -382.0, |
|
"logps/rejected": -326.0, |
|
"loss": 0.3203, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.279296875, |
|
"rewards/margins": 1.75, |
|
"rewards/rejected": -1.46875, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.185897435897436, |
|
"grad_norm": 24.48490434607468, |
|
"learning_rate": 3.36104513064133e-07, |
|
"logits/chosen": -0.400390625, |
|
"logits/rejected": -0.41015625, |
|
"logps/chosen": -336.0, |
|
"logps/rejected": -318.0, |
|
"loss": 0.3216, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.04052734375, |
|
"rewards/margins": 1.359375, |
|
"rewards/rejected": -1.3984375, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.217948717948718, |
|
"grad_norm": 19.39368867421837, |
|
"learning_rate": 3.3016627078384796e-07, |
|
"logits/chosen": -0.33984375, |
|
"logits/rejected": -0.359375, |
|
"logps/chosen": -334.0, |
|
"logps/rejected": -304.0, |
|
"loss": 0.3092, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.1484375, |
|
"rewards/margins": 1.484375, |
|
"rewards/rejected": -1.3359375, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 31.36015324660143, |
|
"learning_rate": 3.2422802850356297e-07, |
|
"logits/chosen": -0.369140625, |
|
"logits/rejected": -0.337890625, |
|
"logps/chosen": -260.0, |
|
"logps/rejected": -280.0, |
|
"loss": 0.3556, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.056884765625, |
|
"rewards/margins": 1.2421875, |
|
"rewards/rejected": -1.3046875, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.282051282051282, |
|
"grad_norm": 22.45941815431359, |
|
"learning_rate": 3.182897862232779e-07, |
|
"logits/chosen": -0.41796875, |
|
"logits/rejected": -0.4765625, |
|
"logps/chosen": -262.0, |
|
"logps/rejected": -332.0, |
|
"loss": 0.3101, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.08056640625, |
|
"rewards/margins": 1.4375, |
|
"rewards/rejected": -1.3515625, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.314102564102564, |
|
"grad_norm": 25.29350114221551, |
|
"learning_rate": 3.1235154394299283e-07, |
|
"logits/chosen": -0.388671875, |
|
"logits/rejected": -0.353515625, |
|
"logps/chosen": -284.0, |
|
"logps/rejected": -278.0, |
|
"loss": 0.3135, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.07373046875, |
|
"rewards/margins": 1.4375, |
|
"rewards/rejected": -1.515625, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.3461538461538463, |
|
"grad_norm": 21.92318193084022, |
|
"learning_rate": 3.0641330166270784e-07, |
|
"logits/chosen": -0.384765625, |
|
"logits/rejected": -0.462890625, |
|
"logps/chosen": -390.0, |
|
"logps/rejected": -312.0, |
|
"loss": 0.2906, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.296875, |
|
"rewards/margins": 1.8203125, |
|
"rewards/rejected": -1.5234375, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.3782051282051282, |
|
"grad_norm": 23.48433321437472, |
|
"learning_rate": 3.004750593824228e-07, |
|
"logits/chosen": -0.39453125, |
|
"logits/rejected": -0.41796875, |
|
"logps/chosen": -304.0, |
|
"logps/rejected": -302.0, |
|
"loss": 0.3054, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.3203125, |
|
"rewards/margins": 1.4921875, |
|
"rewards/rejected": -1.171875, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.4102564102564101, |
|
"grad_norm": 26.26090008450148, |
|
"learning_rate": 2.9453681710213776e-07, |
|
"logits/chosen": -0.31640625, |
|
"logits/rejected": -0.4375, |
|
"logps/chosen": -342.0, |
|
"logps/rejected": -300.0, |
|
"loss": 0.2742, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.298828125, |
|
"rewards/margins": 1.734375, |
|
"rewards/rejected": -1.4296875, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.4423076923076923, |
|
"grad_norm": 23.659041497927436, |
|
"learning_rate": 2.885985748218527e-07, |
|
"logits/chosen": -0.37109375, |
|
"logits/rejected": -0.359375, |
|
"logps/chosen": -384.0, |
|
"logps/rejected": -350.0, |
|
"loss": 0.2861, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.416015625, |
|
"rewards/margins": 1.8984375, |
|
"rewards/rejected": -1.484375, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.4743589743589745, |
|
"grad_norm": 25.88824077819283, |
|
"learning_rate": 2.8266033254156767e-07, |
|
"logits/chosen": -0.32421875, |
|
"logits/rejected": -0.400390625, |
|
"logps/chosen": -306.0, |
|
"logps/rejected": -308.0, |
|
"loss": 0.2735, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.26953125, |
|
"rewards/margins": 1.6953125, |
|
"rewards/rejected": -1.421875, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.5064102564102564, |
|
"grad_norm": 39.69692113209802, |
|
"learning_rate": 2.7672209026128263e-07, |
|
"logits/chosen": -0.3828125, |
|
"logits/rejected": -0.39453125, |
|
"logps/chosen": -332.0, |
|
"logps/rejected": -326.0, |
|
"loss": 0.2837, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.39453125, |
|
"rewards/margins": 1.75, |
|
"rewards/rejected": -1.359375, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.5384615384615383, |
|
"grad_norm": 30.346903581562415, |
|
"learning_rate": 2.7078384798099764e-07, |
|
"logits/chosen": -0.34765625, |
|
"logits/rejected": -0.447265625, |
|
"logps/chosen": -286.0, |
|
"logps/rejected": -290.0, |
|
"loss": 0.2892, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.1240234375, |
|
"rewards/margins": 1.5, |
|
"rewards/rejected": -1.3828125, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.5705128205128205, |
|
"grad_norm": 26.633915775199014, |
|
"learning_rate": 2.648456057007126e-07, |
|
"logits/chosen": -0.283203125, |
|
"logits/rejected": -0.48828125, |
|
"logps/chosen": -302.0, |
|
"logps/rejected": -308.0, |
|
"loss": 0.2848, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.359375, |
|
"rewards/margins": 1.9609375, |
|
"rewards/rejected": -1.6015625, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.6025641025641026, |
|
"grad_norm": 28.260367944812693, |
|
"learning_rate": 2.589073634204275e-07, |
|
"logits/chosen": -0.435546875, |
|
"logits/rejected": -0.392578125, |
|
"logps/chosen": -324.0, |
|
"logps/rejected": -330.0, |
|
"loss": 0.2791, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.328125, |
|
"rewards/margins": 1.609375, |
|
"rewards/rejected": -1.28125, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.6346153846153846, |
|
"grad_norm": 18.511599498231647, |
|
"learning_rate": 2.529691211401425e-07, |
|
"logits/chosen": -0.376953125, |
|
"logits/rejected": -0.33984375, |
|
"logps/chosen": -294.0, |
|
"logps/rejected": -272.0, |
|
"loss": 0.2808, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.322265625, |
|
"rewards/margins": 1.4609375, |
|
"rewards/rejected": -1.140625, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 22.844798241441286, |
|
"learning_rate": 2.4703087885985747e-07, |
|
"logits/chosen": -0.376953125, |
|
"logits/rejected": -0.4296875, |
|
"logps/chosen": -312.0, |
|
"logps/rejected": -318.0, |
|
"loss": 0.2799, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.412109375, |
|
"rewards/margins": 1.5859375, |
|
"rewards/rejected": -1.171875, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.6987179487179487, |
|
"grad_norm": 22.977308639981327, |
|
"learning_rate": 2.410926365795724e-07, |
|
"logits/chosen": -0.369140625, |
|
"logits/rejected": -0.2470703125, |
|
"logps/chosen": -352.0, |
|
"logps/rejected": -340.0, |
|
"loss": 0.2788, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.37890625, |
|
"rewards/margins": 1.7734375, |
|
"rewards/rejected": -1.390625, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.7307692307692308, |
|
"grad_norm": 19.697815015591537, |
|
"learning_rate": 2.351543942992874e-07, |
|
"logits/chosen": -0.294921875, |
|
"logits/rejected": -0.34375, |
|
"logps/chosen": -234.0, |
|
"logps/rejected": -282.0, |
|
"loss": 0.3076, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.0830078125, |
|
"rewards/margins": 1.8671875, |
|
"rewards/rejected": -1.78125, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.7628205128205128, |
|
"grad_norm": 16.42605287271066, |
|
"learning_rate": 2.2921615201900234e-07, |
|
"logits/chosen": -0.5625, |
|
"logits/rejected": -0.41796875, |
|
"logps/chosen": -350.0, |
|
"logps/rejected": -344.0, |
|
"loss": 0.2614, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.21875, |
|
"rewards/margins": 1.890625, |
|
"rewards/rejected": -1.671875, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.7948717948717947, |
|
"grad_norm": 21.70957132124816, |
|
"learning_rate": 2.2327790973871732e-07, |
|
"logits/chosen": -0.431640625, |
|
"logits/rejected": -0.3984375, |
|
"logps/chosen": -324.0, |
|
"logps/rejected": -272.0, |
|
"loss": 0.3165, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.0252685546875, |
|
"rewards/margins": 1.671875, |
|
"rewards/rejected": -1.6484375, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.8269230769230769, |
|
"grad_norm": 22.45992885626303, |
|
"learning_rate": 2.173396674584323e-07, |
|
"logits/chosen": -0.29296875, |
|
"logits/rejected": -0.359375, |
|
"logps/chosen": -294.0, |
|
"logps/rejected": -302.0, |
|
"loss": 0.2871, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.1748046875, |
|
"rewards/margins": 1.7578125, |
|
"rewards/rejected": -1.5859375, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.858974358974359, |
|
"grad_norm": 20.14427708154685, |
|
"learning_rate": 2.1140142517814726e-07, |
|
"logits/chosen": -0.359375, |
|
"logits/rejected": -0.359375, |
|
"logps/chosen": -324.0, |
|
"logps/rejected": -308.0, |
|
"loss": 0.2926, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.443359375, |
|
"rewards/margins": 1.6015625, |
|
"rewards/rejected": -1.15625, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.891025641025641, |
|
"grad_norm": 19.79995273385685, |
|
"learning_rate": 2.0546318289786222e-07, |
|
"logits/chosen": -0.322265625, |
|
"logits/rejected": -0.38671875, |
|
"logps/chosen": -254.0, |
|
"logps/rejected": -288.0, |
|
"loss": 0.2864, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.419921875, |
|
"rewards/margins": 1.703125, |
|
"rewards/rejected": -1.28125, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.9230769230769231, |
|
"grad_norm": 23.566386363575898, |
|
"learning_rate": 1.9952494061757718e-07, |
|
"logits/chosen": -0.306640625, |
|
"logits/rejected": -0.322265625, |
|
"logps/chosen": -282.0, |
|
"logps/rejected": -272.0, |
|
"loss": 0.2718, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.57421875, |
|
"rewards/margins": 1.78125, |
|
"rewards/rejected": -1.2109375, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.9551282051282053, |
|
"grad_norm": 29.152008466264334, |
|
"learning_rate": 1.9358669833729216e-07, |
|
"logits/chosen": -0.59375, |
|
"logits/rejected": -0.447265625, |
|
"logps/chosen": -308.0, |
|
"logps/rejected": -282.0, |
|
"loss": 0.2682, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.234375, |
|
"rewards/margins": 1.7421875, |
|
"rewards/rejected": -1.5078125, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.9871794871794872, |
|
"grad_norm": 22.58745268335563, |
|
"learning_rate": 1.876484560570071e-07, |
|
"logits/chosen": -0.451171875, |
|
"logits/rejected": -0.470703125, |
|
"logps/chosen": -360.0, |
|
"logps/rejected": -300.0, |
|
"loss": 0.2825, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.33984375, |
|
"rewards/margins": 1.703125, |
|
"rewards/rejected": -1.3671875, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_logits/chosen": -0.3828125, |
|
"eval_logits/rejected": -0.375, |
|
"eval_logps/chosen": -298.0, |
|
"eval_logps/rejected": -336.0, |
|
"eval_loss": 0.5634375214576721, |
|
"eval_rewards/accuracies": 0.625, |
|
"eval_rewards/chosen": -0.345703125, |
|
"eval_rewards/margins": 0.625, |
|
"eval_rewards/rejected": -0.96875, |
|
"eval_runtime": 18.4069, |
|
"eval_samples_per_second": 10.865, |
|
"eval_steps_per_second": 0.38, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 2.019230769230769, |
|
"grad_norm": 10.223029148094557, |
|
"learning_rate": 1.8171021377672207e-07, |
|
"logits/chosen": -0.453125, |
|
"logits/rejected": -0.37109375, |
|
"logps/chosen": -272.0, |
|
"logps/rejected": -296.0, |
|
"loss": 0.2027, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.1767578125, |
|
"rewards/margins": 1.9921875, |
|
"rewards/rejected": -1.8125, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.051282051282051, |
|
"grad_norm": 16.341314788264306, |
|
"learning_rate": 1.7577197149643706e-07, |
|
"logits/chosen": -0.345703125, |
|
"logits/rejected": -0.369140625, |
|
"logps/chosen": -356.0, |
|
"logps/rejected": -330.0, |
|
"loss": 0.166, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.486328125, |
|
"rewards/margins": 2.4375, |
|
"rewards/rejected": -1.9453125, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.0833333333333335, |
|
"grad_norm": 12.61817837923421, |
|
"learning_rate": 1.6983372921615202e-07, |
|
"logits/chosen": -0.44140625, |
|
"logits/rejected": -0.337890625, |
|
"logps/chosen": -332.0, |
|
"logps/rejected": -304.0, |
|
"loss": 0.1739, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.28125, |
|
"rewards/margins": 2.171875, |
|
"rewards/rejected": -1.8984375, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.1153846153846154, |
|
"grad_norm": 15.916788215543724, |
|
"learning_rate": 1.6389548693586697e-07, |
|
"logits/chosen": -0.41015625, |
|
"logits/rejected": -0.328125, |
|
"logps/chosen": -330.0, |
|
"logps/rejected": -308.0, |
|
"loss": 0.1616, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.376953125, |
|
"rewards/margins": 2.328125, |
|
"rewards/rejected": -1.953125, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.1474358974358974, |
|
"grad_norm": 16.057171731434803, |
|
"learning_rate": 1.5795724465558193e-07, |
|
"logits/chosen": -0.33984375, |
|
"logits/rejected": -0.3828125, |
|
"logps/chosen": -362.0, |
|
"logps/rejected": -324.0, |
|
"loss": 0.1857, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.1650390625, |
|
"rewards/margins": 2.515625, |
|
"rewards/rejected": -2.34375, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.1794871794871793, |
|
"grad_norm": 11.186100214252932, |
|
"learning_rate": 1.520190023752969e-07, |
|
"logits/chosen": -0.51171875, |
|
"logits/rejected": -0.51171875, |
|
"logps/chosen": -328.0, |
|
"logps/rejected": -338.0, |
|
"loss": 0.1692, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.33203125, |
|
"rewards/margins": 2.421875, |
|
"rewards/rejected": -2.09375, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.2115384615384617, |
|
"grad_norm": 11.075777990973583, |
|
"learning_rate": 1.4608076009501184e-07, |
|
"logits/chosen": -0.314453125, |
|
"logits/rejected": -0.2265625, |
|
"logps/chosen": -268.0, |
|
"logps/rejected": -256.0, |
|
"loss": 0.1701, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.130859375, |
|
"rewards/margins": 2.40625, |
|
"rewards/rejected": -2.265625, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.2435897435897436, |
|
"grad_norm": 12.634117376853869, |
|
"learning_rate": 1.4014251781472683e-07, |
|
"logits/chosen": -0.455078125, |
|
"logits/rejected": -0.47265625, |
|
"logps/chosen": -322.0, |
|
"logps/rejected": -276.0, |
|
"loss": 0.1625, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.04833984375, |
|
"rewards/margins": 2.359375, |
|
"rewards/rejected": -2.40625, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.2756410256410255, |
|
"grad_norm": 13.097076165721171, |
|
"learning_rate": 1.342042755344418e-07, |
|
"logits/chosen": -0.36328125, |
|
"logits/rejected": -0.337890625, |
|
"logps/chosen": -304.0, |
|
"logps/rejected": -270.0, |
|
"loss": 0.1739, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.0115966796875, |
|
"rewards/margins": 2.484375, |
|
"rewards/rejected": -2.484375, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.3076923076923075, |
|
"grad_norm": 13.327447391626707, |
|
"learning_rate": 1.2826603325415677e-07, |
|
"logits/chosen": -0.4296875, |
|
"logits/rejected": -0.361328125, |
|
"logps/chosen": -316.0, |
|
"logps/rejected": -318.0, |
|
"loss": 0.1492, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.11865234375, |
|
"rewards/margins": 2.640625, |
|
"rewards/rejected": -2.515625, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.33974358974359, |
|
"grad_norm": 14.846779798952685, |
|
"learning_rate": 1.2232779097387173e-07, |
|
"logits/chosen": -0.439453125, |
|
"logits/rejected": -0.578125, |
|
"logps/chosen": -316.0, |
|
"logps/rejected": -316.0, |
|
"loss": 0.147, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.1640625, |
|
"rewards/margins": 2.6875, |
|
"rewards/rejected": -2.53125, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.371794871794872, |
|
"grad_norm": 12.996323206405766, |
|
"learning_rate": 1.163895486935867e-07, |
|
"logits/chosen": -0.4296875, |
|
"logits/rejected": -0.458984375, |
|
"logps/chosen": -326.0, |
|
"logps/rejected": -322.0, |
|
"loss": 0.1632, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.2099609375, |
|
"rewards/margins": 2.515625, |
|
"rewards/rejected": -2.296875, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.4038461538461537, |
|
"grad_norm": 12.50397204158465, |
|
"learning_rate": 1.1045130641330165e-07, |
|
"logits/chosen": -0.287109375, |
|
"logits/rejected": -0.32421875, |
|
"logps/chosen": -302.0, |
|
"logps/rejected": -356.0, |
|
"loss": 0.1662, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.484375, |
|
"rewards/margins": 2.9375, |
|
"rewards/rejected": -2.453125, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.435897435897436, |
|
"grad_norm": 19.850141789886788, |
|
"learning_rate": 1.0451306413301662e-07, |
|
"logits/chosen": -0.470703125, |
|
"logits/rejected": -0.40625, |
|
"logps/chosen": -344.0, |
|
"logps/rejected": -314.0, |
|
"loss": 0.1404, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.412109375, |
|
"rewards/margins": 2.6875, |
|
"rewards/rejected": -2.28125, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.467948717948718, |
|
"grad_norm": 14.142532630035193, |
|
"learning_rate": 9.857482185273158e-08, |
|
"logits/chosen": -0.35546875, |
|
"logits/rejected": -0.341796875, |
|
"logps/chosen": -294.0, |
|
"logps/rejected": -286.0, |
|
"loss": 0.1377, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1552734375, |
|
"rewards/margins": 2.5625, |
|
"rewards/rejected": -2.40625, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 10.362780550901535, |
|
"learning_rate": 9.263657957244655e-08, |
|
"logits/chosen": -0.4375, |
|
"logits/rejected": -0.466796875, |
|
"logps/chosen": -338.0, |
|
"logps/rejected": -288.0, |
|
"loss": 0.1545, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.07177734375, |
|
"rewards/margins": 2.703125, |
|
"rewards/rejected": -2.78125, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.532051282051282, |
|
"grad_norm": 14.983339217810357, |
|
"learning_rate": 8.669833729216151e-08, |
|
"logits/chosen": -0.416015625, |
|
"logits/rejected": -0.40625, |
|
"logps/chosen": -328.0, |
|
"logps/rejected": -342.0, |
|
"loss": 0.1724, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.08251953125, |
|
"rewards/margins": 2.8125, |
|
"rewards/rejected": -2.71875, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.564102564102564, |
|
"grad_norm": 11.403661448100282, |
|
"learning_rate": 8.076009501187649e-08, |
|
"logits/chosen": -0.361328125, |
|
"logits/rejected": -0.2578125, |
|
"logps/chosen": -274.0, |
|
"logps/rejected": -316.0, |
|
"loss": 0.1329, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.0247802734375, |
|
"rewards/margins": 2.5625, |
|
"rewards/rejected": -2.546875, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.5961538461538463, |
|
"grad_norm": 12.098366455488087, |
|
"learning_rate": 7.482185273159145e-08, |
|
"logits/chosen": -0.474609375, |
|
"logits/rejected": -0.4609375, |
|
"logps/chosen": -304.0, |
|
"logps/rejected": -296.0, |
|
"loss": 0.1386, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1943359375, |
|
"rewards/margins": 2.703125, |
|
"rewards/rejected": -2.5, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.628205128205128, |
|
"grad_norm": 19.559170964417323, |
|
"learning_rate": 6.88836104513064e-08, |
|
"logits/chosen": -0.4296875, |
|
"logits/rejected": -0.4453125, |
|
"logps/chosen": -314.0, |
|
"logps/rejected": -350.0, |
|
"loss": 0.1804, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.0341796875, |
|
"rewards/margins": 2.609375, |
|
"rewards/rejected": -2.578125, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.66025641025641, |
|
"grad_norm": 13.553714513714917, |
|
"learning_rate": 6.294536817102138e-08, |
|
"logits/chosen": -0.53515625, |
|
"logits/rejected": -0.52734375, |
|
"logps/chosen": -342.0, |
|
"logps/rejected": -288.0, |
|
"loss": 0.1646, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.126953125, |
|
"rewards/margins": 2.4375, |
|
"rewards/rejected": -2.3125, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.6923076923076925, |
|
"grad_norm": 13.769525791929578, |
|
"learning_rate": 5.700712589073634e-08, |
|
"logits/chosen": -0.322265625, |
|
"logits/rejected": -0.41015625, |
|
"logps/chosen": -300.0, |
|
"logps/rejected": -294.0, |
|
"loss": 0.1677, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.0732421875, |
|
"rewards/margins": 2.625, |
|
"rewards/rejected": -2.546875, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.7243589743589745, |
|
"grad_norm": 19.35910853506724, |
|
"learning_rate": 5.10688836104513e-08, |
|
"logits/chosen": -0.35546875, |
|
"logits/rejected": -0.48046875, |
|
"logps/chosen": -326.0, |
|
"logps/rejected": -334.0, |
|
"loss": 0.1733, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.1259765625, |
|
"rewards/margins": 2.53125, |
|
"rewards/rejected": -2.40625, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.7564102564102564, |
|
"grad_norm": 13.618581779013738, |
|
"learning_rate": 4.5130641330166267e-08, |
|
"logits/chosen": -0.3984375, |
|
"logits/rejected": -0.404296875, |
|
"logps/chosen": -324.0, |
|
"logps/rejected": -326.0, |
|
"loss": 0.1658, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.09521484375, |
|
"rewards/margins": 2.796875, |
|
"rewards/rejected": -2.890625, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.7884615384615383, |
|
"grad_norm": 8.922554312926742, |
|
"learning_rate": 3.919239904988123e-08, |
|
"logits/chosen": -0.41796875, |
|
"logits/rejected": -0.453125, |
|
"logps/chosen": -264.0, |
|
"logps/rejected": -306.0, |
|
"loss": 0.185, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.052734375, |
|
"rewards/margins": 2.328125, |
|
"rewards/rejected": -2.265625, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.8205128205128203, |
|
"grad_norm": 15.61605037939187, |
|
"learning_rate": 3.32541567695962e-08, |
|
"logits/chosen": -0.357421875, |
|
"logits/rejected": -0.43359375, |
|
"logps/chosen": -316.0, |
|
"logps/rejected": -316.0, |
|
"loss": 0.1618, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.00665283203125, |
|
"rewards/margins": 2.515625, |
|
"rewards/rejected": -2.5, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.8525641025641026, |
|
"grad_norm": 14.041233392814654, |
|
"learning_rate": 2.7315914489311164e-08, |
|
"logits/chosen": -0.333984375, |
|
"logits/rejected": -0.396484375, |
|
"logps/chosen": -346.0, |
|
"logps/rejected": -336.0, |
|
"loss": 0.1652, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.1982421875, |
|
"rewards/margins": 2.703125, |
|
"rewards/rejected": -2.515625, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.8846153846153846, |
|
"grad_norm": 14.603314234579116, |
|
"learning_rate": 2.1377672209026125e-08, |
|
"logits/chosen": -0.375, |
|
"logits/rejected": -0.341796875, |
|
"logps/chosen": -278.0, |
|
"logps/rejected": -302.0, |
|
"loss": 0.1562, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.05419921875, |
|
"rewards/margins": 2.46875, |
|
"rewards/rejected": -2.421875, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.9166666666666665, |
|
"grad_norm": 13.067382331418449, |
|
"learning_rate": 1.5439429928741092e-08, |
|
"logits/chosen": -0.359375, |
|
"logits/rejected": -0.416015625, |
|
"logps/chosen": -298.0, |
|
"logps/rejected": -300.0, |
|
"loss": 0.1925, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.0152587890625, |
|
"rewards/margins": 2.5625, |
|
"rewards/rejected": -2.546875, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.948717948717949, |
|
"grad_norm": 10.391263156117287, |
|
"learning_rate": 9.501187648456057e-09, |
|
"logits/chosen": -0.443359375, |
|
"logits/rejected": -0.373046875, |
|
"logps/chosen": -288.0, |
|
"logps/rejected": -320.0, |
|
"loss": 0.142, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.031982421875, |
|
"rewards/margins": 2.453125, |
|
"rewards/rejected": -2.484375, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.980769230769231, |
|
"grad_norm": 10.1448021646423, |
|
"learning_rate": 3.562945368171021e-09, |
|
"logits/chosen": -0.435546875, |
|
"logits/rejected": -0.298828125, |
|
"logps/chosen": -245.0, |
|
"logps/rejected": -278.0, |
|
"loss": 0.136, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.0036163330078125, |
|
"rewards/margins": 2.59375, |
|
"rewards/rejected": -2.59375, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_logits/chosen": -0.40625, |
|
"eval_logits/rejected": -0.39453125, |
|
"eval_logps/chosen": -302.0, |
|
"eval_logps/rejected": -344.0, |
|
"eval_loss": 0.5834765434265137, |
|
"eval_rewards/accuracies": 0.6607142686843872, |
|
"eval_rewards/chosen": -0.8671875, |
|
"eval_rewards/margins": 0.83203125, |
|
"eval_rewards/rejected": -1.703125, |
|
"eval_runtime": 18.4869, |
|
"eval_samples_per_second": 10.818, |
|
"eval_steps_per_second": 0.379, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 936, |
|
"total_flos": 0.0, |
|
"train_loss": 0.350877452100444, |
|
"train_runtime": 7216.813, |
|
"train_samples_per_second": 4.15, |
|
"train_steps_per_second": 0.13 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 936, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|