|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.997867803837953, |
|
"eval_steps": 100, |
|
"global_step": 117, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 627.8802337646484, |
|
"epoch": 0.008528784648187633, |
|
"grad_norm": 0.35644883946602257, |
|
"kl": 0.0, |
|
"learning_rate": 2.5e-07, |
|
"loss": -0.0129, |
|
"reward": 0.6093750298023224, |
|
"reward_std": 0.3860909380018711, |
|
"rewards/accuracy_reward": 0.6093750298023224, |
|
"rewards/format_reward": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 587.183611869812, |
|
"epoch": 0.042643923240938165, |
|
"grad_norm": 0.34939166374507424, |
|
"kl": 0.00016704201698303223, |
|
"learning_rate": 1.25e-06, |
|
"loss": 0.0135, |
|
"reward": 0.6184895997866988, |
|
"reward_std": 0.34997194120660424, |
|
"rewards/accuracy_reward": 0.6184895997866988, |
|
"rewards/format_reward": 0.0, |
|
"step": 5 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 611.967724609375, |
|
"epoch": 0.08528784648187633, |
|
"grad_norm": 31.88849856902946, |
|
"kl": 0.0043338298797607425, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.0188, |
|
"reward": 0.6291666887700558, |
|
"reward_std": 0.32357291094958784, |
|
"rewards/accuracy_reward": 0.6291666887700558, |
|
"rewards/format_reward": 0.0, |
|
"step": 10 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 616.8541862487793, |
|
"epoch": 0.1279317697228145, |
|
"grad_norm": 0.3815627956846007, |
|
"kl": 0.010546112060546875, |
|
"learning_rate": 2.993961440992859e-06, |
|
"loss": 0.0578, |
|
"reward": 0.6953125193715095, |
|
"reward_std": 0.2850981580093503, |
|
"rewards/accuracy_reward": 0.6953125193715095, |
|
"rewards/format_reward": 0.0, |
|
"step": 15 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 618.81460647583, |
|
"epoch": 0.17057569296375266, |
|
"grad_norm": 0.21300730475167035, |
|
"kl": 0.0043548583984375, |
|
"learning_rate": 2.957235057439301e-06, |
|
"loss": 0.0758, |
|
"reward": 0.7281250193715095, |
|
"reward_std": 0.23656688714399934, |
|
"rewards/accuracy_reward": 0.7281250193715095, |
|
"rewards/format_reward": 0.0, |
|
"step": 20 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 591.3015823364258, |
|
"epoch": 0.21321961620469082, |
|
"grad_norm": 0.12113342234838173, |
|
"kl": 0.015129280090332032, |
|
"learning_rate": 2.887956450710995e-06, |
|
"loss": 0.0443, |
|
"reward": 0.7828125208616257, |
|
"reward_std": 0.177566824760288, |
|
"rewards/accuracy_reward": 0.7828125208616257, |
|
"rewards/format_reward": 0.0, |
|
"step": 25 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 579.9239768981934, |
|
"epoch": 0.255863539445629, |
|
"grad_norm": 0.14438849219292743, |
|
"kl": 0.005391120910644531, |
|
"learning_rate": 2.7876731904027993e-06, |
|
"loss": 0.0371, |
|
"reward": 0.7609375171363354, |
|
"reward_std": 0.16949560260400176, |
|
"rewards/accuracy_reward": 0.7609375171363354, |
|
"rewards/format_reward": 0.0, |
|
"step": 30 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 571.2515838623046, |
|
"epoch": 0.29850746268656714, |
|
"grad_norm": 0.15658618448015763, |
|
"kl": 0.004360771179199219, |
|
"learning_rate": 2.6586254388368995e-06, |
|
"loss": 0.0415, |
|
"reward": 0.7854166865348816, |
|
"reward_std": 0.17419785326346754, |
|
"rewards/accuracy_reward": 0.7854166865348816, |
|
"rewards/format_reward": 0.0, |
|
"step": 35 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 579.2099113464355, |
|
"epoch": 0.3411513859275053, |
|
"grad_norm": 0.6009157958287555, |
|
"kl": 0.026328277587890626, |
|
"learning_rate": 2.5036959095382875e-06, |
|
"loss": 0.0321, |
|
"reward": 0.7708333596587181, |
|
"reward_std": 0.17964822258800267, |
|
"rewards/accuracy_reward": 0.7708333596587181, |
|
"rewards/format_reward": 0.0, |
|
"step": 40 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 586.2880401611328, |
|
"epoch": 0.3837953091684435, |
|
"grad_norm": 0.46893633502563103, |
|
"kl": 0.015601730346679688, |
|
"learning_rate": 2.3263454721781537e-06, |
|
"loss": 0.0288, |
|
"reward": 0.7869791895151138, |
|
"reward_std": 0.17324934136122466, |
|
"rewards/accuracy_reward": 0.7869791895151138, |
|
"rewards/format_reward": 0.0, |
|
"step": 45 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 577.1146011352539, |
|
"epoch": 0.42643923240938164, |
|
"grad_norm": 0.14115724397125742, |
|
"kl": 0.00496826171875, |
|
"learning_rate": 2.1305358424643485e-06, |
|
"loss": 0.0306, |
|
"reward": 0.7510416850447654, |
|
"reward_std": 0.1911184054799378, |
|
"rewards/accuracy_reward": 0.7510416850447654, |
|
"rewards/format_reward": 0.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 579.0744941711425, |
|
"epoch": 0.4690831556503198, |
|
"grad_norm": 3.183002278410084, |
|
"kl": 0.018677902221679688, |
|
"learning_rate": 1.9206410839590043e-06, |
|
"loss": 0.0246, |
|
"reward": 0.7661458477377892, |
|
"reward_std": 0.19202441712841392, |
|
"rewards/accuracy_reward": 0.7661458477377892, |
|
"rewards/format_reward": 0.0, |
|
"step": 55 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 601.4166862487793, |
|
"epoch": 0.511727078891258, |
|
"grad_norm": 0.12757187329087855, |
|
"kl": 0.006385040283203125, |
|
"learning_rate": 1.7013498987264833e-06, |
|
"loss": 0.0345, |
|
"reward": 0.7364583522081375, |
|
"reward_std": 0.19316800702363252, |
|
"rewards/accuracy_reward": 0.7364583522081375, |
|
"rewards/format_reward": 0.0, |
|
"step": 60 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 581.6609550476074, |
|
"epoch": 0.5543710021321961, |
|
"grad_norm": 0.17404128901148935, |
|
"kl": 0.007928085327148438, |
|
"learning_rate": 1.4775608894771048e-06, |
|
"loss": 0.0328, |
|
"reward": 0.7505208536982536, |
|
"reward_std": 0.2075295069254935, |
|
"rewards/accuracy_reward": 0.7505208536982536, |
|
"rewards/format_reward": 0.0, |
|
"step": 65 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 596.1479362487793, |
|
"epoch": 0.5970149253731343, |
|
"grad_norm": 0.2944207023714681, |
|
"kl": 0.006090927124023438, |
|
"learning_rate": 1.2542731328772936e-06, |
|
"loss": 0.0339, |
|
"reward": 0.7265625208616256, |
|
"reward_std": 0.19276394164189697, |
|
"rewards/accuracy_reward": 0.7265625208616256, |
|
"rewards/format_reward": 0.0, |
|
"step": 70 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 565.5979385375977, |
|
"epoch": 0.6396588486140725, |
|
"grad_norm": 0.18840980741585026, |
|
"kl": 0.0075702667236328125, |
|
"learning_rate": 1.036474508437579e-06, |
|
"loss": 0.0368, |
|
"reward": 0.7677083507180213, |
|
"reward_std": 0.18664944088086485, |
|
"rewards/accuracy_reward": 0.7677083507180213, |
|
"rewards/format_reward": 0.0, |
|
"step": 75 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 580.122933959961, |
|
"epoch": 0.6823027718550106, |
|
"grad_norm": 0.11471347531056907, |
|
"kl": 0.006945037841796875, |
|
"learning_rate": 8.290302775265509e-07, |
|
"loss": 0.0341, |
|
"reward": 0.7583333522081375, |
|
"reward_std": 0.1750888627022505, |
|
"rewards/accuracy_reward": 0.7583333522081375, |
|
"rewards/format_reward": 0.0, |
|
"step": 80 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 592.2614776611329, |
|
"epoch": 0.7249466950959488, |
|
"grad_norm": 0.22116249522463094, |
|
"kl": 0.009731292724609375, |
|
"learning_rate": 6.3657440147149e-07, |
|
"loss": 0.0333, |
|
"reward": 0.7656250178813935, |
|
"reward_std": 0.21401627436280252, |
|
"rewards/accuracy_reward": 0.7656250178813935, |
|
"rewards/format_reward": 0.0, |
|
"step": 85 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 579.4776222229004, |
|
"epoch": 0.767590618336887, |
|
"grad_norm": 0.2030484454112882, |
|
"kl": 0.010486984252929687, |
|
"learning_rate": 4.63406026519703e-07, |
|
"loss": 0.0328, |
|
"reward": 0.7552083522081375, |
|
"reward_std": 0.16649889973923565, |
|
"rewards/accuracy_reward": 0.7552083522081375, |
|
"rewards/format_reward": 0.0, |
|
"step": 90 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 586.7932479858398, |
|
"epoch": 0.8102345415778252, |
|
"grad_norm": 0.11451963582699543, |
|
"kl": 0.006160736083984375, |
|
"learning_rate": 3.133934480154885e-07, |
|
"loss": 0.0294, |
|
"reward": 0.7520833507180213, |
|
"reward_std": 0.18174212109297513, |
|
"rewards/accuracy_reward": 0.7520833507180213, |
|
"rewards/format_reward": 0.0, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.8528784648187633, |
|
"grad_norm": 0.1403395507058327, |
|
"learning_rate": 1.8988769907430552e-07, |
|
"loss": 0.0343, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8528784648187633, |
|
"eval_clip_ratio": 0.0, |
|
"eval_completion_length": 571.9227185058594, |
|
"eval_kl": 0.0098244873046875, |
|
"eval_loss": 0.019065221771597862, |
|
"eval_reward": 0.6759333529949189, |
|
"eval_reward_std": 0.2166214306771755, |
|
"eval_rewards/accuracy_reward": 0.6759333529949189, |
|
"eval_rewards/format_reward": 0.0, |
|
"eval_runtime": 11129.6267, |
|
"eval_samples_per_second": 0.449, |
|
"eval_steps_per_second": 0.009, |
|
"step": 100 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 583.2151233673096, |
|
"epoch": 0.8955223880597015, |
|
"grad_norm": 0.1561949193212127, |
|
"kl": 0.005991172790527344, |
|
"learning_rate": 9.564769404039419e-08, |
|
"loss": 0.0236, |
|
"reward": 0.7598958529531956, |
|
"reward_std": 0.1863211216405034, |
|
"rewards/accuracy_reward": 0.7598958529531956, |
|
"rewards/format_reward": 0.0, |
|
"step": 105 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 586.4302299499511, |
|
"epoch": 0.9381663113006397, |
|
"grad_norm": 5.136938540934993, |
|
"kl": 0.008953857421875, |
|
"learning_rate": 3.277859889929147e-08, |
|
"loss": 0.0333, |
|
"reward": 0.7864583492279053, |
|
"reward_std": 0.1887844305485487, |
|
"rewards/accuracy_reward": 0.7864583492279053, |
|
"rewards/format_reward": 0.0, |
|
"step": 110 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 569.3703300476075, |
|
"epoch": 0.9808102345415778, |
|
"grad_norm": 0.147469744170478, |
|
"kl": 0.00705413818359375, |
|
"learning_rate": 2.684805348397268e-09, |
|
"loss": 0.0296, |
|
"reward": 0.7828125163912774, |
|
"reward_std": 0.16681436980143188, |
|
"rewards/accuracy_reward": 0.7828125163912774, |
|
"rewards/format_reward": 0.0, |
|
"step": 115 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 583.3411636352539, |
|
"epoch": 0.997867803837953, |
|
"kl": 0.007679939270019531, |
|
"reward": 0.7473958507180214, |
|
"reward_std": 0.18170781643129885, |
|
"rewards/accuracy_reward": 0.7473958507180214, |
|
"rewards/format_reward": 0.0, |
|
"step": 117, |
|
"total_flos": 0.0, |
|
"train_loss": 0.034317180164094664, |
|
"train_runtime": 31905.6637, |
|
"train_samples_per_second": 0.235, |
|
"train_steps_per_second": 0.004 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 117, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|