ShenaoZhang's picture
Model save
ab5b79a verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9905956112852664,
"eval_steps": 500,
"global_step": 79,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 6.25e-08,
"logits/chosen": -2.8962948322296143,
"logits/rejected": -2.8726210594177246,
"logps/chosen": -309.3533935546875,
"logps/pi_response": -77.43819427490234,
"logps/ref_response": -77.43819427490234,
"logps/rejected": -128.35205078125,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.13,
"learning_rate": 4.990217055187362e-07,
"logits/chosen": -2.780714988708496,
"logits/rejected": -2.7658631801605225,
"logps/chosen": -218.54730224609375,
"logps/pi_response": -74.19415283203125,
"logps/ref_response": -73.22980499267578,
"logps/rejected": -116.16886138916016,
"loss": 0.6877,
"rewards/accuracies": 0.5798611044883728,
"rewards/chosen": 0.002589114010334015,
"rewards/margins": 0.007948823273181915,
"rewards/rejected": -0.005359708331525326,
"step": 10
},
{
"epoch": 0.25,
"learning_rate": 4.655786431300069e-07,
"logits/chosen": -2.681969165802002,
"logits/rejected": -2.654804229736328,
"logps/chosen": -246.5304412841797,
"logps/pi_response": -92.14692687988281,
"logps/ref_response": -71.16036224365234,
"logps/rejected": -125.13505554199219,
"loss": 0.6353,
"rewards/accuracies": 0.734375,
"rewards/chosen": -0.01587364450097084,
"rewards/margins": 0.15707838535308838,
"rewards/rejected": -0.17295202612876892,
"step": 20
},
{
"epoch": 0.38,
"learning_rate": 3.9061232191019517e-07,
"logits/chosen": -2.5660078525543213,
"logits/rejected": -2.5342659950256348,
"logps/chosen": -237.02120971679688,
"logps/pi_response": -124.30322265625,
"logps/ref_response": -68.76017761230469,
"logps/rejected": -154.5250244140625,
"loss": 0.6022,
"rewards/accuracies": 0.7718750238418579,
"rewards/chosen": -0.1920473277568817,
"rewards/margins": 0.27519237995147705,
"rewards/rejected": -0.4672396779060364,
"step": 30
},
{
"epoch": 0.5,
"learning_rate": 2.8856223324132555e-07,
"logits/chosen": -2.5599398612976074,
"logits/rejected": -2.53330135345459,
"logps/chosen": -255.10220336914062,
"logps/pi_response": -167.27362060546875,
"logps/ref_response": -74.82241821289062,
"logps/rejected": -186.9361114501953,
"loss": 0.5685,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.3857182264328003,
"rewards/margins": 0.41731375455856323,
"rewards/rejected": -0.8030319213867188,
"step": 40
},
{
"epoch": 0.63,
"learning_rate": 1.7908455541642582e-07,
"logits/chosen": -2.575723171234131,
"logits/rejected": -2.5395498275756836,
"logps/chosen": -292.4133605957031,
"logps/pi_response": -174.6892547607422,
"logps/ref_response": -73.51496124267578,
"logps/rejected": -193.74050903320312,
"loss": 0.5498,
"rewards/accuracies": 0.778124988079071,
"rewards/chosen": -0.393137127161026,
"rewards/margins": 0.5005661249160767,
"rewards/rejected": -0.893703281879425,
"step": 50
},
{
"epoch": 0.75,
"learning_rate": 8.32661172908373e-08,
"logits/chosen": -2.5728707313537598,
"logits/rejected": -2.539797782897949,
"logps/chosen": -246.9348602294922,
"logps/pi_response": -170.87734985351562,
"logps/ref_response": -65.44251251220703,
"logps/rejected": -194.9793701171875,
"loss": 0.546,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.40198835730552673,
"rewards/margins": 0.5459630489349365,
"rewards/rejected": -0.9479514360427856,
"step": 60
},
{
"epoch": 0.88,
"learning_rate": 1.956279997278043e-08,
"logits/chosen": -2.576380729675293,
"logits/rejected": -2.5463321208953857,
"logps/chosen": -285.2043151855469,
"logps/pi_response": -187.4281005859375,
"logps/ref_response": -77.24393463134766,
"logps/rejected": -208.2620391845703,
"loss": 0.5361,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.3984689712524414,
"rewards/margins": 0.5768924355506897,
"rewards/rejected": -0.9753614664077759,
"step": 70
},
{
"epoch": 0.99,
"step": 79,
"total_flos": 0.0,
"train_loss": 0.5799291465855851,
"train_runtime": 4369.6481,
"train_samples_per_second": 4.664,
"train_steps_per_second": 0.018
}
],
"logging_steps": 10,
"max_steps": 79,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}