|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9905956112852664, |
|
"eval_steps": 500, |
|
"global_step": 79, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.25e-08, |
|
"logits/chosen": -2.8962948322296143, |
|
"logits/rejected": -2.8726210594177246, |
|
"logps/chosen": -309.3533935546875, |
|
"logps/pi_response": -77.43819427490234, |
|
"logps/ref_response": -77.43819427490234, |
|
"logps/rejected": -128.35205078125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.990217055187362e-07, |
|
"logits/chosen": -2.780714988708496, |
|
"logits/rejected": -2.7658631801605225, |
|
"logps/chosen": -218.54730224609375, |
|
"logps/pi_response": -74.19415283203125, |
|
"logps/ref_response": -73.22980499267578, |
|
"logps/rejected": -116.16886138916016, |
|
"loss": 0.6877, |
|
"rewards/accuracies": 0.5798611044883728, |
|
"rewards/chosen": 0.002589114010334015, |
|
"rewards/margins": 0.007948823273181915, |
|
"rewards/rejected": -0.005359708331525326, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.655786431300069e-07, |
|
"logits/chosen": -2.681969165802002, |
|
"logits/rejected": -2.654804229736328, |
|
"logps/chosen": -246.5304412841797, |
|
"logps/pi_response": -92.14692687988281, |
|
"logps/ref_response": -71.16036224365234, |
|
"logps/rejected": -125.13505554199219, |
|
"loss": 0.6353, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.01587364450097084, |
|
"rewards/margins": 0.15707838535308838, |
|
"rewards/rejected": -0.17295202612876892, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.9061232191019517e-07, |
|
"logits/chosen": -2.5660078525543213, |
|
"logits/rejected": -2.5342659950256348, |
|
"logps/chosen": -237.02120971679688, |
|
"logps/pi_response": -124.30322265625, |
|
"logps/ref_response": -68.76017761230469, |
|
"logps/rejected": -154.5250244140625, |
|
"loss": 0.6022, |
|
"rewards/accuracies": 0.7718750238418579, |
|
"rewards/chosen": -0.1920473277568817, |
|
"rewards/margins": 0.27519237995147705, |
|
"rewards/rejected": -0.4672396779060364, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.8856223324132555e-07, |
|
"logits/chosen": -2.5599398612976074, |
|
"logits/rejected": -2.53330135345459, |
|
"logps/chosen": -255.10220336914062, |
|
"logps/pi_response": -167.27362060546875, |
|
"logps/ref_response": -74.82241821289062, |
|
"logps/rejected": -186.9361114501953, |
|
"loss": 0.5685, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.3857182264328003, |
|
"rewards/margins": 0.41731375455856323, |
|
"rewards/rejected": -0.8030319213867188, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.7908455541642582e-07, |
|
"logits/chosen": -2.575723171234131, |
|
"logits/rejected": -2.5395498275756836, |
|
"logps/chosen": -292.4133605957031, |
|
"logps/pi_response": -174.6892547607422, |
|
"logps/ref_response": -73.51496124267578, |
|
"logps/rejected": -193.74050903320312, |
|
"loss": 0.5498, |
|
"rewards/accuracies": 0.778124988079071, |
|
"rewards/chosen": -0.393137127161026, |
|
"rewards/margins": 0.5005661249160767, |
|
"rewards/rejected": -0.893703281879425, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.32661172908373e-08, |
|
"logits/chosen": -2.5728707313537598, |
|
"logits/rejected": -2.539797782897949, |
|
"logps/chosen": -246.9348602294922, |
|
"logps/pi_response": -170.87734985351562, |
|
"logps/ref_response": -65.44251251220703, |
|
"logps/rejected": -194.9793701171875, |
|
"loss": 0.546, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.40198835730552673, |
|
"rewards/margins": 0.5459630489349365, |
|
"rewards/rejected": -0.9479514360427856, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.956279997278043e-08, |
|
"logits/chosen": -2.576380729675293, |
|
"logits/rejected": -2.5463321208953857, |
|
"logps/chosen": -285.2043151855469, |
|
"logps/pi_response": -187.4281005859375, |
|
"logps/ref_response": -77.24393463134766, |
|
"logps/rejected": -208.2620391845703, |
|
"loss": 0.5361, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.3984689712524414, |
|
"rewards/margins": 0.5768924355506897, |
|
"rewards/rejected": -0.9753614664077759, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"step": 79, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5799291465855851, |
|
"train_runtime": 4369.6481, |
|
"train_samples_per_second": 4.664, |
|
"train_steps_per_second": 0.018 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 79, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|