|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9905956112852664, |
|
"eval_steps": 500, |
|
"global_step": 79, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.25e-08, |
|
"logits/chosen": -1.9257383346557617, |
|
"logits/rejected": -1.8281910419464111, |
|
"logps/chosen": -284.4794006347656, |
|
"logps/pi_response": -150.5237579345703, |
|
"logps/ref_response": -150.5237579345703, |
|
"logps/rejected": -405.0362548828125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.990217055187362e-07, |
|
"logits/chosen": -2.212477445602417, |
|
"logits/rejected": -2.1497442722320557, |
|
"logps/chosen": -276.08154296875, |
|
"logps/pi_response": -147.98231506347656, |
|
"logps/ref_response": -147.419921875, |
|
"logps/rejected": -487.8414611816406, |
|
"loss": 0.6391, |
|
"rewards/accuracies": 0.6805555820465088, |
|
"rewards/chosen": -0.1082373857498169, |
|
"rewards/margins": 0.1503264605998993, |
|
"rewards/rejected": -0.2585638761520386, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.655786431300069e-07, |
|
"logits/chosen": -1.8299214839935303, |
|
"logits/rejected": -1.7438690662384033, |
|
"logps/chosen": -359.1844177246094, |
|
"logps/pi_response": -191.56143188476562, |
|
"logps/ref_response": -139.44454956054688, |
|
"logps/rejected": -768.1581420898438, |
|
"loss": 0.5296, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.2584470510482788, |
|
"rewards/margins": 1.8408260345458984, |
|
"rewards/rejected": -3.099273204803467, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.9061232191019517e-07, |
|
"logits/chosen": -1.8008167743682861, |
|
"logits/rejected": -1.6909490823745728, |
|
"logps/chosen": -325.8211669921875, |
|
"logps/pi_response": -155.09640502929688, |
|
"logps/ref_response": -142.6388702392578, |
|
"logps/rejected": -639.9616088867188, |
|
"loss": 0.491, |
|
"rewards/accuracies": 0.7718750238418579, |
|
"rewards/chosen": -0.7706281542778015, |
|
"rewards/margins": 1.2716760635375977, |
|
"rewards/rejected": -2.042304277420044, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.8856223324132555e-07, |
|
"logits/chosen": -1.623008131980896, |
|
"logits/rejected": -1.4080731868743896, |
|
"logps/chosen": -278.64862060546875, |
|
"logps/pi_response": -127.5015869140625, |
|
"logps/ref_response": -131.16574096679688, |
|
"logps/rejected": -564.2521362304688, |
|
"loss": 0.4488, |
|
"rewards/accuracies": 0.815625011920929, |
|
"rewards/chosen": -0.38128334283828735, |
|
"rewards/margins": 0.9983797073364258, |
|
"rewards/rejected": -1.3796632289886475, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.7908455541642582e-07, |
|
"logits/chosen": -1.3091672658920288, |
|
"logits/rejected": -1.1413580179214478, |
|
"logps/chosen": -334.6408386230469, |
|
"logps/pi_response": -144.52418518066406, |
|
"logps/ref_response": -135.2247314453125, |
|
"logps/rejected": -641.8518676757812, |
|
"loss": 0.4267, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.6444205045700073, |
|
"rewards/margins": 1.1075389385223389, |
|
"rewards/rejected": -1.751959204673767, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.32661172908373e-08, |
|
"logits/chosen": -1.032797932624817, |
|
"logits/rejected": -0.8024501800537109, |
|
"logps/chosen": -304.02117919921875, |
|
"logps/pi_response": -153.7646026611328, |
|
"logps/ref_response": -135.7929229736328, |
|
"logps/rejected": -670.0108642578125, |
|
"loss": 0.4092, |
|
"rewards/accuracies": 0.8031250238418579, |
|
"rewards/chosen": -0.6491419076919556, |
|
"rewards/margins": 1.4545631408691406, |
|
"rewards/rejected": -2.1037049293518066, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.956279997278043e-08, |
|
"logits/chosen": -1.020077109336853, |
|
"logits/rejected": -0.7239805459976196, |
|
"logps/chosen": -318.3223571777344, |
|
"logps/pi_response": -173.13136291503906, |
|
"logps/ref_response": -145.44541931152344, |
|
"logps/rejected": -716.7947998046875, |
|
"loss": 0.3997, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.6952292323112488, |
|
"rewards/margins": 1.7436481714248657, |
|
"rewards/rejected": -2.4388773441314697, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"step": 79, |
|
"total_flos": 0.0, |
|
"train_loss": 0.46834129019628595, |
|
"train_runtime": 4610.1097, |
|
"train_samples_per_second": 4.42, |
|
"train_steps_per_second": 0.017 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 79, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|