|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9874476987447699, |
|
"eval_steps": 500, |
|
"global_step": 59, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.016736401673640166, |
|
"grad_norm": 5.772511137926767, |
|
"learning_rate": 8.333333333333333e-08, |
|
"logits/chosen": -2.801664352416992, |
|
"logits/rejected": -2.8308780193328857, |
|
"logps/chosen": -137.20802307128906, |
|
"logps/pi_response": -133.3135223388672, |
|
"logps/ref_response": -133.3135223388672, |
|
"logps/rejected": -141.6568145751953, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.16736401673640167, |
|
"grad_norm": 5.866571559196428, |
|
"learning_rate": 4.930057285201027e-07, |
|
"logits/chosen": -2.7963476181030273, |
|
"logits/rejected": -2.7924842834472656, |
|
"logps/chosen": -155.5491943359375, |
|
"logps/pi_response": -153.5810089111328, |
|
"logps/ref_response": -153.69000244140625, |
|
"logps/rejected": -155.2954559326172, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4340277910232544, |
|
"rewards/chosen": 0.0005564650637097657, |
|
"rewards/margins": -0.00014017036301083863, |
|
"rewards/rejected": 0.0006966353976167738, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.33472803347280333, |
|
"grad_norm": 5.991267980339142, |
|
"learning_rate": 4.187457503795526e-07, |
|
"logits/chosen": -2.828132152557373, |
|
"logits/rejected": -2.8157670497894287, |
|
"logps/chosen": -149.9945526123047, |
|
"logps/pi_response": -149.78407287597656, |
|
"logps/ref_response": -147.89337158203125, |
|
"logps/rejected": -152.16629028320312, |
|
"loss": 0.6936, |
|
"rewards/accuracies": 0.4906249940395355, |
|
"rewards/chosen": -0.019678335636854172, |
|
"rewards/margins": -8.539492409909144e-05, |
|
"rewards/rejected": -0.01959294266998768, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.502092050209205, |
|
"grad_norm": 6.102937202185575, |
|
"learning_rate": 2.8691164100062034e-07, |
|
"logits/chosen": -2.784358263015747, |
|
"logits/rejected": -2.7720556259155273, |
|
"logps/chosen": -156.15835571289062, |
|
"logps/pi_response": -155.635009765625, |
|
"logps/ref_response": -141.50277709960938, |
|
"logps/rejected": -156.25270080566406, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.503125011920929, |
|
"rewards/chosen": -0.13776005804538727, |
|
"rewards/margins": 0.00394659535959363, |
|
"rewards/rejected": -0.1417066603899002, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.6694560669456067, |
|
"grad_norm": 6.597531175861638, |
|
"learning_rate": 1.4248369943086995e-07, |
|
"logits/chosen": -2.7550230026245117, |
|
"logits/rejected": -2.753791093826294, |
|
"logps/chosen": -157.1768341064453, |
|
"logps/pi_response": -156.76754760742188, |
|
"logps/ref_response": -143.01797485351562, |
|
"logps/rejected": -155.65341186523438, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -0.13175955414772034, |
|
"rewards/margins": 0.008598078042268753, |
|
"rewards/rejected": -0.1403576284646988, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.8368200836820083, |
|
"grad_norm": 6.081555901091694, |
|
"learning_rate": 3.473909705816111e-08, |
|
"logits/chosen": -2.7671895027160645, |
|
"logits/rejected": -2.767414093017578, |
|
"logps/chosen": -149.20730590820312, |
|
"logps/pi_response": -149.70562744140625, |
|
"logps/ref_response": -138.1813201904297, |
|
"logps/rejected": -146.49392700195312, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.11548925936222076, |
|
"rewards/margins": 0.0021973345428705215, |
|
"rewards/rejected": -0.11768659204244614, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.9874476987447699, |
|
"step": 59, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6921672336125778, |
|
"train_runtime": 2751.1622, |
|
"train_samples_per_second": 5.555, |
|
"train_steps_per_second": 0.021 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 59, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|