|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.0, |
|
"eval_steps": 100, |
|
"global_step": 5, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -2.3972699642181396, |
|
"logits/rejected": -2.39332914352417, |
|
"logps/chosen": -153.26783752441406, |
|
"logps/rejected": -146.77935791015625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_logits/chosen": -2.1716835498809814, |
|
"eval_logits/rejected": -2.2157046794891357, |
|
"eval_logps/chosen": -103.76641845703125, |
|
"eval_logps/rejected": -101.35842895507812, |
|
"eval_loss": 0.6931473016738892, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": 0.0, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": 0.0, |
|
"eval_runtime": 6.5465, |
|
"eval_samples_per_second": 4.583, |
|
"eval_steps_per_second": 0.153, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_logits/chosen": -2.1715452671051025, |
|
"eval_logits/rejected": -2.2146592140197754, |
|
"eval_logps/chosen": -103.4699935913086, |
|
"eval_logps/rejected": -101.27898406982422, |
|
"eval_loss": 0.694814920425415, |
|
"eval_rewards/accuracies": 0.5, |
|
"eval_rewards/chosen": 0.02964324876666069, |
|
"eval_rewards/margins": 0.021698763594031334, |
|
"eval_rewards/rejected": 0.007944487035274506, |
|
"eval_runtime": 6.0506, |
|
"eval_samples_per_second": 4.958, |
|
"eval_steps_per_second": 0.165, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_logits/chosen": -2.170850992202759, |
|
"eval_logits/rejected": -2.215348482131958, |
|
"eval_logps/chosen": -103.48912811279297, |
|
"eval_logps/rejected": -101.26887512207031, |
|
"eval_loss": 0.6913403868675232, |
|
"eval_rewards/accuracies": 0.75, |
|
"eval_rewards/chosen": 0.027730178087949753, |
|
"eval_rewards/margins": 0.018774602562189102, |
|
"eval_rewards/rejected": 0.00895557552576065, |
|
"eval_runtime": 6.0481, |
|
"eval_samples_per_second": 4.96, |
|
"eval_steps_per_second": 0.165, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_logits/chosen": -2.1724095344543457, |
|
"eval_logits/rejected": -2.2160496711730957, |
|
"eval_logps/chosen": -102.96302795410156, |
|
"eval_logps/rejected": -101.06044006347656, |
|
"eval_loss": 0.6873850226402283, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 0.08033924549818039, |
|
"eval_rewards/margins": 0.05054035410284996, |
|
"eval_rewards/rejected": 0.02979888767004013, |
|
"eval_runtime": 6.0751, |
|
"eval_samples_per_second": 4.938, |
|
"eval_steps_per_second": 0.165, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 5, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6922631859779358, |
|
"train_runtime": 883.8621, |
|
"train_samples_per_second": 3.541, |
|
"train_steps_per_second": 0.006 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 5, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|