llama3.1-cpo-full-0911 / all_results.json
jbjeong91's picture
End of training
6c56647 verified
raw
history blame contribute delete
828 Bytes
{
"epoch": 2.9956659924877203,
"eval_logits/chosen": -0.34079235792160034,
"eval_logits/rejected": -0.31415677070617676,
"eval_logps/chosen": -143.9447784423828,
"eval_logps/rejected": -155.83648681640625,
"eval_loss": 1.5984355211257935,
"eval_nll_loss": 0.3937048017978668,
"eval_rewards/accuracies": 0.6304348111152649,
"eval_rewards/chosen": -14.394478797912598,
"eval_rewards/margins": 1.1891697645187378,
"eval_rewards/rejected": -15.583648681640625,
"eval_runtime": 73.3871,
"eval_samples": 1826,
"eval_samples_per_second": 24.882,
"eval_steps_per_second": 1.567,
"total_flos": 0.0,
"train_loss": 1.025652496903031,
"train_runtime": 27913.7357,
"train_samples": 55376,
"train_samples_per_second": 5.951,
"train_steps_per_second": 0.046
}