llama-3.2-3b-dpo / all_results.json
tanliboy's picture
End of training
107b694 verified
raw
history blame contribute delete
765 Bytes
{
"epoch": 3.0,
"eval_logits/chosen": 0.3730663061141968,
"eval_logits/rejected": 0.4475269019603729,
"eval_logps/chosen": -338.3392028808594,
"eval_logps/rejected": -370.232666015625,
"eval_loss": 0.6289177536964417,
"eval_rewards/accuracies": 0.7405063509941101,
"eval_rewards/chosen": 0.7478683590888977,
"eval_rewards/margins": 4.585729122161865,
"eval_rewards/rejected": -3.8378612995147705,
"eval_runtime": 70.1775,
"eval_samples": 2500,
"eval_samples_per_second": 35.624,
"eval_steps_per_second": 1.126,
"total_flos": 0.0,
"train_loss": 0.5009220597491634,
"train_runtime": 6227.6413,
"train_samples": 26990,
"train_samples_per_second": 13.002,
"train_steps_per_second": 0.102
}