{ "epoch": 2.983957219251337, "eval_logits/chosen": -1.9690638780593872, "eval_logits/rejected": -1.9223570823669434, "eval_logps/chosen": -233.67343139648438, "eval_logps/rejected": -227.1739044189453, "eval_loss": 0.5413760542869568, "eval_rewards/accuracies": 0.7767857313156128, "eval_rewards/chosen": 0.5871802568435669, "eval_rewards/margins": 2.1379942893981934, "eval_rewards/rejected": -1.550813913345337, "eval_runtime": 176.7918, "eval_samples_per_second": 15.046, "eval_steps_per_second": 0.238, "total_flos": 3289753017384960.0, "train_loss": 0.3334879027045329, "train_runtime": 10390.4498, "train_samples_per_second": 6.91, "train_steps_per_second": 0.027 }