{ "epoch": 3.0, "eval_logits/chosen": -2.0658671855926514, "eval_logits/rejected": -1.9411793947219849, "eval_logps/chosen": -266.13037109375, "eval_logps/rejected": -228.2694091796875, "eval_loss": 0.5265706777572632, "eval_rewards/accuracies": 0.7459999918937683, "eval_rewards/chosen": -0.1469534933567047, "eval_rewards/margins": 0.7515553832054138, "eval_rewards/rejected": -0.8985088467597961, "eval_runtime": 453.4186, "eval_samples": 2000, "eval_samples_per_second": 4.411, "eval_steps_per_second": 0.276 }