{ "epoch": 4.0, "eval_logits/chosen": -2.1724095344543457, "eval_logits/rejected": -2.2160496711730957, "eval_logps/chosen": -102.96302795410156, "eval_logps/rejected": -101.06044006347656, "eval_loss": 0.6873850226402283, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.08033924549818039, "eval_rewards/margins": 0.05054035410284996, "eval_rewards/rejected": 0.02979888767004013, "eval_runtime": 6.0716, "eval_samples": 30, "eval_samples_per_second": 4.941, "eval_steps_per_second": 0.165, "train_loss": 0.6922631859779358, "train_runtime": 883.8621, "train_samples": 626, "train_samples_per_second": 3.541, "train_steps_per_second": 0.006 }