{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.0, "eval_steps": 100, "global_step": 5, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.8, "learning_rate": 5e-07, "logits/chosen": -2.3972699642181396, "logits/rejected": -2.39332914352417, "logps/chosen": -153.26783752441406, "logps/rejected": -146.77935791015625, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.8, "eval_logits/chosen": -2.1716835498809814, "eval_logits/rejected": -2.2157046794891357, "eval_logps/chosen": -103.76641845703125, "eval_logps/rejected": -101.35842895507812, "eval_loss": 0.6931473016738892, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": 0.0, "eval_rewards/margins": 0.0, "eval_rewards/rejected": 0.0, "eval_runtime": 6.5465, "eval_samples_per_second": 4.583, "eval_steps_per_second": 0.153, "step": 1 }, { "epoch": 1.6, "eval_logits/chosen": -2.1715452671051025, "eval_logits/rejected": -2.2146592140197754, "eval_logps/chosen": -103.4699935913086, "eval_logps/rejected": -101.27898406982422, "eval_loss": 0.694814920425415, "eval_rewards/accuracies": 0.5, "eval_rewards/chosen": 0.02964324876666069, "eval_rewards/margins": 0.021698763594031334, "eval_rewards/rejected": 0.007944487035274506, "eval_runtime": 6.0506, "eval_samples_per_second": 4.958, "eval_steps_per_second": 0.165, "step": 2 }, { "epoch": 2.4, "eval_logits/chosen": -2.170850992202759, "eval_logits/rejected": -2.215348482131958, "eval_logps/chosen": -103.48912811279297, "eval_logps/rejected": -101.26887512207031, "eval_loss": 0.6913403868675232, "eval_rewards/accuracies": 0.75, "eval_rewards/chosen": 0.027730178087949753, "eval_rewards/margins": 0.018774602562189102, "eval_rewards/rejected": 0.00895557552576065, "eval_runtime": 6.0481, "eval_samples_per_second": 4.96, "eval_steps_per_second": 0.165, "step": 3 }, { "epoch": 4.0, "eval_logits/chosen": -2.1724095344543457, "eval_logits/rejected": -2.2160496711730957, "eval_logps/chosen": -102.96302795410156, "eval_logps/rejected": -101.06044006347656, "eval_loss": 0.6873850226402283, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.08033924549818039, "eval_rewards/margins": 0.05054035410284996, "eval_rewards/rejected": 0.02979888767004013, "eval_runtime": 6.0751, "eval_samples_per_second": 4.938, "eval_steps_per_second": 0.165, "step": 5 }, { "epoch": 4.0, "step": 5, "total_flos": 0.0, "train_loss": 0.6922631859779358, "train_runtime": 883.8621, "train_samples_per_second": 3.541, "train_steps_per_second": 0.006 } ], "logging_steps": 10, "max_steps": 5, "num_train_epochs": 5, "save_steps": 500, "total_flos": 0.0, "trial_name": null, "trial_params": null }