{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.053304904051172705, "eval_steps": 50, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.010660980810234541, "grad_norm": 0.051327500492334366, "learning_rate": 4.999451708687114e-06, "logits/chosen": 14.755006790161133, "logits/rejected": 14.735244750976562, "logps/chosen": -0.29377540946006775, "logps/rejected": -0.30969956517219543, "loss": 0.952, "rewards/accuracies": 0.4375, "rewards/chosen": -0.44066309928894043, "rewards/margins": 0.023886267095804214, "rewards/rejected": -0.46454939246177673, "step": 10 }, { "epoch": 0.021321961620469083, "grad_norm": 0.04346882924437523, "learning_rate": 4.997807075247147e-06, "logits/chosen": 14.513801574707031, "logits/rejected": 14.946454048156738, "logps/chosen": -0.27995699644088745, "logps/rejected": -0.30138006806373596, "loss": 0.9726, "rewards/accuracies": 0.4124999940395355, "rewards/chosen": -0.4199354648590088, "rewards/margins": 0.03213457390666008, "rewards/rejected": -0.45207005739212036, "step": 20 }, { "epoch": 0.031982942430703626, "grad_norm": 0.05228634551167488, "learning_rate": 4.9950668210706795e-06, "logits/chosen": 14.266324043273926, "logits/rejected": 14.423965454101562, "logps/chosen": -0.2919609546661377, "logps/rejected": -0.32358455657958984, "loss": 0.9622, "rewards/accuracies": 0.5, "rewards/chosen": -0.43794146180152893, "rewards/margins": 0.047435395419597626, "rewards/rejected": -0.48537683486938477, "step": 30 }, { "epoch": 0.042643923240938165, "grad_norm": 0.05487598106265068, "learning_rate": 4.9912321481237616e-06, "logits/chosen": 14.965211868286133, "logits/rejected": 15.058088302612305, "logps/chosen": -0.277716726064682, "logps/rejected": -0.3055034577846527, "loss": 0.9403, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.4165751039981842, "rewards/margins": 0.04168009012937546, "rewards/rejected": -0.4582551419734955, "step": 40 }, { "epoch": 0.053304904051172705, "grad_norm": 0.057255037128925323, "learning_rate": 4.986304738420684e-06, "logits/chosen": 14.539288520812988, "logits/rejected": 15.174041748046875, "logps/chosen": -0.26362231373786926, "logps/rejected": -0.3325727581977844, "loss": 0.9588, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.3954334557056427, "rewards/margins": 0.10342560708522797, "rewards/rejected": -0.49885907769203186, "step": 50 }, { "epoch": 0.053304904051172705, "eval_logits/chosen": 14.618952751159668, "eval_logits/rejected": 15.176809310913086, "eval_logps/chosen": -0.2685677409172058, "eval_logps/rejected": -0.3283654451370239, "eval_loss": 0.9551004767417908, "eval_rewards/accuracies": 0.5131579041481018, "eval_rewards/chosen": -0.4028516113758087, "eval_rewards/margins": 0.08969658613204956, "eval_rewards/rejected": -0.4925481975078583, "eval_runtime": 21.4453, "eval_samples_per_second": 28.305, "eval_steps_per_second": 3.544, "step": 50 } ], "logging_steps": 10, "max_steps": 1500, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.207668074044457e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }