{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.06349206349206349, "eval_steps": 50, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.012698412698412698, "grad_norm": 0.04716634005308151, "learning_rate": 4.999451708687114e-06, "logits/chosen": 14.883691787719727, "logits/rejected": 15.016583442687988, "logps/chosen": -0.29512909054756165, "logps/rejected": -0.30033987760543823, "loss": 1.0007, "rewards/accuracies": 0.36250001192092896, "rewards/chosen": -0.4426936209201813, "rewards/margins": 0.007816222496330738, "rewards/rejected": -0.45050984621047974, "step": 10 }, { "epoch": 0.025396825396825397, "grad_norm": 0.04392225295305252, "learning_rate": 4.997807075247147e-06, "logits/chosen": 14.705224990844727, "logits/rejected": 14.737253189086914, "logps/chosen": -0.3201700747013092, "logps/rejected": -0.280050128698349, "loss": 0.9984, "rewards/accuracies": 0.26249998807907104, "rewards/chosen": -0.4802550673484802, "rewards/margins": -0.060179851949214935, "rewards/rejected": -0.4200752377510071, "step": 20 }, { "epoch": 0.0380952380952381, "grad_norm": 0.04989476501941681, "learning_rate": 4.9950668210706795e-06, "logits/chosen": 15.147977828979492, "logits/rejected": 15.080279350280762, "logps/chosen": -0.3157380223274231, "logps/rejected": -0.2997627556324005, "loss": 0.9991, "rewards/accuracies": 0.2874999940395355, "rewards/chosen": -0.47360706329345703, "rewards/margins": -0.023962898179888725, "rewards/rejected": -0.44964417815208435, "step": 30 }, { "epoch": 0.050793650793650794, "grad_norm": 0.054729390889406204, "learning_rate": 4.9912321481237616e-06, "logits/chosen": 15.301165580749512, "logits/rejected": 15.276555061340332, "logps/chosen": -0.30470195412635803, "logps/rejected": -0.29767152667045593, "loss": 0.9849, "rewards/accuracies": 0.36250001192092896, "rewards/chosen": -0.45705294609069824, "rewards/margins": -0.010545584373176098, "rewards/rejected": -0.44650736451148987, "step": 40 }, { "epoch": 0.06349206349206349, "grad_norm": 0.06035450100898743, "learning_rate": 4.986304738420684e-06, "logits/chosen": 14.600168228149414, "logits/rejected": 14.7944917678833, "logps/chosen": -0.32278841733932495, "logps/rejected": -0.3014402687549591, "loss": 0.9991, "rewards/accuracies": 0.3499999940395355, "rewards/chosen": -0.48418259620666504, "rewards/margins": -0.03202226758003235, "rewards/rejected": -0.4521603584289551, "step": 50 }, { "epoch": 0.06349206349206349, "eval_logits/chosen": 15.261337280273438, "eval_logits/rejected": 15.51547908782959, "eval_logps/chosen": -0.3022651970386505, "eval_logps/rejected": -0.3061661124229431, "eval_loss": 0.9846106171607971, "eval_rewards/accuracies": 0.40625, "eval_rewards/chosen": -0.4533977508544922, "eval_rewards/margins": 0.005851435009390116, "eval_rewards/rejected": -0.45924919843673706, "eval_runtime": 18.4033, "eval_samples_per_second": 27.712, "eval_steps_per_second": 3.478, "step": 50 } ], "logging_steps": 10, "max_steps": 1500, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.195225510838272e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }