ppo / trainer_state.json
nileshmalpeddi's picture
End of training
69de038 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"episode": 2,
"epoch": 6.55694708543702e-05,
"eval_steps": 500,
"global_step": 1,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"episode": 2,
"epoch": 6.55694708543702e-05,
"eps": 0,
"loss/policy_avg": 0.21204827725887299,
"loss/value_avg": 33.20888137817383,
"lr": 3e-06,
"objective/entropy": 70.37702941894531,
"objective/kl": 427.1629638671875,
"objective/non_score_reward": -21.3581485748291,
"objective/rlhf_reward": -17.6393985748291,
"objective/scores": 3.71875,
"policy/approxkl_avg": 0.31922289729118347,
"policy/clipfrac_avg": 0.1320754736661911,
"policy/entropy_avg": 1.321390151977539,
"step": 1,
"val/clipfrac_avg": 0.0,
"val/num_eos_tokens": 1,
"val/ratio": 1.5207229852676392,
"val/ratio_var": 0.5327765941619873
}
],
"logging_steps": 500,
"max_steps": 1,
"num_input_tokens_seen": 0,
"num_train_epochs": 3.27847354271851e-05,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0,
"train_batch_size": null,
"trial_name": null,
"trial_params": null
}