phi3m0128-wds-0.3-kendall-onof-ofif-corr-max-2-simpo-max1500-default
/
checkpoint-50
/trainer_state.json
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 0.06349206349206349, | |
"eval_steps": 50, | |
"global_step": 50, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.012698412698412698, | |
"grad_norm": 0.04716634005308151, | |
"learning_rate": 4.999451708687114e-06, | |
"logits/chosen": 14.883691787719727, | |
"logits/rejected": 15.016583442687988, | |
"logps/chosen": -0.29512909054756165, | |
"logps/rejected": -0.30033987760543823, | |
"loss": 1.0007, | |
"rewards/accuracies": 0.36250001192092896, | |
"rewards/chosen": -0.4426936209201813, | |
"rewards/margins": 0.007816222496330738, | |
"rewards/rejected": -0.45050984621047974, | |
"step": 10 | |
}, | |
{ | |
"epoch": 0.025396825396825397, | |
"grad_norm": 0.04392225295305252, | |
"learning_rate": 4.997807075247147e-06, | |
"logits/chosen": 14.705224990844727, | |
"logits/rejected": 14.737253189086914, | |
"logps/chosen": -0.3201700747013092, | |
"logps/rejected": -0.280050128698349, | |
"loss": 0.9984, | |
"rewards/accuracies": 0.26249998807907104, | |
"rewards/chosen": -0.4802550673484802, | |
"rewards/margins": -0.060179851949214935, | |
"rewards/rejected": -0.4200752377510071, | |
"step": 20 | |
}, | |
{ | |
"epoch": 0.0380952380952381, | |
"grad_norm": 0.04989476501941681, | |
"learning_rate": 4.9950668210706795e-06, | |
"logits/chosen": 15.147977828979492, | |
"logits/rejected": 15.080279350280762, | |
"logps/chosen": -0.3157380223274231, | |
"logps/rejected": -0.2997627556324005, | |
"loss": 0.9991, | |
"rewards/accuracies": 0.2874999940395355, | |
"rewards/chosen": -0.47360706329345703, | |
"rewards/margins": -0.023962898179888725, | |
"rewards/rejected": -0.44964417815208435, | |
"step": 30 | |
}, | |
{ | |
"epoch": 0.050793650793650794, | |
"grad_norm": 0.054729390889406204, | |
"learning_rate": 4.9912321481237616e-06, | |
"logits/chosen": 15.301165580749512, | |
"logits/rejected": 15.276555061340332, | |
"logps/chosen": -0.30470195412635803, | |
"logps/rejected": -0.29767152667045593, | |
"loss": 0.9849, | |
"rewards/accuracies": 0.36250001192092896, | |
"rewards/chosen": -0.45705294609069824, | |
"rewards/margins": -0.010545584373176098, | |
"rewards/rejected": -0.44650736451148987, | |
"step": 40 | |
}, | |
{ | |
"epoch": 0.06349206349206349, | |
"grad_norm": 0.06035450100898743, | |
"learning_rate": 4.986304738420684e-06, | |
"logits/chosen": 14.600168228149414, | |
"logits/rejected": 14.7944917678833, | |
"logps/chosen": -0.32278841733932495, | |
"logps/rejected": -0.3014402687549591, | |
"loss": 0.9991, | |
"rewards/accuracies": 0.3499999940395355, | |
"rewards/chosen": -0.48418259620666504, | |
"rewards/margins": -0.03202226758003235, | |
"rewards/rejected": -0.4521603584289551, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.06349206349206349, | |
"eval_logits/chosen": 15.261337280273438, | |
"eval_logits/rejected": 15.51547908782959, | |
"eval_logps/chosen": -0.3022651970386505, | |
"eval_logps/rejected": -0.3061661124229431, | |
"eval_loss": 0.9846106171607971, | |
"eval_rewards/accuracies": 0.40625, | |
"eval_rewards/chosen": -0.4533977508544922, | |
"eval_rewards/margins": 0.005851435009390116, | |
"eval_rewards/rejected": -0.45924919843673706, | |
"eval_runtime": 18.4033, | |
"eval_samples_per_second": 27.712, | |
"eval_steps_per_second": 3.478, | |
"step": 50 | |
} | |
], | |
"logging_steps": 10, | |
"max_steps": 1500, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 2, | |
"save_steps": 50, | |
"stateful_callbacks": { | |
"TrainerControl": { | |
"args": { | |
"should_epoch_stop": false, | |
"should_evaluate": false, | |
"should_log": false, | |
"should_save": true, | |
"should_training_stop": false | |
}, | |
"attributes": {} | |
} | |
}, | |
"total_flos": 1.195225510838272e+17, | |
"train_batch_size": 1, | |
"trial_name": null, | |
"trial_params": null | |
} | |