|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9936305732484076, |
|
"eval_steps": 500, |
|
"global_step": 78, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.012738853503184714, |
|
"grad_norm": 11.358307592517615, |
|
"learning_rate": 6.25e-08, |
|
"logits/chosen": -2.9414963722229004, |
|
"logits/rejected": -2.8714659214019775, |
|
"logps/chosen": -311.84521484375, |
|
"logps/pi_response": -74.39646911621094, |
|
"logps/ref_response": -74.39646911621094, |
|
"logps/rejected": -137.14251708984375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.12738853503184713, |
|
"grad_norm": 9.998455044790436, |
|
"learning_rate": 4.989935734988097e-07, |
|
"logits/chosen": -2.8198399543762207, |
|
"logits/rejected": -2.7871909141540527, |
|
"logps/chosen": -243.38845825195312, |
|
"logps/pi_response": -64.48577117919922, |
|
"logps/ref_response": -64.41886138916016, |
|
"logps/rejected": -162.31851196289062, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.4861111044883728, |
|
"rewards/chosen": 0.006698554381728172, |
|
"rewards/margins": 0.002677548211067915, |
|
"rewards/rejected": 0.00402100570499897, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.25477707006369427, |
|
"grad_norm": 8.569494020475565, |
|
"learning_rate": 4.646121984004665e-07, |
|
"logits/chosen": -2.7100276947021484, |
|
"logits/rejected": -2.675428867340088, |
|
"logps/chosen": -269.69061279296875, |
|
"logps/pi_response": -83.09251403808594, |
|
"logps/ref_response": -78.46118927001953, |
|
"logps/rejected": -180.74826049804688, |
|
"loss": 0.6686, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.06145774573087692, |
|
"rewards/margins": 0.06889880448579788, |
|
"rewards/rejected": -0.007441061083227396, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3821656050955414, |
|
"grad_norm": 9.060999242111171, |
|
"learning_rate": 3.877242453630256e-07, |
|
"logits/chosen": -2.6871438026428223, |
|
"logits/rejected": -2.6598358154296875, |
|
"logps/chosen": -257.09381103515625, |
|
"logps/pi_response": -102.15229797363281, |
|
"logps/ref_response": -74.38651275634766, |
|
"logps/rejected": -186.90274047851562, |
|
"loss": 0.6379, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.028406163677573204, |
|
"rewards/margins": 0.16392305493354797, |
|
"rewards/rejected": -0.19232919812202454, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.5095541401273885, |
|
"grad_norm": 9.618564789622928, |
|
"learning_rate": 2.8355831645441387e-07, |
|
"logits/chosen": -2.63045072555542, |
|
"logits/rejected": -2.6070001125335693, |
|
"logps/chosen": -235.0849151611328, |
|
"logps/pi_response": -111.06126403808594, |
|
"logps/ref_response": -67.13166046142578, |
|
"logps/rejected": -217.4446563720703, |
|
"loss": 0.6102, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.15396903455257416, |
|
"rewards/margins": 0.16582393646240234, |
|
"rewards/rejected": -0.3197929263114929, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.6369426751592356, |
|
"grad_norm": 11.295831887724951, |
|
"learning_rate": 1.7274575140626315e-07, |
|
"logits/chosen": -2.6650638580322266, |
|
"logits/rejected": -2.6218314170837402, |
|
"logps/chosen": -272.8438720703125, |
|
"logps/pi_response": -133.91879272460938, |
|
"logps/ref_response": -70.64263916015625, |
|
"logps/rejected": -217.2205352783203, |
|
"loss": 0.5676, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.20129744708538055, |
|
"rewards/margins": 0.3560473322868347, |
|
"rewards/rejected": -0.5573447346687317, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.7643312101910829, |
|
"grad_norm": 11.795692669450544, |
|
"learning_rate": 7.723433775328384e-08, |
|
"logits/chosen": -2.7076563835144043, |
|
"logits/rejected": -2.654421329498291, |
|
"logps/chosen": -280.95416259765625, |
|
"logps/pi_response": -140.62283325195312, |
|
"logps/ref_response": -70.89532470703125, |
|
"logps/rejected": -252.49368286132812, |
|
"loss": 0.5608, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.2872604727745056, |
|
"rewards/margins": 0.3657943606376648, |
|
"rewards/rejected": -0.6530548334121704, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.89171974522293, |
|
"grad_norm": 13.957707294493547, |
|
"learning_rate": 1.5941282340065697e-08, |
|
"logits/chosen": -2.670067310333252, |
|
"logits/rejected": -2.6604971885681152, |
|
"logps/chosen": -266.5218200683594, |
|
"logps/pi_response": -149.20486450195312, |
|
"logps/ref_response": -73.49095153808594, |
|
"logps/rejected": -272.1815185546875, |
|
"loss": 0.5576, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.34963458776474, |
|
"rewards/margins": 0.4036545753479004, |
|
"rewards/rejected": -0.7532891035079956, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.9936305732484076, |
|
"step": 78, |
|
"total_flos": 0.0, |
|
"train_loss": 0.604345291088789, |
|
"train_runtime": 1779.6676, |
|
"train_samples_per_second": 5.619, |
|
"train_steps_per_second": 0.044 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 78, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|