|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9724770642201834, |
|
"eval_steps": 40, |
|
"global_step": 81, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1834862385321101, |
|
"grad_norm": 87.98338604183625, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -2.7504944801330566, |
|
"logits/rejected": -2.7376608848571777, |
|
"logps/chosen": -366.7567138671875, |
|
"logps/rejected": -269.1701965332031, |
|
"loss": 0.684, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": 0.036848217248916626, |
|
"rewards/margins": 0.02083454094827175, |
|
"rewards/rejected": 0.016013674437999725, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.3669724770642202, |
|
"grad_norm": 56.51853547131025, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -2.680494785308838, |
|
"logits/rejected": -2.666748285293579, |
|
"logps/chosen": -349.07464599609375, |
|
"logps/rejected": -251.41049194335938, |
|
"loss": 0.5321, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 1.2032493352890015, |
|
"rewards/margins": 0.6572977900505066, |
|
"rewards/rejected": 0.5459514260292053, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.5504587155963303, |
|
"grad_norm": 50.68732493997359, |
|
"learning_rate": 9.878131657762535e-07, |
|
"logits/chosen": -2.505305290222168, |
|
"logits/rejected": -2.4723613262176514, |
|
"logps/chosen": -344.2197265625, |
|
"logps/rejected": -244.26724243164062, |
|
"loss": 0.4596, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 3.3816330432891846, |
|
"rewards/margins": 1.7416467666625977, |
|
"rewards/rejected": 1.639986276626587, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.7339449541284404, |
|
"grad_norm": 48.68084600724118, |
|
"learning_rate": 9.518467388186019e-07, |
|
"logits/chosen": -2.3610739707946777, |
|
"logits/rejected": -2.312293529510498, |
|
"logps/chosen": -348.7062072753906, |
|
"logps/rejected": -251.2678985595703, |
|
"loss": 0.4779, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": 3.990943431854248, |
|
"rewards/margins": 2.4283714294433594, |
|
"rewards/rejected": 1.5625723600387573, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.9174311926605505, |
|
"grad_norm": 43.576923617976306, |
|
"learning_rate": 8.938539866588592e-07, |
|
"logits/chosen": -2.161365509033203, |
|
"logits/rejected": -2.169524669647217, |
|
"logps/chosen": -337.71240234375, |
|
"logps/rejected": -223.12796020507812, |
|
"loss": 0.4429, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": 3.8454651832580566, |
|
"rewards/margins": 2.679399013519287, |
|
"rewards/rejected": 1.16606605052948, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.1009174311926606, |
|
"grad_norm": 15.165089558011688, |
|
"learning_rate": 8.166619015240235e-07, |
|
"logits/chosen": -2.103114366531372, |
|
"logits/rejected": -2.0902843475341797, |
|
"logps/chosen": -333.57958984375, |
|
"logps/rejected": -255.9213409423828, |
|
"loss": 0.2402, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 4.685088157653809, |
|
"rewards/margins": 3.5494351387023926, |
|
"rewards/rejected": 1.135652780532837, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.2844036697247707, |
|
"grad_norm": 16.2176567972852, |
|
"learning_rate": 7.240333919937892e-07, |
|
"logits/chosen": -2.1111814975738525, |
|
"logits/rejected": -2.107719898223877, |
|
"logps/chosen": -336.36669921875, |
|
"logps/rejected": -258.0069885253906, |
|
"loss": 0.0996, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 4.797186374664307, |
|
"rewards/margins": 4.8145318031311035, |
|
"rewards/rejected": -0.01734566129744053, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 1.4678899082568808, |
|
"grad_norm": 23.35951964253066, |
|
"learning_rate": 6.204838512283071e-07, |
|
"logits/chosen": -2.1685638427734375, |
|
"logits/rejected": -2.134986400604248, |
|
"logps/chosen": -321.1785583496094, |
|
"logps/rejected": -274.7965087890625, |
|
"loss": 0.1046, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 4.402096271514893, |
|
"rewards/margins": 4.899113178253174, |
|
"rewards/rejected": -0.4970162808895111, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.4678899082568808, |
|
"eval_logits/chosen": -2.157338857650757, |
|
"eval_logits/rejected": -2.1405954360961914, |
|
"eval_logps/chosen": -318.1454772949219, |
|
"eval_logps/rejected": -250.12286376953125, |
|
"eval_loss": 0.41041094064712524, |
|
"eval_rewards/accuracies": 0.8557692170143127, |
|
"eval_rewards/chosen": 3.198086977005005, |
|
"eval_rewards/margins": 2.8902511596679688, |
|
"eval_rewards/rejected": 0.30783578753471375, |
|
"eval_runtime": 54.4376, |
|
"eval_samples_per_second": 14.126, |
|
"eval_steps_per_second": 0.239, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.6513761467889907, |
|
"grad_norm": 15.026762247253014, |
|
"learning_rate": 5.110610435765934e-07, |
|
"logits/chosen": -2.1868062019348145, |
|
"logits/rejected": -2.1655430793762207, |
|
"logps/chosen": -328.25323486328125, |
|
"logps/rejected": -256.1419677734375, |
|
"loss": 0.1147, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": 5.091620445251465, |
|
"rewards/margins": 5.604047775268555, |
|
"rewards/rejected": -0.5124271512031555, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.834862385321101, |
|
"grad_norm": 20.207846152861585, |
|
"learning_rate": 4.010990395072413e-07, |
|
"logits/chosen": -2.1973319053649902, |
|
"logits/rejected": -2.182528018951416, |
|
"logps/chosen": -318.63800048828125, |
|
"logps/rejected": -282.4230041503906, |
|
"loss": 0.1138, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.645673751831055, |
|
"rewards/margins": 5.373934268951416, |
|
"rewards/rejected": -0.7282606959342957, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.018348623853211, |
|
"grad_norm": 11.21625290323588, |
|
"learning_rate": 2.9595819387826747e-07, |
|
"logits/chosen": -2.2084720134735107, |
|
"logits/rejected": -2.195996046066284, |
|
"logps/chosen": -319.896484375, |
|
"logps/rejected": -269.0523986816406, |
|
"loss": 0.0766, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": 5.313421726226807, |
|
"rewards/margins": 5.936707496643066, |
|
"rewards/rejected": -0.6232857704162598, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 2.2018348623853212, |
|
"grad_norm": 12.136370494834754, |
|
"learning_rate": 2.0076384291297133e-07, |
|
"logits/chosen": -2.221501588821411, |
|
"logits/rejected": -2.2167458534240723, |
|
"logps/chosen": -319.49652099609375, |
|
"logps/rejected": -252.19302368164062, |
|
"loss": 0.032, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.2415971755981445, |
|
"rewards/margins": 5.633230686187744, |
|
"rewards/rejected": -0.39163410663604736, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.385321100917431, |
|
"grad_norm": 12.64835328707971, |
|
"learning_rate": 1.2015645770835764e-07, |
|
"logits/chosen": -2.235281467437744, |
|
"logits/rejected": -2.1987690925598145, |
|
"logps/chosen": -336.44158935546875, |
|
"logps/rejected": -248.538330078125, |
|
"loss": 0.0343, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": 5.285120487213135, |
|
"rewards/margins": 5.766313552856445, |
|
"rewards/rejected": -0.48119330406188965, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 2.5688073394495414, |
|
"grad_norm": 9.209595589190794, |
|
"learning_rate": 5.806543362721944e-08, |
|
"logits/chosen": -2.2242534160614014, |
|
"logits/rejected": -2.2229883670806885, |
|
"logps/chosen": -326.62701416015625, |
|
"logps/rejected": -266.9122619628906, |
|
"loss": 0.0218, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.088566780090332, |
|
"rewards/margins": 5.991884708404541, |
|
"rewards/rejected": -0.9033180475234985, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.7522935779816513, |
|
"grad_norm": 6.119217367247741, |
|
"learning_rate": 1.751754273859507e-08, |
|
"logits/chosen": -2.2509617805480957, |
|
"logits/rejected": -2.230938673019409, |
|
"logps/chosen": -326.35186767578125, |
|
"logps/rejected": -290.31903076171875, |
|
"loss": 0.0466, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": 5.807272911071777, |
|
"rewards/margins": 6.667996883392334, |
|
"rewards/rejected": -0.8607242703437805, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 2.9357798165137616, |
|
"grad_norm": 13.26496056644377, |
|
"learning_rate": 4.893867400131979e-10, |
|
"logits/chosen": -2.244044780731201, |
|
"logits/rejected": -2.239701986312866, |
|
"logps/chosen": -341.17584228515625, |
|
"logps/rejected": -274.3210144042969, |
|
"loss": 0.0449, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.888747215270996, |
|
"rewards/margins": 6.674208641052246, |
|
"rewards/rejected": -0.7854617238044739, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.9357798165137616, |
|
"eval_logits/chosen": -2.2088987827301025, |
|
"eval_logits/rejected": -2.196852445602417, |
|
"eval_logps/chosen": -315.8323669433594, |
|
"eval_logps/rejected": -252.4415740966797, |
|
"eval_loss": 0.4136447310447693, |
|
"eval_rewards/accuracies": 0.8557692170143127, |
|
"eval_rewards/chosen": 3.4293932914733887, |
|
"eval_rewards/margins": 3.3534319400787354, |
|
"eval_rewards/rejected": 0.07596174627542496, |
|
"eval_runtime": 54.1579, |
|
"eval_samples_per_second": 14.199, |
|
"eval_steps_per_second": 0.24, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.9724770642201834, |
|
"step": 81, |
|
"total_flos": 954757539692544.0, |
|
"train_loss": 0.21779433532077588, |
|
"train_runtime": 2944.3137, |
|
"train_samples_per_second": 7.049, |
|
"train_steps_per_second": 0.028 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 81, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 40, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 954757539692544.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|