|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9968652037617555, |
|
"eval_steps": 500, |
|
"global_step": 159, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.006269592476489028, |
|
"grad_norm": 18.55070569719312, |
|
"learning_rate": 3.125e-08, |
|
"logits/chosen": -2.554591178894043, |
|
"logits/rejected": -2.434877395629883, |
|
"logps/chosen": -323.11175537109375, |
|
"logps/pi_response": -94.49095153808594, |
|
"logps/ref_response": -94.49095153808594, |
|
"logps/rejected": -208.82200622558594, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06269592476489028, |
|
"grad_norm": 17.53145291009543, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.4522640705108643, |
|
"logits/rejected": -2.350480079650879, |
|
"logps/chosen": -193.23435974121094, |
|
"logps/pi_response": -86.61455535888672, |
|
"logps/ref_response": -86.81534576416016, |
|
"logps/rejected": -259.4031066894531, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.5763888955116272, |
|
"rewards/chosen": -0.0016433527925983071, |
|
"rewards/margins": 0.0050659943372011185, |
|
"rewards/rejected": -0.0067093465477228165, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.12539184952978055, |
|
"grad_norm": 23.589101524345324, |
|
"learning_rate": 4.990353313429303e-07, |
|
"logits/chosen": -2.4247384071350098, |
|
"logits/rejected": -2.4102516174316406, |
|
"logps/chosen": -214.6630096435547, |
|
"logps/pi_response": -80.90936279296875, |
|
"logps/ref_response": -82.33551788330078, |
|
"logps/rejected": -277.02093505859375, |
|
"loss": 0.6494, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.09652389585971832, |
|
"rewards/margins": 0.16998200118541718, |
|
"rewards/rejected": -0.2665058970451355, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.18808777429467086, |
|
"grad_norm": 34.67188819309952, |
|
"learning_rate": 4.882681251368548e-07, |
|
"logits/chosen": -2.363760471343994, |
|
"logits/rejected": -2.2931735515594482, |
|
"logps/chosen": -265.7935485839844, |
|
"logps/pi_response": -118.18830871582031, |
|
"logps/ref_response": -89.96348571777344, |
|
"logps/rejected": -357.9932556152344, |
|
"loss": 0.5909, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.6331563591957092, |
|
"rewards/margins": 0.4989984929561615, |
|
"rewards/rejected": -1.132154941558838, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2507836990595611, |
|
"grad_norm": 24.39087061903397, |
|
"learning_rate": 4.6604720940421207e-07, |
|
"logits/chosen": -2.3351082801818848, |
|
"logits/rejected": -2.240893602371216, |
|
"logps/chosen": -282.7699890136719, |
|
"logps/pi_response": -137.54034423828125, |
|
"logps/ref_response": -89.04374694824219, |
|
"logps/rejected": -359.68048095703125, |
|
"loss": 0.5472, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.6459968686103821, |
|
"rewards/margins": 0.5398008227348328, |
|
"rewards/rejected": -1.1857976913452148, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.31347962382445144, |
|
"grad_norm": 26.791698094673826, |
|
"learning_rate": 4.3344075855595097e-07, |
|
"logits/chosen": -2.3063526153564453, |
|
"logits/rejected": -2.2795004844665527, |
|
"logps/chosen": -300.38531494140625, |
|
"logps/pi_response": -154.4958953857422, |
|
"logps/ref_response": -83.67357635498047, |
|
"logps/rejected": -391.9969177246094, |
|
"loss": 0.519, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7796777486801147, |
|
"rewards/margins": 0.6542943120002747, |
|
"rewards/rejected": -1.4339721202850342, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.3761755485893417, |
|
"grad_norm": 38.46368190958448, |
|
"learning_rate": 3.920161866827889e-07, |
|
"logits/chosen": -2.1805338859558105, |
|
"logits/rejected": -2.1266684532165527, |
|
"logps/chosen": -285.82476806640625, |
|
"logps/pi_response": -186.3391876220703, |
|
"logps/ref_response": -86.60485076904297, |
|
"logps/rejected": -381.95941162109375, |
|
"loss": 0.5073, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.9567915201187134, |
|
"rewards/margins": 0.6549071073532104, |
|
"rewards/rejected": -1.6116985082626343, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.438871473354232, |
|
"grad_norm": 33.61675096777802, |
|
"learning_rate": 3.4376480090239047e-07, |
|
"logits/chosen": -2.225067138671875, |
|
"logits/rejected": -2.197097063064575, |
|
"logps/chosen": -311.00189208984375, |
|
"logps/pi_response": -195.21932983398438, |
|
"logps/ref_response": -87.1160659790039, |
|
"logps/rejected": -443.4674377441406, |
|
"loss": 0.4642, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.0373988151550293, |
|
"rewards/margins": 0.9388235211372375, |
|
"rewards/rejected": -1.976222276687622, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.5015673981191222, |
|
"grad_norm": 32.26496956211775, |
|
"learning_rate": 2.910060778827554e-07, |
|
"logits/chosen": -2.2063775062561035, |
|
"logits/rejected": -2.1627447605133057, |
|
"logps/chosen": -366.4414367675781, |
|
"logps/pi_response": -222.93441772460938, |
|
"logps/ref_response": -94.23348236083984, |
|
"logps/rejected": -445.9207458496094, |
|
"loss": 0.4839, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.322712779045105, |
|
"rewards/margins": 0.9108338356018066, |
|
"rewards/rejected": -2.233546495437622, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5642633228840125, |
|
"grad_norm": 26.726350157115625, |
|
"learning_rate": 2.3627616503391812e-07, |
|
"logits/chosen": -2.0913848876953125, |
|
"logits/rejected": -2.097029209136963, |
|
"logps/chosen": -338.84674072265625, |
|
"logps/pi_response": -222.7672119140625, |
|
"logps/ref_response": -87.508056640625, |
|
"logps/rejected": -487.66766357421875, |
|
"loss": 0.4588, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.2909362316131592, |
|
"rewards/margins": 0.9323557019233704, |
|
"rewards/rejected": -2.2232918739318848, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.6269592476489029, |
|
"grad_norm": 36.897937868099504, |
|
"learning_rate": 1.8220596619089573e-07, |
|
"logits/chosen": -2.0451536178588867, |
|
"logits/rejected": -2.017054319381714, |
|
"logps/chosen": -363.85626220703125, |
|
"logps/pi_response": -238.09249877929688, |
|
"logps/ref_response": -86.1615219116211, |
|
"logps/rejected": -467.4317321777344, |
|
"loss": 0.4936, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.4615881443023682, |
|
"rewards/margins": 0.8330486416816711, |
|
"rewards/rejected": -2.2946369647979736, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6896551724137931, |
|
"grad_norm": 36.50147029957021, |
|
"learning_rate": 1.3139467229135998e-07, |
|
"logits/chosen": -1.8684625625610352, |
|
"logits/rejected": -1.8239994049072266, |
|
"logps/chosen": -318.9664001464844, |
|
"logps/pi_response": -232.65066528320312, |
|
"logps/ref_response": -80.07207489013672, |
|
"logps/rejected": -486.52655029296875, |
|
"loss": 0.4637, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.443070650100708, |
|
"rewards/margins": 1.0364656448364258, |
|
"rewards/rejected": -2.479536294937134, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.7523510971786834, |
|
"grad_norm": 32.80757403172816, |
|
"learning_rate": 8.628481651367875e-08, |
|
"logits/chosen": -1.8891398906707764, |
|
"logits/rejected": -1.7928073406219482, |
|
"logps/chosen": -345.98785400390625, |
|
"logps/pi_response": -232.44729614257812, |
|
"logps/ref_response": -78.10755920410156, |
|
"logps/rejected": -474.845703125, |
|
"loss": 0.4495, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -1.4001835584640503, |
|
"rewards/margins": 1.0585042238235474, |
|
"rewards/rejected": -2.4586877822875977, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.8150470219435737, |
|
"grad_norm": 31.6821642347826, |
|
"learning_rate": 4.904486005914027e-08, |
|
"logits/chosen": -1.808752417564392, |
|
"logits/rejected": -1.694044828414917, |
|
"logps/chosen": -374.7220764160156, |
|
"logps/pi_response": -225.9062957763672, |
|
"logps/ref_response": -80.09992218017578, |
|
"logps/rejected": -484.637451171875, |
|
"loss": 0.4598, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.4728864431381226, |
|
"rewards/margins": 0.967255711555481, |
|
"rewards/rejected": -2.4401423931121826, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.877742946708464, |
|
"grad_norm": 33.378981761704864, |
|
"learning_rate": 2.1464952759020856e-08, |
|
"logits/chosen": -1.7705777883529663, |
|
"logits/rejected": -1.6775932312011719, |
|
"logps/chosen": -368.3900146484375, |
|
"logps/pi_response": -242.3651580810547, |
|
"logps/ref_response": -80.24095916748047, |
|
"logps/rejected": -515.4364624023438, |
|
"loss": 0.4255, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.5394630432128906, |
|
"rewards/margins": 1.2102196216583252, |
|
"rewards/rejected": -2.7496824264526367, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.9404388714733543, |
|
"grad_norm": 32.52480870828172, |
|
"learning_rate": 4.8708793644441086e-09, |
|
"logits/chosen": -1.8091413974761963, |
|
"logits/rejected": -1.7029527425765991, |
|
"logps/chosen": -412.6189880371094, |
|
"logps/pi_response": -255.9366455078125, |
|
"logps/ref_response": -90.39550018310547, |
|
"logps/rejected": -488.56060791015625, |
|
"loss": 0.437, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6722618341445923, |
|
"rewards/margins": 0.8418585062026978, |
|
"rewards/rejected": -2.514120101928711, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.9968652037617555, |
|
"step": 159, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5087880788359253, |
|
"train_runtime": 3966.6039, |
|
"train_samples_per_second": 5.137, |
|
"train_steps_per_second": 0.04 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 159, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|