|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.1902587519025875, |
|
"eval_steps": 50, |
|
"global_step": 250, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0076103500761035, |
|
"grad_norm": 0.058339186012744904, |
|
"learning_rate": 4.999451708687114e-06, |
|
"logits/chosen": 14.268467903137207, |
|
"logits/rejected": 14.600369453430176, |
|
"logps/chosen": -0.2669850289821625, |
|
"logps/rejected": -0.3412467837333679, |
|
"loss": 0.9049, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.4004775583744049, |
|
"rewards/margins": 0.11139259487390518, |
|
"rewards/rejected": -0.5118702054023743, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.015220700152207, |
|
"grad_norm": 0.049545690417289734, |
|
"learning_rate": 4.997807075247147e-06, |
|
"logits/chosen": 14.14539623260498, |
|
"logits/rejected": 15.191584587097168, |
|
"logps/chosen": -0.25579872727394104, |
|
"logps/rejected": -0.3931494653224945, |
|
"loss": 0.8989, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.38369807600975037, |
|
"rewards/margins": 0.2060261219739914, |
|
"rewards/rejected": -0.5897241830825806, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0228310502283105, |
|
"grad_norm": 0.061699289828538895, |
|
"learning_rate": 4.9950668210706795e-06, |
|
"logits/chosen": 14.284139633178711, |
|
"logits/rejected": 15.006326675415039, |
|
"logps/chosen": -0.275672048330307, |
|
"logps/rejected": -0.3603581488132477, |
|
"loss": 0.9004, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4135080873966217, |
|
"rewards/margins": 0.12702910602092743, |
|
"rewards/rejected": -0.5405372381210327, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.030441400304414, |
|
"grad_norm": 0.05706426501274109, |
|
"learning_rate": 4.9912321481237616e-06, |
|
"logits/chosen": 14.275796890258789, |
|
"logits/rejected": 14.935521125793457, |
|
"logps/chosen": -0.2802076041698456, |
|
"logps/rejected": -0.38278770446777344, |
|
"loss": 0.9138, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.42031145095825195, |
|
"rewards/margins": 0.15387018024921417, |
|
"rewards/rejected": -0.5741815567016602, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0380517503805175, |
|
"grad_norm": 0.05318514257669449, |
|
"learning_rate": 4.986304738420684e-06, |
|
"logits/chosen": 14.433627128601074, |
|
"logits/rejected": 15.458297729492188, |
|
"logps/chosen": -0.2581387162208557, |
|
"logps/rejected": -0.38208404183387756, |
|
"loss": 0.914, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.38720807433128357, |
|
"rewards/margins": 0.18591801822185516, |
|
"rewards/rejected": -0.5731261372566223, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0380517503805175, |
|
"eval_logits/chosen": 14.396967887878418, |
|
"eval_logits/rejected": 15.221076965332031, |
|
"eval_logps/chosen": -0.27519574761390686, |
|
"eval_logps/rejected": -0.3709692656993866, |
|
"eval_loss": 0.9084128141403198, |
|
"eval_rewards/accuracies": 0.5981308221817017, |
|
"eval_rewards/chosen": -0.4127936065196991, |
|
"eval_rewards/margins": 0.14366032183170319, |
|
"eval_rewards/rejected": -0.5564539432525635, |
|
"eval_runtime": 30.773, |
|
"eval_samples_per_second": 27.622, |
|
"eval_steps_per_second": 3.477, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.045662100456621, |
|
"grad_norm": 0.06310460716485977, |
|
"learning_rate": 4.980286753286196e-06, |
|
"logits/chosen": 14.548416137695312, |
|
"logits/rejected": 15.526041030883789, |
|
"logps/chosen": -0.29403647780418396, |
|
"logps/rejected": -0.40682005882263184, |
|
"loss": 0.9082, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.44105473160743713, |
|
"rewards/margins": 0.1691754311323166, |
|
"rewards/rejected": -0.6102300882339478, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0532724505327245, |
|
"grad_norm": 0.1258806735277176, |
|
"learning_rate": 4.973180832407471e-06, |
|
"logits/chosen": 14.390210151672363, |
|
"logits/rejected": 14.817584037780762, |
|
"logps/chosen": -0.25258123874664307, |
|
"logps/rejected": -0.36392712593078613, |
|
"loss": 0.896, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.3788718581199646, |
|
"rewards/margins": 0.1670188158750534, |
|
"rewards/rejected": -0.5458906888961792, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.060882800608828, |
|
"grad_norm": 0.09006265550851822, |
|
"learning_rate": 4.964990092676263e-06, |
|
"logits/chosen": 13.844560623168945, |
|
"logits/rejected": 14.811120986938477, |
|
"logps/chosen": -0.2630843222141266, |
|
"logps/rejected": -0.3794577717781067, |
|
"loss": 0.8977, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.3946264684200287, |
|
"rewards/margins": 0.17456015944480896, |
|
"rewards/rejected": -0.5691865682601929, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0684931506849315, |
|
"grad_norm": 0.07123688608407974, |
|
"learning_rate": 4.9557181268217225e-06, |
|
"logits/chosen": 13.927327156066895, |
|
"logits/rejected": 14.746416091918945, |
|
"logps/chosen": -0.25282323360443115, |
|
"logps/rejected": -0.3279832601547241, |
|
"loss": 0.9092, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.37923485040664673, |
|
"rewards/margins": 0.11274002492427826, |
|
"rewards/rejected": -0.4919748902320862, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.076103500761035, |
|
"grad_norm": 0.08333446085453033, |
|
"learning_rate": 4.9453690018345144e-06, |
|
"logits/chosen": 14.406118392944336, |
|
"logits/rejected": 14.770090103149414, |
|
"logps/chosen": -0.28569403290748596, |
|
"logps/rejected": -0.3596845269203186, |
|
"loss": 0.8932, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.42854103446006775, |
|
"rewards/margins": 0.11098580062389374, |
|
"rewards/rejected": -0.5395268201828003, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.076103500761035, |
|
"eval_logits/chosen": 13.925265312194824, |
|
"eval_logits/rejected": 14.808513641357422, |
|
"eval_logps/chosen": -0.2667020559310913, |
|
"eval_logps/rejected": -0.3739235997200012, |
|
"eval_loss": 0.8984279036521912, |
|
"eval_rewards/accuracies": 0.5981308221817017, |
|
"eval_rewards/chosen": -0.40005311369895935, |
|
"eval_rewards/margins": 0.16083234548568726, |
|
"eval_rewards/rejected": -0.5608854293823242, |
|
"eval_runtime": 30.7791, |
|
"eval_samples_per_second": 27.616, |
|
"eval_steps_per_second": 3.476, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0837138508371385, |
|
"grad_norm": 0.08474570512771606, |
|
"learning_rate": 4.933947257182901e-06, |
|
"logits/chosen": 13.641456604003906, |
|
"logits/rejected": 14.799921035766602, |
|
"logps/chosen": -0.2721528708934784, |
|
"logps/rejected": -0.38378894329071045, |
|
"loss": 0.8995, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.40822935104370117, |
|
"rewards/margins": 0.1674540936946869, |
|
"rewards/rejected": -0.5756834149360657, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.091324200913242, |
|
"grad_norm": 0.1004580408334732, |
|
"learning_rate": 4.921457902821578e-06, |
|
"logits/chosen": 13.835454940795898, |
|
"logits/rejected": 14.882522583007812, |
|
"logps/chosen": -0.28507837653160095, |
|
"logps/rejected": -0.39737468957901, |
|
"loss": 0.8795, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.42761754989624023, |
|
"rewards/margins": 0.16844449937343597, |
|
"rewards/rejected": -0.5960620641708374, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0989345509893455, |
|
"grad_norm": 0.09537151455879211, |
|
"learning_rate": 4.907906416994146e-06, |
|
"logits/chosen": 13.607874870300293, |
|
"logits/rejected": 14.091131210327148, |
|
"logps/chosen": -0.2739318013191223, |
|
"logps/rejected": -0.36800479888916016, |
|
"loss": 0.8912, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.4108976721763611, |
|
"rewards/margins": 0.14110951125621796, |
|
"rewards/rejected": -0.5520071983337402, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.106544901065449, |
|
"grad_norm": 0.10281535238027573, |
|
"learning_rate": 4.893298743830168e-06, |
|
"logits/chosen": 12.017224311828613, |
|
"logits/rejected": 13.04835319519043, |
|
"logps/chosen": -0.24072685837745667, |
|
"logps/rejected": -0.36906492710113525, |
|
"loss": 0.8908, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.3610902428627014, |
|
"rewards/margins": 0.19250717759132385, |
|
"rewards/rejected": -0.5535974502563477, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.1141552511415525, |
|
"grad_norm": 0.707987368106842, |
|
"learning_rate": 4.8776412907378845e-06, |
|
"logits/chosen": 12.522550582885742, |
|
"logits/rejected": 13.272679328918457, |
|
"logps/chosen": -0.2583540081977844, |
|
"logps/rejected": -0.3796755075454712, |
|
"loss": 0.8867, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.38753098249435425, |
|
"rewards/margins": 0.18198221921920776, |
|
"rewards/rejected": -0.569513201713562, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1141552511415525, |
|
"eval_logits/chosen": 11.989100456237793, |
|
"eval_logits/rejected": 12.92872142791748, |
|
"eval_logps/chosen": -0.27158522605895996, |
|
"eval_logps/rejected": -0.40521273016929626, |
|
"eval_loss": 0.8765817284584045, |
|
"eval_rewards/accuracies": 0.5981308221817017, |
|
"eval_rewards/chosen": -0.40737783908843994, |
|
"eval_rewards/margins": 0.20044119656085968, |
|
"eval_rewards/rejected": -0.6078190803527832, |
|
"eval_runtime": 30.7739, |
|
"eval_samples_per_second": 27.621, |
|
"eval_steps_per_second": 3.477, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.121765601217656, |
|
"grad_norm": 0.19342070817947388, |
|
"learning_rate": 4.860940925593703e-06, |
|
"logits/chosen": 11.095940589904785, |
|
"logits/rejected": 12.351040840148926, |
|
"logps/chosen": -0.24749942123889923, |
|
"logps/rejected": -0.43422192335128784, |
|
"loss": 0.8762, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.37124913930892944, |
|
"rewards/margins": 0.2800838053226471, |
|
"rewards/rejected": -0.6513329744338989, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.1293759512937595, |
|
"grad_norm": 0.19374576210975647, |
|
"learning_rate": 4.84320497372973e-06, |
|
"logits/chosen": 10.510068893432617, |
|
"logits/rejected": 11.507593154907227, |
|
"logps/chosen": -0.26223134994506836, |
|
"logps/rejected": -0.43635931611061096, |
|
"loss": 0.8581, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.39334696531295776, |
|
"rewards/margins": 0.2611919641494751, |
|
"rewards/rejected": -0.6545389294624329, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.136986301369863, |
|
"grad_norm": 0.20330430567264557, |
|
"learning_rate": 4.824441214720629e-06, |
|
"logits/chosen": 9.89570140838623, |
|
"logits/rejected": 10.669364929199219, |
|
"logps/chosen": -0.3143860101699829, |
|
"logps/rejected": -0.46989941596984863, |
|
"loss": 0.8558, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.47157901525497437, |
|
"rewards/margins": 0.23327013850212097, |
|
"rewards/rejected": -0.704849123954773, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1445966514459665, |
|
"grad_norm": 0.22942212224006653, |
|
"learning_rate": 4.804657878971252e-06, |
|
"logits/chosen": 8.887057304382324, |
|
"logits/rejected": 9.542157173156738, |
|
"logps/chosen": -0.2906036972999573, |
|
"logps/rejected": -0.4810206890106201, |
|
"loss": 0.8554, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4359055459499359, |
|
"rewards/margins": 0.28562551736831665, |
|
"rewards/rejected": -0.7215310335159302, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.15220700152207, |
|
"grad_norm": 0.29071903228759766, |
|
"learning_rate": 4.783863644106502e-06, |
|
"logits/chosen": 6.791537284851074, |
|
"logits/rejected": 7.366445064544678, |
|
"logps/chosen": -0.31382033228874207, |
|
"logps/rejected": -0.5417486429214478, |
|
"loss": 0.838, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.4707304835319519, |
|
"rewards/margins": 0.34189245104789734, |
|
"rewards/rejected": -0.8126228451728821, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15220700152207, |
|
"eval_logits/chosen": 7.050150394439697, |
|
"eval_logits/rejected": 7.516275405883789, |
|
"eval_logps/chosen": -0.3289315402507782, |
|
"eval_logps/rejected": -0.5481724143028259, |
|
"eval_loss": 0.813983678817749, |
|
"eval_rewards/accuracies": 0.6168224215507507, |
|
"eval_rewards/chosen": -0.4933973252773285, |
|
"eval_rewards/margins": 0.3288613557815552, |
|
"eval_rewards/rejected": -0.8222586512565613, |
|
"eval_runtime": 30.7734, |
|
"eval_samples_per_second": 27.621, |
|
"eval_steps_per_second": 3.477, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1598173515981735, |
|
"grad_norm": 0.23101097345352173, |
|
"learning_rate": 4.762067631165049e-06, |
|
"logits/chosen": 5.132790565490723, |
|
"logits/rejected": 5.848537445068359, |
|
"logps/chosen": -0.33372369408607483, |
|
"logps/rejected": -0.5993582010269165, |
|
"loss": 0.8212, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.5005855560302734, |
|
"rewards/margins": 0.3984517455101013, |
|
"rewards/rejected": -0.8990373611450195, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.167427701674277, |
|
"grad_norm": 0.5136363506317139, |
|
"learning_rate": 4.7392794005985324e-06, |
|
"logits/chosen": 3.807554244995117, |
|
"logits/rejected": 4.600871562957764, |
|
"logps/chosen": -0.32092416286468506, |
|
"logps/rejected": -0.651642918586731, |
|
"loss": 0.7851, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.4813862442970276, |
|
"rewards/margins": 0.4960783123970032, |
|
"rewards/rejected": -0.977464497089386, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.1750380517503805, |
|
"grad_norm": 0.4106898009777069, |
|
"learning_rate": 4.715508948078037e-06, |
|
"logits/chosen": 2.760650396347046, |
|
"logits/rejected": 2.1608071327209473, |
|
"logps/chosen": -0.43665003776550293, |
|
"logps/rejected": -0.8352751731872559, |
|
"loss": 0.7685, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6549750566482544, |
|
"rewards/margins": 0.5979377627372742, |
|
"rewards/rejected": -1.2529128789901733, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.182648401826484, |
|
"grad_norm": 0.4719419479370117, |
|
"learning_rate": 4.690766700109659e-06, |
|
"logits/chosen": 3.1216347217559814, |
|
"logits/rejected": 2.7202537059783936, |
|
"logps/chosen": -0.444007933139801, |
|
"logps/rejected": -0.7697597742080688, |
|
"loss": 0.7474, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6660118699073792, |
|
"rewards/margins": 0.4886276125907898, |
|
"rewards/rejected": -1.154639482498169, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.1902587519025875, |
|
"grad_norm": 0.548523485660553, |
|
"learning_rate": 4.665063509461098e-06, |
|
"logits/chosen": 1.3678622245788574, |
|
"logits/rejected": 0.46835970878601074, |
|
"logps/chosen": -0.48227253556251526, |
|
"logps/rejected": -0.997289776802063, |
|
"loss": 0.7017, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7234088182449341, |
|
"rewards/margins": 0.7725256681442261, |
|
"rewards/rejected": -1.4959346055984497, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1902587519025875, |
|
"eval_logits/chosen": 2.1362831592559814, |
|
"eval_logits/rejected": 1.1932121515274048, |
|
"eval_logps/chosen": -0.500978946685791, |
|
"eval_logps/rejected": -1.0073517560958862, |
|
"eval_loss": 0.6914573907852173, |
|
"eval_rewards/accuracies": 0.6542056202888489, |
|
"eval_rewards/chosen": -0.7514683604240417, |
|
"eval_rewards/margins": 0.7595593929290771, |
|
"eval_rewards/rejected": -1.5110276937484741, |
|
"eval_runtime": 30.7706, |
|
"eval_samples_per_second": 27.624, |
|
"eval_steps_per_second": 3.477, |
|
"step": 250 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.825059660171837e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|