{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.425531914893618, "eval_steps": 300, "global_step": 720, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.9078014184397163, "grad_norm": 132.1505126953125, "learning_rate": 2.222222222222222e-07, "log_odds_chosen": 0.05492939054965973, "log_odds_ratio": -0.7323614954948425, "logits/chosen": -4.740067958831787, "logits/rejected": -4.963461399078369, "logps/chosen": -2.374514579772949, "logps/rejected": -2.4533467292785645, "loss": 2.8785, "nll_loss": 2.7699854373931885, "rewards/accuracies": 0.48828125, "rewards/chosen": -0.3561772108078003, "rewards/margins": 0.011824802495539188, "rewards/rejected": -0.3680019676685333, "step": 32 }, { "epoch": 1.8156028368794326, "grad_norm": 45.387813568115234, "learning_rate": 4.444444444444444e-07, "log_odds_chosen": 0.22124934196472168, "log_odds_ratio": -0.6725601553916931, "logits/chosen": -5.0936760902404785, "logits/rejected": -5.33966588973999, "logps/chosen": -1.732269287109375, "logps/rejected": -1.9329001903533936, "loss": 2.2151, "nll_loss": 2.172783851623535, "rewards/accuracies": 0.6015625, "rewards/chosen": -0.2598403990268707, "rewards/margins": 0.030094601213932037, "rewards/rejected": -0.28993502259254456, "step": 64 }, { "epoch": 2.723404255319149, "grad_norm": 19.709226608276367, "learning_rate": 4.983095894354857e-07, "log_odds_chosen": 0.2306685447692871, "log_odds_ratio": -0.658535361289978, "logits/chosen": -4.936949729919434, "logits/rejected": -5.155893802642822, "logps/chosen": -1.4097586870193481, "logps/rejected": -1.5965328216552734, "loss": 1.8908, "nll_loss": 1.8126921653747559, "rewards/accuracies": 0.6171875, "rewards/chosen": -0.2114638090133667, "rewards/margins": 0.028016118332743645, "rewards/rejected": -0.2394799143075943, "step": 96 }, { "epoch": 3.631205673758865, "grad_norm": 20.5742130279541, "learning_rate": 4.908427196539701e-07, "log_odds_chosen": 0.3416966497898102, "log_odds_ratio": -0.5978461503982544, "logits/chosen": -4.871417999267578, "logits/rejected": -5.006246566772461, "logps/chosen": -1.3338335752487183, "logps/rejected": -1.595802903175354, "loss": 1.7492, "nll_loss": 1.6234831809997559, "rewards/accuracies": 0.66015625, "rewards/chosen": -0.20007506012916565, "rewards/margins": 0.039295390248298645, "rewards/rejected": -0.2393704503774643, "step": 128 }, { "epoch": 4.539007092198582, "grad_norm": 19.219451904296875, "learning_rate": 4.775907352415367e-07, "log_odds_chosen": 0.4098852872848511, "log_odds_ratio": -0.5668885111808777, "logits/chosen": -4.7395429611206055, "logits/rejected": -4.919832229614258, "logps/chosen": -1.251634955406189, "logps/rejected": -1.5617362260818481, "loss": 1.6591, "nll_loss": 1.5759321451187134, "rewards/accuracies": 0.72265625, "rewards/chosen": -0.18774525821208954, "rewards/margins": 0.04651518166065216, "rewards/rejected": -0.2342604398727417, "step": 160 }, { "epoch": 5.446808510638298, "grad_norm": 25.844369888305664, "learning_rate": 4.588719528532341e-07, "log_odds_chosen": 0.4376165568828583, "log_odds_ratio": -0.5644897222518921, "logits/chosen": -4.674585342407227, "logits/rejected": -4.810555934906006, "logps/chosen": -1.2456402778625488, "logps/rejected": -1.5732855796813965, "loss": 1.5996, "nll_loss": 1.4974051713943481, "rewards/accuracies": 0.73828125, "rewards/chosen": -0.1868460476398468, "rewards/margins": 0.04914678633213043, "rewards/rejected": -0.23599283397197723, "step": 192 }, { "epoch": 6.3546099290780145, "grad_norm": 23.09563636779785, "learning_rate": 4.3513600327725117e-07, "log_odds_chosen": 0.3738464415073395, "log_odds_ratio": -0.5867234468460083, "logits/chosen": -4.663087844848633, "logits/rejected": -4.844013214111328, "logps/chosen": -1.3138737678527832, "logps/rejected": -1.5835403203964233, "loss": 1.5411, "nll_loss": 1.4685286283493042, "rewards/accuracies": 0.7109375, "rewards/chosen": -0.197081059217453, "rewards/margins": 0.04045000299811363, "rewards/rejected": -0.23753106594085693, "step": 224 }, { "epoch": 7.26241134751773, "grad_norm": 23.646638870239258, "learning_rate": 4.0695303116802467e-07, "log_odds_chosen": 0.46367794275283813, "log_odds_ratio": -0.547984778881073, "logits/chosen": -4.620482921600342, "logits/rejected": -4.765042781829834, "logps/chosen": -1.307213544845581, "logps/rejected": -1.65544593334198, "loss": 1.498, "nll_loss": 1.4461973905563354, "rewards/accuracies": 0.75390625, "rewards/chosen": -0.19608205556869507, "rewards/margins": 0.05223485454916954, "rewards/rejected": -0.2483169138431549, "step": 256 }, { "epoch": 8.170212765957446, "grad_norm": 23.585857391357422, "learning_rate": 3.75e-07, "log_odds_chosen": 0.4542897343635559, "log_odds_ratio": -0.5773134827613831, "logits/chosen": -4.633105278015137, "logits/rejected": -4.810471057891846, "logps/chosen": -1.3886733055114746, "logps/rejected": -1.7219102382659912, "loss": 1.4686, "nll_loss": 1.3969916105270386, "rewards/accuracies": 0.7734375, "rewards/chosen": -0.20830100774765015, "rewards/margins": 0.049985550343990326, "rewards/rejected": -0.2582865357398987, "step": 288 }, { "epoch": 8.51063829787234, "eval_log_odds_chosen": 1.6898525953292847, "eval_log_odds_ratio": -0.19230316579341888, "eval_logits/chosen": -4.930749416351318, "eval_logits/rejected": -4.758046627044678, "eval_logps/chosen": -1.4714246988296509, "eval_logps/rejected": -2.9621574878692627, "eval_loss": 1.3246647119522095, "eval_nll_loss": 1.4632530212402344, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.22071371972560883, "eval_rewards/margins": 0.22360996901988983, "eval_rewards/rejected": -0.44432368874549866, "eval_runtime": 0.6144, "eval_samples_per_second": 222.965, "eval_steps_per_second": 4.882, "step": 300 }, { "epoch": 9.078014184397164, "grad_norm": 20.154146194458008, "learning_rate": 3.400444312011776e-07, "log_odds_chosen": 0.409349262714386, "log_odds_ratio": -0.5844379663467407, "logits/chosen": -4.599703311920166, "logits/rejected": -4.766429901123047, "logps/chosen": -1.3831363916397095, "logps/rejected": -1.6733564138412476, "loss": 1.4397, "nll_loss": 1.4065345525741577, "rewards/accuracies": 0.74609375, "rewards/chosen": -0.20747046172618866, "rewards/margins": 0.04353303089737892, "rewards/rejected": -0.2510034739971161, "step": 320 }, { "epoch": 9.98581560283688, "grad_norm": 23.09050750732422, "learning_rate": 3.029259680573527e-07, "log_odds_chosen": 0.43665847182273865, "log_odds_ratio": -0.5905143618583679, "logits/chosen": -4.58922815322876, "logits/rejected": -4.685288906097412, "logps/chosen": -1.458475112915039, "logps/rejected": -1.7894960641860962, "loss": 1.4285, "nll_loss": 1.3732693195343018, "rewards/accuracies": 0.73046875, "rewards/chosen": -0.21877126395702362, "rewards/margins": 0.04965316504240036, "rewards/rejected": -0.26842445135116577, "step": 352 }, { "epoch": 10.893617021276595, "grad_norm": 35.926055908203125, "learning_rate": 2.6453620722761895e-07, "log_odds_chosen": 0.6511461138725281, "log_odds_ratio": -0.49195483326911926, "logits/chosen": -4.608173370361328, "logits/rejected": -4.685794830322266, "logps/chosen": -1.3694053888320923, "logps/rejected": -1.8711962699890137, "loss": 1.4144, "nll_loss": 1.374709129333496, "rewards/accuracies": 0.8359375, "rewards/chosen": -0.20541077852249146, "rewards/margins": 0.07526866346597672, "rewards/rejected": -0.28067946434020996, "step": 384 }, { "epoch": 11.801418439716311, "grad_norm": 33.8105583190918, "learning_rate": 2.2579728232420523e-07, "log_odds_chosen": 0.5499828457832336, "log_odds_ratio": -0.5233615040779114, "logits/chosen": -4.537787437438965, "logits/rejected": -4.662774085998535, "logps/chosen": -1.3898181915283203, "logps/rejected": -1.7920804023742676, "loss": 1.4016, "nll_loss": 1.3631547689437866, "rewards/accuracies": 0.7890625, "rewards/chosen": -0.20847272872924805, "rewards/margins": 0.06033932417631149, "rewards/rejected": -0.26881206035614014, "step": 416 }, { "epoch": 12.709219858156029, "grad_norm": 25.557348251342773, "learning_rate": 1.8763971398550467e-07, "log_odds_chosen": 0.5377756357192993, "log_odds_ratio": -0.5508320927619934, "logits/chosen": -4.532352447509766, "logits/rejected": -4.629130840301514, "logps/chosen": -1.3977127075195312, "logps/rejected": -1.7851612567901611, "loss": 1.3943, "nll_loss": 1.3304414749145508, "rewards/accuracies": 0.77734375, "rewards/chosen": -0.20965692400932312, "rewards/margins": 0.05811727046966553, "rewards/rejected": -0.26777422428131104, "step": 448 }, { "epoch": 13.617021276595745, "grad_norm": 28.700815200805664, "learning_rate": 1.5098005849021078e-07, "log_odds_chosen": 0.5411101579666138, "log_odds_ratio": -0.5445564985275269, "logits/chosen": -4.501680850982666, "logits/rejected": -4.677550315856934, "logps/chosen": -1.3654242753982544, "logps/rejected": -1.7541980743408203, "loss": 1.401, "nll_loss": 1.2766036987304688, "rewards/accuracies": 0.8046875, "rewards/chosen": -0.2048136293888092, "rewards/margins": 0.05831605941057205, "rewards/rejected": -0.26312971115112305, "step": 480 }, { "epoch": 14.52482269503546, "grad_norm": 35.40031814575195, "learning_rate": 1.1669889179957723e-07, "log_odds_chosen": 0.7372524738311768, "log_odds_ratio": -0.46363916993141174, "logits/chosen": -4.513700485229492, "logits/rejected": -4.619227886199951, "logps/chosen": -1.3301138877868652, "logps/rejected": -1.8859204053878784, "loss": 1.3839, "nll_loss": 1.219886302947998, "rewards/accuracies": 0.87109375, "rewards/chosen": -0.19951710104942322, "rewards/margins": 0.08337096124887466, "rewards/rejected": -0.2828880548477173, "step": 512 }, { "epoch": 15.432624113475176, "grad_norm": 62.16829299926758, "learning_rate": 8.561965785773412e-08, "log_odds_chosen": 0.661382794380188, "log_odds_ratio": -0.4891131520271301, "logits/chosen": -4.506048202514648, "logits/rejected": -4.587852478027344, "logps/chosen": -1.3864898681640625, "logps/rejected": -1.8775601387023926, "loss": 1.3876, "nll_loss": 1.2974672317504883, "rewards/accuracies": 0.8359375, "rewards/chosen": -0.20797351002693176, "rewards/margins": 0.07366053014993668, "rewards/rejected": -0.28163403272628784, "step": 544 }, { "epoch": 16.340425531914892, "grad_norm": 29.107358932495117, "learning_rate": 5.848888922025552e-08, "log_odds_chosen": 0.6269708275794983, "log_odds_ratio": -0.49757176637649536, "logits/chosen": -4.460994243621826, "logits/rejected": -4.661521911621094, "logps/chosen": -1.3339214324951172, "logps/rejected": -1.7924858331680298, "loss": 1.3923, "nll_loss": 1.2958626747131348, "rewards/accuracies": 0.8125, "rewards/chosen": -0.200088232755661, "rewards/margins": 0.06878463923931122, "rewards/rejected": -0.26887285709381104, "step": 576 }, { "epoch": 17.02127659574468, "eval_log_odds_chosen": 1.7557824850082397, "eval_log_odds_ratio": -0.18494771420955658, "eval_logits/chosen": -4.790639400482178, "eval_logits/rejected": -4.577674865722656, "eval_logps/chosen": -1.626247763633728, "eval_logps/rejected": -3.2115631103515625, "eval_loss": 1.2619013786315918, "eval_nll_loss": 1.4078196287155151, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.2439371794462204, "eval_rewards/margins": 0.2377973347902298, "eval_rewards/rejected": -0.4817345142364502, "eval_runtime": 0.6115, "eval_samples_per_second": 224.05, "eval_steps_per_second": 4.906, "step": 600 }, { "epoch": 17.24822695035461, "grad_norm": 29.23589515686035, "learning_rate": 3.5958275117433404e-08, "log_odds_chosen": 0.5763309001922607, "log_odds_ratio": -0.5261004567146301, "logits/chosen": -4.398637294769287, "logits/rejected": -4.560643672943115, "logps/chosen": -1.3885968923568726, "logps/rejected": -1.8019691705703735, "loss": 1.3886, "nll_loss": 1.3023698329925537, "rewards/accuracies": 0.8046875, "rewards/chosen": -0.20828954875469208, "rewards/margins": 0.06200582906603813, "rewards/rejected": -0.2702953815460205, "step": 608 }, { "epoch": 18.156028368794328, "grad_norm": 27.693330764770508, "learning_rate": 1.8569007682777415e-08, "log_odds_chosen": 0.7424343824386597, "log_odds_ratio": -0.46295538544654846, "logits/chosen": -4.579552173614502, "logits/rejected": -4.691650390625, "logps/chosen": -1.3507909774780273, "logps/rejected": -1.9150110483169556, "loss": 1.3865, "nll_loss": 1.3111711740493774, "rewards/accuracies": 0.84375, "rewards/chosen": -0.20261868834495544, "rewards/margins": 0.084633007645607, "rewards/rejected": -0.28725165128707886, "step": 640 }, { "epoch": 19.06382978723404, "grad_norm": 37.925621032714844, "learning_rate": 6.738782355044048e-09, "log_odds_chosen": 0.6857459545135498, "log_odds_ratio": -0.4916977882385254, "logits/chosen": -4.52652645111084, "logits/rejected": -4.689857482910156, "logps/chosen": -1.341786503791809, "logps/rejected": -1.8565285205841064, "loss": 1.3794, "nll_loss": 1.2754034996032715, "rewards/accuracies": 0.8046875, "rewards/chosen": -0.20126797258853912, "rewards/margins": 0.07721129059791565, "rewards/rejected": -0.27847927808761597, "step": 672 }, { "epoch": 19.97163120567376, "grad_norm": 24.041799545288086, "learning_rate": 7.51764708051994e-10, "log_odds_chosen": 0.6411248445510864, "log_odds_ratio": -0.5100895762443542, "logits/chosen": -4.384097099304199, "logits/rejected": -4.515219688415527, "logps/chosen": -1.3920109272003174, "logps/rejected": -1.8711614608764648, "loss": 1.3805, "nll_loss": 1.2700397968292236, "rewards/accuracies": 0.8203125, "rewards/chosen": -0.20880162715911865, "rewards/margins": 0.07187257707118988, "rewards/rejected": -0.2806742191314697, "step": 704 }, { "epoch": 20.425531914893618, "grad_norm": 28.226720809936523, "learning_rate": 0.0, "log_odds_chosen": 0.6200518608093262, "log_odds_ratio": -0.529932975769043, "logits/chosen": -4.434691905975342, "logits/rejected": -4.575813293457031, "logps/chosen": -1.416117548942566, "logps/rejected": -1.8715832233428955, "loss": 1.3893, "nll_loss": 1.2817054986953735, "rewards/accuracies": 0.78125, "rewards/chosen": -0.2124176323413849, "rewards/margins": 0.0683198943734169, "rewards/rejected": -0.2807375192642212, "step": 720 }, { "epoch": 20.425531914893618, "eval_log_odds_chosen": 1.7479673624038696, "eval_log_odds_ratio": -0.1867920309305191, "eval_logits/chosen": -4.75565767288208, "eval_logits/rejected": -4.538194179534912, "eval_logps/chosen": -1.6300764083862305, "eval_logps/rejected": -3.2087719440460205, "eval_loss": 1.2522811889648438, "eval_nll_loss": 1.4028778076171875, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.24451148509979248, "eval_rewards/margins": 0.23680436611175537, "eval_rewards/rejected": -0.48131585121154785, "eval_runtime": 0.615, "eval_samples_per_second": 222.778, "eval_steps_per_second": 4.878, "step": 720 } ], "logging_steps": 32, "max_steps": 720, "num_input_tokens_seen": 0, "num_train_epochs": 21, "save_steps": 300, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }