|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 478, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"eta": 0.004999999888241291, |
|
"learning_rate": 1.0416666666666666e-08, |
|
"logits/chosen": -2.745321273803711, |
|
"logits/rejected": -2.661250352859497, |
|
"logps/chosen": -321.0613098144531, |
|
"logps/rejected": -271.1681823730469, |
|
"loss": 0.7079, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -2.781073570251465, |
|
"logits/rejected": -2.7723324298858643, |
|
"logps/chosen": -242.78675842285156, |
|
"logps/rejected": -208.898193359375, |
|
"loss": 0.7064, |
|
"rewards/accuracies": 0.5069444179534912, |
|
"rewards/chosen": 0.00046142542851157486, |
|
"rewards/margins": 0.0004825991054531187, |
|
"rewards/rejected": -2.1173778804950416e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -2.7764596939086914, |
|
"logits/rejected": -2.76332426071167, |
|
"logps/chosen": -268.7160339355469, |
|
"logps/rejected": -246.1806182861328, |
|
"loss": 0.7059, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.0012417829129844904, |
|
"rewards/margins": 0.0018564596539363265, |
|
"rewards/rejected": -0.0006146768573671579, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.829183578491211, |
|
"logits/rejected": -2.789580821990967, |
|
"logps/chosen": -279.50506591796875, |
|
"logps/rejected": -272.59332275390625, |
|
"loss": 0.703, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.001455739140510559, |
|
"rewards/margins": 0.005600649863481522, |
|
"rewards/rejected": -0.0041449107229709625, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -2.7342190742492676, |
|
"logits/rejected": -2.7025771141052246, |
|
"logps/chosen": -287.4779968261719, |
|
"logps/rejected": -262.9581298828125, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.00810016505420208, |
|
"rewards/margins": 0.03219769150018692, |
|
"rewards/rejected": -0.02409752830862999, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 4.999733114418725e-07, |
|
"logits/chosen": -2.70398211479187, |
|
"logits/rejected": -2.678131580352783, |
|
"logps/chosen": -306.2682189941406, |
|
"logps/rejected": -304.858642578125, |
|
"loss": 0.6757, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.010384158231317997, |
|
"rewards/margins": 0.06137201189994812, |
|
"rewards/rejected": -0.07175617665052414, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 4.990398100856366e-07, |
|
"logits/chosen": -2.6752824783325195, |
|
"logits/rejected": -2.6444764137268066, |
|
"logps/chosen": -260.00921630859375, |
|
"logps/rejected": -241.9503173828125, |
|
"loss": 0.66, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.07034246623516083, |
|
"rewards/margins": 0.10934233665466309, |
|
"rewards/rejected": -0.17968478798866272, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 4.967775735898179e-07, |
|
"logits/chosen": -2.7276742458343506, |
|
"logits/rejected": -2.7039637565612793, |
|
"logps/chosen": -294.48309326171875, |
|
"logps/rejected": -287.7945251464844, |
|
"loss": 0.6352, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.21306617558002472, |
|
"rewards/margins": 0.20508523285388947, |
|
"rewards/rejected": -0.4181514382362366, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 4.931986719649298e-07, |
|
"logits/chosen": -2.7324066162109375, |
|
"logits/rejected": -2.7214457988739014, |
|
"logps/chosen": -303.2037048339844, |
|
"logps/rejected": -328.5882873535156, |
|
"loss": 0.6282, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.38951486349105835, |
|
"rewards/margins": 0.1954750418663025, |
|
"rewards/rejected": -0.5849899053573608, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 4.883222001996351e-07, |
|
"logits/chosen": -2.732830286026001, |
|
"logits/rejected": -2.6980254650115967, |
|
"logps/chosen": -297.2022399902344, |
|
"logps/rejected": -315.86553955078125, |
|
"loss": 0.5956, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.3561047911643982, |
|
"rewards/margins": 0.31715089082717896, |
|
"rewards/rejected": -0.6732556819915771, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 4.821741763807186e-07, |
|
"logits/chosen": -2.7799582481384277, |
|
"logits/rejected": -2.734489679336548, |
|
"logps/chosen": -322.6822204589844, |
|
"logps/rejected": -346.29632568359375, |
|
"loss": 0.5808, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.4515351355075836, |
|
"rewards/margins": 0.430880069732666, |
|
"rewards/rejected": -0.8824151158332825, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 4.747874028753375e-07, |
|
"logits/chosen": -2.737361431121826, |
|
"logits/rejected": -2.7130186557769775, |
|
"logps/chosen": -354.4649353027344, |
|
"logps/rejected": -358.72894287109375, |
|
"loss": 0.5836, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.42983478307724, |
|
"rewards/margins": 0.48764386773109436, |
|
"rewards/rejected": -0.9174786806106567, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 4.662012913161997e-07, |
|
"logits/chosen": -2.5539088249206543, |
|
"logits/rejected": -2.5288946628570557, |
|
"logps/chosen": -332.5165710449219, |
|
"logps/rejected": -339.154296875, |
|
"loss": 0.5704, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5978292226791382, |
|
"rewards/margins": 0.5392987728118896, |
|
"rewards/rejected": -1.1371279954910278, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 4.5646165232345103e-07, |
|
"logits/chosen": -2.4799418449401855, |
|
"logits/rejected": -2.463563919067383, |
|
"logps/chosen": -309.03570556640625, |
|
"logps/rejected": -393.3725280761719, |
|
"loss": 0.5759, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.5238430500030518, |
|
"rewards/margins": 0.5551779866218567, |
|
"rewards/rejected": -1.0790210962295532, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 4.456204510851956e-07, |
|
"logits/chosen": -2.4601731300354004, |
|
"logits/rejected": -2.4274346828460693, |
|
"logps/chosen": -369.95086669921875, |
|
"logps/rejected": -402.4490661621094, |
|
"loss": 0.5515, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.8267312049865723, |
|
"rewards/margins": 0.6819665431976318, |
|
"rewards/rejected": -1.508697748184204, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 4.337355301007335e-07, |
|
"logits/chosen": -2.4591126441955566, |
|
"logits/rejected": -2.4192230701446533, |
|
"logps/chosen": -338.8128356933594, |
|
"logps/rejected": -341.96185302734375, |
|
"loss": 0.5644, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.55790776014328, |
|
"rewards/margins": 0.5498847961425781, |
|
"rewards/rejected": -1.107792615890503, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 4.2087030056579986e-07, |
|
"logits/chosen": -2.324352979660034, |
|
"logits/rejected": -2.297412872314453, |
|
"logps/chosen": -333.8399963378906, |
|
"logps/rejected": -388.5691833496094, |
|
"loss": 0.5386, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.5367451906204224, |
|
"rewards/margins": 0.6883090138435364, |
|
"rewards/rejected": -1.225054144859314, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 4.070934040463998e-07, |
|
"logits/chosen": -2.25808048248291, |
|
"logits/rejected": -2.191967010498047, |
|
"logps/chosen": -348.13031005859375, |
|
"logps/rejected": -380.72906494140625, |
|
"loss": 0.5827, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.0580767393112183, |
|
"rewards/margins": 0.4786924421787262, |
|
"rewards/rejected": -1.536769151687622, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 3.9247834624635404e-07, |
|
"logits/chosen": -2.3127024173736572, |
|
"logits/rejected": -2.320852279663086, |
|
"logps/chosen": -367.42950439453125, |
|
"logps/rejected": -426.7998046875, |
|
"loss": 0.5569, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7815980315208435, |
|
"rewards/margins": 0.7305761575698853, |
|
"rewards/rejected": -1.512174129486084, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 3.7710310482256523e-07, |
|
"logits/chosen": -2.4002134799957275, |
|
"logits/rejected": -2.3388237953186035, |
|
"logps/chosen": -328.8986511230469, |
|
"logps/rejected": -381.8426208496094, |
|
"loss": 0.5436, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.5217610597610474, |
|
"rewards/margins": 0.6428496241569519, |
|
"rewards/rejected": -1.1646106243133545, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 3.610497133404795e-07, |
|
"logits/chosen": -2.2329065799713135, |
|
"logits/rejected": -2.1769356727600098, |
|
"logps/chosen": -364.03607177734375, |
|
"logps/rejected": -379.78289794921875, |
|
"loss": 0.5399, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.8814976811408997, |
|
"rewards/margins": 0.5185674428939819, |
|
"rewards/rejected": -1.4000650644302368, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 3.4440382358952115e-07, |
|
"logits/chosen": -2.2697091102600098, |
|
"logits/rejected": -2.2113330364227295, |
|
"logps/chosen": -344.99200439453125, |
|
"logps/rejected": -368.27581787109375, |
|
"loss": 0.554, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.7441986799240112, |
|
"rewards/margins": 0.5876610279083252, |
|
"rewards/rejected": -1.331859827041626, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 3.272542485937368e-07, |
|
"logits/chosen": -2.227048873901367, |
|
"logits/rejected": -2.2157351970672607, |
|
"logps/chosen": -324.1903991699219, |
|
"logps/rejected": -370.2602844238281, |
|
"loss": 0.5538, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.5380795001983643, |
|
"rewards/margins": 0.7564869523048401, |
|
"rewards/rejected": -1.2945663928985596, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 3.096924887558854e-07, |
|
"logits/chosen": -2.1815030574798584, |
|
"logits/rejected": -2.1446516513824463, |
|
"logps/chosen": -329.86566162109375, |
|
"logps/rejected": -409.99267578125, |
|
"loss": 0.5485, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.6506673097610474, |
|
"rewards/margins": 0.7174521684646606, |
|
"rewards/rejected": -1.3681195974349976, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 2.9181224366319943e-07, |
|
"logits/chosen": -2.1926796436309814, |
|
"logits/rejected": -2.133800745010376, |
|
"logps/chosen": -328.4383850097656, |
|
"logps/rejected": -380.1564025878906, |
|
"loss": 0.5478, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8131483793258667, |
|
"rewards/margins": 0.6653987169265747, |
|
"rewards/rejected": -1.4785473346710205, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 2.7370891215954565e-07, |
|
"logits/chosen": -2.198378324508667, |
|
"logits/rejected": -2.148686647415161, |
|
"logps/chosen": -343.18426513671875, |
|
"logps/rejected": -405.7811584472656, |
|
"loss": 0.5379, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8210641741752625, |
|
"rewards/margins": 0.6968287229537964, |
|
"rewards/rejected": -1.5178929567337036, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 2.55479083351317e-07, |
|
"logits/chosen": -2.1465113162994385, |
|
"logits/rejected": -2.093456745147705, |
|
"logps/chosen": -347.0015869140625, |
|
"logps/rejected": -396.0328063964844, |
|
"loss": 0.5184, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.7407617568969727, |
|
"rewards/margins": 0.8684328198432922, |
|
"rewards/rejected": -1.6091945171356201, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 2.3722002126275822e-07, |
|
"logits/chosen": -2.1429693698883057, |
|
"logits/rejected": -2.0827112197875977, |
|
"logps/chosen": -358.7803649902344, |
|
"logps/rejected": -440.46990966796875, |
|
"loss": 0.527, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9018821716308594, |
|
"rewards/margins": 0.9309617877006531, |
|
"rewards/rejected": -1.8328437805175781, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 2.19029145890313e-07, |
|
"logits/chosen": -2.215878486633301, |
|
"logits/rejected": -2.1548526287078857, |
|
"logps/chosen": -337.1029357910156, |
|
"logps/rejected": -380.2957763671875, |
|
"loss": 0.5418, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.6890209913253784, |
|
"rewards/margins": 0.7738422155380249, |
|
"rewards/rejected": -1.4628633260726929, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 2.0100351342479216e-07, |
|
"logits/chosen": -2.1938037872314453, |
|
"logits/rejected": -2.177739381790161, |
|
"logps/chosen": -353.51544189453125, |
|
"logps/rejected": -412.64306640625, |
|
"loss": 0.5398, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.726887583732605, |
|
"rewards/margins": 0.7154626250267029, |
|
"rewards/rejected": -1.442350149154663, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 1.8323929841460178e-07, |
|
"logits/chosen": -2.1435744762420654, |
|
"logits/rejected": -2.082127332687378, |
|
"logps/chosen": -368.35906982421875, |
|
"logps/rejected": -436.9691467285156, |
|
"loss": 0.5324, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.8049615025520325, |
|
"rewards/margins": 0.6589316725730896, |
|
"rewards/rejected": -1.4638930559158325, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 1.6583128063291573e-07, |
|
"logits/chosen": -2.188692092895508, |
|
"logits/rejected": -2.1064612865448, |
|
"logps/chosen": -354.7235412597656, |
|
"logps/rejected": -376.1325378417969, |
|
"loss": 0.5187, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.8574191331863403, |
|
"rewards/margins": 0.742445707321167, |
|
"rewards/rejected": -1.5998647212982178, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 1.488723393865766e-07, |
|
"logits/chosen": -2.113239288330078, |
|
"logits/rejected": -2.0523862838745117, |
|
"logps/chosen": -317.50787353515625, |
|
"logps/rejected": -413.80426025390625, |
|
"loss": 0.522, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.7432131171226501, |
|
"rewards/margins": 1.0518220663070679, |
|
"rewards/rejected": -1.7950351238250732, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 1.3245295796480788e-07, |
|
"logits/chosen": -2.2147035598754883, |
|
"logits/rejected": -2.1897997856140137, |
|
"logps/chosen": -375.3721618652344, |
|
"logps/rejected": -414.7781677246094, |
|
"loss": 0.5318, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9661127328872681, |
|
"rewards/margins": 0.6892365217208862, |
|
"rewards/rejected": -1.6553493738174438, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 1.1666074087171627e-07, |
|
"logits/chosen": -2.1029276847839355, |
|
"logits/rejected": -2.0935397148132324, |
|
"logps/chosen": -336.9126892089844, |
|
"logps/rejected": -391.1408386230469, |
|
"loss": 0.5351, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.8055974841117859, |
|
"rewards/margins": 0.6507540941238403, |
|
"rewards/rejected": -1.4563515186309814, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 1.0157994641835734e-07, |
|
"logits/chosen": -2.0941309928894043, |
|
"logits/rejected": -2.0219178199768066, |
|
"logps/chosen": -322.93145751953125, |
|
"logps/rejected": -389.5673828125, |
|
"loss": 0.5032, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.7798911333084106, |
|
"rewards/margins": 0.814423680305481, |
|
"rewards/rejected": -1.5943149328231812, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 8.729103716819111e-08, |
|
"logits/chosen": -2.1901535987854004, |
|
"logits/rejected": -2.1034839153289795, |
|
"logps/chosen": -386.5938720703125, |
|
"logps/rejected": -423.3191833496094, |
|
"loss": 0.5109, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.7419500946998596, |
|
"rewards/margins": 0.9194036722183228, |
|
"rewards/rejected": -1.6613538265228271, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 7.387025063449081e-08, |
|
"logits/chosen": -2.1681642532348633, |
|
"logits/rejected": -2.0902836322784424, |
|
"logps/chosen": -379.05096435546875, |
|
"logps/rejected": -418.29620361328125, |
|
"loss": 0.5313, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.7454948425292969, |
|
"rewards/margins": 0.9174238443374634, |
|
"rewards/rejected": -1.6629188060760498, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 6.138919252022435e-08, |
|
"logits/chosen": -2.127246618270874, |
|
"logits/rejected": -2.0689470767974854, |
|
"logps/chosen": -356.09906005859375, |
|
"logps/rejected": -440.26788330078125, |
|
"loss": 0.5075, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.8266879320144653, |
|
"rewards/margins": 0.8445302844047546, |
|
"rewards/rejected": -1.6712181568145752, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 4.991445467064689e-08, |
|
"logits/chosen": -2.1151671409606934, |
|
"logits/rejected": -2.071991443634033, |
|
"logps/chosen": -353.2530822753906, |
|
"logps/rejected": -428.5403747558594, |
|
"loss": 0.5323, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8702594637870789, |
|
"rewards/margins": 0.7266938090324402, |
|
"rewards/rejected": -1.5969533920288086, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 3.9507259776993954e-08, |
|
"logits/chosen": -2.0943470001220703, |
|
"logits/rejected": -2.0785202980041504, |
|
"logps/chosen": -405.1844177246094, |
|
"logps/rejected": -463.93841552734375, |
|
"loss": 0.5132, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.9134857058525085, |
|
"rewards/margins": 0.8523229360580444, |
|
"rewards/rejected": -1.7658087015151978, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 3.022313472693447e-08, |
|
"logits/chosen": -2.123349905014038, |
|
"logits/rejected": -2.0609166622161865, |
|
"logps/chosen": -333.0213623046875, |
|
"logps/rejected": -408.7607727050781, |
|
"loss": 0.5302, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7784355282783508, |
|
"rewards/margins": 0.8595449328422546, |
|
"rewards/rejected": -1.6379806995391846, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 2.2111614344599684e-08, |
|
"logits/chosen": -2.0923712253570557, |
|
"logits/rejected": -2.0771572589874268, |
|
"logps/chosen": -346.8676452636719, |
|
"logps/rejected": -450.01312255859375, |
|
"loss": 0.5237, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.7909084558486938, |
|
"rewards/margins": 0.9126070141792297, |
|
"rewards/rejected": -1.7035152912139893, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 1.521597710086439e-08, |
|
"logits/chosen": -2.203439474105835, |
|
"logits/rejected": -2.1634104251861572, |
|
"logps/chosen": -366.4968566894531, |
|
"logps/rejected": -410.26177978515625, |
|
"loss": 0.5283, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.7541608810424805, |
|
"rewards/margins": 0.8509491682052612, |
|
"rewards/rejected": -1.6051101684570312, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 9.57301420397924e-09, |
|
"logits/chosen": -2.166147232055664, |
|
"logits/rejected": -2.1166653633117676, |
|
"logps/chosen": -336.30279541015625, |
|
"logps/rejected": -413.140380859375, |
|
"loss": 0.5157, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.8325687646865845, |
|
"rewards/margins": 0.7907065153121948, |
|
"rewards/rejected": -1.6232753992080688, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 5.212833302556258e-09, |
|
"logits/chosen": -2.073584794998169, |
|
"logits/rejected": -2.025235652923584, |
|
"logps/chosen": -316.84906005859375, |
|
"logps/rejected": -382.339599609375, |
|
"loss": 0.5276, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.7750087976455688, |
|
"rewards/margins": 0.7785266041755676, |
|
"rewards/rejected": -1.5535353422164917, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 2.158697848236607e-09, |
|
"logits/chosen": -2.1052441596984863, |
|
"logits/rejected": -2.039158582687378, |
|
"logps/chosen": -318.8851623535156, |
|
"logps/rejected": -384.3031005859375, |
|
"loss": 0.5063, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.8155075907707214, |
|
"rewards/margins": 0.7695605158805847, |
|
"rewards/rejected": -1.5850679874420166, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eta": 0.004999999422580004, |
|
"learning_rate": 4.269029751107489e-10, |
|
"logits/chosen": -2.1885509490966797, |
|
"logits/rejected": -2.0868308544158936, |
|
"logps/chosen": -369.0015869140625, |
|
"logps/rejected": -435.66632080078125, |
|
"loss": 0.5031, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.7688988447189331, |
|
"rewards/margins": 1.0167477130889893, |
|
"rewards/rejected": -1.7856464385986328, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 478, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5600044772215967, |
|
"train_runtime": 8380.7405, |
|
"train_samples_per_second": 7.295, |
|
"train_steps_per_second": 0.057 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 478, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|