|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 1467, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.006817503941369466, |
|
"grad_norm": 20.121693308244087, |
|
"learning_rate": 6.122448979591837e-08, |
|
"logits/chosen": -0.013989130035042763, |
|
"logits/rejected": 0.058542873710393906, |
|
"logps/chosen": -190.215087890625, |
|
"logps/rejected": -203.27479553222656, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.4390625059604645, |
|
"rewards/chosen": 0.0016301964642480016, |
|
"rewards/margins": 0.0021729914005845785, |
|
"rewards/rejected": -0.0005427949363365769, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.013635007882738932, |
|
"grad_norm": 20.76481447180183, |
|
"learning_rate": 1.2925170068027211e-07, |
|
"logits/chosen": -0.006685615051537752, |
|
"logits/rejected": 0.06347016990184784, |
|
"logps/chosen": -191.0093994140625, |
|
"logps/rejected": -203.0770263671875, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.5015624761581421, |
|
"rewards/chosen": -0.0009165612864308059, |
|
"rewards/margins": 8.891527249943465e-05, |
|
"rewards/rejected": -0.0010054768063127995, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0204525118241084, |
|
"grad_norm": 21.80914665550797, |
|
"learning_rate": 1.9727891156462583e-07, |
|
"logits/chosen": 0.02424698881804943, |
|
"logits/rejected": 0.0787603035569191, |
|
"logps/chosen": -188.7990264892578, |
|
"logps/rejected": -198.6798095703125, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5093750357627869, |
|
"rewards/chosen": 0.0011249443050473928, |
|
"rewards/margins": 0.000932438881136477, |
|
"rewards/rejected": 0.00019250542391091585, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.027270015765477863, |
|
"grad_norm": 18.7019723893629, |
|
"learning_rate": 2.653061224489796e-07, |
|
"logits/chosen": -0.006816861219704151, |
|
"logits/rejected": 0.05330298840999603, |
|
"logps/chosen": -183.76039123535156, |
|
"logps/rejected": -199.3633575439453, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.520312488079071, |
|
"rewards/chosen": -0.0010295719839632511, |
|
"rewards/margins": 0.0016696588136255741, |
|
"rewards/rejected": -0.0026992305647581816, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03408751970684733, |
|
"grad_norm": 20.831078202136776, |
|
"learning_rate": 3.333333333333333e-07, |
|
"logits/chosen": 0.03178512677550316, |
|
"logits/rejected": 0.10444696992635727, |
|
"logps/chosen": -182.8831329345703, |
|
"logps/rejected": -193.09466552734375, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.520312488079071, |
|
"rewards/chosen": -0.0042633856646716595, |
|
"rewards/margins": 0.0025998500641435385, |
|
"rewards/rejected": -0.006863235495984554, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0409050236482168, |
|
"grad_norm": 23.46573004490643, |
|
"learning_rate": 4.0136054421768705e-07, |
|
"logits/chosen": -0.0085222776979208, |
|
"logits/rejected": 0.04688471555709839, |
|
"logps/chosen": -187.2247314453125, |
|
"logps/rejected": -197.925537109375, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.5218749642372131, |
|
"rewards/chosen": -0.009058980271220207, |
|
"rewards/margins": 0.003005079925060272, |
|
"rewards/rejected": -0.01206406019628048, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04772252758958626, |
|
"grad_norm": 21.16825784724637, |
|
"learning_rate": 4.693877551020408e-07, |
|
"logits/chosen": 0.03674064576625824, |
|
"logits/rejected": 0.10054312646389008, |
|
"logps/chosen": -177.9295654296875, |
|
"logps/rejected": -190.43357849121094, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.5453125238418579, |
|
"rewards/chosen": -0.014818010851740837, |
|
"rewards/margins": 0.0029301545582711697, |
|
"rewards/rejected": -0.017748164013028145, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05454003153095573, |
|
"grad_norm": 19.91582657292646, |
|
"learning_rate": 5.374149659863945e-07, |
|
"logits/chosen": 0.03953830525279045, |
|
"logits/rejected": 0.0943986028432846, |
|
"logps/chosen": -174.0701446533203, |
|
"logps/rejected": -185.5764617919922, |
|
"loss": 0.6897, |
|
"rewards/accuracies": 0.5453125238418579, |
|
"rewards/chosen": -0.02375047467648983, |
|
"rewards/margins": 0.007541469298303127, |
|
"rewards/rejected": -0.03129194676876068, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0613575354723252, |
|
"grad_norm": 19.184157083023965, |
|
"learning_rate": 6.054421768707482e-07, |
|
"logits/chosen": 0.041828252375125885, |
|
"logits/rejected": 0.11053334176540375, |
|
"logps/chosen": -180.23428344726562, |
|
"logps/rejected": -192.7969207763672, |
|
"loss": 0.6894, |
|
"rewards/accuracies": 0.5656249523162842, |
|
"rewards/chosen": -0.04171518608927727, |
|
"rewards/margins": 0.00825162697583437, |
|
"rewards/rejected": -0.04996681213378906, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.06817503941369465, |
|
"grad_norm": 21.70613257034061, |
|
"learning_rate": 6.734693877551019e-07, |
|
"logits/chosen": 0.03590967878699303, |
|
"logits/rejected": 0.12106480449438095, |
|
"logps/chosen": -187.4181365966797, |
|
"logps/rejected": -196.91404724121094, |
|
"loss": 0.6858, |
|
"rewards/accuracies": 0.620312511920929, |
|
"rewards/chosen": -0.06401355564594269, |
|
"rewards/margins": 0.015702249482274055, |
|
"rewards/rejected": -0.07971581071615219, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07499254335506413, |
|
"grad_norm": 21.340307822546738, |
|
"learning_rate": 7.414965986394558e-07, |
|
"logits/chosen": 0.054249007254838943, |
|
"logits/rejected": 0.11480608582496643, |
|
"logps/chosen": -198.7044219970703, |
|
"logps/rejected": -212.5745086669922, |
|
"loss": 0.6826, |
|
"rewards/accuracies": 0.598437488079071, |
|
"rewards/chosen": -0.10510613769292831, |
|
"rewards/margins": 0.02307462878525257, |
|
"rewards/rejected": -0.12818075716495514, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.0818100472964336, |
|
"grad_norm": 19.224497336436897, |
|
"learning_rate": 8.095238095238095e-07, |
|
"logits/chosen": 0.09257032722234726, |
|
"logits/rejected": 0.12628528475761414, |
|
"logps/chosen": -186.2566680908203, |
|
"logps/rejected": -198.2835235595703, |
|
"loss": 0.6818, |
|
"rewards/accuracies": 0.6015625596046448, |
|
"rewards/chosen": -0.13667678833007812, |
|
"rewards/margins": 0.02535596489906311, |
|
"rewards/rejected": -0.16203275322914124, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.08862755123780305, |
|
"grad_norm": 21.13611235058464, |
|
"learning_rate": 8.775510204081632e-07, |
|
"logits/chosen": 0.09457506239414215, |
|
"logits/rejected": 0.15074704587459564, |
|
"logps/chosen": -194.18267822265625, |
|
"logps/rejected": -206.7709503173828, |
|
"loss": 0.6724, |
|
"rewards/accuracies": 0.6031250357627869, |
|
"rewards/chosen": -0.17589515447616577, |
|
"rewards/margins": 0.04765651002526283, |
|
"rewards/rejected": -0.2235516607761383, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.09544505517917252, |
|
"grad_norm": 19.76839160560583, |
|
"learning_rate": 9.45578231292517e-07, |
|
"logits/chosen": 0.11603380739688873, |
|
"logits/rejected": 0.1540485918521881, |
|
"logps/chosen": -194.67906188964844, |
|
"logps/rejected": -201.3298797607422, |
|
"loss": 0.6753, |
|
"rewards/accuracies": 0.6421875357627869, |
|
"rewards/chosen": -0.21486632525920868, |
|
"rewards/margins": 0.04251245781779289, |
|
"rewards/rejected": -0.25737878680229187, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.102262559120542, |
|
"grad_norm": 20.781161821934894, |
|
"learning_rate": 9.984848484848486e-07, |
|
"logits/chosen": 0.18178227543830872, |
|
"logits/rejected": 0.20421989262104034, |
|
"logps/chosen": -194.18841552734375, |
|
"logps/rejected": -205.5372314453125, |
|
"loss": 0.6693, |
|
"rewards/accuracies": 0.6109374761581421, |
|
"rewards/chosen": -0.24953892827033997, |
|
"rewards/margins": 0.05746041238307953, |
|
"rewards/rejected": -0.3069993257522583, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.10908006306191145, |
|
"grad_norm": 20.949425745434294, |
|
"learning_rate": 9.909090909090909e-07, |
|
"logits/chosen": 0.16577480733394623, |
|
"logits/rejected": 0.22489143908023834, |
|
"logps/chosen": -189.01882934570312, |
|
"logps/rejected": -204.76612854003906, |
|
"loss": 0.6553, |
|
"rewards/accuracies": 0.6578125357627869, |
|
"rewards/chosen": -0.2690110504627228, |
|
"rewards/margins": 0.0968737006187439, |
|
"rewards/rejected": -0.36588478088378906, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.11589756700328092, |
|
"grad_norm": 21.376727693673736, |
|
"learning_rate": 9.833333333333332e-07, |
|
"logits/chosen": 0.16099530458450317, |
|
"logits/rejected": 0.20968888700008392, |
|
"logps/chosen": -198.27276611328125, |
|
"logps/rejected": -207.08128356933594, |
|
"loss": 0.6546, |
|
"rewards/accuracies": 0.6500000357627869, |
|
"rewards/chosen": -0.31624093651771545, |
|
"rewards/margins": 0.09791112691164017, |
|
"rewards/rejected": -0.4141520857810974, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.1227150709446504, |
|
"grad_norm": 20.47632034356792, |
|
"learning_rate": 9.757575757575757e-07, |
|
"logits/chosen": 0.16175265610218048, |
|
"logits/rejected": 0.24207058548927307, |
|
"logps/chosen": -192.8699188232422, |
|
"logps/rejected": -204.4312744140625, |
|
"loss": 0.6469, |
|
"rewards/accuracies": 0.6687500476837158, |
|
"rewards/chosen": -0.3407444357872009, |
|
"rewards/margins": 0.12092556804418564, |
|
"rewards/rejected": -0.46167001128196716, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.12953257488601985, |
|
"grad_norm": 20.746940996761676, |
|
"learning_rate": 9.681818181818182e-07, |
|
"logits/chosen": 0.15175826847553253, |
|
"logits/rejected": 0.21674920618534088, |
|
"logps/chosen": -193.29212951660156, |
|
"logps/rejected": -209.36143493652344, |
|
"loss": 0.6389, |
|
"rewards/accuracies": 0.6812500357627869, |
|
"rewards/chosen": -0.3786366581916809, |
|
"rewards/margins": 0.1404908001422882, |
|
"rewards/rejected": -0.5191274285316467, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1363500788273893, |
|
"grad_norm": 20.484642032996728, |
|
"learning_rate": 9.606060606060605e-07, |
|
"logits/chosen": 0.1607164442539215, |
|
"logits/rejected": 0.22002199292182922, |
|
"logps/chosen": -197.4151153564453, |
|
"logps/rejected": -209.8327178955078, |
|
"loss": 0.6291, |
|
"rewards/accuracies": 0.6609375476837158, |
|
"rewards/chosen": -0.41719570755958557, |
|
"rewards/margins": 0.17708109319210052, |
|
"rewards/rejected": -0.5942767858505249, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1431675827687588, |
|
"grad_norm": 26.738984065984987, |
|
"learning_rate": 9.53030303030303e-07, |
|
"logits/chosen": 0.15654993057250977, |
|
"logits/rejected": 0.2388145625591278, |
|
"logps/chosen": -195.02975463867188, |
|
"logps/rejected": -207.19190979003906, |
|
"loss": 0.6342, |
|
"rewards/accuracies": 0.6749999523162842, |
|
"rewards/chosen": -0.4655718505382538, |
|
"rewards/margins": 0.16476726531982422, |
|
"rewards/rejected": -0.6303391456604004, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.14998508671012825, |
|
"grad_norm": 20.33866123420931, |
|
"learning_rate": 9.454545454545454e-07, |
|
"logits/chosen": 0.12783432006835938, |
|
"logits/rejected": 0.1976049840450287, |
|
"logps/chosen": -201.7896728515625, |
|
"logps/rejected": -215.41249084472656, |
|
"loss": 0.6291, |
|
"rewards/accuracies": 0.6609375476837158, |
|
"rewards/chosen": -0.5083937644958496, |
|
"rewards/margins": 0.18992076814174652, |
|
"rewards/rejected": -0.6983146071434021, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.1568025906514977, |
|
"grad_norm": 32.54405565292402, |
|
"learning_rate": 9.378787878787879e-07, |
|
"logits/chosen": 0.1527099907398224, |
|
"logits/rejected": 0.22111022472381592, |
|
"logps/chosen": -193.4207763671875, |
|
"logps/rejected": -207.85169982910156, |
|
"loss": 0.6212, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": -0.531213641166687, |
|
"rewards/margins": 0.2235802412033081, |
|
"rewards/rejected": -0.7547938823699951, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.1636200945928672, |
|
"grad_norm": 19.095273306756834, |
|
"learning_rate": 9.303030303030303e-07, |
|
"logits/chosen": 0.15274283289909363, |
|
"logits/rejected": 0.21214556694030762, |
|
"logps/chosen": -196.24371337890625, |
|
"logps/rejected": -213.04237365722656, |
|
"loss": 0.6085, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -0.5204161405563354, |
|
"rewards/margins": 0.2579624056816101, |
|
"rewards/rejected": -0.7783786058425903, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.17043759853423665, |
|
"grad_norm": 22.735594447037276, |
|
"learning_rate": 9.227272727272727e-07, |
|
"logits/chosen": 0.1225215271115303, |
|
"logits/rejected": 0.18497014045715332, |
|
"logps/chosen": -192.548095703125, |
|
"logps/rejected": -207.9135284423828, |
|
"loss": 0.6233, |
|
"rewards/accuracies": 0.667187511920929, |
|
"rewards/chosen": -0.5648759603500366, |
|
"rewards/margins": 0.2269633412361145, |
|
"rewards/rejected": -0.7918393611907959, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1772551024756061, |
|
"grad_norm": 22.954060831129915, |
|
"learning_rate": 9.151515151515152e-07, |
|
"logits/chosen": 0.1713658571243286, |
|
"logits/rejected": 0.25986677408218384, |
|
"logps/chosen": -198.60391235351562, |
|
"logps/rejected": -214.2535400390625, |
|
"loss": 0.6071, |
|
"rewards/accuracies": 0.6890625357627869, |
|
"rewards/chosen": -0.6113271713256836, |
|
"rewards/margins": 0.27784913778305054, |
|
"rewards/rejected": -0.8891763091087341, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.1840726064169756, |
|
"grad_norm": 22.190753794677473, |
|
"learning_rate": 9.075757575757576e-07, |
|
"logits/chosen": 0.14579366147518158, |
|
"logits/rejected": 0.20252245664596558, |
|
"logps/chosen": -199.14405822753906, |
|
"logps/rejected": -213.55294799804688, |
|
"loss": 0.6017, |
|
"rewards/accuracies": 0.6906250715255737, |
|
"rewards/chosen": -0.5976826548576355, |
|
"rewards/margins": 0.3174746632575989, |
|
"rewards/rejected": -0.9151572585105896, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.19089011035834505, |
|
"grad_norm": 22.054250481854893, |
|
"learning_rate": 9e-07, |
|
"logits/chosen": 0.11682489514350891, |
|
"logits/rejected": 0.18400567770004272, |
|
"logps/chosen": -195.43438720703125, |
|
"logps/rejected": -214.8118896484375, |
|
"loss": 0.6076, |
|
"rewards/accuracies": 0.6781250238418579, |
|
"rewards/chosen": -0.600721001625061, |
|
"rewards/margins": 0.29136893153190613, |
|
"rewards/rejected": -0.89208984375, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.1977076142997145, |
|
"grad_norm": 24.117104152592596, |
|
"learning_rate": 8.924242424242425e-07, |
|
"logits/chosen": 0.08254396170377731, |
|
"logits/rejected": 0.15450119972229004, |
|
"logps/chosen": -202.29647827148438, |
|
"logps/rejected": -221.5592498779297, |
|
"loss": 0.5958, |
|
"rewards/accuracies": 0.6859375238418579, |
|
"rewards/chosen": -0.5900746583938599, |
|
"rewards/margins": 0.34352385997772217, |
|
"rewards/rejected": -0.9335983991622925, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.204525118241084, |
|
"grad_norm": 21.604244584329482, |
|
"learning_rate": 8.848484848484849e-07, |
|
"logits/chosen": 0.08819441497325897, |
|
"logits/rejected": 0.17239636182785034, |
|
"logps/chosen": -188.81192016601562, |
|
"logps/rejected": -208.72073364257812, |
|
"loss": 0.569, |
|
"rewards/accuracies": 0.7328125238418579, |
|
"rewards/chosen": -0.6129291653633118, |
|
"rewards/margins": 0.4271809160709381, |
|
"rewards/rejected": -1.0401101112365723, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.21134262218245345, |
|
"grad_norm": 22.020365760713695, |
|
"learning_rate": 8.772727272727273e-07, |
|
"logits/chosen": 0.06760307401418686, |
|
"logits/rejected": 0.14344710111618042, |
|
"logps/chosen": -195.82293701171875, |
|
"logps/rejected": -212.59982299804688, |
|
"loss": 0.5687, |
|
"rewards/accuracies": 0.7140624523162842, |
|
"rewards/chosen": -0.6414520740509033, |
|
"rewards/margins": 0.4150450825691223, |
|
"rewards/rejected": -1.0564970970153809, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.2181601261238229, |
|
"grad_norm": 19.161894841909444, |
|
"learning_rate": 8.696969696969697e-07, |
|
"logits/chosen": 0.11280106008052826, |
|
"logits/rejected": 0.18791824579238892, |
|
"logps/chosen": -209.43258666992188, |
|
"logps/rejected": -236.70346069335938, |
|
"loss": 0.5635, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -0.74039626121521, |
|
"rewards/margins": 0.5291692018508911, |
|
"rewards/rejected": -1.269565463066101, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.2249776300651924, |
|
"grad_norm": 22.3343774818719, |
|
"learning_rate": 8.62121212121212e-07, |
|
"logits/chosen": 0.09679359942674637, |
|
"logits/rejected": 0.19822388887405396, |
|
"logps/chosen": -208.64476013183594, |
|
"logps/rejected": -229.81011962890625, |
|
"loss": 0.5596, |
|
"rewards/accuracies": 0.7343750596046448, |
|
"rewards/chosen": -0.7948130965232849, |
|
"rewards/margins": 0.5681655406951904, |
|
"rewards/rejected": -1.3629785776138306, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.23179513400656185, |
|
"grad_norm": 20.045843226629753, |
|
"learning_rate": 8.545454545454544e-07, |
|
"logits/chosen": 0.03968825936317444, |
|
"logits/rejected": 0.12135367095470428, |
|
"logps/chosen": -207.46609497070312, |
|
"logps/rejected": -224.89439392089844, |
|
"loss": 0.5679, |
|
"rewards/accuracies": 0.7046875357627869, |
|
"rewards/chosen": -0.7988042831420898, |
|
"rewards/margins": 0.4717750549316406, |
|
"rewards/rejected": -1.2705793380737305, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.2386126379479313, |
|
"grad_norm": 30.125557475346305, |
|
"learning_rate": 8.469696969696968e-07, |
|
"logits/chosen": 0.10933436453342438, |
|
"logits/rejected": 0.1516590416431427, |
|
"logps/chosen": -201.27694702148438, |
|
"logps/rejected": -218.70889282226562, |
|
"loss": 0.5859, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -0.8309043645858765, |
|
"rewards/margins": 0.49856728315353394, |
|
"rewards/rejected": -1.3294715881347656, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.2454301418893008, |
|
"grad_norm": 22.893234913534002, |
|
"learning_rate": 8.393939393939393e-07, |
|
"logits/chosen": 0.062410831451416016, |
|
"logits/rejected": 0.13367854058742523, |
|
"logps/chosen": -194.7364959716797, |
|
"logps/rejected": -215.7635040283203, |
|
"loss": 0.5739, |
|
"rewards/accuracies": 0.698437511920929, |
|
"rewards/chosen": -0.7721937894821167, |
|
"rewards/margins": 0.49258309602737427, |
|
"rewards/rejected": -1.2647769451141357, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.2522476458306702, |
|
"grad_norm": 22.23618919017924, |
|
"learning_rate": 8.318181818181817e-07, |
|
"logits/chosen": 0.04097752273082733, |
|
"logits/rejected": 0.11259806156158447, |
|
"logps/chosen": -193.11825561523438, |
|
"logps/rejected": -220.61949157714844, |
|
"loss": 0.5378, |
|
"rewards/accuracies": 0.7234375476837158, |
|
"rewards/chosen": -0.7389846444129944, |
|
"rewards/margins": 0.5330405831336975, |
|
"rewards/rejected": -1.272025227546692, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2590651497720397, |
|
"grad_norm": 21.834653461400006, |
|
"learning_rate": 8.242424242424241e-07, |
|
"logits/chosen": 0.05887192115187645, |
|
"logits/rejected": 0.1328059434890747, |
|
"logps/chosen": -202.1223602294922, |
|
"logps/rejected": -221.07273864746094, |
|
"loss": 0.5508, |
|
"rewards/accuracies": 0.7296874523162842, |
|
"rewards/chosen": -0.900775671005249, |
|
"rewards/margins": 0.587921142578125, |
|
"rewards/rejected": -1.488696813583374, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.2658826537134092, |
|
"grad_norm": 23.992274309591316, |
|
"learning_rate": 8.166666666666666e-07, |
|
"logits/chosen": 0.03438958153128624, |
|
"logits/rejected": 0.12133367359638214, |
|
"logps/chosen": -202.35487365722656, |
|
"logps/rejected": -227.4732208251953, |
|
"loss": 0.5618, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -0.9070041179656982, |
|
"rewards/margins": 0.6916414499282837, |
|
"rewards/rejected": -1.598645567893982, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.2727001576547786, |
|
"grad_norm": 22.0860813112572, |
|
"learning_rate": 8.09090909090909e-07, |
|
"logits/chosen": 0.08829227089881897, |
|
"logits/rejected": 0.1640704870223999, |
|
"logps/chosen": -205.60989379882812, |
|
"logps/rejected": -224.852294921875, |
|
"loss": 0.5494, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.9454193115234375, |
|
"rewards/margins": 0.6362107396125793, |
|
"rewards/rejected": -1.5816301107406616, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.2795176615961481, |
|
"grad_norm": 23.160379012581956, |
|
"learning_rate": 8.015151515151514e-07, |
|
"logits/chosen": 0.06336803734302521, |
|
"logits/rejected": 0.13648778200149536, |
|
"logps/chosen": -205.41561889648438, |
|
"logps/rejected": -228.48057556152344, |
|
"loss": 0.5462, |
|
"rewards/accuracies": 0.7343750596046448, |
|
"rewards/chosen": -0.9269916415214539, |
|
"rewards/margins": 0.6442463994026184, |
|
"rewards/rejected": -1.5712381601333618, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.2863351655375176, |
|
"grad_norm": 22.408557079555997, |
|
"learning_rate": 7.939393939393939e-07, |
|
"logits/chosen": 0.08505380898714066, |
|
"logits/rejected": 0.18310996890068054, |
|
"logps/chosen": -213.0338897705078, |
|
"logps/rejected": -235.76596069335938, |
|
"loss": 0.5532, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -1.0196318626403809, |
|
"rewards/margins": 0.5743885040283203, |
|
"rewards/rejected": -1.5940203666687012, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.293152669478887, |
|
"grad_norm": 25.16391126214167, |
|
"learning_rate": 7.863636363636363e-07, |
|
"logits/chosen": 0.10937841981649399, |
|
"logits/rejected": 0.16763341426849365, |
|
"logps/chosen": -207.480224609375, |
|
"logps/rejected": -234.79544067382812, |
|
"loss": 0.5562, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.9945791363716125, |
|
"rewards/margins": 0.7221311330795288, |
|
"rewards/rejected": -1.7167102098464966, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.2999701734202565, |
|
"grad_norm": 29.733679778009286, |
|
"learning_rate": 7.787878787878787e-07, |
|
"logits/chosen": 0.09990985691547394, |
|
"logits/rejected": 0.19938966631889343, |
|
"logps/chosen": -207.3507843017578, |
|
"logps/rejected": -230.52630615234375, |
|
"loss": 0.5384, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9231570959091187, |
|
"rewards/margins": 0.6046810746192932, |
|
"rewards/rejected": -1.527838110923767, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.306787677361626, |
|
"grad_norm": 19.72755564195352, |
|
"learning_rate": 7.712121212121212e-07, |
|
"logits/chosen": 0.14025147259235382, |
|
"logits/rejected": 0.1931421309709549, |
|
"logps/chosen": -211.64739990234375, |
|
"logps/rejected": -234.59742736816406, |
|
"loss": 0.539, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.9828760623931885, |
|
"rewards/margins": 0.6804162263870239, |
|
"rewards/rejected": -1.6632922887802124, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.3136051813029954, |
|
"grad_norm": 23.978166468246467, |
|
"learning_rate": 7.636363636363636e-07, |
|
"logits/chosen": 0.0902441218495369, |
|
"logits/rejected": 0.18330176174640656, |
|
"logps/chosen": -212.70648193359375, |
|
"logps/rejected": -237.41574096679688, |
|
"loss": 0.5309, |
|
"rewards/accuracies": 0.7468750476837158, |
|
"rewards/chosen": -1.0690429210662842, |
|
"rewards/margins": 0.6713231801986694, |
|
"rewards/rejected": -1.740365982055664, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.3204226852443649, |
|
"grad_norm": 26.207432691612087, |
|
"learning_rate": 7.56060606060606e-07, |
|
"logits/chosen": 0.11341211199760437, |
|
"logits/rejected": 0.1867765188217163, |
|
"logps/chosen": -197.91021728515625, |
|
"logps/rejected": -221.50146484375, |
|
"loss": 0.5343, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.9720097780227661, |
|
"rewards/margins": 0.6199135780334473, |
|
"rewards/rejected": -1.5919233560562134, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.3272401891857344, |
|
"grad_norm": 21.846418443949897, |
|
"learning_rate": 7.484848484848485e-07, |
|
"logits/chosen": 0.11702318489551544, |
|
"logits/rejected": 0.203329399228096, |
|
"logps/chosen": -201.898193359375, |
|
"logps/rejected": -221.0704803466797, |
|
"loss": 0.5479, |
|
"rewards/accuracies": 0.7437500357627869, |
|
"rewards/chosen": -0.9688056111335754, |
|
"rewards/margins": 0.7794600129127502, |
|
"rewards/rejected": -1.7482655048370361, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.3340576931271038, |
|
"grad_norm": 23.690713241273283, |
|
"learning_rate": 7.409090909090909e-07, |
|
"logits/chosen": 0.10431469976902008, |
|
"logits/rejected": 0.20410987734794617, |
|
"logps/chosen": -212.93824768066406, |
|
"logps/rejected": -240.54246520996094, |
|
"loss": 0.5022, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.9758983850479126, |
|
"rewards/margins": 0.8021982312202454, |
|
"rewards/rejected": -1.7780965566635132, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.3408751970684733, |
|
"grad_norm": 23.75624324955974, |
|
"learning_rate": 7.333333333333332e-07, |
|
"logits/chosen": 0.0590752549469471, |
|
"logits/rejected": 0.15686756372451782, |
|
"logps/chosen": -204.53347778320312, |
|
"logps/rejected": -232.50863647460938, |
|
"loss": 0.4879, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.9903222322463989, |
|
"rewards/margins": 0.8048741817474365, |
|
"rewards/rejected": -1.7951964139938354, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3476927010098428, |
|
"grad_norm": 24.670166077373448, |
|
"learning_rate": 7.257575757575756e-07, |
|
"logits/chosen": 0.04646120220422745, |
|
"logits/rejected": 0.1389884203672409, |
|
"logps/chosen": -216.92431640625, |
|
"logps/rejected": -242.18240356445312, |
|
"loss": 0.5303, |
|
"rewards/accuracies": 0.7312500476837158, |
|
"rewards/chosen": -1.1649525165557861, |
|
"rewards/margins": 0.7863146066665649, |
|
"rewards/rejected": -1.9512672424316406, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.3545102049512122, |
|
"grad_norm": 23.363016624074675, |
|
"learning_rate": 7.181818181818181e-07, |
|
"logits/chosen": -0.012276587076485157, |
|
"logits/rejected": 0.07009466737508774, |
|
"logps/chosen": -205.48934936523438, |
|
"logps/rejected": -232.42538452148438, |
|
"loss": 0.5015, |
|
"rewards/accuracies": 0.7671874761581421, |
|
"rewards/chosen": -1.0472114086151123, |
|
"rewards/margins": 0.788646399974823, |
|
"rewards/rejected": -1.83585786819458, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.3613277088925817, |
|
"grad_norm": 25.288611630037565, |
|
"learning_rate": 7.106060606060605e-07, |
|
"logits/chosen": -0.012474373914301395, |
|
"logits/rejected": 0.05413222685456276, |
|
"logps/chosen": -208.1520538330078, |
|
"logps/rejected": -241.48385620117188, |
|
"loss": 0.5252, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -1.1651169061660767, |
|
"rewards/margins": 1.0477391481399536, |
|
"rewards/rejected": -2.2128560543060303, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.3681452128339512, |
|
"grad_norm": 20.178014539662176, |
|
"learning_rate": 7.030303030303029e-07, |
|
"logits/chosen": -0.04357679560780525, |
|
"logits/rejected": 0.0625062957406044, |
|
"logps/chosen": -210.49520874023438, |
|
"logps/rejected": -241.66493225097656, |
|
"loss": 0.4807, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.1151983737945557, |
|
"rewards/margins": 0.9497561454772949, |
|
"rewards/rejected": -2.0649547576904297, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.3749627167753206, |
|
"grad_norm": 21.944400967201336, |
|
"learning_rate": 6.954545454545454e-07, |
|
"logits/chosen": 0.004060904495418072, |
|
"logits/rejected": 0.08822981268167496, |
|
"logps/chosen": -207.4363250732422, |
|
"logps/rejected": -234.25550842285156, |
|
"loss": 0.501, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -1.2111350297927856, |
|
"rewards/margins": 0.8183422684669495, |
|
"rewards/rejected": -2.02947735786438, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.3817802207166901, |
|
"grad_norm": 26.850310787153724, |
|
"learning_rate": 6.878787878787878e-07, |
|
"logits/chosen": -0.017100585624575615, |
|
"logits/rejected": 0.07575605064630508, |
|
"logps/chosen": -213.30137634277344, |
|
"logps/rejected": -243.35186767578125, |
|
"loss": 0.4941, |
|
"rewards/accuracies": 0.7593750357627869, |
|
"rewards/chosen": -1.3406263589859009, |
|
"rewards/margins": 0.9297415018081665, |
|
"rewards/rejected": -2.2703678607940674, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.3885977246580596, |
|
"grad_norm": 25.982278946370197, |
|
"learning_rate": 6.803030303030302e-07, |
|
"logits/chosen": 0.019733965396881104, |
|
"logits/rejected": 0.07434576749801636, |
|
"logps/chosen": -220.54368591308594, |
|
"logps/rejected": -245.9012451171875, |
|
"loss": 0.5234, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": -1.3345239162445068, |
|
"rewards/margins": 0.882716953754425, |
|
"rewards/rejected": -2.217240810394287, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.395415228599429, |
|
"grad_norm": 30.324292712841515, |
|
"learning_rate": 6.727272727272727e-07, |
|
"logits/chosen": -0.07648750394582748, |
|
"logits/rejected": 0.013701358810067177, |
|
"logps/chosen": -215.10986328125, |
|
"logps/rejected": -241.52174377441406, |
|
"loss": 0.4888, |
|
"rewards/accuracies": 0.785937488079071, |
|
"rewards/chosen": -1.2312657833099365, |
|
"rewards/margins": 0.8834339380264282, |
|
"rewards/rejected": -2.1146998405456543, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.4022327325407985, |
|
"grad_norm": 30.267248385209463, |
|
"learning_rate": 6.651515151515151e-07, |
|
"logits/chosen": -0.08248546719551086, |
|
"logits/rejected": 0.012668056413531303, |
|
"logps/chosen": -217.48597717285156, |
|
"logps/rejected": -248.1539306640625, |
|
"loss": 0.4718, |
|
"rewards/accuracies": 0.770312488079071, |
|
"rewards/chosen": -1.3211320638656616, |
|
"rewards/margins": 1.034571886062622, |
|
"rewards/rejected": -2.3557040691375732, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.409050236482168, |
|
"grad_norm": 33.7927835163272, |
|
"learning_rate": 6.575757575757575e-07, |
|
"logits/chosen": -0.10192164778709412, |
|
"logits/rejected": -0.015221836045384407, |
|
"logps/chosen": -217.5685577392578, |
|
"logps/rejected": -255.0595703125, |
|
"loss": 0.503, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4013419151306152, |
|
"rewards/margins": 1.1709883213043213, |
|
"rewards/rejected": -2.5723299980163574, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.4158677404235374, |
|
"grad_norm": 26.97423161614756, |
|
"learning_rate": 6.5e-07, |
|
"logits/chosen": -0.09173352271318436, |
|
"logits/rejected": -0.004754798021167517, |
|
"logps/chosen": -214.8068084716797, |
|
"logps/rejected": -247.6568145751953, |
|
"loss": 0.4939, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -1.280412197113037, |
|
"rewards/margins": 1.0068598985671997, |
|
"rewards/rejected": -2.2872722148895264, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.4226852443649069, |
|
"grad_norm": 23.353042033965973, |
|
"learning_rate": 6.424242424242424e-07, |
|
"logits/chosen": -0.08759984374046326, |
|
"logits/rejected": -0.006561200134456158, |
|
"logps/chosen": -209.99142456054688, |
|
"logps/rejected": -240.59873962402344, |
|
"loss": 0.5032, |
|
"rewards/accuracies": 0.7640625238418579, |
|
"rewards/chosen": -1.327502965927124, |
|
"rewards/margins": 0.8928775191307068, |
|
"rewards/rejected": -2.2203807830810547, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.4295027483062764, |
|
"grad_norm": 19.622319808534836, |
|
"learning_rate": 6.348484848484848e-07, |
|
"logits/chosen": -0.072292260825634, |
|
"logits/rejected": 0.022778620943427086, |
|
"logps/chosen": -208.22052001953125, |
|
"logps/rejected": -243.78250122070312, |
|
"loss": 0.4516, |
|
"rewards/accuracies": 0.7953125238418579, |
|
"rewards/chosen": -1.2082226276397705, |
|
"rewards/margins": 1.1600842475891113, |
|
"rewards/rejected": -2.368306875228882, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.4363202522476458, |
|
"grad_norm": 27.54001459206905, |
|
"learning_rate": 6.272727272727273e-07, |
|
"logits/chosen": -0.07350125908851624, |
|
"logits/rejected": 0.0216854028403759, |
|
"logps/chosen": -214.49398803710938, |
|
"logps/rejected": -245.44357299804688, |
|
"loss": 0.481, |
|
"rewards/accuracies": 0.7671875357627869, |
|
"rewards/chosen": -1.3779951333999634, |
|
"rewards/margins": 1.1042989492416382, |
|
"rewards/rejected": -2.4822940826416016, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.4431377561890153, |
|
"grad_norm": 22.005523605032646, |
|
"learning_rate": 6.196969696969697e-07, |
|
"logits/chosen": -0.08112622797489166, |
|
"logits/rejected": 0.016506649553775787, |
|
"logps/chosen": -207.86119079589844, |
|
"logps/rejected": -243.9912109375, |
|
"loss": 0.4669, |
|
"rewards/accuracies": 0.776562511920929, |
|
"rewards/chosen": -1.312855839729309, |
|
"rewards/margins": 1.1270496845245361, |
|
"rewards/rejected": -2.4399054050445557, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.4499552601303848, |
|
"grad_norm": 22.786232365472497, |
|
"learning_rate": 6.12121212121212e-07, |
|
"logits/chosen": -0.014887440949678421, |
|
"logits/rejected": 0.07256890088319778, |
|
"logps/chosen": -214.85989379882812, |
|
"logps/rejected": -242.47958374023438, |
|
"loss": 0.5139, |
|
"rewards/accuracies": 0.7328125238418579, |
|
"rewards/chosen": -1.3719854354858398, |
|
"rewards/margins": 0.9033377766609192, |
|
"rewards/rejected": -2.2753231525421143, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.4567727640717542, |
|
"grad_norm": 22.124194779218676, |
|
"learning_rate": 6.045454545454545e-07, |
|
"logits/chosen": -0.06873725354671478, |
|
"logits/rejected": 0.01612996682524681, |
|
"logps/chosen": -214.2239990234375, |
|
"logps/rejected": -245.57972717285156, |
|
"loss": 0.4842, |
|
"rewards/accuracies": 0.7515625357627869, |
|
"rewards/chosen": -1.374406099319458, |
|
"rewards/margins": 1.0592212677001953, |
|
"rewards/rejected": -2.4336276054382324, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.4635902680131237, |
|
"grad_norm": 25.208264673147376, |
|
"learning_rate": 5.969696969696969e-07, |
|
"logits/chosen": -0.08258620649576187, |
|
"logits/rejected": 0.019038595259189606, |
|
"logps/chosen": -212.36058044433594, |
|
"logps/rejected": -247.52285766601562, |
|
"loss": 0.4707, |
|
"rewards/accuracies": 0.7796874642372131, |
|
"rewards/chosen": -1.355445384979248, |
|
"rewards/margins": 1.0607233047485352, |
|
"rewards/rejected": -2.416168689727783, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.4704077719544932, |
|
"grad_norm": 23.492565569306137, |
|
"learning_rate": 5.893939393939393e-07, |
|
"logits/chosen": -0.048173777759075165, |
|
"logits/rejected": 0.05654379725456238, |
|
"logps/chosen": -203.7908935546875, |
|
"logps/rejected": -235.70806884765625, |
|
"loss": 0.463, |
|
"rewards/accuracies": 0.7640625238418579, |
|
"rewards/chosen": -1.3086433410644531, |
|
"rewards/margins": 1.073492407798767, |
|
"rewards/rejected": -2.3821358680725098, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.4772252758958626, |
|
"grad_norm": 23.525844296596986, |
|
"learning_rate": 5.818181818181818e-07, |
|
"logits/chosen": -0.06435231864452362, |
|
"logits/rejected": 0.036002036184072495, |
|
"logps/chosen": -215.8461456298828, |
|
"logps/rejected": -253.51507568359375, |
|
"loss": 0.4424, |
|
"rewards/accuracies": 0.7937500476837158, |
|
"rewards/chosen": -1.304274320602417, |
|
"rewards/margins": 1.1241943836212158, |
|
"rewards/rejected": -2.428468704223633, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.4840427798372321, |
|
"grad_norm": 24.839200055423007, |
|
"learning_rate": 5.742424242424242e-07, |
|
"logits/chosen": -0.1140328049659729, |
|
"logits/rejected": -0.0182164516299963, |
|
"logps/chosen": -219.0977020263672, |
|
"logps/rejected": -245.15292358398438, |
|
"loss": 0.4667, |
|
"rewards/accuracies": 0.7796875238418579, |
|
"rewards/chosen": -1.4004441499710083, |
|
"rewards/margins": 1.1077167987823486, |
|
"rewards/rejected": -2.5081608295440674, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.4908602837786016, |
|
"grad_norm": 21.081539741219093, |
|
"learning_rate": 5.666666666666666e-07, |
|
"logits/chosen": -0.12187488377094269, |
|
"logits/rejected": -0.013158449903130531, |
|
"logps/chosen": -209.8751220703125, |
|
"logps/rejected": -246.4436492919922, |
|
"loss": 0.4639, |
|
"rewards/accuracies": 0.784375011920929, |
|
"rewards/chosen": -1.422644853591919, |
|
"rewards/margins": 1.1023151874542236, |
|
"rewards/rejected": -2.5249602794647217, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.497677787719971, |
|
"grad_norm": 24.782821613085822, |
|
"learning_rate": 5.590909090909091e-07, |
|
"logits/chosen": -0.06877341866493225, |
|
"logits/rejected": 0.035405777394771576, |
|
"logps/chosen": -216.97332763671875, |
|
"logps/rejected": -256.8809814453125, |
|
"loss": 0.4337, |
|
"rewards/accuracies": 0.8078125715255737, |
|
"rewards/chosen": -1.4306436777114868, |
|
"rewards/margins": 1.1720441579818726, |
|
"rewards/rejected": -2.6026878356933594, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.5044952916613404, |
|
"grad_norm": 24.588552599891635, |
|
"learning_rate": 5.515151515151515e-07, |
|
"logits/chosen": -0.07467488199472427, |
|
"logits/rejected": 0.017803018912672997, |
|
"logps/chosen": -230.01266479492188, |
|
"logps/rejected": -262.2432861328125, |
|
"loss": 0.4669, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.632286548614502, |
|
"rewards/margins": 1.2193667888641357, |
|
"rewards/rejected": -2.8516533374786377, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.5113127956027099, |
|
"grad_norm": 20.475854944533108, |
|
"learning_rate": 5.439393939393939e-07, |
|
"logits/chosen": -0.07244399189949036, |
|
"logits/rejected": 0.0048616742715239525, |
|
"logps/chosen": -218.39337158203125, |
|
"logps/rejected": -250.11416625976562, |
|
"loss": 0.4467, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.5096694231033325, |
|
"rewards/margins": 1.196234107017517, |
|
"rewards/rejected": -2.7059032917022705, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.5181302995440794, |
|
"grad_norm": 24.853405341139933, |
|
"learning_rate": 5.363636363636363e-07, |
|
"logits/chosen": -0.0735924020409584, |
|
"logits/rejected": 0.022622695192694664, |
|
"logps/chosen": -216.3765411376953, |
|
"logps/rejected": -250.67161560058594, |
|
"loss": 0.4594, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -1.5826576948165894, |
|
"rewards/margins": 1.228342890739441, |
|
"rewards/rejected": -2.8110008239746094, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.5249478034854489, |
|
"grad_norm": 20.713474784942502, |
|
"learning_rate": 5.287878787878788e-07, |
|
"logits/chosen": -0.07298550754785538, |
|
"logits/rejected": 0.015953145921230316, |
|
"logps/chosen": -213.88311767578125, |
|
"logps/rejected": -254.41146850585938, |
|
"loss": 0.4376, |
|
"rewards/accuracies": 0.8312500715255737, |
|
"rewards/chosen": -1.4484457969665527, |
|
"rewards/margins": 1.3493634462356567, |
|
"rewards/rejected": -2.797809362411499, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.5317653074268184, |
|
"grad_norm": 25.104952077400377, |
|
"learning_rate": 5.212121212121212e-07, |
|
"logits/chosen": -0.040183987468481064, |
|
"logits/rejected": 0.03515133634209633, |
|
"logps/chosen": -217.26239013671875, |
|
"logps/rejected": -248.25259399414062, |
|
"loss": 0.4695, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -1.5951099395751953, |
|
"rewards/margins": 1.2722889184951782, |
|
"rewards/rejected": -2.867398738861084, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.5385828113681879, |
|
"grad_norm": 28.29480749737075, |
|
"learning_rate": 5.136363636363636e-07, |
|
"logits/chosen": -0.04418431594967842, |
|
"logits/rejected": 0.05417613312602043, |
|
"logps/chosen": -222.26820373535156, |
|
"logps/rejected": -257.3962707519531, |
|
"loss": 0.4579, |
|
"rewards/accuracies": 0.7906250357627869, |
|
"rewards/chosen": -1.570731520652771, |
|
"rewards/margins": 1.1469626426696777, |
|
"rewards/rejected": -2.717694044113159, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.5454003153095572, |
|
"grad_norm": 23.21073310542596, |
|
"learning_rate": 5.060606060606061e-07, |
|
"logits/chosen": -0.07153814285993576, |
|
"logits/rejected": 0.021596048027276993, |
|
"logps/chosen": -217.89630126953125, |
|
"logps/rejected": -254.9208221435547, |
|
"loss": 0.4123, |
|
"rewards/accuracies": 0.8250000476837158, |
|
"rewards/chosen": -1.5101759433746338, |
|
"rewards/margins": 1.3654392957687378, |
|
"rewards/rejected": -2.8756155967712402, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5522178192509267, |
|
"grad_norm": 21.00129246925032, |
|
"learning_rate": 4.984848484848485e-07, |
|
"logits/chosen": -0.06583255529403687, |
|
"logits/rejected": 0.04746149852871895, |
|
"logps/chosen": -214.39453125, |
|
"logps/rejected": -253.67242431640625, |
|
"loss": 0.4335, |
|
"rewards/accuracies": 0.785937488079071, |
|
"rewards/chosen": -1.6045914888381958, |
|
"rewards/margins": 1.34579598903656, |
|
"rewards/rejected": -2.950387477874756, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.5590353231922962, |
|
"grad_norm": 25.238378218924026, |
|
"learning_rate": 4.909090909090909e-07, |
|
"logits/chosen": -0.06921117007732391, |
|
"logits/rejected": 0.03174077346920967, |
|
"logps/chosen": -213.57254028320312, |
|
"logps/rejected": -248.80799865722656, |
|
"loss": 0.4561, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.6258023977279663, |
|
"rewards/margins": 1.308131217956543, |
|
"rewards/rejected": -2.933933734893799, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.5658528271336657, |
|
"grad_norm": 22.691646670626245, |
|
"learning_rate": 4.833333333333333e-07, |
|
"logits/chosen": -0.08812057971954346, |
|
"logits/rejected": 0.028292154893279076, |
|
"logps/chosen": -217.73495483398438, |
|
"logps/rejected": -258.3476867675781, |
|
"loss": 0.4395, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -1.6993637084960938, |
|
"rewards/margins": 1.3441307544708252, |
|
"rewards/rejected": -3.043494462966919, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.5726703310750352, |
|
"grad_norm": 26.78354678353827, |
|
"learning_rate": 4.7575757575757574e-07, |
|
"logits/chosen": -0.08428293466567993, |
|
"logits/rejected": 0.0038617942482233047, |
|
"logps/chosen": -228.9470977783203, |
|
"logps/rejected": -262.1487121582031, |
|
"loss": 0.4585, |
|
"rewards/accuracies": 0.7874999642372131, |
|
"rewards/chosen": -1.7086464166641235, |
|
"rewards/margins": 1.2672871351242065, |
|
"rewards/rejected": -2.97593355178833, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.5794878350164047, |
|
"grad_norm": 21.82947420474725, |
|
"learning_rate": 4.681818181818182e-07, |
|
"logits/chosen": -0.07127973437309265, |
|
"logits/rejected": 0.03848648816347122, |
|
"logps/chosen": -214.94436645507812, |
|
"logps/rejected": -252.94378662109375, |
|
"loss": 0.4487, |
|
"rewards/accuracies": 0.785937488079071, |
|
"rewards/chosen": -1.5856503248214722, |
|
"rewards/margins": 1.2746567726135254, |
|
"rewards/rejected": -2.860306978225708, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.586305338957774, |
|
"grad_norm": 25.33636086854273, |
|
"learning_rate": 4.606060606060606e-07, |
|
"logits/chosen": -0.0755903422832489, |
|
"logits/rejected": 0.007846422493457794, |
|
"logps/chosen": -229.74365234375, |
|
"logps/rejected": -266.0998229980469, |
|
"loss": 0.4466, |
|
"rewards/accuracies": 0.8109375238418579, |
|
"rewards/chosen": -1.6116007566452026, |
|
"rewards/margins": 1.3451875448226929, |
|
"rewards/rejected": -2.9567883014678955, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.5931228428991435, |
|
"grad_norm": 23.430791371783034, |
|
"learning_rate": 4.53030303030303e-07, |
|
"logits/chosen": -0.08032269030809402, |
|
"logits/rejected": 0.013129429891705513, |
|
"logps/chosen": -211.66123962402344, |
|
"logps/rejected": -249.7532501220703, |
|
"loss": 0.4168, |
|
"rewards/accuracies": 0.8296875357627869, |
|
"rewards/chosen": -1.4253365993499756, |
|
"rewards/margins": 1.4885873794555664, |
|
"rewards/rejected": -2.913924217224121, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.599940346840513, |
|
"grad_norm": 25.6792868240925, |
|
"learning_rate": 4.4545454545454544e-07, |
|
"logits/chosen": -0.10410317778587341, |
|
"logits/rejected": -0.02125217206776142, |
|
"logps/chosen": -211.6175994873047, |
|
"logps/rejected": -250.02481079101562, |
|
"loss": 0.4419, |
|
"rewards/accuracies": 0.776562511920929, |
|
"rewards/chosen": -1.4521470069885254, |
|
"rewards/margins": 1.4656074047088623, |
|
"rewards/rejected": -2.9177544116973877, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.6067578507818825, |
|
"grad_norm": 24.336985726252383, |
|
"learning_rate": 4.3787878787878784e-07, |
|
"logits/chosen": -0.1017291247844696, |
|
"logits/rejected": -0.006951052229851484, |
|
"logps/chosen": -207.91339111328125, |
|
"logps/rejected": -244.61329650878906, |
|
"loss": 0.4338, |
|
"rewards/accuracies": 0.7953125238418579, |
|
"rewards/chosen": -1.3715894222259521, |
|
"rewards/margins": 1.3720262050628662, |
|
"rewards/rejected": -2.7436156272888184, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.613575354723252, |
|
"grad_norm": 28.717619953557637, |
|
"learning_rate": 4.303030303030303e-07, |
|
"logits/chosen": -0.12957513332366943, |
|
"logits/rejected": -0.04137944057583809, |
|
"logps/chosen": -212.1148223876953, |
|
"logps/rejected": -250.58657836914062, |
|
"loss": 0.4428, |
|
"rewards/accuracies": 0.8062500357627869, |
|
"rewards/chosen": -1.5294814109802246, |
|
"rewards/margins": 1.3217490911483765, |
|
"rewards/rejected": -2.8512306213378906, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.6203928586646215, |
|
"grad_norm": 24.64484800585095, |
|
"learning_rate": 4.227272727272727e-07, |
|
"logits/chosen": -0.1173202320933342, |
|
"logits/rejected": -0.026215719059109688, |
|
"logps/chosen": -219.25180053710938, |
|
"logps/rejected": -253.3756561279297, |
|
"loss": 0.4444, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -1.5156140327453613, |
|
"rewards/margins": 1.2724617719650269, |
|
"rewards/rejected": -2.7880756855010986, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.6272103626059908, |
|
"grad_norm": 21.577769353125333, |
|
"learning_rate": 4.1515151515151513e-07, |
|
"logits/chosen": -0.09302366524934769, |
|
"logits/rejected": -0.017984673380851746, |
|
"logps/chosen": -225.01498413085938, |
|
"logps/rejected": -260.4620666503906, |
|
"loss": 0.4204, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.5388312339782715, |
|
"rewards/margins": 1.3805811405181885, |
|
"rewards/rejected": -2.91941237449646, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.6340278665473603, |
|
"grad_norm": 21.851016424850197, |
|
"learning_rate": 4.075757575757576e-07, |
|
"logits/chosen": -0.12906233966350555, |
|
"logits/rejected": -0.042714815586805344, |
|
"logps/chosen": -224.21875, |
|
"logps/rejected": -261.9176940917969, |
|
"loss": 0.4137, |
|
"rewards/accuracies": 0.8343750238418579, |
|
"rewards/chosen": -1.6001354455947876, |
|
"rewards/margins": 1.3610618114471436, |
|
"rewards/rejected": -2.9611973762512207, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.6408453704887298, |
|
"grad_norm": 28.106139021916903, |
|
"learning_rate": 4e-07, |
|
"logits/chosen": -0.17769670486450195, |
|
"logits/rejected": -0.07489217072725296, |
|
"logps/chosen": -215.14065551757812, |
|
"logps/rejected": -260.8359680175781, |
|
"loss": 0.4011, |
|
"rewards/accuracies": 0.8265625238418579, |
|
"rewards/chosen": -1.5202898979187012, |
|
"rewards/margins": 1.60263991355896, |
|
"rewards/rejected": -3.122929811477661, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.6476628744300993, |
|
"grad_norm": 26.85542161068887, |
|
"learning_rate": 3.924242424242424e-07, |
|
"logits/chosen": -0.14841988682746887, |
|
"logits/rejected": -0.06951985508203506, |
|
"logps/chosen": -207.8277130126953, |
|
"logps/rejected": -250.3575439453125, |
|
"loss": 0.4295, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -1.5621274709701538, |
|
"rewards/margins": 1.4103821516036987, |
|
"rewards/rejected": -2.9725096225738525, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.6544803783714688, |
|
"grad_norm": 30.007894138540276, |
|
"learning_rate": 3.8484848484848483e-07, |
|
"logits/chosen": -0.17307066917419434, |
|
"logits/rejected": -0.07763750106096268, |
|
"logps/chosen": -220.78807067871094, |
|
"logps/rejected": -258.3966369628906, |
|
"loss": 0.4118, |
|
"rewards/accuracies": 0.7984375357627869, |
|
"rewards/chosen": -1.657617449760437, |
|
"rewards/margins": 1.4771149158477783, |
|
"rewards/rejected": -3.134732723236084, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.6612978823128383, |
|
"grad_norm": 29.07536633448252, |
|
"learning_rate": 3.7727272727272723e-07, |
|
"logits/chosen": -0.15869039297103882, |
|
"logits/rejected": -0.07338032126426697, |
|
"logps/chosen": -211.30084228515625, |
|
"logps/rejected": -255.395751953125, |
|
"loss": 0.4357, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -1.6164686679840088, |
|
"rewards/margins": 1.3737252950668335, |
|
"rewards/rejected": -2.9901938438415527, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.6681153862542076, |
|
"grad_norm": 28.531124567566387, |
|
"learning_rate": 3.696969696969697e-07, |
|
"logits/chosen": -0.13030777871608734, |
|
"logits/rejected": -0.046453818678855896, |
|
"logps/chosen": -216.94631958007812, |
|
"logps/rejected": -267.0611572265625, |
|
"loss": 0.4098, |
|
"rewards/accuracies": 0.8203125596046448, |
|
"rewards/chosen": -1.650040626525879, |
|
"rewards/margins": 1.7019206285476685, |
|
"rewards/rejected": -3.351961135864258, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.6749328901955771, |
|
"grad_norm": 24.268025730195088, |
|
"learning_rate": 3.6212121212121213e-07, |
|
"logits/chosen": -0.15982282161712646, |
|
"logits/rejected": -0.0780140832066536, |
|
"logps/chosen": -215.56704711914062, |
|
"logps/rejected": -260.79736328125, |
|
"loss": 0.397, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -1.6652125120162964, |
|
"rewards/margins": 1.7730036973953247, |
|
"rewards/rejected": -3.438216209411621, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.6817503941369466, |
|
"grad_norm": 24.14951347534636, |
|
"learning_rate": 3.545454545454545e-07, |
|
"logits/chosen": -0.16655105352401733, |
|
"logits/rejected": -0.06034347787499428, |
|
"logps/chosen": -214.05752563476562, |
|
"logps/rejected": -254.98703002929688, |
|
"loss": 0.4264, |
|
"rewards/accuracies": 0.8031250238418579, |
|
"rewards/chosen": -1.8080203533172607, |
|
"rewards/margins": 1.4334427118301392, |
|
"rewards/rejected": -3.2414629459381104, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.6885678980783161, |
|
"grad_norm": 28.566305760445395, |
|
"learning_rate": 3.46969696969697e-07, |
|
"logits/chosen": -0.19522453844547272, |
|
"logits/rejected": -0.08679309487342834, |
|
"logps/chosen": -227.52655029296875, |
|
"logps/rejected": -274.0975646972656, |
|
"loss": 0.3803, |
|
"rewards/accuracies": 0.8328125476837158, |
|
"rewards/chosen": -1.7928123474121094, |
|
"rewards/margins": 1.5590283870697021, |
|
"rewards/rejected": -3.3518409729003906, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.6953854020196856, |
|
"grad_norm": 47.85219425522013, |
|
"learning_rate": 3.393939393939394e-07, |
|
"logits/chosen": -0.1567739099264145, |
|
"logits/rejected": -0.06111231818795204, |
|
"logps/chosen": -228.47779846191406, |
|
"logps/rejected": -274.33428955078125, |
|
"loss": 0.4016, |
|
"rewards/accuracies": 0.8218750357627869, |
|
"rewards/chosen": -1.8795576095581055, |
|
"rewards/margins": 1.6040751934051514, |
|
"rewards/rejected": -3.4836325645446777, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.7022029059610551, |
|
"grad_norm": 27.480180578165882, |
|
"learning_rate": 3.318181818181818e-07, |
|
"logits/chosen": -0.14685329794883728, |
|
"logits/rejected": -0.04708694666624069, |
|
"logps/chosen": -229.77883911132812, |
|
"logps/rejected": -269.2246398925781, |
|
"loss": 0.434, |
|
"rewards/accuracies": 0.8156250715255737, |
|
"rewards/chosen": -1.9993985891342163, |
|
"rewards/margins": 1.541892647743225, |
|
"rewards/rejected": -3.5412912368774414, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.7090204099024244, |
|
"grad_norm": 28.437076758644576, |
|
"learning_rate": 3.242424242424242e-07, |
|
"logits/chosen": -0.11219906061887741, |
|
"logits/rejected": 0.005472442135214806, |
|
"logps/chosen": -223.41433715820312, |
|
"logps/rejected": -263.777099609375, |
|
"loss": 0.4147, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -1.9533647298812866, |
|
"rewards/margins": 1.5760959386825562, |
|
"rewards/rejected": -3.5294606685638428, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.7158379138437939, |
|
"grad_norm": 24.780730794756213, |
|
"learning_rate": 3.166666666666666e-07, |
|
"logits/chosen": -0.10113102942705154, |
|
"logits/rejected": -0.0247341338545084, |
|
"logps/chosen": -224.37001037597656, |
|
"logps/rejected": -268.121337890625, |
|
"loss": 0.4301, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.9565200805664062, |
|
"rewards/margins": 1.5767617225646973, |
|
"rewards/rejected": -3.5332815647125244, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.7226554177851634, |
|
"grad_norm": 24.01977768454246, |
|
"learning_rate": 3.0909090909090907e-07, |
|
"logits/chosen": -0.16017019748687744, |
|
"logits/rejected": -0.038362376391887665, |
|
"logps/chosen": -221.4423828125, |
|
"logps/rejected": -269.96832275390625, |
|
"loss": 0.3642, |
|
"rewards/accuracies": 0.8578125238418579, |
|
"rewards/chosen": -1.7076702117919922, |
|
"rewards/margins": 1.7623119354248047, |
|
"rewards/rejected": -3.469982147216797, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.7294729217265329, |
|
"grad_norm": 23.607998459504426, |
|
"learning_rate": 3.015151515151515e-07, |
|
"logits/chosen": -0.10493813455104828, |
|
"logits/rejected": -0.015611783601343632, |
|
"logps/chosen": -225.48379516601562, |
|
"logps/rejected": -265.3271789550781, |
|
"loss": 0.3929, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -1.8727744817733765, |
|
"rewards/margins": 1.5633747577667236, |
|
"rewards/rejected": -3.4361491203308105, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.7362904256679024, |
|
"grad_norm": 27.095218567106365, |
|
"learning_rate": 2.939393939393939e-07, |
|
"logits/chosen": -0.17973893880844116, |
|
"logits/rejected": -0.0618341825902462, |
|
"logps/chosen": -216.41917419433594, |
|
"logps/rejected": -259.7709655761719, |
|
"loss": 0.4123, |
|
"rewards/accuracies": 0.8218750357627869, |
|
"rewards/chosen": -1.995064377784729, |
|
"rewards/margins": 1.4902169704437256, |
|
"rewards/rejected": -3.485281467437744, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.7431079296092719, |
|
"grad_norm": 25.027731308583423, |
|
"learning_rate": 2.8636363636363637e-07, |
|
"logits/chosen": -0.15540730953216553, |
|
"logits/rejected": -0.03777886554598808, |
|
"logps/chosen": -228.9147186279297, |
|
"logps/rejected": -270.1064758300781, |
|
"loss": 0.3986, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -2.030261993408203, |
|
"rewards/margins": 1.6086986064910889, |
|
"rewards/rejected": -3.638960599899292, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.7499254335506412, |
|
"grad_norm": 32.01602660729836, |
|
"learning_rate": 2.787878787878788e-07, |
|
"logits/chosen": -0.11517558991909027, |
|
"logits/rejected": -0.03462303429841995, |
|
"logps/chosen": -224.9439697265625, |
|
"logps/rejected": -259.7923889160156, |
|
"loss": 0.4219, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9747884273529053, |
|
"rewards/margins": 1.3838858604431152, |
|
"rewards/rejected": -3.3586747646331787, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.7567429374920107, |
|
"grad_norm": 31.517350763988723, |
|
"learning_rate": 2.712121212121212e-07, |
|
"logits/chosen": -0.12141910940408707, |
|
"logits/rejected": -0.01053343154489994, |
|
"logps/chosen": -220.55685424804688, |
|
"logps/rejected": -259.1381530761719, |
|
"loss": 0.4178, |
|
"rewards/accuracies": 0.8109375238418579, |
|
"rewards/chosen": -1.887320637702942, |
|
"rewards/margins": 1.4266173839569092, |
|
"rewards/rejected": -3.3139376640319824, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.7635604414333802, |
|
"grad_norm": 26.127671898389973, |
|
"learning_rate": 2.636363636363636e-07, |
|
"logits/chosen": -0.08841400593519211, |
|
"logits/rejected": -0.0024342993274331093, |
|
"logps/chosen": -226.21588134765625, |
|
"logps/rejected": -265.4579772949219, |
|
"loss": 0.4114, |
|
"rewards/accuracies": 0.8328125476837158, |
|
"rewards/chosen": -1.9491462707519531, |
|
"rewards/margins": 1.4079630374908447, |
|
"rewards/rejected": -3.357109308242798, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.7703779453747497, |
|
"grad_norm": 27.236714171841353, |
|
"learning_rate": 2.56060606060606e-07, |
|
"logits/chosen": -0.09334474056959152, |
|
"logits/rejected": 0.006139551289379597, |
|
"logps/chosen": -221.79632568359375, |
|
"logps/rejected": -259.6671447753906, |
|
"loss": 0.4146, |
|
"rewards/accuracies": 0.8125000596046448, |
|
"rewards/chosen": -1.8307971954345703, |
|
"rewards/margins": 1.6026943922042847, |
|
"rewards/rejected": -3.4334912300109863, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.7771954493161192, |
|
"grad_norm": 23.66366648859467, |
|
"learning_rate": 2.4848484848484846e-07, |
|
"logits/chosen": -0.11060778051614761, |
|
"logits/rejected": -0.020527532324194908, |
|
"logps/chosen": -217.34706115722656, |
|
"logps/rejected": -258.9071350097656, |
|
"loss": 0.4164, |
|
"rewards/accuracies": 0.8093750476837158, |
|
"rewards/chosen": -1.7729765176773071, |
|
"rewards/margins": 1.513333797454834, |
|
"rewards/rejected": -3.2863101959228516, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.7840129532574887, |
|
"grad_norm": 25.2882907135726, |
|
"learning_rate": 2.409090909090909e-07, |
|
"logits/chosen": -0.10374785959720612, |
|
"logits/rejected": -0.018276991322636604, |
|
"logps/chosen": -236.36245727539062, |
|
"logps/rejected": -274.1772766113281, |
|
"loss": 0.4001, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -1.8536320924758911, |
|
"rewards/margins": 1.4811946153640747, |
|
"rewards/rejected": -3.3348264694213867, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.790830457198858, |
|
"grad_norm": 26.790523464833836, |
|
"learning_rate": 2.3333333333333333e-07, |
|
"logits/chosen": -0.13400709629058838, |
|
"logits/rejected": -0.03729373216629028, |
|
"logps/chosen": -220.23544311523438, |
|
"logps/rejected": -264.3974304199219, |
|
"loss": 0.3736, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.7529268264770508, |
|
"rewards/margins": 1.6899100542068481, |
|
"rewards/rejected": -3.4428367614746094, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.7976479611402275, |
|
"grad_norm": 19.702274370294905, |
|
"learning_rate": 2.2575757575757576e-07, |
|
"logits/chosen": -0.17225009202957153, |
|
"logits/rejected": -0.06531926244497299, |
|
"logps/chosen": -217.31951904296875, |
|
"logps/rejected": -262.68475341796875, |
|
"loss": 0.3893, |
|
"rewards/accuracies": 0.8296875357627869, |
|
"rewards/chosen": -1.773924469947815, |
|
"rewards/margins": 1.5378942489624023, |
|
"rewards/rejected": -3.311818838119507, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.804465465081597, |
|
"grad_norm": 23.425791089677848, |
|
"learning_rate": 2.1818181818181815e-07, |
|
"logits/chosen": -0.1890868991613388, |
|
"logits/rejected": -0.09723814576864243, |
|
"logps/chosen": -237.30435180664062, |
|
"logps/rejected": -280.53155517578125, |
|
"loss": 0.4049, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -1.9801266193389893, |
|
"rewards/margins": 1.6612507104873657, |
|
"rewards/rejected": -3.6413774490356445, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.8112829690229665, |
|
"grad_norm": 24.870512384975044, |
|
"learning_rate": 2.106060606060606e-07, |
|
"logits/chosen": -0.15967592597007751, |
|
"logits/rejected": -0.04680642858147621, |
|
"logps/chosen": -226.22872924804688, |
|
"logps/rejected": -259.76031494140625, |
|
"loss": 0.4036, |
|
"rewards/accuracies": 0.8218750357627869, |
|
"rewards/chosen": -1.8575615882873535, |
|
"rewards/margins": 1.4922484159469604, |
|
"rewards/rejected": -3.3498101234436035, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.818100472964336, |
|
"grad_norm": 24.491520644166894, |
|
"learning_rate": 2.0303030303030303e-07, |
|
"logits/chosen": -0.1476404070854187, |
|
"logits/rejected": -0.056609444320201874, |
|
"logps/chosen": -218.4747314453125, |
|
"logps/rejected": -266.4804382324219, |
|
"loss": 0.3761, |
|
"rewards/accuracies": 0.839062511920929, |
|
"rewards/chosen": -1.8029754161834717, |
|
"rewards/margins": 1.7781448364257812, |
|
"rewards/rejected": -3.581120014190674, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.8249179769057055, |
|
"grad_norm": 25.43453775835723, |
|
"learning_rate": 1.9545454545454545e-07, |
|
"logits/chosen": -0.19677500426769257, |
|
"logits/rejected": -0.1171552985906601, |
|
"logps/chosen": -229.79348754882812, |
|
"logps/rejected": -272.9264831542969, |
|
"loss": 0.3763, |
|
"rewards/accuracies": 0.8343750238418579, |
|
"rewards/chosen": -1.9184120893478394, |
|
"rewards/margins": 1.567906379699707, |
|
"rewards/rejected": -3.486318349838257, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.8317354808470748, |
|
"grad_norm": 28.16577093909573, |
|
"learning_rate": 1.8787878787878785e-07, |
|
"logits/chosen": -0.2017778754234314, |
|
"logits/rejected": -0.09277643263339996, |
|
"logps/chosen": -231.24696350097656, |
|
"logps/rejected": -278.23516845703125, |
|
"loss": 0.3691, |
|
"rewards/accuracies": 0.854687511920929, |
|
"rewards/chosen": -1.9549689292907715, |
|
"rewards/margins": 1.7604336738586426, |
|
"rewards/rejected": -3.715402603149414, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.8385529847884443, |
|
"grad_norm": 21.84200034001996, |
|
"learning_rate": 1.803030303030303e-07, |
|
"logits/chosen": -0.21481652557849884, |
|
"logits/rejected": -0.09980207681655884, |
|
"logps/chosen": -233.2796630859375, |
|
"logps/rejected": -282.91644287109375, |
|
"loss": 0.3711, |
|
"rewards/accuracies": 0.8609374761581421, |
|
"rewards/chosen": -2.0337018966674805, |
|
"rewards/margins": 1.896054983139038, |
|
"rewards/rejected": -3.9297573566436768, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.8453704887298138, |
|
"grad_norm": 22.694435946938032, |
|
"learning_rate": 1.7272727272727272e-07, |
|
"logits/chosen": -0.15690943598747253, |
|
"logits/rejected": -0.06275378912687302, |
|
"logps/chosen": -227.27455139160156, |
|
"logps/rejected": -274.53790283203125, |
|
"loss": 0.3913, |
|
"rewards/accuracies": 0.846875011920929, |
|
"rewards/chosen": -1.9616923332214355, |
|
"rewards/margins": 1.6192635297775269, |
|
"rewards/rejected": -3.580955982208252, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.8521879926711833, |
|
"grad_norm": 26.094403500936398, |
|
"learning_rate": 1.6515151515151515e-07, |
|
"logits/chosen": -0.19916404783725739, |
|
"logits/rejected": -0.08219482004642487, |
|
"logps/chosen": -227.81277465820312, |
|
"logps/rejected": -274.002685546875, |
|
"loss": 0.3713, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -2.1085541248321533, |
|
"rewards/margins": 1.6733149290084839, |
|
"rewards/rejected": -3.7818689346313477, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.8590054966125528, |
|
"grad_norm": 28.285289787916426, |
|
"learning_rate": 1.5757575757575757e-07, |
|
"logits/chosen": -0.20249146223068237, |
|
"logits/rejected": -0.11430975049734116, |
|
"logps/chosen": -224.1591796875, |
|
"logps/rejected": -267.00555419921875, |
|
"loss": 0.355, |
|
"rewards/accuracies": 0.8531250357627869, |
|
"rewards/chosen": -2.015260934829712, |
|
"rewards/margins": 1.7478997707366943, |
|
"rewards/rejected": -3.7631607055664062, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.8658230005539223, |
|
"grad_norm": 28.62125408987568, |
|
"learning_rate": 1.5e-07, |
|
"logits/chosen": -0.23050257563591003, |
|
"logits/rejected": -0.13684435188770294, |
|
"logps/chosen": -226.26783752441406, |
|
"logps/rejected": -269.7716979980469, |
|
"loss": 0.3747, |
|
"rewards/accuracies": 0.840624988079071, |
|
"rewards/chosen": -1.9981603622436523, |
|
"rewards/margins": 1.6271231174468994, |
|
"rewards/rejected": -3.6252834796905518, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.8726405044952916, |
|
"grad_norm": 28.239549647986106, |
|
"learning_rate": 1.4242424242424242e-07, |
|
"logits/chosen": -0.21466362476348877, |
|
"logits/rejected": -0.11737212538719177, |
|
"logps/chosen": -225.7066650390625, |
|
"logps/rejected": -269.4527893066406, |
|
"loss": 0.3437, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -2.0757107734680176, |
|
"rewards/margins": 1.816016674041748, |
|
"rewards/rejected": -3.8917269706726074, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.8794580084366611, |
|
"grad_norm": 31.488998628971228, |
|
"learning_rate": 1.3484848484848484e-07, |
|
"logits/chosen": -0.2509796619415283, |
|
"logits/rejected": -0.15716706216335297, |
|
"logps/chosen": -227.94644165039062, |
|
"logps/rejected": -273.2456970214844, |
|
"loss": 0.3713, |
|
"rewards/accuracies": 0.8312500715255737, |
|
"rewards/chosen": -2.063997268676758, |
|
"rewards/margins": 1.7757971286773682, |
|
"rewards/rejected": -3.839794635772705, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.8862755123780306, |
|
"grad_norm": 30.3120062725158, |
|
"learning_rate": 1.2727272727272726e-07, |
|
"logits/chosen": -0.22213181853294373, |
|
"logits/rejected": -0.10703583061695099, |
|
"logps/chosen": -230.4038848876953, |
|
"logps/rejected": -277.89312744140625, |
|
"loss": 0.3841, |
|
"rewards/accuracies": 0.8343750238418579, |
|
"rewards/chosen": -2.1271657943725586, |
|
"rewards/margins": 1.8834253549575806, |
|
"rewards/rejected": -4.01059103012085, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.8930930163194001, |
|
"grad_norm": 20.44931799862506, |
|
"learning_rate": 1.196969696969697e-07, |
|
"logits/chosen": -0.19441619515419006, |
|
"logits/rejected": -0.09727019816637039, |
|
"logps/chosen": -221.7393035888672, |
|
"logps/rejected": -264.74078369140625, |
|
"loss": 0.3754, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -2.1274771690368652, |
|
"rewards/margins": 1.6642124652862549, |
|
"rewards/rejected": -3.791689395904541, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.8999105202607696, |
|
"grad_norm": 29.43599693108452, |
|
"learning_rate": 1.1212121212121211e-07, |
|
"logits/chosen": -0.20746608078479767, |
|
"logits/rejected": -0.10938036441802979, |
|
"logps/chosen": -230.49411010742188, |
|
"logps/rejected": -274.149169921875, |
|
"loss": 0.4089, |
|
"rewards/accuracies": 0.8140624761581421, |
|
"rewards/chosen": -2.111417293548584, |
|
"rewards/margins": 1.6498727798461914, |
|
"rewards/rejected": -3.7612900733947754, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.9067280242021389, |
|
"grad_norm": 32.13529497456355, |
|
"learning_rate": 1.0454545454545454e-07, |
|
"logits/chosen": -0.19128209352493286, |
|
"logits/rejected": -0.09950501471757889, |
|
"logps/chosen": -232.70655822753906, |
|
"logps/rejected": -277.17218017578125, |
|
"loss": 0.4075, |
|
"rewards/accuracies": 0.8171875476837158, |
|
"rewards/chosen": -2.2094011306762695, |
|
"rewards/margins": 1.6824061870574951, |
|
"rewards/rejected": -3.8918075561523438, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.9135455281435084, |
|
"grad_norm": 24.495028721716935, |
|
"learning_rate": 9.696969696969696e-08, |
|
"logits/chosen": -0.2125847339630127, |
|
"logits/rejected": -0.13061244785785675, |
|
"logps/chosen": -238.3480682373047, |
|
"logps/rejected": -278.2753601074219, |
|
"loss": 0.3927, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -1.9954111576080322, |
|
"rewards/margins": 1.6787245273590088, |
|
"rewards/rejected": -3.674135208129883, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.9203630320848779, |
|
"grad_norm": 18.494054905792385, |
|
"learning_rate": 8.93939393939394e-08, |
|
"logits/chosen": -0.22991694509983063, |
|
"logits/rejected": -0.1375647336244583, |
|
"logps/chosen": -232.1606903076172, |
|
"logps/rejected": -277.8919677734375, |
|
"loss": 0.3625, |
|
"rewards/accuracies": 0.846875011920929, |
|
"rewards/chosen": -1.9369205236434937, |
|
"rewards/margins": 1.840739369392395, |
|
"rewards/rejected": -3.7776598930358887, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.9271805360262474, |
|
"grad_norm": 23.0952598889843, |
|
"learning_rate": 8.181818181818182e-08, |
|
"logits/chosen": -0.2471987009048462, |
|
"logits/rejected": -0.15926134586334229, |
|
"logps/chosen": -226.1959991455078, |
|
"logps/rejected": -268.29144287109375, |
|
"loss": 0.3859, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -1.9533106088638306, |
|
"rewards/margins": 1.6697773933410645, |
|
"rewards/rejected": -3.6230881214141846, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.9339980399676169, |
|
"grad_norm": 31.57121472361453, |
|
"learning_rate": 7.424242424242424e-08, |
|
"logits/chosen": -0.21243150532245636, |
|
"logits/rejected": -0.10772553086280823, |
|
"logps/chosen": -229.5428924560547, |
|
"logps/rejected": -267.2238464355469, |
|
"loss": 0.41, |
|
"rewards/accuracies": 0.8093750476837158, |
|
"rewards/chosen": -2.041501998901367, |
|
"rewards/margins": 1.6097761392593384, |
|
"rewards/rejected": -3.651278257369995, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.9408155439089864, |
|
"grad_norm": 24.660859941372323, |
|
"learning_rate": 6.666666666666667e-08, |
|
"logits/chosen": -0.2305571436882019, |
|
"logits/rejected": -0.13984078168869019, |
|
"logps/chosen": -235.77882385253906, |
|
"logps/rejected": -275.8688659667969, |
|
"loss": 0.3632, |
|
"rewards/accuracies": 0.8515625, |
|
"rewards/chosen": -1.9864195585250854, |
|
"rewards/margins": 1.6625468730926514, |
|
"rewards/rejected": -3.6489667892456055, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.9476330478503557, |
|
"grad_norm": 21.4536097610763, |
|
"learning_rate": 5.9090909090909085e-08, |
|
"logits/chosen": -0.2120400369167328, |
|
"logits/rejected": -0.12410594522953033, |
|
"logps/chosen": -222.5196075439453, |
|
"logps/rejected": -271.9889831542969, |
|
"loss": 0.3386, |
|
"rewards/accuracies": 0.8750000596046448, |
|
"rewards/chosen": -1.9523049592971802, |
|
"rewards/margins": 1.976583480834961, |
|
"rewards/rejected": -3.9288883209228516, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.9544505517917252, |
|
"grad_norm": 24.15173339303778, |
|
"learning_rate": 5.151515151515151e-08, |
|
"logits/chosen": -0.2382027804851532, |
|
"logits/rejected": -0.15090808272361755, |
|
"logps/chosen": -222.01071166992188, |
|
"logps/rejected": -271.6986389160156, |
|
"loss": 0.3614, |
|
"rewards/accuracies": 0.8453124761581421, |
|
"rewards/chosen": -2.006227731704712, |
|
"rewards/margins": 1.7343711853027344, |
|
"rewards/rejected": -3.740598678588867, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.9612680557330947, |
|
"grad_norm": 24.550521505283424, |
|
"learning_rate": 4.393939393939393e-08, |
|
"logits/chosen": -0.17552296817302704, |
|
"logits/rejected": -0.08083190023899078, |
|
"logps/chosen": -228.05532836914062, |
|
"logps/rejected": -272.7567443847656, |
|
"loss": 0.4114, |
|
"rewards/accuracies": 0.8218749761581421, |
|
"rewards/chosen": -2.1707372665405273, |
|
"rewards/margins": 1.5515494346618652, |
|
"rewards/rejected": -3.7222867012023926, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.9680855596744642, |
|
"grad_norm": 22.32856821239117, |
|
"learning_rate": 3.636363636363636e-08, |
|
"logits/chosen": -0.22575151920318604, |
|
"logits/rejected": -0.12315725535154343, |
|
"logps/chosen": -227.44094848632812, |
|
"logps/rejected": -275.5225830078125, |
|
"loss": 0.3557, |
|
"rewards/accuracies": 0.846875011920929, |
|
"rewards/chosen": -1.9577521085739136, |
|
"rewards/margins": 1.8534328937530518, |
|
"rewards/rejected": -3.811184883117676, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.9749030636158337, |
|
"grad_norm": 29.28483492412063, |
|
"learning_rate": 2.8787878787878787e-08, |
|
"logits/chosen": -0.20699195563793182, |
|
"logits/rejected": -0.11024124175310135, |
|
"logps/chosen": -226.45755004882812, |
|
"logps/rejected": -270.3711242675781, |
|
"loss": 0.3872, |
|
"rewards/accuracies": 0.8343750238418579, |
|
"rewards/chosen": -2.00704288482666, |
|
"rewards/margins": 1.5971609354019165, |
|
"rewards/rejected": -3.604203939437866, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.9817205675572032, |
|
"grad_norm": 25.452990040575536, |
|
"learning_rate": 2.1212121212121214e-08, |
|
"logits/chosen": -0.2176055610179901, |
|
"logits/rejected": -0.09939160197973251, |
|
"logps/chosen": -219.8345947265625, |
|
"logps/rejected": -268.5072937011719, |
|
"loss": 0.3557, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -2.036515235900879, |
|
"rewards/margins": 1.7241967916488647, |
|
"rewards/rejected": -3.760712146759033, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.9885380714985725, |
|
"grad_norm": 21.433383519856523, |
|
"learning_rate": 1.3636363636363635e-08, |
|
"logits/chosen": -0.19624559581279755, |
|
"logits/rejected": -0.10556697845458984, |
|
"logps/chosen": -229.1326141357422, |
|
"logps/rejected": -277.2064514160156, |
|
"loss": 0.3724, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.0669782161712646, |
|
"rewards/margins": 1.8541796207427979, |
|
"rewards/rejected": -3.9211580753326416, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.995355575439942, |
|
"grad_norm": 24.137434407958857, |
|
"learning_rate": 6.06060606060606e-09, |
|
"logits/chosen": -0.19412463903427124, |
|
"logits/rejected": -0.08993732929229736, |
|
"logps/chosen": -227.69815063476562, |
|
"logps/rejected": -274.43865966796875, |
|
"loss": 0.3698, |
|
"rewards/accuracies": 0.848437488079071, |
|
"rewards/chosen": -1.9875869750976562, |
|
"rewards/margins": 1.7459800243377686, |
|
"rewards/rejected": -3.7335667610168457, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1467, |
|
"total_flos": 161507922542592.0, |
|
"train_loss": 0.48762158089620206, |
|
"train_runtime": 14310.7821, |
|
"train_samples_per_second": 6.56, |
|
"train_steps_per_second": 0.103 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1467, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 161507922542592.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|