|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100.0, |
|
"global_step": 239, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 6.047051636632, |
|
"learning_rate": 1.6666666666666667e-08, |
|
"logits/generated": -3.130502223968506, |
|
"logits/oppo_generated": -3.1088104248046875, |
|
"logits/oppo_real": -3.130502223968506, |
|
"logits/real": -3.1088104248046875, |
|
"logps/generated": -99.40917205810547, |
|
"logps/oppo_gen": -99.40917205810547, |
|
"logps/oppo_real": -459.3097229003906, |
|
"logps/real": -459.3097229003906, |
|
"loss": 0.6068, |
|
"loss/gen": 0.5344465970993042, |
|
"loss/real": 0.07232951372861862, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 6.047051636632, |
|
"learning_rate": 1.6666666666666667e-08, |
|
"logits/generated": -3.0933988094329834, |
|
"logits/oppo_generated": -2.919645309448242, |
|
"logits/oppo_real": -3.0933988094329834, |
|
"logits/real": -2.919645309448242, |
|
"logps/generated": -103.65153503417969, |
|
"logps/oppo_gen": -103.65153503417969, |
|
"logps/oppo_real": -392.1358642578125, |
|
"logps/real": -392.1358642578125, |
|
"loss": 0.6068, |
|
"loss/gen": 0.5344465970993042, |
|
"loss/real": 0.07232951372861862, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 5.809209404894276, |
|
"learning_rate": 3.3333333333333334e-08, |
|
"logits/generated": -2.6572537422180176, |
|
"logits/oppo_generated": -2.8074941635131836, |
|
"logits/oppo_real": -2.6572537422180176, |
|
"logits/real": -2.8074941635131836, |
|
"logps/generated": -72.88986206054688, |
|
"logps/oppo_gen": -72.88986206054688, |
|
"logps/oppo_real": -291.916748046875, |
|
"logps/real": -291.916748046875, |
|
"loss": 0.6068, |
|
"loss/gen": 0.5344465970993042, |
|
"loss/real": 0.07232951372861862, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 5.655275208162173, |
|
"learning_rate": 5e-08, |
|
"logits/generated": -2.8966193199157715, |
|
"logits/oppo_generated": -2.768460273742676, |
|
"logits/oppo_real": -2.8966193199157715, |
|
"logits/real": -2.768460273742676, |
|
"logps/generated": -64.05287170410156, |
|
"logps/oppo_gen": -64.05287170410156, |
|
"logps/oppo_real": -376.8367919921875, |
|
"logps/real": -376.8367919921875, |
|
"loss": 0.6068, |
|
"loss/gen": 0.5344465970993042, |
|
"loss/real": 0.07232951372861862, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 5.655275208162173, |
|
"learning_rate": 5e-08, |
|
"logits/generated": -2.8891592025756836, |
|
"logits/oppo_generated": -2.708950996398926, |
|
"logits/oppo_real": -2.889317512512207, |
|
"logits/real": -2.708822250366211, |
|
"logps/generated": -48.3460693359375, |
|
"logps/oppo_gen": -48.29164123535156, |
|
"logps/oppo_real": -173.0751953125, |
|
"logps/real": -173.10202026367188, |
|
"loss": 0.6067, |
|
"loss/gen": 0.5342901945114136, |
|
"loss/real": 0.07235788553953171, |
|
"rewards/accuracies": 0.75, |
|
"rewards/generated": -0.054425716400146484, |
|
"rewards/margins": 0.02759718894958496, |
|
"rewards/real": -0.026828527450561523, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 5.191032218232002, |
|
"learning_rate": 6.666666666666667e-08, |
|
"logits/generated": -2.9579579830169678, |
|
"logits/oppo_generated": -2.749436378479004, |
|
"logits/oppo_real": -2.957958698272705, |
|
"logits/real": -2.7493579387664795, |
|
"logps/generated": -48.876949310302734, |
|
"logps/oppo_gen": -48.84138488769531, |
|
"logps/oppo_real": -139.2998046875, |
|
"logps/real": -139.3273468017578, |
|
"loss": 0.6067, |
|
"loss/gen": 0.5343444347381592, |
|
"loss/real": 0.072358638048172, |
|
"rewards/accuracies": 0.625, |
|
"rewards/generated": -0.035565853118896484, |
|
"rewards/margins": 0.008023262023925781, |
|
"rewards/real": -0.027542591094970703, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 5.460550668356129, |
|
"learning_rate": 8.333333333333333e-08, |
|
"logits/generated": -3.1190991401672363, |
|
"logits/oppo_generated": -2.9545342922210693, |
|
"logits/oppo_real": -3.1195316314697266, |
|
"logits/real": -2.953674793243408, |
|
"logps/generated": -163.337158203125, |
|
"logps/oppo_gen": -163.2059783935547, |
|
"logps/oppo_real": -432.88226318359375, |
|
"logps/real": -432.93475341796875, |
|
"loss": 0.6065, |
|
"loss/gen": 0.5340694189071655, |
|
"loss/real": 0.07238505035638809, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -0.13118791580200195, |
|
"rewards/margins": 0.07870101928710938, |
|
"rewards/real": -0.05248689651489258, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 5.654872832010975, |
|
"learning_rate": 1e-07, |
|
"logits/generated": -2.908449172973633, |
|
"logits/oppo_generated": -2.9416637420654297, |
|
"logits/oppo_real": -2.910332441329956, |
|
"logits/real": -2.9389724731445312, |
|
"logps/generated": -69.748291015625, |
|
"logps/oppo_gen": -69.29386901855469, |
|
"logps/oppo_real": -311.59619140625, |
|
"logps/real": -311.66888427734375, |
|
"loss": 0.6057, |
|
"loss/gen": 0.533139705657959, |
|
"loss/real": 0.07240670919418335, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -0.4544186592102051, |
|
"rewards/margins": 0.3816962242126465, |
|
"rewards/real": -0.0727224349975586, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 5.654872832010975, |
|
"learning_rate": 1e-07, |
|
"logits/generated": -2.406726360321045, |
|
"logits/oppo_generated": -2.294548273086548, |
|
"logits/oppo_real": -2.409976005554199, |
|
"logits/real": -2.292487621307373, |
|
"logps/generated": -82.7013931274414, |
|
"logps/oppo_gen": -82.20011138916016, |
|
"logps/oppo_real": -381.1852111816406, |
|
"logps/real": -381.2330627441406, |
|
"loss": 0.6052, |
|
"loss/gen": 0.5330047607421875, |
|
"loss/real": 0.07238054275512695, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -0.5012831687927246, |
|
"rewards/margins": 0.4534478187561035, |
|
"rewards/real": -0.047835350036621094, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 5.392383587128462, |
|
"learning_rate": 1.1666666666666667e-07, |
|
"logits/generated": -2.959984302520752, |
|
"logits/oppo_generated": -2.9239017963409424, |
|
"logits/oppo_real": -2.963313579559326, |
|
"logits/real": -2.9208478927612305, |
|
"logps/generated": -93.70030212402344, |
|
"logps/oppo_gen": -93.09856414794922, |
|
"logps/oppo_real": -233.10401916503906, |
|
"logps/real": -233.15390014648438, |
|
"loss": 0.6053, |
|
"loss/gen": 0.5327156186103821, |
|
"loss/real": 0.07238255441188812, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -0.6017398834228516, |
|
"rewards/margins": 0.5518603324890137, |
|
"rewards/real": -0.04987955093383789, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 5.744929192407967, |
|
"learning_rate": 1.3333333333333334e-07, |
|
"logits/generated": -2.8514630794525146, |
|
"logits/oppo_generated": -2.837850570678711, |
|
"logits/oppo_real": -2.857771396636963, |
|
"logits/real": -2.8309640884399414, |
|
"logps/generated": -61.07666015625, |
|
"logps/oppo_gen": -59.46293640136719, |
|
"logps/oppo_real": -142.69805908203125, |
|
"logps/real": -143.30299377441406, |
|
"loss": 0.6025, |
|
"loss/gen": 0.5297998189926147, |
|
"loss/real": 0.07297563552856445, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -1.613722562789917, |
|
"rewards/margins": 1.0087928771972656, |
|
"rewards/real": -0.6049296855926514, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 5.755958455310272, |
|
"learning_rate": 1.5e-07, |
|
"logits/generated": -2.8688440322875977, |
|
"logits/oppo_generated": -2.7672762870788574, |
|
"logits/oppo_real": -2.8780808448791504, |
|
"logits/real": -2.756375312805176, |
|
"logps/generated": -72.56011962890625, |
|
"logps/oppo_gen": -70.58644104003906, |
|
"logps/oppo_real": -343.4704284667969, |
|
"logps/real": -343.6341247558594, |
|
"loss": 0.6012, |
|
"loss/gen": 0.5287613272666931, |
|
"loss/real": 0.07250790297985077, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -1.97367525100708, |
|
"rewards/margins": 1.8099758625030518, |
|
"rewards/real": -0.16369938850402832, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 5.518660361230539, |
|
"learning_rate": 1.6666666666666665e-07, |
|
"logits/generated": -2.79744291305542, |
|
"logits/oppo_generated": -2.8374581336975098, |
|
"logits/oppo_real": -2.822021961212158, |
|
"logits/real": -2.811734437942505, |
|
"logps/generated": -110.61869049072266, |
|
"logps/oppo_gen": -106.73956298828125, |
|
"logps/oppo_real": -280.41741943359375, |
|
"logps/real": -282.54925537109375, |
|
"loss": 0.5948, |
|
"loss/gen": 0.5232512354850769, |
|
"loss/real": 0.0746162161231041, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.87911319732666, |
|
"rewards/margins": 1.7472848892211914, |
|
"rewards/real": -2.1318283081054688, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 5.852587214182235, |
|
"learning_rate": 1.833333333333333e-07, |
|
"logits/generated": -2.7365834712982178, |
|
"logits/oppo_generated": -2.8255615234375, |
|
"logits/oppo_real": -2.771684169769287, |
|
"logits/real": -2.7913918495178223, |
|
"logps/generated": -93.08985137939453, |
|
"logps/oppo_gen": -85.86231994628906, |
|
"logps/oppo_real": -289.01318359375, |
|
"logps/real": -289.8492431640625, |
|
"loss": 0.5907, |
|
"loss/gen": 0.5135142803192139, |
|
"loss/real": 0.07325862348079681, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.227532386779785, |
|
"rewards/margins": 6.3914642333984375, |
|
"rewards/real": -0.8360681533813477, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 5.442473143343532, |
|
"learning_rate": 2e-07, |
|
"logits/generated": -3.1198368072509766, |
|
"logits/oppo_generated": -2.7394165992736816, |
|
"logits/oppo_real": -3.1553921699523926, |
|
"logits/real": -2.7055230140686035, |
|
"logps/generated": -81.803955078125, |
|
"logps/oppo_gen": -74.47514343261719, |
|
"logps/oppo_real": -366.370361328125, |
|
"logps/real": -367.5390625, |
|
"loss": 0.588, |
|
"loss/gen": 0.5132280588150024, |
|
"loss/real": 0.07360552996397018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.328815460205078, |
|
"rewards/margins": 6.160125255584717, |
|
"rewards/real": -1.1686904430389404, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 5.707858026397619, |
|
"learning_rate": 2.1666666666666667e-07, |
|
"logits/generated": -2.060692310333252, |
|
"logits/oppo_generated": -2.1468427181243896, |
|
"logits/oppo_real": -2.142064094543457, |
|
"logits/real": -2.0768179893493652, |
|
"logps/generated": -89.34324645996094, |
|
"logps/oppo_gen": -78.08332824707031, |
|
"logps/oppo_real": -437.152587890625, |
|
"logps/real": -440.1899719238281, |
|
"loss": 0.5753, |
|
"loss/gen": 0.5017337799072266, |
|
"loss/real": 0.07567332684993744, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.259918212890625, |
|
"rewards/margins": 8.222564697265625, |
|
"rewards/real": -3.037353515625, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 5.381090483920762, |
|
"learning_rate": 2.3333333333333333e-07, |
|
"logits/generated": -2.863776683807373, |
|
"logits/oppo_generated": -2.902646064758301, |
|
"logits/oppo_real": -2.953411817550659, |
|
"logits/real": -2.8005595207214355, |
|
"logps/generated": -87.5417251586914, |
|
"logps/oppo_gen": -72.53976440429688, |
|
"logps/oppo_real": -310.7004089355469, |
|
"logps/real": -314.5782165527344, |
|
"loss": 0.5689, |
|
"loss/gen": 0.49076879024505615, |
|
"loss/real": 0.07662531733512878, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.001964569091797, |
|
"rewards/margins": 11.124154090881348, |
|
"rewards/real": -3.877810478210449, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 5.897206320701016, |
|
"learning_rate": 2.5e-07, |
|
"logits/generated": -2.8799896240234375, |
|
"logits/oppo_generated": -2.947140693664551, |
|
"logits/oppo_real": -2.9634807109832764, |
|
"logits/real": -2.844564914703369, |
|
"logps/generated": -94.40156555175781, |
|
"logps/oppo_gen": -74.80116271972656, |
|
"logps/oppo_real": -309.46124267578125, |
|
"logps/real": -314.874267578125, |
|
"loss": 0.5597, |
|
"loss/gen": 0.4772190451622009, |
|
"loss/real": 0.07830348610877991, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.600406646728516, |
|
"rewards/margins": 14.187431335449219, |
|
"rewards/real": -5.412975311279297, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 6.215747510988629, |
|
"learning_rate": 2.6666666666666667e-07, |
|
"logits/generated": -2.3516016006469727, |
|
"logits/oppo_generated": -2.6668543815612793, |
|
"logits/oppo_real": -2.47564697265625, |
|
"logits/real": -2.550909996032715, |
|
"logps/generated": -89.29439544677734, |
|
"logps/oppo_gen": -67.190673828125, |
|
"logps/oppo_real": -285.60797119140625, |
|
"logps/real": -294.1334228515625, |
|
"loss": 0.5501, |
|
"loss/gen": 0.4698275327682495, |
|
"loss/real": 0.08201275765895844, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -22.103715896606445, |
|
"rewards/margins": 13.57829761505127, |
|
"rewards/real": -8.525418281555176, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 5.962322829071436, |
|
"learning_rate": 2.833333333333333e-07, |
|
"logits/generated": -3.0025205612182617, |
|
"logits/oppo_generated": -2.7376956939697266, |
|
"logits/oppo_real": -3.1153059005737305, |
|
"logits/real": -2.610355854034424, |
|
"logps/generated": -122.87580108642578, |
|
"logps/oppo_gen": -93.65745544433594, |
|
"logps/oppo_real": -173.968994140625, |
|
"logps/real": -185.29052734375, |
|
"loss": 0.5422, |
|
"loss/gen": 0.44879257678985596, |
|
"loss/real": 0.08537431061267853, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -29.21834945678711, |
|
"rewards/margins": 17.896812438964844, |
|
"rewards/real": -11.321537017822266, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 5.834202438278973, |
|
"learning_rate": 3e-07, |
|
"logits/generated": -2.757288932800293, |
|
"logits/oppo_generated": -2.6699156761169434, |
|
"logits/oppo_real": -2.8930060863494873, |
|
"logits/real": -2.5185751914978027, |
|
"logps/generated": -74.53424072265625, |
|
"logps/oppo_gen": -50.189754486083984, |
|
"logps/oppo_real": -197.0562286376953, |
|
"logps/real": -205.22372436523438, |
|
"loss": 0.5312, |
|
"loss/gen": 0.463203489780426, |
|
"loss/real": 0.08189205825328827, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -24.34449005126953, |
|
"rewards/margins": 16.17698860168457, |
|
"rewards/real": -8.167499542236328, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 5.7506381800230715, |
|
"learning_rate": 3.166666666666666e-07, |
|
"logits/generated": -2.8069210052490234, |
|
"logits/oppo_generated": -2.8113152980804443, |
|
"logits/oppo_real": -2.997610330581665, |
|
"logits/real": -2.6217806339263916, |
|
"logps/generated": -95.0489501953125, |
|
"logps/oppo_gen": -59.91856384277344, |
|
"logps/oppo_real": -175.6089324951172, |
|
"logps/real": -191.240478515625, |
|
"loss": 0.5218, |
|
"loss/gen": 0.4313267767429352, |
|
"loss/real": 0.0910997986793518, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -35.13037872314453, |
|
"rewards/margins": 19.49883460998535, |
|
"rewards/real": -15.63154411315918, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 6.494424335907993, |
|
"learning_rate": 3.333333333333333e-07, |
|
"logits/generated": -2.639596462249756, |
|
"logits/oppo_generated": -2.712057113647461, |
|
"logits/oppo_real": -2.83805513381958, |
|
"logits/real": -2.524055004119873, |
|
"logps/generated": -128.61825561523438, |
|
"logps/oppo_gen": -84.5518798828125, |
|
"logps/oppo_real": -331.96221923828125, |
|
"logps/real": -351.935546875, |
|
"loss": 0.5041, |
|
"loss/gen": 0.40506821870803833, |
|
"loss/real": 0.0967344343662262, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -44.066375732421875, |
|
"rewards/margins": 24.093048095703125, |
|
"rewards/real": -19.973331451416016, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 5.960307436700748, |
|
"learning_rate": 3.5e-07, |
|
"logits/generated": -2.1258792877197266, |
|
"logits/oppo_generated": -2.4313888549804688, |
|
"logits/oppo_real": -2.3368191719055176, |
|
"logits/real": -2.221534013748169, |
|
"logps/generated": -126.85552978515625, |
|
"logps/oppo_gen": -70.7446060180664, |
|
"logps/oppo_real": -186.56976318359375, |
|
"logps/real": -207.33192443847656, |
|
"loss": 0.4957, |
|
"loss/gen": 0.37048545479774475, |
|
"loss/real": 0.09748665988445282, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -56.110923767089844, |
|
"rewards/margins": 35.34877014160156, |
|
"rewards/real": -20.762155532836914, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 5.647138812980542, |
|
"learning_rate": 3.666666666666666e-07, |
|
"logits/generated": -2.6529014110565186, |
|
"logits/oppo_generated": -2.8222999572753906, |
|
"logits/oppo_real": -2.956730842590332, |
|
"logits/real": -2.5613512992858887, |
|
"logps/generated": -105.7339859008789, |
|
"logps/oppo_gen": -55.461936950683594, |
|
"logps/oppo_real": -125.98847198486328, |
|
"logps/real": -154.47018432617188, |
|
"loss": 0.4824, |
|
"loss/gen": 0.38719600439071655, |
|
"loss/real": 0.10918831825256348, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -50.27205276489258, |
|
"rewards/margins": 21.790328979492188, |
|
"rewards/real": -28.48172378540039, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 5.606258451015265, |
|
"learning_rate": 3.8333333333333335e-07, |
|
"logits/generated": -2.351107597351074, |
|
"logits/oppo_generated": -2.9076757431030273, |
|
"logits/oppo_real": -2.661245822906494, |
|
"logits/real": -2.590463161468506, |
|
"logps/generated": -130.83560180664062, |
|
"logps/oppo_gen": -71.46342468261719, |
|
"logps/oppo_real": -293.69677734375, |
|
"logps/real": -317.739501953125, |
|
"loss": 0.4697, |
|
"loss/gen": 0.3609875738620758, |
|
"loss/real": 0.10510525107383728, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -59.372169494628906, |
|
"rewards/margins": 35.3294677734375, |
|
"rewards/real": -24.04269790649414, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 5.4775990418081335, |
|
"learning_rate": 4e-07, |
|
"logits/generated": -2.5304102897644043, |
|
"logits/oppo_generated": -3.018123149871826, |
|
"logits/oppo_real": -2.837935447692871, |
|
"logits/real": -2.691473960876465, |
|
"logps/generated": -116.93437194824219, |
|
"logps/oppo_gen": -51.06623458862305, |
|
"logps/oppo_real": -151.72972106933594, |
|
"logps/real": -204.6798858642578, |
|
"loss": 0.4607, |
|
"loss/gen": 0.34339845180511475, |
|
"loss/real": 0.1490423083305359, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -65.86813354492188, |
|
"rewards/margins": 12.917970657348633, |
|
"rewards/real": -52.950164794921875, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 5.7765635698073465, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/generated": -2.269188642501831, |
|
"logits/oppo_generated": -2.7700376510620117, |
|
"logits/oppo_real": -2.6328747272491455, |
|
"logits/real": -2.4533772468566895, |
|
"logps/generated": -171.6358642578125, |
|
"logps/oppo_gen": -72.09120178222656, |
|
"logps/oppo_real": -411.427978515625, |
|
"logps/real": -435.83746337890625, |
|
"loss": 0.4388, |
|
"loss/gen": 0.26547765731811523, |
|
"loss/real": 0.11204466968774796, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -99.5446548461914, |
|
"rewards/margins": 75.1351318359375, |
|
"rewards/real": -24.409523010253906, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 5.293058117326734, |
|
"learning_rate": 4.3333333333333335e-07, |
|
"logits/generated": -2.5251784324645996, |
|
"logits/oppo_generated": -2.91198468208313, |
|
"logits/oppo_real": -2.9211230278015137, |
|
"logits/real": -2.5067286491394043, |
|
"logps/generated": -171.92626953125, |
|
"logps/oppo_gen": -82.21741485595703, |
|
"logps/oppo_real": -301.3589172363281, |
|
"logps/real": -348.7081298828125, |
|
"loss": 0.4299, |
|
"loss/gen": 0.27918827533721924, |
|
"loss/real": 0.1409532129764557, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -89.70884704589844, |
|
"rewards/margins": 42.359642028808594, |
|
"rewards/real": -47.349205017089844, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 5.006704032206546, |
|
"learning_rate": 4.5e-07, |
|
"logits/generated": -2.4755945205688477, |
|
"logits/oppo_generated": -2.4022648334503174, |
|
"logits/oppo_real": -2.97650146484375, |
|
"logits/real": -1.9955105781555176, |
|
"logps/generated": -181.78067016601562, |
|
"logps/oppo_gen": -99.30915832519531, |
|
"logps/oppo_real": -226.3162841796875, |
|
"logps/real": -281.9669189453125, |
|
"loss": 0.4157, |
|
"loss/gen": 0.29939502477645874, |
|
"loss/real": 0.15717226266860962, |
|
"rewards/accuracies": 0.75, |
|
"rewards/generated": -82.47151947021484, |
|
"rewards/margins": 26.820903778076172, |
|
"rewards/real": -55.65061950683594, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 5.11314770762399, |
|
"learning_rate": 4.6666666666666666e-07, |
|
"logits/generated": -2.28588604927063, |
|
"logits/oppo_generated": -2.854034900665283, |
|
"logits/oppo_real": -2.9424033164978027, |
|
"logits/real": -2.35001802444458, |
|
"logps/generated": -146.79574584960938, |
|
"logps/oppo_gen": -54.3837890625, |
|
"logps/oppo_real": -252.91123962402344, |
|
"logps/real": -309.61859130859375, |
|
"loss": 0.4047, |
|
"loss/gen": 0.2730938494205475, |
|
"loss/real": 0.15622730553150177, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -92.4119644165039, |
|
"rewards/margins": 35.704612731933594, |
|
"rewards/real": -56.70735168457031, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 5.122749206528014, |
|
"learning_rate": 4.833333333333333e-07, |
|
"logits/generated": -2.2742700576782227, |
|
"logits/oppo_generated": -2.9263906478881836, |
|
"logits/oppo_real": -2.9535346031188965, |
|
"logits/real": -2.310698986053467, |
|
"logps/generated": -197.92111206054688, |
|
"logps/oppo_gen": -78.93435668945312, |
|
"logps/oppo_real": -298.2490234375, |
|
"logps/real": -364.26275634765625, |
|
"loss": 0.4044, |
|
"loss/gen": 0.20681458711624146, |
|
"loss/real": 0.17815178632736206, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -118.98676300048828, |
|
"rewards/margins": 52.972984313964844, |
|
"rewards/real": -66.01377868652344, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 6.360770407505836, |
|
"learning_rate": 5e-07, |
|
"logits/generated": -2.4970831871032715, |
|
"logits/oppo_generated": -2.9521539211273193, |
|
"logits/oppo_real": -3.0699048042297363, |
|
"logits/real": -2.4102611541748047, |
|
"logps/generated": -235.4222412109375, |
|
"logps/oppo_gen": -136.80690002441406, |
|
"logps/oppo_real": -344.64990234375, |
|
"logps/real": -410.9317321777344, |
|
"loss": 0.3865, |
|
"loss/gen": 0.26079097390174866, |
|
"loss/real": 0.18503594398498535, |
|
"rewards/accuracies": 0.75, |
|
"rewards/generated": -98.61534118652344, |
|
"rewards/margins": 32.33351135253906, |
|
"rewards/real": -66.28182983398438, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 6.08762211921915, |
|
"learning_rate": 4.996438746438746e-07, |
|
"logits/generated": -2.333153247833252, |
|
"logits/oppo_generated": -2.8447458744049072, |
|
"logits/oppo_real": -2.998192548751831, |
|
"logits/real": -2.280444622039795, |
|
"logps/generated": -202.4652862548828, |
|
"logps/oppo_gen": -79.24800109863281, |
|
"logps/oppo_real": -401.9757385253906, |
|
"logps/real": -471.5330810546875, |
|
"loss": 0.3748, |
|
"loss/gen": 0.19843924045562744, |
|
"loss/real": 0.18462583422660828, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -123.21727752685547, |
|
"rewards/margins": 53.659934997558594, |
|
"rewards/real": -69.55734252929688, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 5.005730015947096, |
|
"learning_rate": 4.992877492877492e-07, |
|
"logits/generated": -2.316225528717041, |
|
"logits/oppo_generated": -2.942030906677246, |
|
"logits/oppo_real": -2.9536867141723633, |
|
"logits/real": -2.363262891769409, |
|
"logps/generated": -201.16250610351562, |
|
"logps/oppo_gen": -62.21235656738281, |
|
"logps/oppo_real": -296.8402404785156, |
|
"logps/real": -363.26275634765625, |
|
"loss": 0.3562, |
|
"loss/gen": 0.18675431609153748, |
|
"loss/real": 0.19386838376522064, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -138.9501495361328, |
|
"rewards/margins": 72.52763366699219, |
|
"rewards/real": -66.42252349853516, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 5.686578361538164, |
|
"learning_rate": 4.98931623931624e-07, |
|
"logits/generated": -2.023681879043579, |
|
"logits/oppo_generated": -2.792217493057251, |
|
"logits/oppo_real": -2.680948257446289, |
|
"logits/real": -2.2037131786346436, |
|
"logps/generated": -198.9320831298828, |
|
"logps/oppo_gen": -49.044715881347656, |
|
"logps/oppo_real": -183.3726348876953, |
|
"logps/real": -240.0377197265625, |
|
"loss": 0.3554, |
|
"loss/gen": 0.17746244370937347, |
|
"loss/real": 0.16751524806022644, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -149.88735961914062, |
|
"rewards/margins": 93.22227478027344, |
|
"rewards/real": -56.66510009765625, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 4.38929853392009, |
|
"learning_rate": 4.985754985754986e-07, |
|
"logits/generated": -2.163674831390381, |
|
"logits/oppo_generated": -2.5968940258026123, |
|
"logits/oppo_real": -2.84472393989563, |
|
"logits/real": -2.076559066772461, |
|
"logps/generated": -253.6720428466797, |
|
"logps/oppo_gen": -96.46727752685547, |
|
"logps/oppo_real": -441.2087097167969, |
|
"logps/real": -481.818359375, |
|
"loss": 0.3364, |
|
"loss/gen": 0.18341580033302307, |
|
"loss/real": 0.14260295033454895, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -157.2047576904297, |
|
"rewards/margins": 116.5951156616211, |
|
"rewards/real": -40.609642028808594, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 3.8634310887113905, |
|
"learning_rate": 4.982193732193732e-07, |
|
"logits/generated": -2.4291858673095703, |
|
"logits/oppo_generated": -3.097993850708008, |
|
"logits/oppo_real": -3.161780834197998, |
|
"logits/real": -2.493154525756836, |
|
"logps/generated": -247.3079833984375, |
|
"logps/oppo_gen": -86.33152770996094, |
|
"logps/oppo_real": -374.5130615234375, |
|
"logps/real": -419.8951110839844, |
|
"loss": 0.3163, |
|
"loss/gen": 0.1362161934375763, |
|
"loss/real": 0.1468784660100937, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -160.9764404296875, |
|
"rewards/margins": 115.5943832397461, |
|
"rewards/real": -45.38206481933594, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 3.752952430689934, |
|
"learning_rate": 4.978632478632478e-07, |
|
"logits/generated": -2.0712549686431885, |
|
"logits/oppo_generated": -2.648486614227295, |
|
"logits/oppo_real": -2.7488012313842773, |
|
"logits/real": -2.041752815246582, |
|
"logps/generated": -231.01263427734375, |
|
"logps/oppo_gen": -78.30477142333984, |
|
"logps/oppo_real": -363.86407470703125, |
|
"logps/real": -424.56488037109375, |
|
"loss": 0.3128, |
|
"loss/gen": 0.14557518064975739, |
|
"loss/real": 0.17687970399856567, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -152.70785522460938, |
|
"rewards/margins": 92.00706481933594, |
|
"rewards/real": -60.7007942199707, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 4.097720252047239, |
|
"learning_rate": 4.975071225071225e-07, |
|
"logits/generated": -2.0535855293273926, |
|
"logits/oppo_generated": -2.864193916320801, |
|
"logits/oppo_real": -2.7761850357055664, |
|
"logits/real": -2.224205493927002, |
|
"logps/generated": -198.95758056640625, |
|
"logps/oppo_gen": -60.6450309753418, |
|
"logps/oppo_real": -320.1565856933594, |
|
"logps/real": -351.37152099609375, |
|
"loss": 0.3158, |
|
"loss/gen": 0.19815625250339508, |
|
"loss/real": 0.12008698284626007, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -138.31253051757812, |
|
"rewards/margins": 107.09764099121094, |
|
"rewards/real": -31.21491050720215, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 4.308362219668762, |
|
"learning_rate": 4.971509971509972e-07, |
|
"logits/generated": -2.2241737842559814, |
|
"logits/oppo_generated": -2.812058210372925, |
|
"logits/oppo_real": -2.982236862182617, |
|
"logits/real": -2.1994166374206543, |
|
"logps/generated": -222.87245178222656, |
|
"logps/oppo_gen": -90.06674194335938, |
|
"logps/oppo_real": -176.9713592529297, |
|
"logps/real": -230.6976318359375, |
|
"loss": 0.3223, |
|
"loss/gen": 0.18512912094593048, |
|
"loss/real": 0.15975108742713928, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -132.80569458007812, |
|
"rewards/margins": 79.07943725585938, |
|
"rewards/real": -53.72626495361328, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 5.451193043726812, |
|
"learning_rate": 4.967948717948718e-07, |
|
"logits/generated": -2.195953369140625, |
|
"logits/oppo_generated": -2.9253015518188477, |
|
"logits/oppo_real": -2.9079301357269287, |
|
"logits/real": -2.269479274749756, |
|
"logps/generated": -196.22265625, |
|
"logps/oppo_gen": -54.79414367675781, |
|
"logps/oppo_real": -186.92176818847656, |
|
"logps/real": -261.07904052734375, |
|
"loss": 0.3154, |
|
"loss/gen": 0.16257138550281525, |
|
"loss/real": 0.1990506947040558, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -141.4285125732422, |
|
"rewards/margins": 67.271240234375, |
|
"rewards/real": -74.15726470947266, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 5.298608186897925, |
|
"learning_rate": 4.964387464387464e-07, |
|
"logits/generated": -2.0243582725524902, |
|
"logits/oppo_generated": -2.9949498176574707, |
|
"logits/oppo_real": -2.9107003211975098, |
|
"logits/real": -2.368220090866089, |
|
"logps/generated": -288.9565734863281, |
|
"logps/oppo_gen": -79.9820785522461, |
|
"logps/oppo_real": -404.1100158691406, |
|
"logps/real": -426.2897033691406, |
|
"loss": 0.305, |
|
"loss/gen": 0.11848976463079453, |
|
"loss/real": 0.10273829847574234, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -208.9744873046875, |
|
"rewards/margins": 186.7947998046875, |
|
"rewards/real": -22.179689407348633, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 4.0295905904369596, |
|
"learning_rate": 4.96082621082621e-07, |
|
"logits/generated": -1.7335329055786133, |
|
"logits/oppo_generated": -2.4440221786499023, |
|
"logits/oppo_real": -2.3998050689697266, |
|
"logits/real": -1.7864277362823486, |
|
"logps/generated": -316.6847839355469, |
|
"logps/oppo_gen": -93.22187805175781, |
|
"logps/oppo_real": -290.8685302734375, |
|
"logps/real": -340.7790832519531, |
|
"loss": 0.2935, |
|
"loss/gen": 0.11050556600093842, |
|
"loss/real": 0.15923935174942017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -223.462890625, |
|
"rewards/margins": 173.55233764648438, |
|
"rewards/real": -49.91055679321289, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 3.969789984502597, |
|
"learning_rate": 4.957264957264958e-07, |
|
"logits/generated": -2.0692965984344482, |
|
"logits/oppo_generated": -2.9232547283172607, |
|
"logits/oppo_real": -2.7114880084991455, |
|
"logits/real": -2.287893056869507, |
|
"logps/generated": -261.8079833984375, |
|
"logps/oppo_gen": -64.50846862792969, |
|
"logps/oppo_real": -239.8323974609375, |
|
"logps/real": -322.8907775878906, |
|
"loss": 0.295, |
|
"loss/gen": 0.10703419148921967, |
|
"loss/real": 0.22259050607681274, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -197.29953002929688, |
|
"rewards/margins": 114.24113464355469, |
|
"rewards/real": -83.05838012695312, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 4.12131937141567, |
|
"learning_rate": 4.953703703703703e-07, |
|
"logits/generated": -2.165015697479248, |
|
"logits/oppo_generated": -2.741456985473633, |
|
"logits/oppo_real": -2.9938759803771973, |
|
"logits/real": -2.1477468013763428, |
|
"logps/generated": -207.61871337890625, |
|
"logps/oppo_gen": -58.174400329589844, |
|
"logps/oppo_real": -258.21685791015625, |
|
"logps/real": -323.63824462890625, |
|
"loss": 0.2947, |
|
"loss/gen": 0.1594236195087433, |
|
"loss/real": 0.18429553508758545, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -149.44430541992188, |
|
"rewards/margins": 84.02290344238281, |
|
"rewards/real": -65.42140197753906, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 3.985397506856586, |
|
"learning_rate": 4.95014245014245e-07, |
|
"logits/generated": -2.107313632965088, |
|
"logits/oppo_generated": -2.814079761505127, |
|
"logits/oppo_real": -2.964923620223999, |
|
"logits/real": -2.1809310913085938, |
|
"logps/generated": -287.1518859863281, |
|
"logps/oppo_gen": -78.5189208984375, |
|
"logps/oppo_real": -288.56396484375, |
|
"logps/real": -355.6239013671875, |
|
"loss": 0.2845, |
|
"loss/gen": 0.11285540461540222, |
|
"loss/real": 0.19928349554538727, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -208.63296508789062, |
|
"rewards/margins": 141.57305908203125, |
|
"rewards/real": -67.05989074707031, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 4.1054153221131005, |
|
"learning_rate": 4.946581196581196e-07, |
|
"logits/generated": -2.0935535430908203, |
|
"logits/oppo_generated": -2.7121076583862305, |
|
"logits/oppo_real": -2.932806968688965, |
|
"logits/real": -2.0798892974853516, |
|
"logps/generated": -259.9253234863281, |
|
"logps/oppo_gen": -72.10917663574219, |
|
"logps/oppo_real": -299.3392333984375, |
|
"logps/real": -372.9299011230469, |
|
"loss": 0.2789, |
|
"loss/gen": 0.11227723956108093, |
|
"loss/real": 0.1982138603925705, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -187.816162109375, |
|
"rewards/margins": 114.22547912597656, |
|
"rewards/real": -73.5906753540039, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 4.079712540466766, |
|
"learning_rate": 4.943019943019943e-07, |
|
"logits/generated": -2.233832836151123, |
|
"logits/oppo_generated": -2.814209461212158, |
|
"logits/oppo_real": -3.157527208328247, |
|
"logits/real": -2.258612871170044, |
|
"logps/generated": -303.4700012207031, |
|
"logps/oppo_gen": -80.24543762207031, |
|
"logps/oppo_real": -294.9969482421875, |
|
"logps/real": -351.3782958984375, |
|
"loss": 0.2575, |
|
"loss/gen": 0.11445442587137222, |
|
"loss/real": 0.17576591670513153, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -223.2245635986328, |
|
"rewards/margins": 166.8432159423828, |
|
"rewards/real": -56.38134765625, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 4.0831349902087, |
|
"learning_rate": 4.93945868945869e-07, |
|
"logits/generated": -2.146358013153076, |
|
"logits/oppo_generated": -2.9343652725219727, |
|
"logits/oppo_real": -2.7617945671081543, |
|
"logits/real": -2.3580541610717773, |
|
"logps/generated": -280.8390197753906, |
|
"logps/oppo_gen": -82.74765014648438, |
|
"logps/oppo_real": -315.32562255859375, |
|
"logps/real": -353.22186279296875, |
|
"loss": 0.2672, |
|
"loss/gen": 0.10266949236392975, |
|
"loss/real": 0.13033956289291382, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -198.09136962890625, |
|
"rewards/margins": 160.19512939453125, |
|
"rewards/real": -37.8962516784668, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 4.075803366263433, |
|
"learning_rate": 4.935897435897436e-07, |
|
"logits/generated": -2.074964761734009, |
|
"logits/oppo_generated": -2.805569648742676, |
|
"logits/oppo_real": -2.7846250534057617, |
|
"logits/real": -2.133402109146118, |
|
"logps/generated": -183.99075317382812, |
|
"logps/oppo_gen": -45.456573486328125, |
|
"logps/oppo_real": -161.39598083496094, |
|
"logps/real": -206.3616943359375, |
|
"loss": 0.2584, |
|
"loss/gen": 0.17228275537490845, |
|
"loss/real": 0.15338978171348572, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -138.5341796875, |
|
"rewards/margins": 93.56846618652344, |
|
"rewards/real": -44.96571350097656, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 4.075803366263433, |
|
"learning_rate": 4.935897435897436e-07, |
|
"logits/generated": -1.9417665004730225, |
|
"logits/oppo_generated": -2.7444612979888916, |
|
"logits/oppo_real": -2.7595162391662598, |
|
"logits/real": -2.026608943939209, |
|
"logps/generated": -194.52951049804688, |
|
"logps/oppo_gen": -50.193504333496094, |
|
"logps/oppo_real": -148.25294494628906, |
|
"logps/real": -180.50367736816406, |
|
"loss": 0.2608, |
|
"loss/gen": 0.17173901200294495, |
|
"loss/real": 0.12604832649230957, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -144.33599853515625, |
|
"rewards/margins": 112.08526611328125, |
|
"rewards/real": -32.250728607177734, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 4.495767298164259, |
|
"learning_rate": 4.932336182336182e-07, |
|
"logits/generated": -1.9348936080932617, |
|
"logits/oppo_generated": -2.660369396209717, |
|
"logits/oppo_real": -2.6082496643066406, |
|
"logits/real": -1.9820643663406372, |
|
"logps/generated": -204.697265625, |
|
"logps/oppo_gen": -55.80210876464844, |
|
"logps/oppo_real": -201.49038696289062, |
|
"logps/real": -237.58058166503906, |
|
"loss": 0.2359, |
|
"loss/gen": 0.18117573857307434, |
|
"loss/real": 0.13662098348140717, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -148.89515686035156, |
|
"rewards/margins": 112.80496215820312, |
|
"rewards/real": -36.09020233154297, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 4.130458572458854, |
|
"learning_rate": 4.928774928774928e-07, |
|
"logits/generated": -2.1984543800354004, |
|
"logits/oppo_generated": -2.746832847595215, |
|
"logits/oppo_real": -2.973560333251953, |
|
"logits/real": -2.1483070850372314, |
|
"logps/generated": -208.64358520507812, |
|
"logps/oppo_gen": -77.28608703613281, |
|
"logps/oppo_real": -547.3628540039062, |
|
"logps/real": -562.9273071289062, |
|
"loss": 0.2434, |
|
"loss/gen": 0.19732165336608887, |
|
"loss/real": 0.11211533099412918, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -131.3574981689453, |
|
"rewards/margins": 115.79305267333984, |
|
"rewards/real": -15.564445495605469, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 4.8370300932163595, |
|
"learning_rate": 4.925213675213676e-07, |
|
"logits/generated": -1.7475500106811523, |
|
"logits/oppo_generated": -2.664555072784424, |
|
"logits/oppo_real": -2.6400251388549805, |
|
"logits/real": -2.041037082672119, |
|
"logps/generated": -291.1873779296875, |
|
"logps/oppo_gen": -78.57785034179688, |
|
"logps/oppo_real": -398.628662109375, |
|
"logps/real": -400.1112365722656, |
|
"loss": 0.2286, |
|
"loss/gen": 0.08117571473121643, |
|
"loss/real": 0.0776968002319336, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -212.60952758789062, |
|
"rewards/margins": 211.126953125, |
|
"rewards/real": -1.4825716018676758, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 7.264780959458862, |
|
"learning_rate": 4.921652421652421e-07, |
|
"logits/generated": -2.2139689922332764, |
|
"logits/oppo_generated": -2.638930320739746, |
|
"logits/oppo_real": -3.1015210151672363, |
|
"logits/real": -1.9857072830200195, |
|
"logps/generated": -266.18267822265625, |
|
"logps/oppo_gen": -84.6130599975586, |
|
"logps/oppo_real": -310.54534912109375, |
|
"logps/real": -368.14923095703125, |
|
"loss": 0.2394, |
|
"loss/gen": 0.14072535932064056, |
|
"loss/real": 0.1684618443250656, |
|
"rewards/accuracies": 0.75, |
|
"rewards/generated": -181.5696258544922, |
|
"rewards/margins": 123.9657211303711, |
|
"rewards/real": -57.603904724121094, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 4.6434099979100925, |
|
"learning_rate": 4.918091168091168e-07, |
|
"logits/generated": -2.0649800300598145, |
|
"logits/oppo_generated": -2.9305167198181152, |
|
"logits/oppo_real": -2.7986156940460205, |
|
"logits/real": -2.206486701965332, |
|
"logps/generated": -337.5455322265625, |
|
"logps/oppo_gen": -55.247596740722656, |
|
"logps/oppo_real": -159.6094970703125, |
|
"logps/real": -232.8638153076172, |
|
"loss": 0.2301, |
|
"loss/gen": 0.08769591897726059, |
|
"loss/real": 0.19665245711803436, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -282.2979431152344, |
|
"rewards/margins": 209.04364013671875, |
|
"rewards/real": -73.25431060791016, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 5.299618171037331, |
|
"learning_rate": 4.914529914529914e-07, |
|
"logits/generated": -2.0552940368652344, |
|
"logits/oppo_generated": -2.733177900314331, |
|
"logits/oppo_real": -3.0261659622192383, |
|
"logits/real": -2.060375690460205, |
|
"logps/generated": -279.427490234375, |
|
"logps/oppo_gen": -77.4105453491211, |
|
"logps/oppo_real": -291.50042724609375, |
|
"logps/real": -326.88134765625, |
|
"loss": 0.2383, |
|
"loss/gen": 0.08899568021297455, |
|
"loss/real": 0.12921909987926483, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -202.01693725585938, |
|
"rewards/margins": 166.63604736328125, |
|
"rewards/real": -35.380889892578125, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 4.947761977605242, |
|
"learning_rate": 4.910968660968661e-07, |
|
"logits/generated": -1.6708121299743652, |
|
"logits/oppo_generated": -2.70068359375, |
|
"logits/oppo_real": -2.622352361679077, |
|
"logits/real": -1.8560223579406738, |
|
"logps/generated": -342.7183837890625, |
|
"logps/oppo_gen": -66.53448486328125, |
|
"logps/oppo_real": -142.07913208007812, |
|
"logps/real": -231.65255737304688, |
|
"loss": 0.2469, |
|
"loss/gen": 0.04403278976678848, |
|
"loss/real": 0.26509106159210205, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -276.18389892578125, |
|
"rewards/margins": 186.61048889160156, |
|
"rewards/real": -89.57340240478516, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 4.788945406653761, |
|
"learning_rate": 4.907407407407407e-07, |
|
"logits/generated": -2.1787357330322266, |
|
"logits/oppo_generated": -3.0608558654785156, |
|
"logits/oppo_real": -3.0881457328796387, |
|
"logits/real": -2.3616437911987305, |
|
"logps/generated": -281.7923889160156, |
|
"logps/oppo_gen": -78.30126953125, |
|
"logps/oppo_real": -296.7585144042969, |
|
"logps/real": -322.1238708496094, |
|
"loss": 0.224, |
|
"loss/gen": 0.09028749167919159, |
|
"loss/real": 0.12251698970794678, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -203.49111938476562, |
|
"rewards/margins": 178.12576293945312, |
|
"rewards/real": -25.365345001220703, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 6.474579032262988, |
|
"learning_rate": 4.903846153846153e-07, |
|
"logits/generated": -2.096007823944092, |
|
"logits/oppo_generated": -2.904336929321289, |
|
"logits/oppo_real": -3.0007967948913574, |
|
"logits/real": -2.2163987159729004, |
|
"logps/generated": -334.1524658203125, |
|
"logps/oppo_gen": -78.76142883300781, |
|
"logps/oppo_real": -321.17315673828125, |
|
"logps/real": -358.46551513671875, |
|
"loss": 0.2299, |
|
"loss/gen": 0.06527581810951233, |
|
"loss/real": 0.158095583319664, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -255.39102172851562, |
|
"rewards/margins": 218.09866333007812, |
|
"rewards/real": -37.29237365722656, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 4.512685485578188, |
|
"learning_rate": 4.9002849002849e-07, |
|
"logits/generated": -2.161278247833252, |
|
"logits/oppo_generated": -3.0246148109436035, |
|
"logits/oppo_real": -3.155604839324951, |
|
"logits/real": -2.2183971405029297, |
|
"logps/generated": -336.80706787109375, |
|
"logps/oppo_gen": -99.78816986083984, |
|
"logps/oppo_real": -357.6624755859375, |
|
"logps/real": -384.63702392578125, |
|
"loss": 0.2101, |
|
"loss/gen": 0.05192907154560089, |
|
"loss/real": 0.12459313869476318, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -237.01889038085938, |
|
"rewards/margins": 210.04437255859375, |
|
"rewards/real": -26.974525451660156, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 4.815763085605405, |
|
"learning_rate": 4.896723646723647e-07, |
|
"logits/generated": -1.8751075267791748, |
|
"logits/oppo_generated": -2.718918800354004, |
|
"logits/oppo_real": -2.8950438499450684, |
|
"logits/real": -1.882493495941162, |
|
"logps/generated": -259.6773376464844, |
|
"logps/oppo_gen": -73.73533630371094, |
|
"logps/oppo_real": -276.2977294921875, |
|
"logps/real": -297.37060546875, |
|
"loss": 0.1989, |
|
"loss/gen": 0.10083942115306854, |
|
"loss/real": 0.1283130794763565, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -185.94200134277344, |
|
"rewards/margins": 164.86915588378906, |
|
"rewards/real": -21.07284927368164, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 4.815763085605405, |
|
"learning_rate": 4.896723646723647e-07, |
|
"logits/generated": -2.0247457027435303, |
|
"logits/oppo_generated": -2.7741386890411377, |
|
"logits/oppo_real": -2.8905487060546875, |
|
"logits/real": -2.063991069793701, |
|
"logps/generated": -250.93898010253906, |
|
"logps/oppo_gen": -70.42605590820312, |
|
"logps/oppo_real": -291.8798522949219, |
|
"logps/real": -357.66455078125, |
|
"loss": 0.2196, |
|
"loss/gen": 0.11537407338619232, |
|
"loss/real": 0.19172152876853943, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -180.512939453125, |
|
"rewards/margins": 114.72822570800781, |
|
"rewards/real": -65.78471374511719, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 9.402815587844096, |
|
"learning_rate": 4.893162393162393e-07, |
|
"logits/generated": -1.89133882522583, |
|
"logits/oppo_generated": -2.731257438659668, |
|
"logits/oppo_real": -2.804780960083008, |
|
"logits/real": -2.033903121948242, |
|
"logps/generated": -360.88739013671875, |
|
"logps/oppo_gen": -143.67832946777344, |
|
"logps/oppo_real": -309.55450439453125, |
|
"logps/real": -327.82147216796875, |
|
"loss": 0.1855, |
|
"loss/gen": 0.08057817816734314, |
|
"loss/real": 0.10118211805820465, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -217.20907592773438, |
|
"rewards/margins": 198.94215393066406, |
|
"rewards/real": -18.266925811767578, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 6.598795474561087, |
|
"learning_rate": 4.889601139601139e-07, |
|
"logits/generated": -1.9886155128479004, |
|
"logits/oppo_generated": -2.710496664047241, |
|
"logits/oppo_real": -2.980191707611084, |
|
"logits/real": -1.9148871898651123, |
|
"logps/generated": -333.3258361816406, |
|
"logps/oppo_gen": -71.51214599609375, |
|
"logps/oppo_real": -284.34765625, |
|
"logps/real": -336.5108642578125, |
|
"loss": 0.1874, |
|
"loss/gen": 0.032972171902656555, |
|
"loss/real": 0.17686012387275696, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -261.8136901855469, |
|
"rewards/margins": 209.65048217773438, |
|
"rewards/real": -52.1632080078125, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 6.524966781356265, |
|
"learning_rate": 4.886039886039886e-07, |
|
"logits/generated": -2.0850396156311035, |
|
"logits/oppo_generated": -3.0934062004089355, |
|
"logits/oppo_real": -3.077010154724121, |
|
"logits/real": -2.3363146781921387, |
|
"logps/generated": -359.4500732421875, |
|
"logps/oppo_gen": -109.1805419921875, |
|
"logps/oppo_real": -348.23834228515625, |
|
"logps/real": -342.962158203125, |
|
"loss": 0.2055, |
|
"loss/gen": 0.044875070452690125, |
|
"loss/real": 0.06856581568717957, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -250.26954650878906, |
|
"rewards/margins": 255.5457763671875, |
|
"rewards/real": 5.276228904724121, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 6.524966781356265, |
|
"learning_rate": 4.886039886039886e-07, |
|
"logits/generated": -2.09678316116333, |
|
"logits/oppo_generated": -2.838265895843506, |
|
"logits/oppo_real": -3.01387357711792, |
|
"logits/real": -2.099071979522705, |
|
"logps/generated": -283.34869384765625, |
|
"logps/oppo_gen": -75.5096206665039, |
|
"logps/oppo_real": -242.11915588378906, |
|
"logps/real": -276.683837890625, |
|
"loss": 0.2146, |
|
"loss/gen": 0.07300423085689545, |
|
"loss/real": 0.159988671541214, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -207.83909606933594, |
|
"rewards/margins": 173.2744140625, |
|
"rewards/real": -34.56468200683594, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 7.727434099521727, |
|
"learning_rate": 4.882478632478633e-07, |
|
"logits/generated": -2.0702996253967285, |
|
"logits/oppo_generated": -2.786154270172119, |
|
"logits/oppo_real": -2.980445146560669, |
|
"logits/real": -2.0372719764709473, |
|
"logps/generated": -323.0815124511719, |
|
"logps/oppo_gen": -78.40753173828125, |
|
"logps/oppo_real": -188.29739379882812, |
|
"logps/real": -220.34078979492188, |
|
"loss": 0.2102, |
|
"loss/gen": 0.047749102115631104, |
|
"loss/real": 0.13793116807937622, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -244.67396545410156, |
|
"rewards/margins": 212.6305694580078, |
|
"rewards/real": -32.04340744018555, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 13.112634846854888, |
|
"learning_rate": 4.878917378917379e-07, |
|
"logits/generated": -1.8675211668014526, |
|
"logits/oppo_generated": -2.8353500366210938, |
|
"logits/oppo_real": -2.788581371307373, |
|
"logits/real": -2.013288736343384, |
|
"logps/generated": -304.5635681152344, |
|
"logps/oppo_gen": -74.27359008789062, |
|
"logps/oppo_real": -262.4258728027344, |
|
"logps/real": -304.51788330078125, |
|
"loss": 0.2119, |
|
"loss/gen": 0.06847551465034485, |
|
"loss/real": 0.16117896139621735, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -230.28997802734375, |
|
"rewards/margins": 188.19796752929688, |
|
"rewards/real": -42.09199142456055, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 6.283808097138244, |
|
"learning_rate": 4.875356125356125e-07, |
|
"logits/generated": -1.7545793056488037, |
|
"logits/oppo_generated": -2.8188014030456543, |
|
"logits/oppo_real": -2.757133960723877, |
|
"logits/real": -1.8540079593658447, |
|
"logps/generated": -328.4749755859375, |
|
"logps/oppo_gen": -55.317054748535156, |
|
"logps/oppo_real": -178.10824584960938, |
|
"logps/real": -227.6909942626953, |
|
"loss": 0.2085, |
|
"loss/gen": 0.03556426614522934, |
|
"loss/real": 0.15076972544193268, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -273.157958984375, |
|
"rewards/margins": 223.5751953125, |
|
"rewards/real": -49.58274459838867, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 8.846480847697116, |
|
"learning_rate": 4.871794871794871e-07, |
|
"logits/generated": -1.8735418319702148, |
|
"logits/oppo_generated": -2.865746259689331, |
|
"logits/oppo_real": -2.85042142868042, |
|
"logits/real": -2.002570152282715, |
|
"logps/generated": -348.75439453125, |
|
"logps/oppo_gen": -101.81581115722656, |
|
"logps/oppo_real": -463.47314453125, |
|
"logps/real": -474.5299377441406, |
|
"loss": 0.2081, |
|
"loss/gen": 0.050575897097587585, |
|
"loss/real": 0.10425007343292236, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -246.9385986328125, |
|
"rewards/margins": 235.8818817138672, |
|
"rewards/real": -11.056716918945312, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 6.390898383898802, |
|
"learning_rate": 4.868233618233618e-07, |
|
"logits/generated": -1.8523106575012207, |
|
"logits/oppo_generated": -2.9923882484436035, |
|
"logits/oppo_real": -2.813816547393799, |
|
"logits/real": -2.116447925567627, |
|
"logps/generated": -425.23760986328125, |
|
"logps/oppo_gen": -78.51251220703125, |
|
"logps/oppo_real": -286.4658508300781, |
|
"logps/real": -282.1247863769531, |
|
"loss": 0.1855, |
|
"loss/gen": 0.021565552800893784, |
|
"loss/real": 0.0718618631362915, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -346.72509765625, |
|
"rewards/margins": 351.066162109375, |
|
"rewards/real": 4.341072082519531, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 10.194700352791303, |
|
"learning_rate": 4.864672364672365e-07, |
|
"logits/generated": -2.067366600036621, |
|
"logits/oppo_generated": -2.7725887298583984, |
|
"logits/oppo_real": -3.063380002975464, |
|
"logits/real": -1.9453740119934082, |
|
"logps/generated": -310.57940673828125, |
|
"logps/oppo_gen": -79.40229034423828, |
|
"logps/oppo_real": -383.419677734375, |
|
"logps/real": -404.203125, |
|
"loss": 0.1814, |
|
"loss/gen": 0.056390903890132904, |
|
"loss/real": 0.12866151332855225, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -231.1771240234375, |
|
"rewards/margins": 210.39370727539062, |
|
"rewards/real": -20.783414840698242, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 6.585202430620193, |
|
"learning_rate": 4.861111111111111e-07, |
|
"logits/generated": -2.1229610443115234, |
|
"logits/oppo_generated": -2.8321666717529297, |
|
"logits/oppo_real": -3.1668171882629395, |
|
"logits/real": -1.997361660003662, |
|
"logps/generated": -408.10772705078125, |
|
"logps/oppo_gen": -99.83964538574219, |
|
"logps/oppo_real": -322.6613464355469, |
|
"logps/real": -318.35369873046875, |
|
"loss": 0.1752, |
|
"loss/gen": 0.09104468673467636, |
|
"loss/real": 0.09765169024467468, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -308.26806640625, |
|
"rewards/margins": 312.57574462890625, |
|
"rewards/real": 4.307661056518555, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 6.299977789945659, |
|
"learning_rate": 4.857549857549857e-07, |
|
"logits/generated": -2.283506393432617, |
|
"logits/oppo_generated": -3.000812530517578, |
|
"logits/oppo_real": -3.1619484424591064, |
|
"logits/real": -2.3234751224517822, |
|
"logps/generated": -314.71881103515625, |
|
"logps/oppo_gen": -83.82888793945312, |
|
"logps/oppo_real": -441.3746337890625, |
|
"logps/real": -437.03350830078125, |
|
"loss": 0.1922, |
|
"loss/gen": 0.04957776144146919, |
|
"loss/real": 0.07287702709436417, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -230.88992309570312, |
|
"rewards/margins": 235.2310791015625, |
|
"rewards/real": 4.341154098510742, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 16.796384440510923, |
|
"learning_rate": 4.853988603988603e-07, |
|
"logits/generated": -1.4338067770004272, |
|
"logits/oppo_generated": -2.4111037254333496, |
|
"logits/oppo_real": -2.622360944747925, |
|
"logits/real": -1.429781436920166, |
|
"logps/generated": -283.0327453613281, |
|
"logps/oppo_gen": -94.29784393310547, |
|
"logps/oppo_real": -307.8828125, |
|
"logps/real": -296.2539978027344, |
|
"loss": 0.1974, |
|
"loss/gen": 0.12565049529075623, |
|
"loss/real": 0.06776070594787598, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -188.73489379882812, |
|
"rewards/margins": 200.36370849609375, |
|
"rewards/real": 11.628820419311523, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 5.938452072105601, |
|
"learning_rate": 4.850427350427351e-07, |
|
"logits/generated": -1.771193265914917, |
|
"logits/oppo_generated": -2.7816574573516846, |
|
"logits/oppo_real": -2.923349380493164, |
|
"logits/real": -1.7050981521606445, |
|
"logps/generated": -282.5160217285156, |
|
"logps/oppo_gen": -70.22672271728516, |
|
"logps/oppo_real": -286.0644836425781, |
|
"logps/real": -340.19940185546875, |
|
"loss": 0.1753, |
|
"loss/gen": 0.08594659715890884, |
|
"loss/real": 0.18101441860198975, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -212.289306640625, |
|
"rewards/margins": 158.15438842773438, |
|
"rewards/real": -54.134918212890625, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 16.730987234407017, |
|
"learning_rate": 4.846866096866097e-07, |
|
"logits/generated": -1.1037144660949707, |
|
"logits/oppo_generated": -2.624129056930542, |
|
"logits/oppo_real": -2.6314826011657715, |
|
"logits/real": -1.0653507709503174, |
|
"logps/generated": -288.92779541015625, |
|
"logps/oppo_gen": -48.185340881347656, |
|
"logps/oppo_real": -148.66656494140625, |
|
"logps/real": -194.64031982421875, |
|
"loss": 0.1668, |
|
"loss/gen": 0.04733405262231827, |
|
"loss/real": 0.17014886438846588, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -240.7424774169922, |
|
"rewards/margins": 194.76873779296875, |
|
"rewards/real": -45.97373962402344, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 10.836560947164976, |
|
"learning_rate": 4.843304843304843e-07, |
|
"logits/generated": -1.4275020360946655, |
|
"logits/oppo_generated": -2.668670177459717, |
|
"logits/oppo_real": -2.9500231742858887, |
|
"logits/real": -1.0475223064422607, |
|
"logps/generated": -348.1563415527344, |
|
"logps/oppo_gen": -76.79248809814453, |
|
"logps/oppo_real": -287.1414794921875, |
|
"logps/real": -357.95648193359375, |
|
"loss": 0.1672, |
|
"loss/gen": 0.06210296228528023, |
|
"loss/real": 0.22867648303508759, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -271.36383056640625, |
|
"rewards/margins": 200.5488739013672, |
|
"rewards/real": -70.81497192382812, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 12.903897641442306, |
|
"learning_rate": 4.839743589743589e-07, |
|
"logits/generated": -1.5788016319274902, |
|
"logits/oppo_generated": -2.8624868392944336, |
|
"logits/oppo_real": -3.0077338218688965, |
|
"logits/real": -1.4091284275054932, |
|
"logps/generated": -322.8958740234375, |
|
"logps/oppo_gen": -103.01863861083984, |
|
"logps/oppo_real": -484.10565185546875, |
|
"logps/real": -477.997802734375, |
|
"loss": 0.1641, |
|
"loss/gen": 0.10916104912757874, |
|
"loss/real": 0.07555591315031052, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -219.87725830078125, |
|
"rewards/margins": 225.985107421875, |
|
"rewards/real": 6.107841491699219, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 10.592868104309284, |
|
"learning_rate": 4.836182336182337e-07, |
|
"logits/generated": -1.2707586288452148, |
|
"logits/oppo_generated": -2.976921796798706, |
|
"logits/oppo_real": -3.0094780921936035, |
|
"logits/real": -1.4526524543762207, |
|
"logps/generated": -312.49371337890625, |
|
"logps/oppo_gen": -66.51390075683594, |
|
"logps/oppo_real": -174.39071655273438, |
|
"logps/real": -178.83309936523438, |
|
"loss": 0.1749, |
|
"loss/gen": 0.10825233161449432, |
|
"loss/real": 0.09127488732337952, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -245.97982788085938, |
|
"rewards/margins": 241.53746032714844, |
|
"rewards/real": -4.442395210266113, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 8.081066429844189, |
|
"learning_rate": 4.832621082621082e-07, |
|
"logits/generated": -0.9244946241378784, |
|
"logits/oppo_generated": -3.01529598236084, |
|
"logits/oppo_real": -2.9185380935668945, |
|
"logits/real": -1.7126656770706177, |
|
"logps/generated": -510.9183349609375, |
|
"logps/oppo_gen": -86.220458984375, |
|
"logps/oppo_real": -329.8023376464844, |
|
"logps/real": -329.7042236328125, |
|
"loss": 0.1706, |
|
"loss/gen": 0.0638774186372757, |
|
"loss/real": 0.10543566942214966, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -424.69793701171875, |
|
"rewards/margins": 424.7960205078125, |
|
"rewards/real": 0.09810352325439453, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 5.680776793325534, |
|
"learning_rate": 4.829059829059829e-07, |
|
"logits/generated": -0.9043546319007874, |
|
"logits/oppo_generated": -2.864108085632324, |
|
"logits/oppo_real": -2.8596436977386475, |
|
"logits/real": -0.7685225605964661, |
|
"logps/generated": -340.16485595703125, |
|
"logps/oppo_gen": -79.35113525390625, |
|
"logps/oppo_real": -357.43438720703125, |
|
"logps/real": -345.6606750488281, |
|
"loss": 0.1549, |
|
"loss/gen": 0.0296328142285347, |
|
"loss/real": 0.06829441338777542, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -260.813720703125, |
|
"rewards/margins": 272.58746337890625, |
|
"rewards/real": 11.773737907409668, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 5.318546937681411, |
|
"learning_rate": 4.825498575498575e-07, |
|
"logits/generated": 0.13492201268672943, |
|
"logits/oppo_generated": -2.635812282562256, |
|
"logits/oppo_real": -2.784547805786133, |
|
"logits/real": -0.45901572704315186, |
|
"logps/generated": -414.36871337890625, |
|
"logps/oppo_gen": -87.48421478271484, |
|
"logps/oppo_real": -250.10626220703125, |
|
"logps/real": -259.7474670410156, |
|
"loss": 0.1478, |
|
"loss/gen": 0.028783217072486877, |
|
"loss/real": 0.09256881475448608, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -326.8844909667969, |
|
"rewards/margins": 317.2432861328125, |
|
"rewards/real": -9.641218185424805, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 16.837768171931778, |
|
"learning_rate": 4.821937321937321e-07, |
|
"logits/generated": -0.17588667571544647, |
|
"logits/oppo_generated": -2.9845218658447266, |
|
"logits/oppo_real": -3.016307830810547, |
|
"logits/real": -0.11410784721374512, |
|
"logps/generated": -326.5010986328125, |
|
"logps/oppo_gen": -55.523197174072266, |
|
"logps/oppo_real": -291.81378173828125, |
|
"logps/real": -377.31939697265625, |
|
"loss": 0.1657, |
|
"loss/gen": 0.047291021794080734, |
|
"loss/real": 0.28053322434425354, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -270.9778747558594, |
|
"rewards/margins": 185.4722900390625, |
|
"rewards/real": -85.5055923461914, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 11.822744942424869, |
|
"learning_rate": 4.818376068376069e-07, |
|
"logits/generated": 0.943000316619873, |
|
"logits/oppo_generated": -2.8317785263061523, |
|
"logits/oppo_real": -2.849785327911377, |
|
"logits/real": 0.47939473390579224, |
|
"logps/generated": -319.35791015625, |
|
"logps/oppo_gen": -65.48351287841797, |
|
"logps/oppo_real": -259.8980712890625, |
|
"logps/real": -274.378662109375, |
|
"loss": 0.134, |
|
"loss/gen": 0.06071440130472183, |
|
"loss/real": 0.11204100400209427, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -253.8743896484375, |
|
"rewards/margins": 239.39376831054688, |
|
"rewards/real": -14.480613708496094, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 9.308826515658824, |
|
"learning_rate": 4.814814814814814e-07, |
|
"logits/generated": 1.9314751625061035, |
|
"logits/oppo_generated": -2.9616637229919434, |
|
"logits/oppo_real": -2.8549320697784424, |
|
"logits/real": 0.0003508329391479492, |
|
"logps/generated": -361.995849609375, |
|
"logps/oppo_gen": -66.1073226928711, |
|
"logps/oppo_real": -297.0393981933594, |
|
"logps/real": -279.0748291015625, |
|
"loss": 0.1425, |
|
"loss/gen": 0.02837001159787178, |
|
"loss/real": 0.056885264813899994, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -295.88848876953125, |
|
"rewards/margins": 313.8530578613281, |
|
"rewards/real": 17.96456527709961, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 19.337031219067406, |
|
"learning_rate": 4.811253561253561e-07, |
|
"logits/generated": -0.18303906917572021, |
|
"logits/oppo_generated": -2.944060802459717, |
|
"logits/oppo_real": -2.977362632751465, |
|
"logits/real": 0.0758047103881836, |
|
"logps/generated": -371.5301513671875, |
|
"logps/oppo_gen": -49.032493591308594, |
|
"logps/oppo_real": -197.13412475585938, |
|
"logps/real": -230.9898681640625, |
|
"loss": 0.1525, |
|
"loss/gen": 0.05682520568370819, |
|
"loss/real": 0.14864592254161835, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -322.4976806640625, |
|
"rewards/margins": 288.6419372558594, |
|
"rewards/real": -33.855743408203125, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 10.486335582449854, |
|
"learning_rate": 4.807692307692307e-07, |
|
"logits/generated": 2.399709939956665, |
|
"logits/oppo_generated": -2.9935152530670166, |
|
"logits/oppo_real": -2.782620906829834, |
|
"logits/real": -0.642041802406311, |
|
"logps/generated": -318.13739013671875, |
|
"logps/oppo_gen": -79.41259002685547, |
|
"logps/oppo_real": -304.58465576171875, |
|
"logps/real": -291.5322265625, |
|
"loss": 0.1445, |
|
"loss/gen": 0.10180285573005676, |
|
"loss/real": 0.06210155040025711, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -238.7247772216797, |
|
"rewards/margins": 251.7772216796875, |
|
"rewards/real": 13.052433013916016, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 10.683275246623444, |
|
"learning_rate": 4.804131054131054e-07, |
|
"logits/generated": -0.5497384667396545, |
|
"logits/oppo_generated": -3.0348973274230957, |
|
"logits/oppo_real": -2.8550362586975098, |
|
"logits/real": -0.3163856863975525, |
|
"logps/generated": -430.0574951171875, |
|
"logps/oppo_gen": -147.11734008789062, |
|
"logps/oppo_real": -324.0049743652344, |
|
"logps/real": -316.42535400390625, |
|
"loss": 0.1433, |
|
"loss/gen": 0.06713081151247025, |
|
"loss/real": 0.06877341121435165, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -282.940185546875, |
|
"rewards/margins": 290.5198059082031, |
|
"rewards/real": 7.579617500305176, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 16.20368154631215, |
|
"learning_rate": 4.8005698005698e-07, |
|
"logits/generated": 2.2823328971862793, |
|
"logits/oppo_generated": -2.8708338737487793, |
|
"logits/oppo_real": -2.8143606185913086, |
|
"logits/real": 1.9810662269592285, |
|
"logps/generated": -438.2496337890625, |
|
"logps/oppo_gen": -81.77798461914062, |
|
"logps/oppo_real": -330.5220031738281, |
|
"logps/real": -332.4623107910156, |
|
"loss": 0.1574, |
|
"loss/gen": 0.01590638794004917, |
|
"loss/real": 0.08378194272518158, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -356.4716491699219, |
|
"rewards/margins": 354.53131103515625, |
|
"rewards/real": -1.940330982208252, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 9.616510387541387, |
|
"learning_rate": 4.797008547008547e-07, |
|
"logits/generated": 2.1011903285980225, |
|
"logits/oppo_generated": -2.7298922538757324, |
|
"logits/oppo_real": -2.698655605316162, |
|
"logits/real": 1.2009429931640625, |
|
"logps/generated": -423.5313720703125, |
|
"logps/oppo_gen": -74.60616302490234, |
|
"logps/oppo_real": -251.41427612304688, |
|
"logps/real": -272.0252685546875, |
|
"loss": 0.1566, |
|
"loss/gen": 0.00938740000128746, |
|
"loss/real": 0.15086832642555237, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -348.9252014160156, |
|
"rewards/margins": 328.314208984375, |
|
"rewards/real": -20.611007690429688, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 9.74919633732098, |
|
"learning_rate": 4.793447293447293e-07, |
|
"logits/generated": 1.5779234170913696, |
|
"logits/oppo_generated": -2.9584808349609375, |
|
"logits/oppo_real": -2.8358330726623535, |
|
"logits/real": -0.6080777049064636, |
|
"logps/generated": -360.771240234375, |
|
"logps/oppo_gen": -83.23335266113281, |
|
"logps/oppo_real": -311.66064453125, |
|
"logps/real": -302.7115173339844, |
|
"loss": 0.1474, |
|
"loss/gen": 0.04708855226635933, |
|
"loss/real": 0.06983175873756409, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -277.53790283203125, |
|
"rewards/margins": 286.4870300292969, |
|
"rewards/real": 8.949142456054688, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 12.238807391738392, |
|
"learning_rate": 4.78988603988604e-07, |
|
"logits/generated": 1.7313982248306274, |
|
"logits/oppo_generated": -2.83894681930542, |
|
"logits/oppo_real": -2.731696605682373, |
|
"logits/real": -0.31787559390068054, |
|
"logps/generated": -404.16876220703125, |
|
"logps/oppo_gen": -103.72628021240234, |
|
"logps/oppo_real": -218.9561767578125, |
|
"logps/real": -203.67791748046875, |
|
"loss": 0.1465, |
|
"loss/gen": 0.05964846536517143, |
|
"loss/real": 0.05892299860715866, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -300.4425048828125, |
|
"rewards/margins": 315.72076416015625, |
|
"rewards/real": 15.278261184692383, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 26.120130751371633, |
|
"learning_rate": 4.786324786324786e-07, |
|
"logits/generated": -0.03346788138151169, |
|
"logits/oppo_generated": -2.7633142471313477, |
|
"logits/oppo_real": -2.9560418128967285, |
|
"logits/real": 0.4580051302909851, |
|
"logps/generated": -400.31939697265625, |
|
"logps/oppo_gen": -74.91079711914062, |
|
"logps/oppo_real": -299.2713623046875, |
|
"logps/real": -274.74908447265625, |
|
"loss": 0.1447, |
|
"loss/gen": 0.03246932476758957, |
|
"loss/real": 0.057413578033447266, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -325.40863037109375, |
|
"rewards/margins": 349.930908203125, |
|
"rewards/real": 24.522279739379883, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 6.85888308627302, |
|
"learning_rate": 4.782763532763532e-07, |
|
"logits/generated": -0.8883915543556213, |
|
"logits/oppo_generated": -2.8308515548706055, |
|
"logits/oppo_real": -3.085522174835205, |
|
"logits/real": -0.28476178646087646, |
|
"logps/generated": -450.4888916015625, |
|
"logps/oppo_gen": -134.01483154296875, |
|
"logps/oppo_real": -442.37945556640625, |
|
"logps/real": -414.3377990722656, |
|
"loss": 0.1213, |
|
"loss/gen": 0.013260584324598312, |
|
"loss/real": 0.0481923446059227, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -316.47406005859375, |
|
"rewards/margins": 344.51568603515625, |
|
"rewards/real": 28.041629791259766, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 11.034659833967117, |
|
"learning_rate": 4.779202279202279e-07, |
|
"logits/generated": 1.2936651706695557, |
|
"logits/oppo_generated": -2.8044867515563965, |
|
"logits/oppo_real": -2.8060150146484375, |
|
"logits/real": 2.6514382362365723, |
|
"logps/generated": -387.21533203125, |
|
"logps/oppo_gen": -51.423309326171875, |
|
"logps/oppo_real": -222.54879760742188, |
|
"logps/real": -246.83465576171875, |
|
"loss": 0.1273, |
|
"loss/gen": 0.011176066473126411, |
|
"loss/real": 0.15403711795806885, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -335.79205322265625, |
|
"rewards/margins": 311.5062255859375, |
|
"rewards/real": -24.28582763671875, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 20.060685975962333, |
|
"learning_rate": 4.775641025641026e-07, |
|
"logits/generated": 1.3783206939697266, |
|
"logits/oppo_generated": -2.932793140411377, |
|
"logits/oppo_real": -2.9959638118743896, |
|
"logits/real": -0.3575197160243988, |
|
"logps/generated": -413.3162841796875, |
|
"logps/oppo_gen": -68.20332336425781, |
|
"logps/oppo_real": -376.541015625, |
|
"logps/real": -404.2684020996094, |
|
"loss": 0.1383, |
|
"loss/gen": 0.009259795770049095, |
|
"loss/real": 0.16172456741333008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -345.1129455566406, |
|
"rewards/margins": 317.38555908203125, |
|
"rewards/real": -27.727367401123047, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 13.770484231206762, |
|
"learning_rate": 4.772079772079772e-07, |
|
"logits/generated": -0.9163269400596619, |
|
"logits/oppo_generated": -2.780601739883423, |
|
"logits/oppo_real": -2.8726038932800293, |
|
"logits/real": -0.48223400115966797, |
|
"logps/generated": -435.2667236328125, |
|
"logps/oppo_gen": -75.83106994628906, |
|
"logps/oppo_real": -327.609619140625, |
|
"logps/real": -339.29437255859375, |
|
"loss": 0.1407, |
|
"loss/gen": 0.01995166763663292, |
|
"loss/real": 0.11280296742916107, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -359.4356384277344, |
|
"rewards/margins": 347.7508544921875, |
|
"rewards/real": -11.684758186340332, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 12.00733532823042, |
|
"learning_rate": 4.768518518518518e-07, |
|
"logits/generated": 0.19806894659996033, |
|
"logits/oppo_generated": -2.91953706741333, |
|
"logits/oppo_real": -2.820370674133301, |
|
"logits/real": -1.750054955482483, |
|
"logps/generated": -414.35638427734375, |
|
"logps/oppo_gen": -75.91517639160156, |
|
"logps/oppo_real": -531.0400390625, |
|
"logps/real": -527.661376953125, |
|
"loss": 0.1302, |
|
"loss/gen": 0.013693347573280334, |
|
"loss/real": 0.07382213324308395, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -338.44122314453125, |
|
"rewards/margins": 341.81988525390625, |
|
"rewards/real": 3.378690719604492, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 16.03866483006768, |
|
"learning_rate": 4.764957264957264e-07, |
|
"logits/generated": 0.33925262093544006, |
|
"logits/oppo_generated": -2.927794933319092, |
|
"logits/oppo_real": -2.8259315490722656, |
|
"logits/real": -1.4205646514892578, |
|
"logps/generated": -393.587890625, |
|
"logps/oppo_gen": -75.32722473144531, |
|
"logps/oppo_real": -334.3116149902344, |
|
"logps/real": -323.7161865234375, |
|
"loss": 0.1289, |
|
"loss/gen": 0.0497332438826561, |
|
"loss/real": 0.06772775202989578, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -318.2606506347656, |
|
"rewards/margins": 328.8560485839844, |
|
"rewards/real": 10.595392227172852, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 8.833888726198415, |
|
"learning_rate": 4.761396011396011e-07, |
|
"logits/generated": -0.9230914115905762, |
|
"logits/oppo_generated": -2.798323154449463, |
|
"logits/oppo_real": -3.0827927589416504, |
|
"logits/real": 0.6153226494789124, |
|
"logps/generated": -410.2586364746094, |
|
"logps/oppo_gen": -85.98326110839844, |
|
"logps/oppo_real": -484.7052001953125, |
|
"logps/real": -483.60198974609375, |
|
"loss": 0.1254, |
|
"loss/gen": 0.013556469231843948, |
|
"loss/real": 0.08908233791589737, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -324.275390625, |
|
"rewards/margins": 325.37860107421875, |
|
"rewards/real": 1.1032123565673828, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 7.328062012489163, |
|
"learning_rate": 4.7578347578347577e-07, |
|
"logits/generated": -0.05489081144332886, |
|
"logits/oppo_generated": -2.820817232131958, |
|
"logits/oppo_real": -2.7580766677856445, |
|
"logits/real": 0.22153127193450928, |
|
"logps/generated": -493.45196533203125, |
|
"logps/oppo_gen": -98.39456176757812, |
|
"logps/oppo_real": -435.86871337890625, |
|
"logps/real": -457.92608642578125, |
|
"loss": 0.1371, |
|
"loss/gen": 0.00898753385990858, |
|
"loss/real": 0.1511078178882599, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -395.05743408203125, |
|
"rewards/margins": 373.00006103515625, |
|
"rewards/real": -22.057361602783203, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 11.117432152635654, |
|
"learning_rate": 4.754273504273504e-07, |
|
"logits/generated": 2.727875232696533, |
|
"logits/oppo_generated": -2.991581439971924, |
|
"logits/oppo_real": -3.002182960510254, |
|
"logits/real": -0.03313925862312317, |
|
"logps/generated": -460.0456848144531, |
|
"logps/oppo_gen": -81.12940216064453, |
|
"logps/oppo_real": -296.61138916015625, |
|
"logps/real": -301.22528076171875, |
|
"loss": 0.1075, |
|
"loss/gen": 0.006128540262579918, |
|
"loss/real": 0.12722176313400269, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -378.9162902832031, |
|
"rewards/margins": 374.3023986816406, |
|
"rewards/real": -4.61387825012207, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 9.33021079917967, |
|
"learning_rate": 4.7507122507122507e-07, |
|
"logits/generated": 1.4162685871124268, |
|
"logits/oppo_generated": -2.8433456420898438, |
|
"logits/oppo_real": -3.012195110321045, |
|
"logits/real": 2.0103273391723633, |
|
"logps/generated": -370.1234130859375, |
|
"logps/oppo_gen": -63.396881103515625, |
|
"logps/oppo_real": -288.55780029296875, |
|
"logps/real": -273.33319091796875, |
|
"loss": 0.1087, |
|
"loss/gen": 0.0711958184838295, |
|
"loss/real": 0.06172256916761398, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -306.7265625, |
|
"rewards/margins": 321.951171875, |
|
"rewards/real": 15.224629402160645, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 10.560251933473818, |
|
"learning_rate": 4.747150997150997e-07, |
|
"logits/generated": 0.24370548129081726, |
|
"logits/oppo_generated": -2.75607967376709, |
|
"logits/oppo_real": -3.044626235961914, |
|
"logits/real": 2.0132508277893066, |
|
"logps/generated": -481.5845031738281, |
|
"logps/oppo_gen": -89.79308319091797, |
|
"logps/oppo_real": -237.51071166992188, |
|
"logps/real": -226.40072631835938, |
|
"loss": 0.1334, |
|
"loss/gen": 0.006441723555326462, |
|
"loss/real": 0.06355743110179901, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -391.7913818359375, |
|
"rewards/margins": 402.9013671875, |
|
"rewards/real": 11.10997486114502, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 8.007156985365109, |
|
"learning_rate": 4.743589743589743e-07, |
|
"logits/generated": 0.1388707160949707, |
|
"logits/oppo_generated": -2.9334537982940674, |
|
"logits/oppo_real": -3.0197911262512207, |
|
"logits/real": 0.12455784529447556, |
|
"logps/generated": -413.8739318847656, |
|
"logps/oppo_gen": -86.25882720947266, |
|
"logps/oppo_real": -171.73361206054688, |
|
"logps/real": -153.0205078125, |
|
"loss": 0.1145, |
|
"loss/gen": 0.01757695898413658, |
|
"loss/real": 0.05847536399960518, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -327.6151123046875, |
|
"rewards/margins": 346.32818603515625, |
|
"rewards/real": 18.71310806274414, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 15.781931645241597, |
|
"learning_rate": 4.74002849002849e-07, |
|
"logits/generated": 0.3543964624404907, |
|
"logits/oppo_generated": -2.8885016441345215, |
|
"logits/oppo_real": -2.9670629501342773, |
|
"logits/real": 1.7155840396881104, |
|
"logps/generated": -310.737548828125, |
|
"logps/oppo_gen": -52.36747741699219, |
|
"logps/oppo_real": -234.88699340820312, |
|
"logps/real": -229.83233642578125, |
|
"loss": 0.1185, |
|
"loss/gen": 0.08432283997535706, |
|
"loss/real": 0.09768233448266983, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -258.37005615234375, |
|
"rewards/margins": 263.42474365234375, |
|
"rewards/real": 5.054680824279785, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 18.309062544003567, |
|
"learning_rate": 4.7364672364672366e-07, |
|
"logits/generated": 4.106470108032227, |
|
"logits/oppo_generated": -2.902094841003418, |
|
"logits/oppo_real": -2.738150119781494, |
|
"logits/real": -0.7901719808578491, |
|
"logps/generated": -486.57537841796875, |
|
"logps/oppo_gen": -71.77503967285156, |
|
"logps/oppo_real": -226.59805297851562, |
|
"logps/real": -223.43231201171875, |
|
"loss": 0.1276, |
|
"loss/gen": 0.0029249710496515036, |
|
"loss/real": 0.07814927399158478, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -414.8003234863281, |
|
"rewards/margins": 417.9660949707031, |
|
"rewards/real": 3.165764808654785, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 15.178673710839073, |
|
"learning_rate": 4.7329059829059823e-07, |
|
"logits/generated": 2.0630407333374023, |
|
"logits/oppo_generated": -2.78233003616333, |
|
"logits/oppo_real": -2.810633420944214, |
|
"logits/real": 0.5877382755279541, |
|
"logps/generated": -413.3228759765625, |
|
"logps/oppo_gen": -51.96064758300781, |
|
"logps/oppo_real": -160.8415069580078, |
|
"logps/real": -206.94174194335938, |
|
"loss": 0.1099, |
|
"loss/gen": 0.005064055323600769, |
|
"loss/real": 0.20853213965892792, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -361.36224365234375, |
|
"rewards/margins": 315.2619934082031, |
|
"rewards/real": -46.10023498535156, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 15.499474301191032, |
|
"learning_rate": 4.729344729344729e-07, |
|
"logits/generated": -0.24721288681030273, |
|
"logits/oppo_generated": -2.7906460762023926, |
|
"logits/oppo_real": -2.7454147338867188, |
|
"logits/real": -0.8565228581428528, |
|
"logps/generated": -378.21197509765625, |
|
"logps/oppo_gen": -67.77021789550781, |
|
"logps/oppo_real": -355.9058837890625, |
|
"logps/real": -326.93756103515625, |
|
"loss": 0.129, |
|
"loss/gen": 0.019696667790412903, |
|
"loss/real": 0.04715769737958908, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -310.4417724609375, |
|
"rewards/margins": 339.41009521484375, |
|
"rewards/real": 28.968334197998047, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 18.262918159141492, |
|
"learning_rate": 4.725783475783476e-07, |
|
"logits/generated": 4.8104448318481445, |
|
"logits/oppo_generated": -2.784420967102051, |
|
"logits/oppo_real": -2.58797550201416, |
|
"logits/real": 1.0035556554794312, |
|
"logps/generated": -408.8854064941406, |
|
"logps/oppo_gen": -53.4489631652832, |
|
"logps/oppo_real": -213.77337646484375, |
|
"logps/real": -237.3814239501953, |
|
"loss": 0.1199, |
|
"loss/gen": 0.016478953883051872, |
|
"loss/real": 0.15279527008533478, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -355.43646240234375, |
|
"rewards/margins": 331.8283996582031, |
|
"rewards/real": -23.60806655883789, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 9.994206466890306, |
|
"learning_rate": 4.722222222222222e-07, |
|
"logits/generated": 0.6246334910392761, |
|
"logits/oppo_generated": -2.9693868160247803, |
|
"logits/oppo_real": -2.897064208984375, |
|
"logits/real": -1.4851816892623901, |
|
"logps/generated": -406.7054443359375, |
|
"logps/oppo_gen": -65.07535552978516, |
|
"logps/oppo_real": -380.3414306640625, |
|
"logps/real": -383.3380432128906, |
|
"loss": 0.1289, |
|
"loss/gen": 0.013010518625378609, |
|
"loss/real": 0.09031115472316742, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -341.63006591796875, |
|
"rewards/margins": 338.6334228515625, |
|
"rewards/real": -2.9966354370117188, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 15.87821435300705, |
|
"learning_rate": 4.7186609686609683e-07, |
|
"logits/generated": -0.36341723799705505, |
|
"logits/oppo_generated": -2.8074076175689697, |
|
"logits/oppo_real": -2.9744620323181152, |
|
"logits/real": 0.5565029382705688, |
|
"logps/generated": -380.6473388671875, |
|
"logps/oppo_gen": -81.67523193359375, |
|
"logps/oppo_real": -332.10321044921875, |
|
"logps/real": -320.957275390625, |
|
"loss": 0.1067, |
|
"loss/gen": 0.08063576370477676, |
|
"loss/real": 0.0693480372428894, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -298.97210693359375, |
|
"rewards/margins": 310.1180419921875, |
|
"rewards/real": 11.145920753479004, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 11.68128843020531, |
|
"learning_rate": 4.715099715099715e-07, |
|
"logits/generated": -0.2585492730140686, |
|
"logits/oppo_generated": -2.8780970573425293, |
|
"logits/oppo_real": -2.880333185195923, |
|
"logits/real": -0.5610638856887817, |
|
"logps/generated": -508.65380859375, |
|
"logps/oppo_gen": -83.72149658203125, |
|
"logps/oppo_real": -272.17291259765625, |
|
"logps/real": -247.9830780029297, |
|
"loss": 0.1178, |
|
"loss/gen": 0.009849696420133114, |
|
"loss/real": 0.05431273579597473, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -424.9322814941406, |
|
"rewards/margins": 449.12213134765625, |
|
"rewards/real": 24.189828872680664, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 6.8891323142300545, |
|
"learning_rate": 4.711538461538461e-07, |
|
"logits/generated": 0.28690657019615173, |
|
"logits/oppo_generated": -2.8689210414886475, |
|
"logits/oppo_real": -3.036574602127075, |
|
"logits/real": 0.44430530071258545, |
|
"logps/generated": -501.7239990234375, |
|
"logps/oppo_gen": -61.806739807128906, |
|
"logps/oppo_real": -213.864013671875, |
|
"logps/real": -210.99581909179688, |
|
"loss": 0.1123, |
|
"loss/gen": 0.005460466258227825, |
|
"loss/real": 0.09053057432174683, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -439.917236328125, |
|
"rewards/margins": 442.78546142578125, |
|
"rewards/real": 2.868199348449707, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 9.490839908843315, |
|
"learning_rate": 4.707977207977208e-07, |
|
"logits/generated": 0.23971767723560333, |
|
"logits/oppo_generated": -2.847443103790283, |
|
"logits/oppo_real": -2.9110074043273926, |
|
"logits/real": -1.9221229553222656, |
|
"logps/generated": -454.3905029296875, |
|
"logps/oppo_gen": -68.70259857177734, |
|
"logps/oppo_real": -252.70947265625, |
|
"logps/real": -238.11082458496094, |
|
"loss": 0.1114, |
|
"loss/gen": 0.01896515116095543, |
|
"loss/real": 0.06484989076852798, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -385.68792724609375, |
|
"rewards/margins": 400.28656005859375, |
|
"rewards/real": 14.598625183105469, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 9.89817505016609, |
|
"learning_rate": 4.7044159544159537e-07, |
|
"logits/generated": 0.8384239077568054, |
|
"logits/oppo_generated": -2.850525140762329, |
|
"logits/oppo_real": -2.9623799324035645, |
|
"logits/real": -1.0885248184204102, |
|
"logps/generated": -497.30291748046875, |
|
"logps/oppo_gen": -70.65492248535156, |
|
"logps/oppo_real": -241.07968139648438, |
|
"logps/real": -268.8257141113281, |
|
"loss": 0.1057, |
|
"loss/gen": 0.0037786937318742275, |
|
"loss/real": 0.15509513020515442, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -426.64801025390625, |
|
"rewards/margins": 398.9019775390625, |
|
"rewards/real": -27.746034622192383, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 12.319693850391852, |
|
"learning_rate": 4.7008547008547005e-07, |
|
"logits/generated": 0.24996593594551086, |
|
"logits/oppo_generated": -2.760641574859619, |
|
"logits/oppo_real": -2.835960865020752, |
|
"logits/real": -0.5614693760871887, |
|
"logps/generated": -412.4053955078125, |
|
"logps/oppo_gen": -77.80702209472656, |
|
"logps/oppo_real": -309.97265625, |
|
"logps/real": -334.11785888671875, |
|
"loss": 0.1175, |
|
"loss/gen": 0.03727255389094353, |
|
"loss/real": 0.15751832723617554, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -334.5983581542969, |
|
"rewards/margins": 310.453125, |
|
"rewards/real": -24.145217895507812, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 6.3940563736316625, |
|
"learning_rate": 4.697293447293447e-07, |
|
"logits/generated": 0.5585614442825317, |
|
"logits/oppo_generated": -2.762300491333008, |
|
"logits/oppo_real": -2.91391658782959, |
|
"logits/real": 0.02464289963245392, |
|
"logps/generated": -475.69134521484375, |
|
"logps/oppo_gen": -79.30331420898438, |
|
"logps/oppo_real": -206.95407104492188, |
|
"logps/real": -197.98240661621094, |
|
"loss": 0.0987, |
|
"loss/gen": 0.002962255384773016, |
|
"loss/real": 0.06829556077718735, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -396.38800048828125, |
|
"rewards/margins": 405.35968017578125, |
|
"rewards/real": 8.971668243408203, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 8.25558467612843, |
|
"learning_rate": 4.6937321937321934e-07, |
|
"logits/generated": 0.5922085046768188, |
|
"logits/oppo_generated": -2.8723740577697754, |
|
"logits/oppo_real": -2.730229139328003, |
|
"logits/real": -0.23181796073913574, |
|
"logps/generated": -524.1031494140625, |
|
"logps/oppo_gen": -68.4917984008789, |
|
"logps/oppo_real": -205.74790954589844, |
|
"logps/real": -234.06521606445312, |
|
"loss": 0.1115, |
|
"loss/gen": 0.002901814179494977, |
|
"loss/real": 0.15742677450180054, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -455.6112976074219, |
|
"rewards/margins": 427.29400634765625, |
|
"rewards/real": -28.31732177734375, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 10.834276794041324, |
|
"learning_rate": 4.69017094017094e-07, |
|
"logits/generated": 1.4108164310455322, |
|
"logits/oppo_generated": -2.833265781402588, |
|
"logits/oppo_real": -2.8581643104553223, |
|
"logits/real": -0.3456733226776123, |
|
"logps/generated": -481.52471923828125, |
|
"logps/oppo_gen": -72.44357299804688, |
|
"logps/oppo_real": -294.85699462890625, |
|
"logps/real": -270.8955078125, |
|
"loss": 0.1298, |
|
"loss/gen": 0.002270390745252371, |
|
"loss/real": 0.05221754312515259, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -409.08111572265625, |
|
"rewards/margins": 433.0426025390625, |
|
"rewards/real": 23.961448669433594, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 7.972693810007679, |
|
"learning_rate": 4.6866096866096864e-07, |
|
"logits/generated": -0.14726564288139343, |
|
"logits/oppo_generated": -2.8131227493286133, |
|
"logits/oppo_real": -2.815453052520752, |
|
"logits/real": 1.334158182144165, |
|
"logps/generated": -526.6148681640625, |
|
"logps/oppo_gen": -118.46414184570312, |
|
"logps/oppo_real": -350.6376953125, |
|
"logps/real": -333.80560302734375, |
|
"loss": 0.1067, |
|
"loss/gen": 0.003093698527663946, |
|
"loss/real": 0.06007348746061325, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -408.1506652832031, |
|
"rewards/margins": 424.9827880859375, |
|
"rewards/real": 16.832094192504883, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 15.05114976920368, |
|
"learning_rate": 4.6830484330484326e-07, |
|
"logits/generated": 2.6919243335723877, |
|
"logits/oppo_generated": -2.868478775024414, |
|
"logits/oppo_real": -2.87443208694458, |
|
"logits/real": 0.641966700553894, |
|
"logps/generated": -454.2250671386719, |
|
"logps/oppo_gen": -72.4801025390625, |
|
"logps/oppo_real": -315.2503356933594, |
|
"logps/real": -297.43890380859375, |
|
"loss": 0.1283, |
|
"loss/gen": 0.007784061599522829, |
|
"loss/real": 0.056649141013622284, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -381.7449951171875, |
|
"rewards/margins": 399.5564270019531, |
|
"rewards/real": 17.81142234802246, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 14.445761954710054, |
|
"learning_rate": 4.6794871794871794e-07, |
|
"logits/generated": 0.24739599227905273, |
|
"logits/oppo_generated": -2.5010550022125244, |
|
"logits/oppo_real": -2.635188102722168, |
|
"logits/real": 1.4196836948394775, |
|
"logps/generated": -497.96832275390625, |
|
"logps/oppo_gen": -80.23007202148438, |
|
"logps/oppo_real": -347.019287109375, |
|
"logps/real": -367.36773681640625, |
|
"loss": 0.1031, |
|
"loss/gen": 0.0026748834643512964, |
|
"loss/real": 0.16232778131961823, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -417.7382507324219, |
|
"rewards/margins": 397.3897705078125, |
|
"rewards/real": -20.34844398498535, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 9.567511149315958, |
|
"learning_rate": 4.675925925925926e-07, |
|
"logits/generated": 1.9026211500167847, |
|
"logits/oppo_generated": -2.6126623153686523, |
|
"logits/oppo_real": -2.6145567893981934, |
|
"logits/real": -0.05264997482299805, |
|
"logps/generated": -483.66961669921875, |
|
"logps/oppo_gen": -73.5291748046875, |
|
"logps/oppo_real": -317.5265808105469, |
|
"logps/real": -291.19769287109375, |
|
"loss": 0.1171, |
|
"loss/gen": 0.0032699224539101124, |
|
"loss/real": 0.05136152356863022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -410.1404113769531, |
|
"rewards/margins": 436.4693298339844, |
|
"rewards/real": 26.328907012939453, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 9.27439303404284, |
|
"learning_rate": 4.672364672364672e-07, |
|
"logits/generated": 0.22407007217407227, |
|
"logits/oppo_generated": -3.0297465324401855, |
|
"logits/oppo_real": -3.101362705230713, |
|
"logits/real": -0.9624991416931152, |
|
"logps/generated": -475.4916687011719, |
|
"logps/oppo_gen": -120.2161865234375, |
|
"logps/oppo_real": -532.0965576171875, |
|
"logps/real": -508.2243347167969, |
|
"loss": 0.1328, |
|
"loss/gen": 0.013585396111011505, |
|
"loss/real": 0.053436100482940674, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -355.27545166015625, |
|
"rewards/margins": 379.14764404296875, |
|
"rewards/real": 23.8721981048584, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 8.529536819315032, |
|
"learning_rate": 4.6688034188034186e-07, |
|
"logits/generated": 0.46417438983917236, |
|
"logits/oppo_generated": -2.4462087154388428, |
|
"logits/oppo_real": -2.882254123687744, |
|
"logits/real": 0.4770011603832245, |
|
"logps/generated": -414.92950439453125, |
|
"logps/oppo_gen": -74.71348571777344, |
|
"logps/oppo_real": -324.086669921875, |
|
"logps/real": -322.7204284667969, |
|
"loss": 0.1141, |
|
"loss/gen": 0.022401118651032448, |
|
"loss/real": 0.08568526804447174, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -340.2160339355469, |
|
"rewards/margins": 341.582275390625, |
|
"rewards/real": 1.3662652969360352, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 7.97382459439066, |
|
"learning_rate": 4.6652421652421653e-07, |
|
"logits/generated": 1.1687769889831543, |
|
"logits/oppo_generated": -2.9427778720855713, |
|
"logits/oppo_real": -2.9869794845581055, |
|
"logits/real": 0.1678808182477951, |
|
"logps/generated": -373.3022155761719, |
|
"logps/oppo_gen": -57.98387908935547, |
|
"logps/oppo_real": -299.8202209472656, |
|
"logps/real": -347.08966064453125, |
|
"loss": 0.1225, |
|
"loss/gen": 0.06827792525291443, |
|
"loss/real": 0.1839476227760315, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -315.318359375, |
|
"rewards/margins": 268.04888916015625, |
|
"rewards/real": -47.26943588256836, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 12.16076733125673, |
|
"learning_rate": 4.6616809116809116e-07, |
|
"logits/generated": -0.5007442831993103, |
|
"logits/oppo_generated": -2.462200880050659, |
|
"logits/oppo_real": -2.7382378578186035, |
|
"logits/real": 0.901258647441864, |
|
"logps/generated": -380.9939880371094, |
|
"logps/oppo_gen": -109.31198120117188, |
|
"logps/oppo_real": -333.22021484375, |
|
"logps/real": -319.7472229003906, |
|
"loss": 0.1218, |
|
"loss/gen": 0.14071233570575714, |
|
"loss/real": 0.062038667500019073, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -271.6820068359375, |
|
"rewards/margins": 285.1549987792969, |
|
"rewards/real": 13.472984313964844, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 9.4222944428817, |
|
"learning_rate": 4.658119658119658e-07, |
|
"logits/generated": 0.9653230309486389, |
|
"logits/oppo_generated": -2.9814329147338867, |
|
"logits/oppo_real": -2.8366198539733887, |
|
"logits/real": -1.606312870979309, |
|
"logps/generated": -515.083740234375, |
|
"logps/oppo_gen": -117.97686767578125, |
|
"logps/oppo_real": -333.4208679199219, |
|
"logps/real": -334.5287170410156, |
|
"loss": 0.1333, |
|
"loss/gen": 0.02934938669204712, |
|
"loss/real": 0.12275935709476471, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -397.10693359375, |
|
"rewards/margins": 395.9990539550781, |
|
"rewards/real": -1.1078624725341797, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 10.36904147832379, |
|
"learning_rate": 4.654558404558404e-07, |
|
"logits/generated": 3.2590041160583496, |
|
"logits/oppo_generated": -2.6781723499298096, |
|
"logits/oppo_real": -2.516916513442993, |
|
"logits/real": 0.9979041814804077, |
|
"logps/generated": -471.4188537597656, |
|
"logps/oppo_gen": -60.19814682006836, |
|
"logps/oppo_real": -262.58551025390625, |
|
"logps/real": -253.1987762451172, |
|
"loss": 0.1248, |
|
"loss/gen": 0.01645912043750286, |
|
"loss/real": 0.06675288081169128, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -411.220703125, |
|
"rewards/margins": 420.607421875, |
|
"rewards/real": 9.386733055114746, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 7.49224910770421, |
|
"learning_rate": 4.650997150997151e-07, |
|
"logits/generated": -0.17034882307052612, |
|
"logits/oppo_generated": -2.8787498474121094, |
|
"logits/oppo_real": -2.805894374847412, |
|
"logits/real": -0.6173279285430908, |
|
"logps/generated": -499.2947692871094, |
|
"logps/oppo_gen": -124.28936767578125, |
|
"logps/oppo_real": -606.1627807617188, |
|
"logps/real": -573.3131713867188, |
|
"loss": 0.0974, |
|
"loss/gen": 0.017171718180179596, |
|
"loss/real": 0.04739490896463394, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -375.00537109375, |
|
"rewards/margins": 407.85498046875, |
|
"rewards/real": 32.84962463378906, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 10.01345111268014, |
|
"learning_rate": 4.6474358974358975e-07, |
|
"logits/generated": 0.31560423970222473, |
|
"logits/oppo_generated": -2.765538454055786, |
|
"logits/oppo_real": -2.839543342590332, |
|
"logits/real": 0.13689792156219482, |
|
"logps/generated": -441.107177734375, |
|
"logps/oppo_gen": -83.72669982910156, |
|
"logps/oppo_real": -361.6756591796875, |
|
"logps/real": -350.48388671875, |
|
"loss": 0.105, |
|
"loss/gen": 0.029633918777108192, |
|
"loss/real": 0.07531121373176575, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -357.3804931640625, |
|
"rewards/margins": 368.572265625, |
|
"rewards/real": 11.191795349121094, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 6.730574453700564, |
|
"learning_rate": 4.643874643874643e-07, |
|
"logits/generated": 2.131331205368042, |
|
"logits/oppo_generated": -2.7416014671325684, |
|
"logits/oppo_real": -2.8941569328308105, |
|
"logits/real": 0.05423975735902786, |
|
"logps/generated": -409.352783203125, |
|
"logps/oppo_gen": -51.659912109375, |
|
"logps/oppo_real": -267.5926513671875, |
|
"logps/real": -244.4554901123047, |
|
"loss": 0.113, |
|
"loss/gen": 0.00840664841234684, |
|
"loss/real": 0.05236246809363365, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -357.6929016113281, |
|
"rewards/margins": 380.830078125, |
|
"rewards/real": 23.13716697692871, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 8.541901921699957, |
|
"learning_rate": 4.64031339031339e-07, |
|
"logits/generated": 0.6947270035743713, |
|
"logits/oppo_generated": -2.609920024871826, |
|
"logits/oppo_real": -2.5399818420410156, |
|
"logits/real": -0.6192195415496826, |
|
"logps/generated": -489.7707824707031, |
|
"logps/oppo_gen": -81.96345520019531, |
|
"logps/oppo_real": -258.99554443359375, |
|
"logps/real": -241.81118774414062, |
|
"loss": 0.1239, |
|
"loss/gen": 0.009341086260974407, |
|
"loss/real": 0.05701681971549988, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -407.80731201171875, |
|
"rewards/margins": 424.99163818359375, |
|
"rewards/real": 17.18433952331543, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 6.895813940967902, |
|
"learning_rate": 4.6367521367521367e-07, |
|
"logits/generated": 1.7106398344039917, |
|
"logits/oppo_generated": -2.89731502532959, |
|
"logits/oppo_real": -2.861166000366211, |
|
"logits/real": -0.8656010031700134, |
|
"logps/generated": -439.37548828125, |
|
"logps/oppo_gen": -61.10588073730469, |
|
"logps/oppo_real": -297.8720703125, |
|
"logps/real": -310.3854675292969, |
|
"loss": 0.1158, |
|
"loss/gen": 0.008315667510032654, |
|
"loss/real": 0.14091408252716064, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -378.26959228515625, |
|
"rewards/margins": 365.7562255859375, |
|
"rewards/real": -12.51338005065918, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 8.371380379629707, |
|
"learning_rate": 4.633190883190883e-07, |
|
"logits/generated": 0.12555718421936035, |
|
"logits/oppo_generated": -2.8648695945739746, |
|
"logits/oppo_real": -2.711393356323242, |
|
"logits/real": -2.393634796142578, |
|
"logps/generated": -473.0018005371094, |
|
"logps/oppo_gen": -111.59371948242188, |
|
"logps/oppo_real": -521.255859375, |
|
"logps/real": -490.30670166015625, |
|
"loss": 0.1191, |
|
"loss/gen": 0.011024970561265945, |
|
"loss/real": 0.04596058279275894, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -361.4081115722656, |
|
"rewards/margins": 392.3572998046875, |
|
"rewards/real": 30.94921112060547, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 7.677323964323365, |
|
"learning_rate": 4.6296296296296297e-07, |
|
"logits/generated": 2.04494571685791, |
|
"logits/oppo_generated": -2.8064088821411133, |
|
"logits/oppo_real": -2.845989227294922, |
|
"logits/real": 0.3437741696834564, |
|
"logps/generated": -461.95391845703125, |
|
"logps/oppo_gen": -52.78784942626953, |
|
"logps/oppo_real": -172.55088806152344, |
|
"logps/real": -199.171875, |
|
"loss": 0.0992, |
|
"loss/gen": 0.010190755128860474, |
|
"loss/real": 0.1549648642539978, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -409.16607666015625, |
|
"rewards/margins": 382.5450439453125, |
|
"rewards/real": -26.62099266052246, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 7.519952808800201, |
|
"learning_rate": 4.626068376068376e-07, |
|
"logits/generated": 1.723816990852356, |
|
"logits/oppo_generated": -3.0264251232147217, |
|
"logits/oppo_real": -2.836057186126709, |
|
"logits/real": -1.7777621746063232, |
|
"logps/generated": -501.8693542480469, |
|
"logps/oppo_gen": -74.337158203125, |
|
"logps/oppo_real": -371.032470703125, |
|
"logps/real": -327.919677734375, |
|
"loss": 0.0991, |
|
"loss/gen": 0.0021500587463378906, |
|
"loss/real": 0.04081626981496811, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -427.5322265625, |
|
"rewards/margins": 470.64501953125, |
|
"rewards/real": 43.11281967163086, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 7.682464439044137, |
|
"learning_rate": 4.622507122507122e-07, |
|
"logits/generated": 0.18839222192764282, |
|
"logits/oppo_generated": -2.876476764678955, |
|
"logits/oppo_real": -2.912707805633545, |
|
"logits/real": -1.0475839376449585, |
|
"logps/generated": -440.72467041015625, |
|
"logps/oppo_gen": -90.53692626953125, |
|
"logps/oppo_real": -383.74615478515625, |
|
"logps/real": -350.449951171875, |
|
"loss": 0.1054, |
|
"loss/gen": 0.011242890730500221, |
|
"loss/real": 0.04423694312572479, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -350.1877746582031, |
|
"rewards/margins": 383.4840087890625, |
|
"rewards/real": 33.296241760253906, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 11.80636259624048, |
|
"learning_rate": 4.618945868945869e-07, |
|
"logits/generated": -1.3398778438568115, |
|
"logits/oppo_generated": -2.9819746017456055, |
|
"logits/oppo_real": -3.1959123611450195, |
|
"logits/real": -1.570970892906189, |
|
"logps/generated": -552.3341674804688, |
|
"logps/oppo_gen": -152.70217895507812, |
|
"logps/oppo_real": -483.54266357421875, |
|
"logps/real": -448.5618896484375, |
|
"loss": 0.1211, |
|
"loss/gen": 0.0028542811051011086, |
|
"loss/real": 0.04466398060321808, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -399.6319885253906, |
|
"rewards/margins": 434.61273193359375, |
|
"rewards/real": 34.98075866699219, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 13.32263370995865, |
|
"learning_rate": 4.6153846153846156e-07, |
|
"logits/generated": 0.6599330306053162, |
|
"logits/oppo_generated": -2.7378830909729004, |
|
"logits/oppo_real": -3.110536813735962, |
|
"logits/real": -0.832703709602356, |
|
"logps/generated": -510.0391845703125, |
|
"logps/oppo_gen": -86.0918960571289, |
|
"logps/oppo_real": -447.7939147949219, |
|
"logps/real": -431.9365234375, |
|
"loss": 0.128, |
|
"loss/gen": 0.001785873668268323, |
|
"loss/real": 0.059112463146448135, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -423.947265625, |
|
"rewards/margins": 439.8046569824219, |
|
"rewards/real": 15.857396125793457, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 7.936690161535528, |
|
"learning_rate": 4.6118233618233613e-07, |
|
"logits/generated": -0.5695008039474487, |
|
"logits/oppo_generated": -2.7491419315338135, |
|
"logits/oppo_real": -3.191051483154297, |
|
"logits/real": -0.1943381428718567, |
|
"logps/generated": -525.660888671875, |
|
"logps/oppo_gen": -96.26548767089844, |
|
"logps/oppo_real": -305.7531433105469, |
|
"logps/real": -300.1641540527344, |
|
"loss": 0.106, |
|
"loss/gen": 0.009844229556620121, |
|
"loss/real": 0.07896681129932404, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -429.3954162597656, |
|
"rewards/margins": 434.98443603515625, |
|
"rewards/real": 5.589000701904297, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 10.56822022610991, |
|
"learning_rate": 4.608262108262108e-07, |
|
"logits/generated": 0.07804641127586365, |
|
"logits/oppo_generated": -2.8662476539611816, |
|
"logits/oppo_real": -2.7619881629943848, |
|
"logits/real": -0.7753300666809082, |
|
"logps/generated": -483.155029296875, |
|
"logps/oppo_gen": -76.39656066894531, |
|
"logps/oppo_real": -342.36138916015625, |
|
"logps/real": -317.1158447265625, |
|
"loss": 0.1337, |
|
"loss/gen": 0.002769787795841694, |
|
"loss/real": 0.051119036972522736, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -406.7584533691406, |
|
"rewards/margins": 432.0040283203125, |
|
"rewards/real": 25.24556541442871, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 8.100548205250428, |
|
"learning_rate": 4.6047008547008543e-07, |
|
"logits/generated": 0.029185794293880463, |
|
"logits/oppo_generated": -2.973456859588623, |
|
"logits/oppo_real": -2.9541869163513184, |
|
"logits/real": -1.5546411275863647, |
|
"logps/generated": -418.9149169921875, |
|
"logps/oppo_gen": -58.52758026123047, |
|
"logps/oppo_real": -196.6337127685547, |
|
"logps/real": -197.22256469726562, |
|
"loss": 0.1083, |
|
"loss/gen": 0.00923408754169941, |
|
"loss/real": 0.08144041895866394, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -360.3873291015625, |
|
"rewards/margins": 359.79852294921875, |
|
"rewards/real": -0.5888404846191406, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 11.178794499952486, |
|
"learning_rate": 4.601139601139601e-07, |
|
"logits/generated": -0.6778485774993896, |
|
"logits/oppo_generated": -2.9579458236694336, |
|
"logits/oppo_real": -2.8345115184783936, |
|
"logits/real": -2.318662405014038, |
|
"logps/generated": -453.1796569824219, |
|
"logps/oppo_gen": -86.37559509277344, |
|
"logps/oppo_real": -329.4002685546875, |
|
"logps/real": -332.9542236328125, |
|
"loss": 0.1167, |
|
"loss/gen": 0.00886719860136509, |
|
"loss/real": 0.1160876601934433, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -366.8040771484375, |
|
"rewards/margins": 363.2501220703125, |
|
"rewards/real": -3.5539398193359375, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 5.332568153485923, |
|
"learning_rate": 4.5975783475783473e-07, |
|
"logits/generated": 1.4992669820785522, |
|
"logits/oppo_generated": -2.4297678470611572, |
|
"logits/oppo_real": -2.5349526405334473, |
|
"logits/real": -1.3000285625457764, |
|
"logps/generated": -538.8160400390625, |
|
"logps/oppo_gen": -139.25880432128906, |
|
"logps/oppo_real": -366.9024658203125, |
|
"logps/real": -333.7024841308594, |
|
"loss": 0.1031, |
|
"loss/gen": 0.004812781233340502, |
|
"loss/real": 0.05341378599405289, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -399.5572509765625, |
|
"rewards/margins": 432.75726318359375, |
|
"rewards/real": 33.19999694824219, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 10.509122609429369, |
|
"learning_rate": 4.5940170940170935e-07, |
|
"logits/generated": 1.0690361261367798, |
|
"logits/oppo_generated": -2.59027099609375, |
|
"logits/oppo_real": -2.5751681327819824, |
|
"logits/real": -0.17392590641975403, |
|
"logps/generated": -419.8863525390625, |
|
"logps/oppo_gen": -44.13750076293945, |
|
"logps/oppo_real": -126.39328002929688, |
|
"logps/real": -176.01084899902344, |
|
"loss": 0.1203, |
|
"loss/gen": 0.023745674639940262, |
|
"loss/real": 0.19582872092723846, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -375.74884033203125, |
|
"rewards/margins": 326.13128662109375, |
|
"rewards/real": -49.617576599121094, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 7.353634160316212, |
|
"learning_rate": 4.59045584045584e-07, |
|
"logits/generated": 0.641779899597168, |
|
"logits/oppo_generated": -2.8061888217926025, |
|
"logits/oppo_real": -2.885352611541748, |
|
"logits/real": -2.01800799369812, |
|
"logps/generated": -555.3866577148438, |
|
"logps/oppo_gen": -82.9956283569336, |
|
"logps/oppo_real": -287.7582702636719, |
|
"logps/real": -271.5313415527344, |
|
"loss": 0.0932, |
|
"loss/gen": 0.0082212183624506, |
|
"loss/real": 0.05742044001817703, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -472.39105224609375, |
|
"rewards/margins": 488.6179504394531, |
|
"rewards/real": 16.226917266845703, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 9.072201081242417, |
|
"learning_rate": 4.586894586894587e-07, |
|
"logits/generated": 1.6796048879623413, |
|
"logits/oppo_generated": -2.6804826259613037, |
|
"logits/oppo_real": -2.560675621032715, |
|
"logits/real": 0.42854011058807373, |
|
"logps/generated": -459.411376953125, |
|
"logps/oppo_gen": -125.20469665527344, |
|
"logps/oppo_real": -214.75454711914062, |
|
"logps/real": -226.22039794921875, |
|
"loss": 0.1276, |
|
"loss/gen": 0.03355031833052635, |
|
"loss/real": 0.12927620112895966, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -334.20672607421875, |
|
"rewards/margins": 322.7408447265625, |
|
"rewards/real": -11.465866088867188, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 11.786933155528228, |
|
"learning_rate": 4.5833333333333327e-07, |
|
"logits/generated": -1.28011155128479, |
|
"logits/oppo_generated": -2.8161306381225586, |
|
"logits/oppo_real": -2.873737096786499, |
|
"logits/real": -1.3232629299163818, |
|
"logps/generated": -596.6446533203125, |
|
"logps/oppo_gen": -39.4675178527832, |
|
"logps/oppo_real": -94.7720718383789, |
|
"logps/real": -106.9762191772461, |
|
"loss": 0.105, |
|
"loss/gen": 0.04502476006746292, |
|
"loss/real": 0.10439670085906982, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -557.1771240234375, |
|
"rewards/margins": 544.9729614257812, |
|
"rewards/real": -12.204153060913086, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 15.654533124581928, |
|
"learning_rate": 4.5797720797720794e-07, |
|
"logits/generated": 1.7296359539031982, |
|
"logits/oppo_generated": -2.754338026046753, |
|
"logits/oppo_real": -2.6611428260803223, |
|
"logits/real": -0.2379247546195984, |
|
"logps/generated": -511.6190490722656, |
|
"logps/oppo_gen": -53.64311981201172, |
|
"logps/oppo_real": -189.60964965820312, |
|
"logps/real": -217.4978790283203, |
|
"loss": 0.1257, |
|
"loss/gen": 0.0008055841899476945, |
|
"loss/real": 0.15694613754749298, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -457.9759521484375, |
|
"rewards/margins": 430.08770751953125, |
|
"rewards/real": -27.888227462768555, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 7.173984724247081, |
|
"learning_rate": 4.576210826210826e-07, |
|
"logits/generated": 0.1732943058013916, |
|
"logits/oppo_generated": -2.8700437545776367, |
|
"logits/oppo_real": -3.012883186340332, |
|
"logits/real": -0.7629199028015137, |
|
"logps/generated": -533.4395751953125, |
|
"logps/oppo_gen": -64.43563842773438, |
|
"logps/oppo_real": -366.68572998046875, |
|
"logps/real": -337.18341064453125, |
|
"loss": 0.0966, |
|
"loss/gen": 0.0009057590505108237, |
|
"loss/real": 0.04720592498779297, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -469.0039367675781, |
|
"rewards/margins": 498.50628662109375, |
|
"rewards/real": 29.502349853515625, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 8.298848120489561, |
|
"learning_rate": 4.5726495726495724e-07, |
|
"logits/generated": 0.7261441946029663, |
|
"logits/oppo_generated": -2.896176338195801, |
|
"logits/oppo_real": -2.7520911693573, |
|
"logits/real": -2.145052194595337, |
|
"logps/generated": -551.3854370117188, |
|
"logps/oppo_gen": -94.6259765625, |
|
"logps/oppo_real": -329.9571533203125, |
|
"logps/real": -318.6071472167969, |
|
"loss": 0.0979, |
|
"loss/gen": 0.0014733282150700688, |
|
"loss/real": 0.07271689176559448, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -456.7594299316406, |
|
"rewards/margins": 468.10943603515625, |
|
"rewards/real": 11.350017547607422, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 7.603075368948934, |
|
"learning_rate": 4.569088319088319e-07, |
|
"logits/generated": 0.5782715082168579, |
|
"logits/oppo_generated": -2.72526478767395, |
|
"logits/oppo_real": -2.760162591934204, |
|
"logits/real": -0.5101295709609985, |
|
"logps/generated": -484.132080078125, |
|
"logps/oppo_gen": -70.71673583984375, |
|
"logps/oppo_real": -391.76458740234375, |
|
"logps/real": -390.664306640625, |
|
"loss": 0.0814, |
|
"loss/gen": 0.003090164391323924, |
|
"loss/real": 0.08752277493476868, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -413.41534423828125, |
|
"rewards/margins": 414.515625, |
|
"rewards/real": 1.1003141403198242, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 8.720307898028933, |
|
"learning_rate": 4.5655270655270654e-07, |
|
"logits/generated": -0.9746605157852173, |
|
"logits/oppo_generated": -2.979785919189453, |
|
"logits/oppo_real": -3.2641677856445312, |
|
"logits/real": -2.1320035457611084, |
|
"logps/generated": -547.2476196289062, |
|
"logps/oppo_gen": -92.89317321777344, |
|
"logps/oppo_real": -330.3245849609375, |
|
"logps/real": -331.6492919921875, |
|
"loss": 0.1021, |
|
"loss/gen": 0.006980424281209707, |
|
"loss/real": 0.09580740332603455, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -454.35443115234375, |
|
"rewards/margins": 453.02972412109375, |
|
"rewards/real": -1.3247241973876953, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 6.378341696542414, |
|
"learning_rate": 4.5619658119658116e-07, |
|
"logits/generated": 0.2979557514190674, |
|
"logits/oppo_generated": -2.775574207305908, |
|
"logits/oppo_real": -2.598371744155884, |
|
"logits/real": -1.701836109161377, |
|
"logps/generated": -490.8515319824219, |
|
"logps/oppo_gen": -65.71693420410156, |
|
"logps/oppo_real": -220.19737243652344, |
|
"logps/real": -198.3828125, |
|
"loss": 0.0869, |
|
"loss/gen": 0.005930028390139341, |
|
"loss/real": 0.05311470851302147, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -425.1346435546875, |
|
"rewards/margins": 446.94921875, |
|
"rewards/real": 21.814559936523438, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 11.739804003767482, |
|
"learning_rate": 4.5584045584045584e-07, |
|
"logits/generated": 2.0976781845092773, |
|
"logits/oppo_generated": -2.6892812252044678, |
|
"logits/oppo_real": -2.527797222137451, |
|
"logits/real": -0.9774438142776489, |
|
"logps/generated": -316.81829833984375, |
|
"logps/oppo_gen": -56.507102966308594, |
|
"logps/oppo_real": -203.99942016601562, |
|
"logps/real": -194.0773468017578, |
|
"loss": 0.1095, |
|
"loss/gen": 0.1823972463607788, |
|
"loss/real": 0.06337600946426392, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -260.31121826171875, |
|
"rewards/margins": 270.2332763671875, |
|
"rewards/real": 9.922063827514648, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 9.327009329219294, |
|
"learning_rate": 4.5548433048433046e-07, |
|
"logits/generated": 0.6838961243629456, |
|
"logits/oppo_generated": -2.892515182495117, |
|
"logits/oppo_real": -2.87583589553833, |
|
"logits/real": -1.6009001731872559, |
|
"logps/generated": -449.57476806640625, |
|
"logps/oppo_gen": -70.63409423828125, |
|
"logps/oppo_real": -236.45480346679688, |
|
"logps/real": -215.04605102539062, |
|
"loss": 0.0981, |
|
"loss/gen": 0.034911513328552246, |
|
"loss/real": 0.05380266159772873, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -378.940673828125, |
|
"rewards/margins": 400.34942626953125, |
|
"rewards/real": 21.408771514892578, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 11.752851424295404, |
|
"learning_rate": 4.551282051282051e-07, |
|
"logits/generated": 1.7187445163726807, |
|
"logits/oppo_generated": -2.2372124195098877, |
|
"logits/oppo_real": -2.6531500816345215, |
|
"logits/real": 0.8881663084030151, |
|
"logps/generated": -376.4776611328125, |
|
"logps/oppo_gen": -49.9699821472168, |
|
"logps/oppo_real": -257.7629699707031, |
|
"logps/real": -249.66366577148438, |
|
"loss": 0.1182, |
|
"loss/gen": 0.12994062900543213, |
|
"loss/real": 0.10034769773483276, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -326.5076904296875, |
|
"rewards/margins": 334.60699462890625, |
|
"rewards/real": 8.099308967590332, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 18.73650393189305, |
|
"learning_rate": 4.5477207977207976e-07, |
|
"logits/generated": 2.266127109527588, |
|
"logits/oppo_generated": -2.6594979763031006, |
|
"logits/oppo_real": -2.72336483001709, |
|
"logits/real": -1.0872607231140137, |
|
"logps/generated": -446.92950439453125, |
|
"logps/oppo_gen": -69.47285461425781, |
|
"logps/oppo_real": -203.925048828125, |
|
"logps/real": -173.6673126220703, |
|
"loss": 0.108, |
|
"loss/gen": 0.039231862872838974, |
|
"loss/real": 0.04621148854494095, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -377.4566650390625, |
|
"rewards/margins": 407.71441650390625, |
|
"rewards/real": 30.25775146484375, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 10.004324550142796, |
|
"learning_rate": 4.544159544159544e-07, |
|
"logits/generated": -0.5070485472679138, |
|
"logits/oppo_generated": -2.84741473197937, |
|
"logits/oppo_real": -2.9322423934936523, |
|
"logits/real": -0.8364191055297852, |
|
"logps/generated": -565.1874389648438, |
|
"logps/oppo_gen": -72.28129577636719, |
|
"logps/oppo_real": -342.0706787109375, |
|
"logps/real": -389.3838195800781, |
|
"loss": 0.1139, |
|
"loss/gen": 0.0009682751260697842, |
|
"loss/real": 0.20359663665294647, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -492.9061584472656, |
|
"rewards/margins": 445.59307861328125, |
|
"rewards/real": -47.313148498535156, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 11.552963365492598, |
|
"learning_rate": 4.5405982905982905e-07, |
|
"logits/generated": 1.1087026596069336, |
|
"logits/oppo_generated": -2.8123486042022705, |
|
"logits/oppo_real": -2.9484448432922363, |
|
"logits/real": -0.608461856842041, |
|
"logps/generated": -495.27496337890625, |
|
"logps/oppo_gen": -78.67784118652344, |
|
"logps/oppo_real": -224.94638061523438, |
|
"logps/real": -219.1400604248047, |
|
"loss": 0.1101, |
|
"loss/gen": 0.0022095751482993364, |
|
"loss/real": 0.0847286581993103, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -416.59716796875, |
|
"rewards/margins": 422.4034729003906, |
|
"rewards/real": 5.806319236755371, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 13.91452801807594, |
|
"learning_rate": 4.537037037037037e-07, |
|
"logits/generated": -0.4531553387641907, |
|
"logits/oppo_generated": -2.6430654525756836, |
|
"logits/oppo_real": -2.7417783737182617, |
|
"logits/real": -0.06139189004898071, |
|
"logps/generated": -527.9544677734375, |
|
"logps/oppo_gen": -63.871150970458984, |
|
"logps/oppo_real": -224.14703369140625, |
|
"logps/real": -196.36013793945312, |
|
"loss": 0.0982, |
|
"loss/gen": 0.03309467062354088, |
|
"loss/real": 0.06330372393131256, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -464.08331298828125, |
|
"rewards/margins": 491.8702087402344, |
|
"rewards/real": 27.78690528869629, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 6.456378402984101, |
|
"learning_rate": 4.533475783475783e-07, |
|
"logits/generated": 0.31468260288238525, |
|
"logits/oppo_generated": -2.757966995239258, |
|
"logits/oppo_real": -2.906935691833496, |
|
"logits/real": 0.27933990955352783, |
|
"logps/generated": -367.7430419921875, |
|
"logps/oppo_gen": -53.980133056640625, |
|
"logps/oppo_real": -168.99293518066406, |
|
"logps/real": -186.01309204101562, |
|
"loss": 0.0919, |
|
"loss/gen": 0.08386404067277908, |
|
"loss/real": 0.1370202898979187, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -313.7629089355469, |
|
"rewards/margins": 296.7427673339844, |
|
"rewards/real": -17.020137786865234, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 9.117984112584521, |
|
"learning_rate": 4.5299145299145297e-07, |
|
"logits/generated": 0.7232341766357422, |
|
"logits/oppo_generated": -2.34848690032959, |
|
"logits/oppo_real": -2.549453020095825, |
|
"logits/real": 0.08811542391777039, |
|
"logps/generated": -472.3197937011719, |
|
"logps/oppo_gen": -41.99907684326172, |
|
"logps/oppo_real": -137.05735778808594, |
|
"logps/real": -114.59136962890625, |
|
"loss": 0.0985, |
|
"loss/gen": 0.012792231515049934, |
|
"loss/real": 0.07953563332557678, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -430.3207092285156, |
|
"rewards/margins": 452.78668212890625, |
|
"rewards/real": 22.46598243713379, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 9.673804662234318, |
|
"learning_rate": 4.5263532763532765e-07, |
|
"logits/generated": -0.14821961522102356, |
|
"logits/oppo_generated": -2.5094847679138184, |
|
"logits/oppo_real": -2.6891722679138184, |
|
"logits/real": -0.5701528191566467, |
|
"logps/generated": -598.5277099609375, |
|
"logps/oppo_gen": -68.40258026123047, |
|
"logps/oppo_real": -223.42794799804688, |
|
"logps/real": -208.8800048828125, |
|
"loss": 0.1145, |
|
"loss/gen": 0.06975440680980682, |
|
"loss/real": 0.0920247808098793, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -530.1251220703125, |
|
"rewards/margins": 544.673095703125, |
|
"rewards/real": 14.547938346862793, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 18.323394763370047, |
|
"learning_rate": 4.522792022792022e-07, |
|
"logits/generated": -0.09898993372917175, |
|
"logits/oppo_generated": -2.8935999870300293, |
|
"logits/oppo_real": -2.775484561920166, |
|
"logits/real": -1.8450055122375488, |
|
"logps/generated": -366.33251953125, |
|
"logps/oppo_gen": -50.93283462524414, |
|
"logps/oppo_real": -316.0002136230469, |
|
"logps/real": -285.7142028808594, |
|
"loss": 0.1016, |
|
"loss/gen": 0.04077897593379021, |
|
"loss/real": 0.04620899260044098, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -315.39971923828125, |
|
"rewards/margins": 345.6856994628906, |
|
"rewards/real": 30.286012649536133, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 9.95871911571871, |
|
"learning_rate": 4.519230769230769e-07, |
|
"logits/generated": -0.8768476247787476, |
|
"logits/oppo_generated": -2.8526816368103027, |
|
"logits/oppo_real": -3.2386014461517334, |
|
"logits/real": -1.6847550868988037, |
|
"logps/generated": -509.8077392578125, |
|
"logps/oppo_gen": -113.54923248291016, |
|
"logps/oppo_real": -351.7125549316406, |
|
"logps/real": -331.41754150390625, |
|
"loss": 0.0994, |
|
"loss/gen": 0.06542319059371948, |
|
"loss/real": 0.05489509552717209, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -396.25848388671875, |
|
"rewards/margins": 416.5534973144531, |
|
"rewards/real": 20.295007705688477, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 8.799185576465701, |
|
"learning_rate": 4.5156695156695157e-07, |
|
"logits/generated": 0.07430555671453476, |
|
"logits/oppo_generated": -2.9850940704345703, |
|
"logits/oppo_real": -3.0315611362457275, |
|
"logits/real": -0.40749257802963257, |
|
"logps/generated": -522.1142578125, |
|
"logps/oppo_gen": -61.65489196777344, |
|
"logps/oppo_real": -151.10653686523438, |
|
"logps/real": -150.37619018554688, |
|
"loss": 0.1072, |
|
"loss/gen": 0.0012253040913492441, |
|
"loss/real": 0.08858918398618698, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -460.45941162109375, |
|
"rewards/margins": 461.1897277832031, |
|
"rewards/real": 0.7303409576416016, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 10.122576388171886, |
|
"learning_rate": 4.512108262108262e-07, |
|
"logits/generated": -0.8268670439720154, |
|
"logits/oppo_generated": -2.891350746154785, |
|
"logits/oppo_real": -3.0990657806396484, |
|
"logits/real": -2.160102367401123, |
|
"logps/generated": -657.8355102539062, |
|
"logps/oppo_gen": -212.02532958984375, |
|
"logps/oppo_real": -549.8078002929688, |
|
"logps/real": -522.5389404296875, |
|
"loss": 0.0979, |
|
"loss/gen": 0.0014306737575680017, |
|
"loss/real": 0.05242353677749634, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -445.8101806640625, |
|
"rewards/margins": 473.0790100097656, |
|
"rewards/real": 27.26885986328125, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 8.222653930373783, |
|
"learning_rate": 4.5085470085470087e-07, |
|
"logits/generated": 4.044073104858398, |
|
"logits/oppo_generated": -2.861656904220581, |
|
"logits/oppo_real": -2.749734878540039, |
|
"logits/real": 0.019296986982226372, |
|
"logps/generated": -477.9574279785156, |
|
"logps/oppo_gen": -52.08341598510742, |
|
"logps/oppo_real": -268.2560119628906, |
|
"logps/real": -233.78384399414062, |
|
"loss": 0.0869, |
|
"loss/gen": 0.002855573780834675, |
|
"loss/real": 0.04411199688911438, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -425.8740234375, |
|
"rewards/margins": 460.34619140625, |
|
"rewards/real": 34.47218704223633, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 11.325558033514632, |
|
"learning_rate": 4.5049857549857543e-07, |
|
"logits/generated": 1.9206124544143677, |
|
"logits/oppo_generated": -2.8331031799316406, |
|
"logits/oppo_real": -2.8462958335876465, |
|
"logits/real": -1.0948928594589233, |
|
"logps/generated": -606.4732666015625, |
|
"logps/oppo_gen": -78.92254638671875, |
|
"logps/oppo_real": -224.86373901367188, |
|
"logps/real": -214.3316650390625, |
|
"loss": 0.1087, |
|
"loss/gen": 0.00091104197781533, |
|
"loss/real": 0.07179263234138489, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -527.5506591796875, |
|
"rewards/margins": 538.082763671875, |
|
"rewards/real": 10.532065391540527, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 7.108887238590587, |
|
"learning_rate": 4.501424501424501e-07, |
|
"logits/generated": -0.11697453260421753, |
|
"logits/oppo_generated": -2.879185199737549, |
|
"logits/oppo_real": -2.873112678527832, |
|
"logits/real": -2.0221967697143555, |
|
"logps/generated": -491.9667663574219, |
|
"logps/oppo_gen": -49.27460479736328, |
|
"logps/oppo_real": -375.43463134765625, |
|
"logps/real": -347.04376220703125, |
|
"loss": 0.086, |
|
"loss/gen": 0.010025402531027794, |
|
"loss/real": 0.04785071685910225, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -442.692138671875, |
|
"rewards/margins": 471.0830078125, |
|
"rewards/real": 28.390844345092773, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 7.113051451103452, |
|
"learning_rate": 4.497863247863248e-07, |
|
"logits/generated": -1.336794376373291, |
|
"logits/oppo_generated": -3.0462043285369873, |
|
"logits/oppo_real": -3.1089582443237305, |
|
"logits/real": -2.504794120788574, |
|
"logps/generated": -515.6500854492188, |
|
"logps/oppo_gen": -77.79332733154297, |
|
"logps/oppo_real": -319.2231750488281, |
|
"logps/real": -284.25970458984375, |
|
"loss": 0.1016, |
|
"loss/gen": 0.0012869073543697596, |
|
"loss/real": 0.04417861998081207, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -437.85675048828125, |
|
"rewards/margins": 472.82025146484375, |
|
"rewards/real": 34.9635009765625, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 7.25392965170229, |
|
"learning_rate": 4.494301994301994e-07, |
|
"logits/generated": 0.08211880922317505, |
|
"logits/oppo_generated": -2.815687656402588, |
|
"logits/oppo_real": -2.9501237869262695, |
|
"logits/real": -2.270163059234619, |
|
"logps/generated": -488.885009765625, |
|
"logps/oppo_gen": -103.51431274414062, |
|
"logps/oppo_real": -308.8333435058594, |
|
"logps/real": -294.90545654296875, |
|
"loss": 0.1108, |
|
"loss/gen": 0.010138665325939655, |
|
"loss/real": 0.06405410915613174, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -385.3706970214844, |
|
"rewards/margins": 399.298583984375, |
|
"rewards/real": 13.927886962890625, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 6.725902352188283, |
|
"learning_rate": 4.4907407407407403e-07, |
|
"logits/generated": 1.6352074146270752, |
|
"logits/oppo_generated": -2.779146194458008, |
|
"logits/oppo_real": -2.8336267471313477, |
|
"logits/real": -0.30210697650909424, |
|
"logps/generated": -425.0787658691406, |
|
"logps/oppo_gen": -72.71639251708984, |
|
"logps/oppo_real": -196.57557678222656, |
|
"logps/real": -188.48077392578125, |
|
"loss": 0.0918, |
|
"loss/gen": 0.02201060950756073, |
|
"loss/real": 0.07751898467540741, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -352.36236572265625, |
|
"rewards/margins": 360.4571838378906, |
|
"rewards/real": 8.094803810119629, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 8.467618409177252, |
|
"learning_rate": 4.487179487179487e-07, |
|
"logits/generated": 0.2313031107187271, |
|
"logits/oppo_generated": -2.8425636291503906, |
|
"logits/oppo_real": -2.9093685150146484, |
|
"logits/real": -1.3410108089447021, |
|
"logps/generated": -495.5323486328125, |
|
"logps/oppo_gen": -95.93893432617188, |
|
"logps/oppo_real": -207.11392211914062, |
|
"logps/real": -204.73190307617188, |
|
"loss": 0.1089, |
|
"loss/gen": 0.06844402849674225, |
|
"loss/real": 0.08963720500469208, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -399.5934143066406, |
|
"rewards/margins": 401.97540283203125, |
|
"rewards/real": 2.382023811340332, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 9.123527094444691, |
|
"learning_rate": 4.4836182336182333e-07, |
|
"logits/generated": 0.14070743322372437, |
|
"logits/oppo_generated": -2.8224010467529297, |
|
"logits/oppo_real": -2.778409957885742, |
|
"logits/real": -1.2655444145202637, |
|
"logps/generated": -539.6663818359375, |
|
"logps/oppo_gen": -88.16463470458984, |
|
"logps/oppo_real": -239.9169921875, |
|
"logps/real": -248.80075073242188, |
|
"loss": 0.0975, |
|
"loss/gen": 0.0023253695107996464, |
|
"loss/real": 0.12179554253816605, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -451.50177001953125, |
|
"rewards/margins": 442.61798095703125, |
|
"rewards/real": -8.883747100830078, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 9.945805371845404, |
|
"learning_rate": 4.48005698005698e-07, |
|
"logits/generated": 0.5030673742294312, |
|
"logits/oppo_generated": -2.9657952785491943, |
|
"logits/oppo_real": -2.9425137042999268, |
|
"logits/real": -1.8194687366485596, |
|
"logps/generated": -504.91021728515625, |
|
"logps/oppo_gen": -76.42547607421875, |
|
"logps/oppo_real": -261.8043518066406, |
|
"logps/real": -240.3399658203125, |
|
"loss": 0.1157, |
|
"loss/gen": 0.0015581045299768448, |
|
"loss/real": 0.05328588932752609, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -428.4847412109375, |
|
"rewards/margins": 449.94915771484375, |
|
"rewards/real": 21.464385986328125, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 6.99499114531583, |
|
"learning_rate": 4.476495726495726e-07, |
|
"logits/generated": 0.15306584537029266, |
|
"logits/oppo_generated": -2.6656646728515625, |
|
"logits/oppo_real": -2.512063980102539, |
|
"logits/real": -0.21824151277542114, |
|
"logps/generated": -410.4168701171875, |
|
"logps/oppo_gen": -61.16596603393555, |
|
"logps/oppo_real": -89.70797729492188, |
|
"logps/real": -68.23112487792969, |
|
"loss": 0.0792, |
|
"loss/gen": 0.07100537419319153, |
|
"loss/real": 0.05325597524642944, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -349.25091552734375, |
|
"rewards/margins": 370.7277526855469, |
|
"rewards/real": 21.47686195373535, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 14.794789732289413, |
|
"learning_rate": 4.4729344729344725e-07, |
|
"logits/generated": -0.05492660403251648, |
|
"logits/oppo_generated": -2.679591655731201, |
|
"logits/oppo_real": -2.5152084827423096, |
|
"logits/real": -1.319637417793274, |
|
"logps/generated": -771.938720703125, |
|
"logps/oppo_gen": -134.39280700683594, |
|
"logps/oppo_real": -353.8466491699219, |
|
"logps/real": -338.97027587890625, |
|
"loss": 0.0906, |
|
"loss/gen": 0.008226570673286915, |
|
"loss/real": 0.0589267835021019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -637.5459594726562, |
|
"rewards/margins": 652.4222412109375, |
|
"rewards/real": 14.876349449157715, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 6.076222099800461, |
|
"learning_rate": 4.469373219373219e-07, |
|
"logits/generated": 0.17128604650497437, |
|
"logits/oppo_generated": -2.8852622509002686, |
|
"logits/oppo_real": -2.9888343811035156, |
|
"logits/real": -1.4829645156860352, |
|
"logps/generated": -563.3876342773438, |
|
"logps/oppo_gen": -86.57408142089844, |
|
"logps/oppo_real": -353.78594970703125, |
|
"logps/real": -333.20477294921875, |
|
"loss": 0.0819, |
|
"loss/gen": 0.0012016872642561793, |
|
"loss/real": 0.06735072284936905, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -476.81353759765625, |
|
"rewards/margins": 497.39471435546875, |
|
"rewards/real": 20.581186294555664, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 12.005749278869283, |
|
"learning_rate": 4.465811965811966e-07, |
|
"logits/generated": 0.22106623649597168, |
|
"logits/oppo_generated": -2.894904136657715, |
|
"logits/oppo_real": -2.8833250999450684, |
|
"logits/real": -2.006408929824829, |
|
"logps/generated": -479.835693359375, |
|
"logps/oppo_gen": -97.552490234375, |
|
"logps/oppo_real": -446.60357666015625, |
|
"logps/real": -415.96783447265625, |
|
"loss": 0.0906, |
|
"loss/gen": 0.048683419823646545, |
|
"loss/real": 0.04597897082567215, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -382.2832336425781, |
|
"rewards/margins": 412.9189147949219, |
|
"rewards/real": 30.63570785522461, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 13.103903827952438, |
|
"learning_rate": 4.4622507122507117e-07, |
|
"logits/generated": -0.44371479749679565, |
|
"logits/oppo_generated": -2.9238195419311523, |
|
"logits/oppo_real": -2.928109645843506, |
|
"logits/real": -2.34285306930542, |
|
"logps/generated": -654.0284423828125, |
|
"logps/oppo_gen": -99.34373474121094, |
|
"logps/oppo_real": -381.1275634765625, |
|
"logps/real": -357.4264831542969, |
|
"loss": 0.0913, |
|
"loss/gen": 0.000881514570210129, |
|
"loss/real": 0.053087156265974045, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -554.6846923828125, |
|
"rewards/margins": 578.3857421875, |
|
"rewards/real": 23.701074600219727, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 6.461617043125639, |
|
"learning_rate": 4.4586894586894584e-07, |
|
"logits/generated": 3.6788649559020996, |
|
"logits/oppo_generated": -2.7080626487731934, |
|
"logits/oppo_real": -2.5767087936401367, |
|
"logits/real": 0.09816145896911621, |
|
"logps/generated": -560.853515625, |
|
"logps/oppo_gen": -46.502037048339844, |
|
"logps/oppo_real": -149.05059814453125, |
|
"logps/real": -167.17193603515625, |
|
"loss": 0.0963, |
|
"loss/gen": 0.0628194808959961, |
|
"loss/real": 0.14349111914634705, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -514.3514404296875, |
|
"rewards/margins": 496.2301025390625, |
|
"rewards/real": -18.121322631835938, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 9.013192574269086, |
|
"learning_rate": 4.455128205128205e-07, |
|
"logits/generated": -0.43150991201400757, |
|
"logits/oppo_generated": -2.9217922687530518, |
|
"logits/oppo_real": -3.0358145236968994, |
|
"logits/real": -1.1901543140411377, |
|
"logps/generated": -541.3538818359375, |
|
"logps/oppo_gen": -72.13301849365234, |
|
"logps/oppo_real": -295.51861572265625, |
|
"logps/real": -310.5357971191406, |
|
"loss": 0.1032, |
|
"loss/gen": 0.0007504450622946024, |
|
"loss/real": 0.14646711945533752, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -469.2208251953125, |
|
"rewards/margins": 454.20361328125, |
|
"rewards/real": -15.017206192016602, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 11.693687392311356, |
|
"learning_rate": 4.4515669515669514e-07, |
|
"logits/generated": 1.8059457540512085, |
|
"logits/oppo_generated": -2.7406344413757324, |
|
"logits/oppo_real": -2.799593925476074, |
|
"logits/real": -0.5050146579742432, |
|
"logps/generated": -544.3583374023438, |
|
"logps/oppo_gen": -102.60955810546875, |
|
"logps/oppo_real": -305.8299255371094, |
|
"logps/real": -270.0591125488281, |
|
"loss": 0.0802, |
|
"loss/gen": 0.002116965129971504, |
|
"loss/real": 0.044044435024261475, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -441.748779296875, |
|
"rewards/margins": 477.51959228515625, |
|
"rewards/real": 35.77080535888672, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 9.874561541517023, |
|
"learning_rate": 4.448005698005698e-07, |
|
"logits/generated": 0.07804876565933228, |
|
"logits/oppo_generated": -2.8220396041870117, |
|
"logits/oppo_real": -3.0663821697235107, |
|
"logits/real": -1.2566304206848145, |
|
"logps/generated": -471.7215576171875, |
|
"logps/oppo_gen": -80.95722961425781, |
|
"logps/oppo_real": -339.0364074707031, |
|
"logps/real": -320.71282958984375, |
|
"loss": 0.0917, |
|
"loss/gen": 0.07263980060815811, |
|
"loss/real": 0.05610188841819763, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -390.7643127441406, |
|
"rewards/margins": 409.0878601074219, |
|
"rewards/real": 18.32356071472168, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 9.420661101579883, |
|
"learning_rate": 4.444444444444444e-07, |
|
"logits/generated": 0.3896804749965668, |
|
"logits/oppo_generated": -2.8528313636779785, |
|
"logits/oppo_real": -2.9469070434570312, |
|
"logits/real": -0.7403790950775146, |
|
"logps/generated": -495.3824462890625, |
|
"logps/oppo_gen": -55.95906066894531, |
|
"logps/oppo_real": -228.37322998046875, |
|
"logps/real": -198.60240173339844, |
|
"loss": 0.0883, |
|
"loss/gen": 0.0015880331629887223, |
|
"loss/real": 0.04681776836514473, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -439.42340087890625, |
|
"rewards/margins": 469.19427490234375, |
|
"rewards/real": 29.770835876464844, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 5.275442508017266, |
|
"learning_rate": 4.4408831908831906e-07, |
|
"logits/generated": -0.6961678266525269, |
|
"logits/oppo_generated": -2.759657859802246, |
|
"logits/oppo_real": -2.7739434242248535, |
|
"logits/real": -1.9008269309997559, |
|
"logps/generated": -501.955322265625, |
|
"logps/oppo_gen": -55.900001525878906, |
|
"logps/oppo_real": -240.51673889160156, |
|
"logps/real": -244.97976684570312, |
|
"loss": 0.0944, |
|
"loss/gen": 0.02463957481086254, |
|
"loss/real": 0.12993966042995453, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -446.0553283691406, |
|
"rewards/margins": 441.59228515625, |
|
"rewards/real": -4.463043212890625, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 12.026972656868498, |
|
"learning_rate": 4.4373219373219373e-07, |
|
"logits/generated": -0.3736618459224701, |
|
"logits/oppo_generated": -2.714049816131592, |
|
"logits/oppo_real": -2.821863889694214, |
|
"logits/real": -1.2542011737823486, |
|
"logps/generated": -461.4123840332031, |
|
"logps/oppo_gen": -61.66150665283203, |
|
"logps/oppo_real": -281.81561279296875, |
|
"logps/real": -303.64080810546875, |
|
"loss": 0.1096, |
|
"loss/gen": 0.06007068231701851, |
|
"loss/real": 0.15294288098812103, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -399.7508850097656, |
|
"rewards/margins": 377.92572021484375, |
|
"rewards/real": -21.825159072875977, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 7.3867804627082325, |
|
"learning_rate": 4.4337606837606836e-07, |
|
"logits/generated": -0.8853031992912292, |
|
"logits/oppo_generated": -2.7336645126342773, |
|
"logits/oppo_real": -2.6636435985565186, |
|
"logits/real": -2.0228090286254883, |
|
"logps/generated": -461.8339538574219, |
|
"logps/oppo_gen": -66.04891204833984, |
|
"logps/oppo_real": -343.6158447265625, |
|
"logps/real": -303.4918518066406, |
|
"loss": 0.0875, |
|
"loss/gen": 0.013448844663798809, |
|
"loss/real": 0.04182068258523941, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -395.7850341796875, |
|
"rewards/margins": 435.90899658203125, |
|
"rewards/real": 40.123992919921875, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 9.061123447447496, |
|
"learning_rate": 4.43019943019943e-07, |
|
"logits/generated": 0.5346022248268127, |
|
"logits/oppo_generated": -3.0542874336242676, |
|
"logits/oppo_real": -2.803119659423828, |
|
"logits/real": -2.6106531620025635, |
|
"logps/generated": -457.6131591796875, |
|
"logps/oppo_gen": -81.553955078125, |
|
"logps/oppo_real": -376.17071533203125, |
|
"logps/real": -327.69122314453125, |
|
"loss": 0.1071, |
|
"loss/gen": 0.07322467863559723, |
|
"loss/real": 0.039743319153785706, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -376.0592041015625, |
|
"rewards/margins": 424.5386657714844, |
|
"rewards/real": 48.47947311401367, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 7.112731543662184, |
|
"learning_rate": 4.4266381766381765e-07, |
|
"logits/generated": -0.425476610660553, |
|
"logits/oppo_generated": -2.791293144226074, |
|
"logits/oppo_real": -2.8689441680908203, |
|
"logits/real": -2.340095043182373, |
|
"logps/generated": -594.830078125, |
|
"logps/oppo_gen": -90.10079956054688, |
|
"logps/oppo_real": -387.6597900390625, |
|
"logps/real": -358.162841796875, |
|
"loss": 0.1084, |
|
"loss/gen": 0.07199215888977051, |
|
"loss/real": 0.04830511659383774, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -504.729248046875, |
|
"rewards/margins": 534.2261962890625, |
|
"rewards/real": 29.496944427490234, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 8.225771381416818, |
|
"learning_rate": 4.423076923076923e-07, |
|
"logits/generated": 1.6258618831634521, |
|
"logits/oppo_generated": -2.8356850147247314, |
|
"logits/oppo_real": -2.917833089828491, |
|
"logits/real": -1.8351335525512695, |
|
"logps/generated": -565.7457275390625, |
|
"logps/oppo_gen": -76.40264892578125, |
|
"logps/oppo_real": -278.172607421875, |
|
"logps/real": -244.8758087158203, |
|
"loss": 0.0797, |
|
"loss/gen": 0.0172113087028265, |
|
"loss/real": 0.04479437321424484, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -489.3431396484375, |
|
"rewards/margins": 522.639892578125, |
|
"rewards/real": 33.29682159423828, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 8.413846238951294, |
|
"learning_rate": 4.4195156695156695e-07, |
|
"logits/generated": -0.260977566242218, |
|
"logits/oppo_generated": -3.0011539459228516, |
|
"logits/oppo_real": -3.069876194000244, |
|
"logits/real": -2.041016101837158, |
|
"logps/generated": -546.556396484375, |
|
"logps/oppo_gen": -69.13575744628906, |
|
"logps/oppo_real": -340.70343017578125, |
|
"logps/real": -326.86199951171875, |
|
"loss": 0.0869, |
|
"loss/gen": 0.005083143711090088, |
|
"loss/real": 0.06998279690742493, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -477.4206848144531, |
|
"rewards/margins": 491.2621154785156, |
|
"rewards/real": 13.841464042663574, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 9.29984934695055, |
|
"learning_rate": 4.4159544159544157e-07, |
|
"logits/generated": -0.5148028135299683, |
|
"logits/oppo_generated": -2.821411609649658, |
|
"logits/oppo_real": -2.9697532653808594, |
|
"logits/real": -2.523200511932373, |
|
"logps/generated": -538.35546875, |
|
"logps/oppo_gen": -94.25292205810547, |
|
"logps/oppo_real": -449.1705322265625, |
|
"logps/real": -411.50274658203125, |
|
"loss": 0.0751, |
|
"loss/gen": 0.04956042394042015, |
|
"loss/real": 0.04226259887218475, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -444.1025085449219, |
|
"rewards/margins": 481.770263671875, |
|
"rewards/real": 37.66777801513672, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 8.666425212528836, |
|
"learning_rate": 4.412393162393162e-07, |
|
"logits/generated": 0.49625787138938904, |
|
"logits/oppo_generated": -2.9498441219329834, |
|
"logits/oppo_real": -2.889374017715454, |
|
"logits/real": -2.204894542694092, |
|
"logps/generated": -593.1181640625, |
|
"logps/oppo_gen": -93.28401184082031, |
|
"logps/oppo_real": -446.9027099609375, |
|
"logps/real": -414.73431396484375, |
|
"loss": 0.1, |
|
"loss/gen": 0.03768323361873627, |
|
"loss/real": 0.04522031173110008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -499.83416748046875, |
|
"rewards/margins": 532.0025634765625, |
|
"rewards/real": 32.168392181396484, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 7.098388231036369, |
|
"learning_rate": 4.4088319088319087e-07, |
|
"logits/generated": 0.929095983505249, |
|
"logits/oppo_generated": -2.5877699851989746, |
|
"logits/oppo_real": -2.4145617485046387, |
|
"logits/real": -1.5228768587112427, |
|
"logps/generated": -528.0653076171875, |
|
"logps/oppo_gen": -58.147544860839844, |
|
"logps/oppo_real": -256.63494873046875, |
|
"logps/real": -238.59326171875, |
|
"loss": 0.1044, |
|
"loss/gen": 0.026826368644833565, |
|
"loss/real": 0.0565837100148201, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -469.9177551269531, |
|
"rewards/margins": 487.95947265625, |
|
"rewards/real": 18.041690826416016, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 7.642931506173311, |
|
"learning_rate": 4.4052706552706555e-07, |
|
"logits/generated": -1.4258209466934204, |
|
"logits/oppo_generated": -2.825096607208252, |
|
"logits/oppo_real": -2.919394016265869, |
|
"logits/real": -2.0324389934539795, |
|
"logps/generated": -391.3765869140625, |
|
"logps/oppo_gen": -62.71122360229492, |
|
"logps/oppo_real": -234.44354248046875, |
|
"logps/real": -210.38009643554688, |
|
"loss": 0.0938, |
|
"loss/gen": 0.07887871563434601, |
|
"loss/real": 0.05352405831217766, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -328.66534423828125, |
|
"rewards/margins": 352.72882080078125, |
|
"rewards/real": 24.063447952270508, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 13.57412994575268, |
|
"learning_rate": 4.4017094017094017e-07, |
|
"logits/generated": -0.5292628407478333, |
|
"logits/oppo_generated": -2.681910276412964, |
|
"logits/oppo_real": -2.8930723667144775, |
|
"logits/real": -2.0319461822509766, |
|
"logps/generated": -433.7325439453125, |
|
"logps/oppo_gen": -69.35714721679688, |
|
"logps/oppo_real": -321.68878173828125, |
|
"logps/real": -286.9642333984375, |
|
"loss": 0.0964, |
|
"loss/gen": 0.10028743743896484, |
|
"loss/real": 0.04317962005734444, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -364.3753967285156, |
|
"rewards/margins": 399.0999755859375, |
|
"rewards/real": 34.72455978393555, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 7.212487877582696, |
|
"learning_rate": 4.398148148148148e-07, |
|
"logits/generated": -0.8619464039802551, |
|
"logits/oppo_generated": -2.910146951675415, |
|
"logits/oppo_real": -2.842686653137207, |
|
"logits/real": -2.131901502609253, |
|
"logps/generated": -601.9418334960938, |
|
"logps/oppo_gen": -55.29602813720703, |
|
"logps/oppo_real": -188.457763671875, |
|
"logps/real": -160.12319946289062, |
|
"loss": 0.0815, |
|
"loss/gen": 0.0017280648462474346, |
|
"loss/real": 0.04960310831665993, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -546.6458740234375, |
|
"rewards/margins": 574.9803466796875, |
|
"rewards/real": 28.334548950195312, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 6.245616443711772, |
|
"learning_rate": 4.394586894586894e-07, |
|
"logits/generated": -0.061074838042259216, |
|
"logits/oppo_generated": -2.9482345581054688, |
|
"logits/oppo_real": -3.0109448432922363, |
|
"logits/real": -2.1488213539123535, |
|
"logps/generated": -496.9813537597656, |
|
"logps/oppo_gen": -70.6409912109375, |
|
"logps/oppo_real": -375.189697265625, |
|
"logps/real": -349.4216003417969, |
|
"loss": 0.0816, |
|
"loss/gen": 0.003311349079012871, |
|
"loss/real": 0.049754396080970764, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -426.34039306640625, |
|
"rewards/margins": 452.10845947265625, |
|
"rewards/real": 25.768083572387695, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 6.656857171819587, |
|
"learning_rate": 4.391025641025641e-07, |
|
"logits/generated": 0.6247938275337219, |
|
"logits/oppo_generated": -2.7811834812164307, |
|
"logits/oppo_real": -2.923962116241455, |
|
"logits/real": 0.7597999572753906, |
|
"logps/generated": -516.4414672851562, |
|
"logps/oppo_gen": -71.71026611328125, |
|
"logps/oppo_real": -353.846923828125, |
|
"logps/real": -375.318359375, |
|
"loss": 0.0922, |
|
"loss/gen": 0.0018027722835540771, |
|
"loss/real": 0.15209785103797913, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -444.731201171875, |
|
"rewards/margins": 423.2597961425781, |
|
"rewards/real": -21.471416473388672, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 8.481192481928902, |
|
"learning_rate": 4.3874643874643876e-07, |
|
"logits/generated": -0.6601736545562744, |
|
"logits/oppo_generated": -2.8043360710144043, |
|
"logits/oppo_real": -3.0211949348449707, |
|
"logits/real": -2.2159879207611084, |
|
"logps/generated": -507.84197998046875, |
|
"logps/oppo_gen": -77.71004486083984, |
|
"logps/oppo_real": -389.77301025390625, |
|
"logps/real": -357.7090148925781, |
|
"loss": 0.0979, |
|
"loss/gen": 0.012667636387050152, |
|
"loss/real": 0.044871166348457336, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -430.1319580078125, |
|
"rewards/margins": 462.1959533691406, |
|
"rewards/real": 32.063995361328125, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 6.837551706735764, |
|
"learning_rate": 4.3839031339031333e-07, |
|
"logits/generated": -0.5177347660064697, |
|
"logits/oppo_generated": -2.7760987281799316, |
|
"logits/oppo_real": -2.740163803100586, |
|
"logits/real": -2.0827713012695312, |
|
"logps/generated": -612.817626953125, |
|
"logps/oppo_gen": -88.69313049316406, |
|
"logps/oppo_real": -338.8006591796875, |
|
"logps/real": -305.4683837890625, |
|
"loss": 0.0934, |
|
"loss/gen": 0.0003631175495684147, |
|
"loss/real": 0.04421408474445343, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -524.12451171875, |
|
"rewards/margins": 557.456787109375, |
|
"rewards/real": 33.332298278808594, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 8.198324488214801, |
|
"learning_rate": 4.38034188034188e-07, |
|
"logits/generated": -0.3121938109397888, |
|
"logits/oppo_generated": -2.7127938270568848, |
|
"logits/oppo_real": -2.803234577178955, |
|
"logits/real": -1.109214186668396, |
|
"logps/generated": -610.0167236328125, |
|
"logps/oppo_gen": -85.75541687011719, |
|
"logps/oppo_real": -242.4071807861328, |
|
"logps/real": -220.00424194335938, |
|
"loss": 0.0914, |
|
"loss/gen": 0.0030445237644016743, |
|
"loss/real": 0.05265495926141739, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -524.2613525390625, |
|
"rewards/margins": 546.664306640625, |
|
"rewards/real": 22.402935028076172, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 6.0753321176175605, |
|
"learning_rate": 4.376780626780627e-07, |
|
"logits/generated": 0.4881715774536133, |
|
"logits/oppo_generated": -2.995426654815674, |
|
"logits/oppo_real": -2.8803281784057617, |
|
"logits/real": -2.568976402282715, |
|
"logps/generated": -573.0343017578125, |
|
"logps/oppo_gen": -68.82854461669922, |
|
"logps/oppo_real": -337.844482421875, |
|
"logps/real": -298.8654479980469, |
|
"loss": 0.0893, |
|
"loss/gen": 0.001983725931495428, |
|
"loss/real": 0.04183054342865944, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -504.2057189941406, |
|
"rewards/margins": 543.1847534179688, |
|
"rewards/real": 38.97906494140625, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 7.825779972874135, |
|
"learning_rate": 4.373219373219373e-07, |
|
"logits/generated": -0.09599490463733673, |
|
"logits/oppo_generated": -2.6126418113708496, |
|
"logits/oppo_real": -3.0222294330596924, |
|
"logits/real": -1.5339518785476685, |
|
"logps/generated": -456.8564758300781, |
|
"logps/oppo_gen": -56.36054992675781, |
|
"logps/oppo_real": -325.3075256347656, |
|
"logps/real": -323.93658447265625, |
|
"loss": 0.1071, |
|
"loss/gen": 0.0157207902520895, |
|
"loss/real": 0.12539535760879517, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -400.49591064453125, |
|
"rewards/margins": 401.8668212890625, |
|
"rewards/real": 1.370926856994629, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 7.68849113712022, |
|
"learning_rate": 4.3696581196581193e-07, |
|
"logits/generated": -0.9831919074058533, |
|
"logits/oppo_generated": -3.026592254638672, |
|
"logits/oppo_real": -2.9974026679992676, |
|
"logits/real": -1.871561050415039, |
|
"logps/generated": -548.5293579101562, |
|
"logps/oppo_gen": -81.62860107421875, |
|
"logps/oppo_real": -354.01513671875, |
|
"logps/real": -329.4884338378906, |
|
"loss": 0.0956, |
|
"loss/gen": 0.0023166935425251722, |
|
"loss/real": 0.050665199756622314, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -466.90069580078125, |
|
"rewards/margins": 491.42742919921875, |
|
"rewards/real": 24.526710510253906, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 9.401834966240466, |
|
"learning_rate": 4.366096866096866e-07, |
|
"logits/generated": -0.019820451736450195, |
|
"logits/oppo_generated": -2.86299991607666, |
|
"logits/oppo_real": -2.897392749786377, |
|
"logits/real": -1.4564645290374756, |
|
"logps/generated": -499.983642578125, |
|
"logps/oppo_gen": -55.654396057128906, |
|
"logps/oppo_real": -286.4037170410156, |
|
"logps/real": -321.74761962890625, |
|
"loss": 0.0931, |
|
"loss/gen": 0.0018793029012158513, |
|
"loss/real": 0.16421890258789062, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -444.3292541503906, |
|
"rewards/margins": 408.9853820800781, |
|
"rewards/real": -35.343902587890625, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 9.270704449961457, |
|
"learning_rate": 4.362535612535612e-07, |
|
"logits/generated": -0.800953209400177, |
|
"logits/oppo_generated": -2.8678367137908936, |
|
"logits/oppo_real": -2.797013759613037, |
|
"logits/real": -2.207515239715576, |
|
"logps/generated": -608.06298828125, |
|
"logps/oppo_gen": -154.916748046875, |
|
"logps/oppo_real": -268.4582824707031, |
|
"logps/real": -248.12521362304688, |
|
"loss": 0.0937, |
|
"loss/gen": 0.004351920913904905, |
|
"loss/real": 0.05765657126903534, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -453.146240234375, |
|
"rewards/margins": 473.47930908203125, |
|
"rewards/real": 20.33307456970215, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 7.696353187482667, |
|
"learning_rate": 4.358974358974359e-07, |
|
"logits/generated": -1.0136319398880005, |
|
"logits/oppo_generated": -2.879833221435547, |
|
"logits/oppo_real": -3.0112786293029785, |
|
"logits/real": -1.7318873405456543, |
|
"logps/generated": -518.8911743164062, |
|
"logps/oppo_gen": -96.10844421386719, |
|
"logps/oppo_real": -492.59039306640625, |
|
"logps/real": -477.6430358886719, |
|
"loss": 0.0977, |
|
"loss/gen": 0.07185956090688705, |
|
"loss/real": 0.065572589635849, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -422.78271484375, |
|
"rewards/margins": 437.7301025390625, |
|
"rewards/real": 14.94738483428955, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 7.6049576300068376, |
|
"learning_rate": 4.355413105413105e-07, |
|
"logits/generated": -1.0468111038208008, |
|
"logits/oppo_generated": -2.855457305908203, |
|
"logits/oppo_real": -3.161579132080078, |
|
"logits/real": -1.6303105354309082, |
|
"logps/generated": -431.0528259277344, |
|
"logps/oppo_gen": -79.04156494140625, |
|
"logps/oppo_real": -508.73779296875, |
|
"logps/real": -501.22515869140625, |
|
"loss": 0.0956, |
|
"loss/gen": 0.08708032220602036, |
|
"loss/real": 0.10645326226949692, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -352.0112609863281, |
|
"rewards/margins": 359.5238952636719, |
|
"rewards/real": 7.512636184692383, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 6.853238708693857, |
|
"learning_rate": 4.3518518518518514e-07, |
|
"logits/generated": -1.0127158164978027, |
|
"logits/oppo_generated": -2.8270015716552734, |
|
"logits/oppo_real": -2.9884450435638428, |
|
"logits/real": -1.9456806182861328, |
|
"logps/generated": -520.1721801757812, |
|
"logps/oppo_gen": -79.96229553222656, |
|
"logps/oppo_real": -295.296630859375, |
|
"logps/real": -267.6679382324219, |
|
"loss": 0.0834, |
|
"loss/gen": 0.07029302418231964, |
|
"loss/real": 0.04932165890932083, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -440.2098693847656, |
|
"rewards/margins": 467.83856201171875, |
|
"rewards/real": 27.628707885742188, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 7.743734719028128, |
|
"learning_rate": 4.348290598290598e-07, |
|
"logits/generated": 1.6780352592468262, |
|
"logits/oppo_generated": -2.7040886878967285, |
|
"logits/oppo_real": -2.816561698913574, |
|
"logits/real": -1.4537031650543213, |
|
"logps/generated": -591.6678466796875, |
|
"logps/oppo_gen": -55.71031188964844, |
|
"logps/oppo_real": -202.95962524414062, |
|
"logps/real": -165.92742919921875, |
|
"loss": 0.1053, |
|
"loss/gen": 0.006178971379995346, |
|
"loss/real": 0.04278302937746048, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -535.95751953125, |
|
"rewards/margins": 572.9896850585938, |
|
"rewards/real": 37.03219223022461, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 14.92835214053567, |
|
"learning_rate": 4.3447293447293444e-07, |
|
"logits/generated": -0.7193632125854492, |
|
"logits/oppo_generated": -2.385345458984375, |
|
"logits/oppo_real": -2.4835422039031982, |
|
"logits/real": -1.4402399063110352, |
|
"logps/generated": -515.0576171875, |
|
"logps/oppo_gen": -75.58077239990234, |
|
"logps/oppo_real": -339.3034973144531, |
|
"logps/real": -278.0953369140625, |
|
"loss": 0.0834, |
|
"loss/gen": 0.07033772766590118, |
|
"loss/real": 0.03465234115719795, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -439.476806640625, |
|
"rewards/margins": 500.6849670410156, |
|
"rewards/real": 61.20813751220703, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 5.571790159459946, |
|
"learning_rate": 4.341168091168091e-07, |
|
"logits/generated": -1.4496867656707764, |
|
"logits/oppo_generated": -3.011491060256958, |
|
"logits/oppo_real": -3.0487937927246094, |
|
"logits/real": -2.6563940048217773, |
|
"logps/generated": -565.4811401367188, |
|
"logps/oppo_gen": -131.22396850585938, |
|
"logps/oppo_real": -400.33868408203125, |
|
"logps/real": -371.1045837402344, |
|
"loss": 0.1007, |
|
"loss/gen": 0.019953308627009392, |
|
"loss/real": 0.048318050801754, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -434.2572021484375, |
|
"rewards/margins": 463.4913024902344, |
|
"rewards/real": 29.234098434448242, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 12.491345881202239, |
|
"learning_rate": 4.3376068376068374e-07, |
|
"logits/generated": -0.78708416223526, |
|
"logits/oppo_generated": -2.755108118057251, |
|
"logits/oppo_real": -2.8694067001342773, |
|
"logits/real": -1.9260311126708984, |
|
"logps/generated": -515.32763671875, |
|
"logps/oppo_gen": -61.73572540283203, |
|
"logps/oppo_real": -230.838134765625, |
|
"logps/real": -231.46530151367188, |
|
"loss": 0.0894, |
|
"loss/gen": 0.02343502640724182, |
|
"loss/real": 0.12424956262111664, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -453.5919494628906, |
|
"rewards/margins": 452.96478271484375, |
|
"rewards/real": -0.6271572113037109, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 9.761894600006734, |
|
"learning_rate": 4.3340455840455836e-07, |
|
"logits/generated": -0.5808227062225342, |
|
"logits/oppo_generated": -2.8574419021606445, |
|
"logits/oppo_real": -2.923137903213501, |
|
"logits/real": -1.3926044702529907, |
|
"logps/generated": -549.8450927734375, |
|
"logps/oppo_gen": -82.77210998535156, |
|
"logps/oppo_real": -252.58892822265625, |
|
"logps/real": -293.86065673828125, |
|
"loss": 0.1027, |
|
"loss/gen": 0.002889402210712433, |
|
"loss/real": 0.16547296941280365, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -467.072998046875, |
|
"rewards/margins": 425.80126953125, |
|
"rewards/real": -41.27172088623047, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 12.957305049677213, |
|
"learning_rate": 4.3304843304843304e-07, |
|
"logits/generated": -1.3726850748062134, |
|
"logits/oppo_generated": -2.994565010070801, |
|
"logits/oppo_real": -2.8149280548095703, |
|
"logits/real": -2.1286659240722656, |
|
"logps/generated": -489.687255859375, |
|
"logps/oppo_gen": -48.2861213684082, |
|
"logps/oppo_real": -137.37625122070312, |
|
"logps/real": -163.36282348632812, |
|
"loss": 0.1199, |
|
"loss/gen": 0.11190799623727798, |
|
"loss/real": 0.14364519715309143, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -441.401123046875, |
|
"rewards/margins": 415.41455078125, |
|
"rewards/real": -25.98657989501953, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 14.633519487058642, |
|
"learning_rate": 4.326923076923077e-07, |
|
"logits/generated": -0.8933599591255188, |
|
"logits/oppo_generated": -2.816603422164917, |
|
"logits/oppo_real": -2.9343314170837402, |
|
"logits/real": -2.3044919967651367, |
|
"logps/generated": -400.06109619140625, |
|
"logps/oppo_gen": -30.44548988342285, |
|
"logps/oppo_real": -174.9966278076172, |
|
"logps/real": -154.96810913085938, |
|
"loss": 0.1069, |
|
"loss/gen": 0.006913540884852409, |
|
"loss/real": 0.054345495998859406, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -369.6155700683594, |
|
"rewards/margins": 389.64410400390625, |
|
"rewards/real": 20.028532028198242, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 18.231897023946708, |
|
"learning_rate": 4.323361823361823e-07, |
|
"logits/generated": -1.5220329761505127, |
|
"logits/oppo_generated": -2.6415185928344727, |
|
"logits/oppo_real": -3.0115818977355957, |
|
"logits/real": -2.152147054672241, |
|
"logps/generated": -491.63330078125, |
|
"logps/oppo_gen": -93.466064453125, |
|
"logps/oppo_real": -340.529296875, |
|
"logps/real": -298.77935791015625, |
|
"loss": 0.1062, |
|
"loss/gen": 0.060000915080308914, |
|
"loss/real": 0.04492133855819702, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -398.167236328125, |
|
"rewards/margins": 439.9171447753906, |
|
"rewards/real": 41.749935150146484, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 10.778284168035912, |
|
"learning_rate": 4.3198005698005696e-07, |
|
"logits/generated": -1.0498695373535156, |
|
"logits/oppo_generated": -2.7984108924865723, |
|
"logits/oppo_real": -2.9754528999328613, |
|
"logits/real": -1.8841339349746704, |
|
"logps/generated": -482.349853515625, |
|
"logps/oppo_gen": -69.67858123779297, |
|
"logps/oppo_real": -268.7974853515625, |
|
"logps/real": -248.0356903076172, |
|
"loss": 0.0892, |
|
"loss/gen": 0.006701639387756586, |
|
"loss/real": 0.05421861633658409, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -412.6712646484375, |
|
"rewards/margins": 433.43304443359375, |
|
"rewards/real": 20.761764526367188, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 5.662894882344747, |
|
"learning_rate": 4.3162393162393163e-07, |
|
"logits/generated": -0.40803262591362, |
|
"logits/oppo_generated": -2.7994847297668457, |
|
"logits/oppo_real": -2.687981605529785, |
|
"logits/real": -1.8741077184677124, |
|
"logps/generated": -422.8677978515625, |
|
"logps/oppo_gen": -76.17577362060547, |
|
"logps/oppo_real": -381.5020751953125, |
|
"logps/real": -373.92547607421875, |
|
"loss": 0.0924, |
|
"loss/gen": 0.12070396542549133, |
|
"loss/real": 0.1276516318321228, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -346.6919860839844, |
|
"rewards/margins": 354.26861572265625, |
|
"rewards/real": 7.576608657836914, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 13.117954652038163, |
|
"learning_rate": 4.3126780626780625e-07, |
|
"logits/generated": -0.690459668636322, |
|
"logits/oppo_generated": -2.8429031372070312, |
|
"logits/oppo_real": -3.0224597454071045, |
|
"logits/real": -2.0877585411071777, |
|
"logps/generated": -508.1094970703125, |
|
"logps/oppo_gen": -78.5534439086914, |
|
"logps/oppo_real": -246.5026397705078, |
|
"logps/real": -224.82470703125, |
|
"loss": 0.095, |
|
"loss/gen": 0.0401313453912735, |
|
"loss/real": 0.05716457962989807, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -429.5560607910156, |
|
"rewards/margins": 451.2340087890625, |
|
"rewards/real": 21.677928924560547, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 8.104270901700197, |
|
"learning_rate": 4.309116809116809e-07, |
|
"logits/generated": -0.37960249185562134, |
|
"logits/oppo_generated": -2.5529236793518066, |
|
"logits/oppo_real": -2.7146146297454834, |
|
"logits/real": 1.159952163696289, |
|
"logps/generated": -590.0205078125, |
|
"logps/oppo_gen": -79.70944213867188, |
|
"logps/oppo_real": -106.01055145263672, |
|
"logps/real": -109.36273956298828, |
|
"loss": 0.0941, |
|
"loss/gen": 0.0003832872025668621, |
|
"loss/real": 0.10172198712825775, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -510.3110656738281, |
|
"rewards/margins": 506.9588623046875, |
|
"rewards/real": -3.3521909713745117, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 15.215718243482266, |
|
"learning_rate": 4.3055555555555555e-07, |
|
"logits/generated": 0.5861719250679016, |
|
"logits/oppo_generated": -2.5894346237182617, |
|
"logits/oppo_real": -2.6849865913391113, |
|
"logits/real": -0.9694942235946655, |
|
"logps/generated": -623.01904296875, |
|
"logps/oppo_gen": -67.09019470214844, |
|
"logps/oppo_real": -256.4427185058594, |
|
"logps/real": -233.60354614257812, |
|
"loss": 0.0965, |
|
"loss/gen": 0.011839567683637142, |
|
"loss/real": 0.06257271021604538, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -555.9288330078125, |
|
"rewards/margins": 578.7680053710938, |
|
"rewards/real": 22.839157104492188, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 7.629829003254036, |
|
"learning_rate": 4.3019943019943017e-07, |
|
"logits/generated": 0.5100865364074707, |
|
"logits/oppo_generated": -2.959817886352539, |
|
"logits/oppo_real": -2.9362192153930664, |
|
"logits/real": -1.9117169380187988, |
|
"logps/generated": -508.62567138671875, |
|
"logps/oppo_gen": -82.48292541503906, |
|
"logps/oppo_real": -458.88818359375, |
|
"logps/real": -433.4519348144531, |
|
"loss": 0.0975, |
|
"loss/gen": 0.009979079477488995, |
|
"loss/real": 0.05191000550985336, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -426.14276123046875, |
|
"rewards/margins": 451.57891845703125, |
|
"rewards/real": 25.436199188232422, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 6.749905620904138, |
|
"learning_rate": 4.2984330484330485e-07, |
|
"logits/generated": 0.30616289377212524, |
|
"logits/oppo_generated": -2.7284858226776123, |
|
"logits/oppo_real": -2.8326492309570312, |
|
"logits/real": -0.5457709431648254, |
|
"logps/generated": -536.1026611328125, |
|
"logps/oppo_gen": -60.89936828613281, |
|
"logps/oppo_real": -245.58233642578125, |
|
"logps/real": -231.62322998046875, |
|
"loss": 0.0886, |
|
"loss/gen": 0.0017942792037501931, |
|
"loss/real": 0.059879861772060394, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -475.2032775878906, |
|
"rewards/margins": 489.16241455078125, |
|
"rewards/real": 13.959126472473145, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 12.672164593831791, |
|
"learning_rate": 4.294871794871794e-07, |
|
"logits/generated": -0.04702004790306091, |
|
"logits/oppo_generated": -2.884782075881958, |
|
"logits/oppo_real": -3.007986545562744, |
|
"logits/real": -2.3009767532348633, |
|
"logps/generated": -544.8890380859375, |
|
"logps/oppo_gen": -64.29571533203125, |
|
"logps/oppo_real": -445.2386169433594, |
|
"logps/real": -402.3214111328125, |
|
"loss": 0.0975, |
|
"loss/gen": 0.000552927260287106, |
|
"loss/real": 0.03968076407909393, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -480.59332275390625, |
|
"rewards/margins": 523.510498046875, |
|
"rewards/real": 42.91718292236328, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 9.12294003998407, |
|
"learning_rate": 4.291310541310541e-07, |
|
"logits/generated": 0.24063043296337128, |
|
"logits/oppo_generated": -2.8430304527282715, |
|
"logits/oppo_real": -2.873483657836914, |
|
"logits/real": -1.1980528831481934, |
|
"logps/generated": -557.898193359375, |
|
"logps/oppo_gen": -68.79239654541016, |
|
"logps/oppo_real": -391.89910888671875, |
|
"logps/real": -367.91357421875, |
|
"loss": 0.0969, |
|
"loss/gen": 0.010371391661465168, |
|
"loss/real": 0.053776565939188004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -489.1058044433594, |
|
"rewards/margins": 513.09130859375, |
|
"rewards/real": 23.985549926757812, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 9.653372425330378, |
|
"learning_rate": 4.2877492877492877e-07, |
|
"logits/generated": 0.6971580386161804, |
|
"logits/oppo_generated": -2.8508265018463135, |
|
"logits/oppo_real": -2.9677348136901855, |
|
"logits/real": -2.4330368041992188, |
|
"logps/generated": -542.3233642578125, |
|
"logps/oppo_gen": -88.43344116210938, |
|
"logps/oppo_real": -438.55322265625, |
|
"logps/real": -396.9981689453125, |
|
"loss": 0.1012, |
|
"loss/gen": 0.003461389569565654, |
|
"loss/real": 0.03977859392762184, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -453.889892578125, |
|
"rewards/margins": 495.4449768066406, |
|
"rewards/real": 41.55507278442383, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 10.285206691387895, |
|
"learning_rate": 4.284188034188034e-07, |
|
"logits/generated": -0.6367926597595215, |
|
"logits/oppo_generated": -2.816070079803467, |
|
"logits/oppo_real": -3.012850761413574, |
|
"logits/real": -1.2840546369552612, |
|
"logps/generated": -558.1781005859375, |
|
"logps/oppo_gen": -55.2912483215332, |
|
"logps/oppo_real": -255.20977783203125, |
|
"logps/real": -234.92495727539062, |
|
"loss": 0.0878, |
|
"loss/gen": 0.002074107062071562, |
|
"loss/real": 0.0539107508957386, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -502.8868408203125, |
|
"rewards/margins": 523.1716918945312, |
|
"rewards/real": 20.284809112548828, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 9.204867808373399, |
|
"learning_rate": 4.2806267806267807e-07, |
|
"logits/generated": -0.493258535861969, |
|
"logits/oppo_generated": -2.701869487762451, |
|
"logits/oppo_real": -2.963564872741699, |
|
"logits/real": -1.1260539293289185, |
|
"logps/generated": -546.3982543945312, |
|
"logps/oppo_gen": -83.03327941894531, |
|
"logps/oppo_real": -312.4057312011719, |
|
"logps/real": -280.11279296875, |
|
"loss": 0.082, |
|
"loss/gen": 0.004817273002117872, |
|
"loss/real": 0.04604298248887062, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -463.3650207519531, |
|
"rewards/margins": 495.6579284667969, |
|
"rewards/real": 32.292930603027344, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 9.79890501171998, |
|
"learning_rate": 4.277065527065527e-07, |
|
"logits/generated": -0.6599729657173157, |
|
"logits/oppo_generated": -2.8546152114868164, |
|
"logits/oppo_real": -3.036848545074463, |
|
"logits/real": -1.591578483581543, |
|
"logps/generated": -535.380615234375, |
|
"logps/oppo_gen": -75.19477844238281, |
|
"logps/oppo_real": -314.191162109375, |
|
"logps/real": -302.3117370605469, |
|
"loss": 0.077, |
|
"loss/gen": 0.01566830277442932, |
|
"loss/real": 0.09632173180580139, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -460.18585205078125, |
|
"rewards/margins": 472.06524658203125, |
|
"rewards/real": 11.879388809204102, |
|
"step": 239 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 1434, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 6, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|